summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore18
-rw-r--r--ANNOUNCE-3.098
-rw-r--r--ANNOUNCE-3.0.122
-rw-r--r--ANNOUNCE-3.0.221
-rw-r--r--ANNOUNCE-3.0.329
-rw-r--r--ANNOUNCE-3.133
-rw-r--r--ANNOUNCE-3.1.139
-rw-r--r--ANNOUNCE-3.1.246
-rw-r--r--ANNOUNCE-3.1.346
-rw-r--r--ANNOUNCE-3.1.437
-rw-r--r--ANNOUNCE-3.1.542
-rw-r--r--ANNOUNCE-3.277
-rw-r--r--ANNOUNCE-3.2.175
-rw-r--r--ANNOUNCE-3.2.236
-rw-r--r--ANNOUNCE-3.2.324
-rw-r--r--ANNOUNCE-3.2.4144
-rw-r--r--ANNOUNCE-3.2.531
-rw-r--r--ANNOUNCE-3.2.657
-rw-r--r--ANNOUNCE-3.363
-rw-r--r--ANNOUNCE-3.3.123
-rw-r--r--ANNOUNCE-3.3.216
-rw-r--r--Assemble.c2007
-rw-r--r--Build.c293
-rw-r--r--COPYING339
-rw-r--r--ChangeLog306
-rw-r--r--Create.c1069
-rw-r--r--Detail.c763
-rw-r--r--Dump.c311
-rw-r--r--Examine.c225
-rw-r--r--Grow.c4998
-rw-r--r--INSTALL13
-rw-r--r--Incremental.c1798
-rw-r--r--Kill.c146
-rw-r--r--Makefile334
-rw-r--r--Manage.c1691
-rw-r--r--Monitor.c1109
-rw-r--r--Query.c126
-rw-r--r--README.initramfs123
-rw-r--r--ReadMe.c636
-rw-r--r--TODO213
-rw-r--r--bitmap.c448
-rw-r--r--bitmap.h287
-rw-r--r--config.c1184
-rw-r--r--crc32.c360
-rw-r--r--crc32.h441
-rw-r--r--debian/FAQ581
-rw-r--r--debian/NEWS107
-rw-r--r--debian/README.Debian148
-rw-r--r--debian/README.checkarray33
-rw-r--r--debian/README.recipes149
-rw-r--r--debian/TODO29
-rwxr-xr-xdebian/bugscript219
-rw-r--r--debian/changelog1819
-rw-r--r--debian/checkarray219
-rw-r--r--debian/compat1
-rw-r--r--debian/control32
-rw-r--r--debian/copyright21
-rw-r--r--debian/initramfs/hook266
-rw-r--r--debian/initramfs/script.local-top101
-rw-r--r--debian/mdadm-raid256
-rw-r--r--debian/mdadm-waitidle56
-rw-r--r--debian/mdadm.config174
-rw-r--r--debian/mdadm.cron.d12
-rw-r--r--debian/mdadm.cron.daily18
-rw-r--r--debian/mdadm.doc-base.faq9
-rw-r--r--debian/mdadm.doc-base.recipes9
-rw-r--r--debian/mdadm.docs7
-rw-r--r--debian/mdadm.init94
-rw-r--r--debian/mdadm.logcheck.ignore.server23
-rw-r--r--debian/mdadm.logcheck.violations3
-rw-r--r--debian/mdadm.modules8
-rw-r--r--debian/mdadm.postinst112
-rw-r--r--debian/mdadm.postrm25
-rw-r--r--debian/mdadm.preinst45
-rw-r--r--debian/mdadm.templates99
-rw-r--r--debian/mkconf101
-rw-r--r--debian/patches/build-sys-no-check_rundir.patch33
-rw-r--r--debian/patches/debian-conffile-location.diff115
-rw-r--r--debian/patches/debian-no-Werror.diff24
-rw-r--r--debian/patches/mdmonitor-service-simplify.diff20
-rw-r--r--debian/patches/readlink-path.patch15
-rw-r--r--debian/patches/rebuildmap-strip-local-host-name-from-device-name.patch47
-rw-r--r--debian/patches/series9
-rw-r--r--debian/patches/sha1-includes.diff40
-rw-r--r--debian/patches/use-external-blkid.diff16
-rw-r--r--debian/patches/use-tempnode-not-devnode.patch31
-rw-r--r--debian/po/POTFILES.in1
-rw-r--r--debian/po/ca.po220
-rw-r--r--debian/po/cs.po265
-rw-r--r--debian/po/da.po210
-rw-r--r--debian/po/de.po284
-rw-r--r--debian/po/es.po254
-rw-r--r--debian/po/eu.po211
-rw-r--r--debian/po/fi.po208
-rw-r--r--debian/po/fr.po222
-rw-r--r--debian/po/gl.po210
-rw-r--r--debian/po/it.po213
-rw-r--r--debian/po/ja.po268
-rw-r--r--debian/po/nl.po223
-rw-r--r--debian/po/pt.po214
-rw-r--r--debian/po/pt_BR.po340
-rw-r--r--debian/po/ru.po224
-rw-r--r--debian/po/sk.po212
-rw-r--r--debian/po/sv.po219
-rw-r--r--debian/po/templates.pot176
-rw-r--r--debian/po/vi.po215
-rw-r--r--debian/presubj32
-rwxr-xr-xdebian/rules101
-rw-r--r--debian/source/format1
-rw-r--r--debian/watch2
-rw-r--r--dlink.c74
-rw-r--r--dlink.h25
-rw-r--r--external-reshape-design.txt280
-rwxr-xr-xinventory249
-rw-r--r--kernel-patch-2.6.1835
-rw-r--r--kernel-patch-2.6.18.635
-rw-r--r--kernel-patch-2.6.1934
-rw-r--r--kernel-patch-2.6.25199
-rw-r--r--kernel-patch-2.6.2736
-rw-r--r--lib.c475
-rwxr-xr-xmakedist96
-rw-r--r--managemon.c910
-rw-r--r--mapfile.c508
-rw-r--r--maps.c150
-rw-r--r--md.41125
-rw-r--r--md5.h136
-rw-r--r--md_p.h198
-rw-r--r--md_u.h122
-rw-r--r--mdadm.8.in3205
-rw-r--r--mdadm.c1873
-rw-r--r--mdadm.conf-example65
-rw-r--r--mdadm.conf.5641
-rw-r--r--mdadm.h1577
-rw-r--r--mdadm.spec45
-rw-r--r--mdassemble.865
-rw-r--r--mdassemble.c90
-rw-r--r--mdmon-design.txt146
-rw-r--r--mdmon.8257
-rw-r--r--mdmon.c600
-rw-r--r--mdmon.h111
-rw-r--r--mdopen.c473
-rw-r--r--mdstat.c416
-rw-r--r--misc/mdcheck158
-rw-r--r--misc/syslog-events27
-rw-r--r--mkinitramfs55
-rw-r--r--monitor.c713
-rw-r--r--msg.c475
-rw-r--r--msg.h37
-rw-r--r--part.h79
-rw-r--r--platform-intel.c513
-rw-r--r--platform-intel.h210
-rw-r--r--policy.c914
-rw-r--r--probe_roms.c317
-rw-r--r--probe_roms.h24
-rw-r--r--pwgr.c17
-rw-r--r--raid5extend.c80
-rw-r--r--raid6check.896
-rw-r--r--raid6check.c663
-rw-r--r--restripe.c994
-rw-r--r--sg_io.c42
-rw-r--r--sha1.c415
-rw-r--r--sha1.h136
-rw-r--r--super-ddf.c5303
-rw-r--r--super-gpt.c217
-rw-r--r--super-intel.c10626
-rw-r--r--super-mbr.c206
-rw-r--r--super0.c1331
-rw-r--r--super1.c2425
-rw-r--r--swap_super.c81
-rw-r--r--sysfs.c921
-rw-r--r--systemd/SUSE-mdadm_env.sh45
-rw-r--r--systemd/mdadm-grow-continue@.service17
-rw-r--r--systemd/mdadm-last-resort@.service7
-rw-r--r--systemd/mdadm-last-resort@.timer7
-rw-r--r--systemd/mdadm.shutdown4
-rw-r--r--systemd/mdmon@.service28
-rw-r--r--systemd/mdmonitor.service13
-rwxr-xr-xtest402
-rw-r--r--tests/00linear25
-rw-r--r--tests/00multipath29
-rw-r--r--tests/00names13
-rw-r--r--tests/00raid043
-rw-r--r--tests/00raid134
-rw-r--r--tests/00raid1018
-rw-r--r--tests/00raid416
-rw-r--r--tests/00raid533
-rw-r--r--tests/00raid616
-rw-r--r--tests/01r1fail29
-rw-r--r--tests/01r5fail27
-rw-r--r--tests/01r5integ33
-rw-r--r--tests/01raid6integ57
-rw-r--r--tests/01replace52
-rw-r--r--tests/02lineargrow23
-rw-r--r--tests/02r1add40
-rw-r--r--tests/02r1grow36
-rw-r--r--tests/02r5grow36
-rw-r--r--tests/02r6grow36
-rw-r--r--tests/03assem-incr17
-rw-r--r--tests/03r0assem137
-rw-r--r--tests/03r5assem109
-rw-r--r--tests/03r5assem-failed12
-rw-r--r--tests/03r5assemV1128
-rw-r--r--tests/04r0update20
-rw-r--r--tests/04r1update15
-rw-r--r--tests/04r5swap18
-rw-r--r--tests/04update-metadata48
-rw-r--r--tests/04update-uuid82
-rw-r--r--tests/05r1-add-internalbitmap20
-rw-r--r--tests/05r1-add-internalbitmap-v1a20
-rw-r--r--tests/05r1-add-internalbitmap-v1b20
-rw-r--r--tests/05r1-add-internalbitmap-v1c20
-rw-r--r--tests/05r1-bitmapfile49
-rw-r--r--tests/05r1-grow-external33
-rw-r--r--tests/05r1-grow-internal31
-rw-r--r--tests/05r1-grow-internal-131
-rw-r--r--tests/05r1-internalbitmap47
-rw-r--r--tests/05r1-internalbitmap-v1a48
-rw-r--r--tests/05r1-internalbitmap-v1b49
-rw-r--r--tests/05r1-internalbitmap-v1c48
-rw-r--r--tests/05r1-n3-bitmapfile53
-rw-r--r--tests/05r1-re-add36
-rw-r--r--tests/05r1-re-add-nosuper37
-rw-r--r--tests/05r1-remove-internalbitmap18
-rw-r--r--tests/05r1-remove-internalbitmap-v1a18
-rw-r--r--tests/05r1-remove-internalbitmap-v1b18
-rw-r--r--tests/05r1-remove-internalbitmap-v1c18
-rw-r--r--tests/05r5-bitmapfile49
-rw-r--r--tests/05r5-internalbitmap47
-rw-r--r--tests/05r6-bitmapfile49
-rw-r--r--tests/05r6tor027
-rw-r--r--tests/06name12
-rw-r--r--tests/06sysfs11
-rw-r--r--tests/06wrmostly13
-rw-r--r--tests/07autoassemble24
-rw-r--r--tests/07autodetect34
-rw-r--r--tests/07changelevelintr60
-rw-r--r--tests/07changelevels113
-rw-r--r--tests/07layouts91
-rw-r--r--tests/07reshape5intr41
-rw-r--r--tests/07revert-grow52
-rw-r--r--tests/07revert-inplace44
-rw-r--r--tests/07revert-shrink56
-rw-r--r--tests/07testreshape545
-rw-r--r--tests/09imsm-assemble73
-rw-r--r--tests/09imsm-create-fail-rebuild78
-rw-r--r--tests/09imsm-overlap30
-rw-r--r--tests/10ddf-assemble-missing61
-rw-r--r--tests/10ddf-create89
-rw-r--r--tests/10ddf-create-fail-rebuild77
-rw-r--r--tests/10ddf-fail-create-race66
-rw-r--r--tests/10ddf-fail-readd55
-rw-r--r--tests/10ddf-fail-readd-readonly71
-rw-r--r--tests/10ddf-fail-spare86
-rw-r--r--tests/10ddf-fail-stop-readd66
-rw-r--r--tests/10ddf-fail-twice59
-rw-r--r--tests/10ddf-fail-two-spares86
-rw-r--r--tests/10ddf-geometry82
-rw-r--r--tests/10ddf-incremental-wrong-order131
-rw-r--r--tests/10ddf-sudden-degraded18
-rw-r--r--tests/11spare-migration454
-rw-r--r--tests/12imsm-r0_2d-grow-r0_3d20
-rw-r--r--tests/12imsm-r0_2d-grow-r0_4d20
-rw-r--r--tests/12imsm-r0_2d-grow-r0_5d20
-rw-r--r--tests/12imsm-r0_3d-grow-r0_4d20
-rw-r--r--tests/12imsm-r5_3d-grow-r5_4d20
-rw-r--r--tests/12imsm-r5_3d-grow-r5_5d20
-rw-r--r--tests/13imsm-r0_r0_2d-grow-r0_r0_4d29
-rw-r--r--tests/13imsm-r0_r0_2d-grow-r0_r0_5d29
-rw-r--r--tests/13imsm-r0_r0_3d-grow-r0_r0_4d29
-rw-r--r--tests/13imsm-r0_r5_3d-grow-r0_r5_4d29
-rw-r--r--tests/13imsm-r0_r5_3d-grow-r0_r5_5d29
-rw-r--r--tests/13imsm-r5_r0_3d-grow-r5_r0_4d29
-rw-r--r--tests/13imsm-r5_r0_3d-grow-r5_r0_5d29
-rw-r--r--tests/14imsm-r0_3d-r5_3d-migrate-r5_4d-r5_4d29
-rw-r--r--tests/14imsm-r0_3d_no_spares-migrate-r5_3d21
-rw-r--r--tests/14imsm-r0_r0_2d-takeover-r10_4d30
-rw-r--r--tests/14imsm-r10_4d-grow-r10_5d20
-rw-r--r--tests/14imsm-r10_r5_4d-takeover-r0_2d30
-rw-r--r--tests/14imsm-r1_2d-grow-r1_3d20
-rw-r--r--tests/14imsm-r1_2d-takeover-r0_2d22
-rw-r--r--tests/14imsm-r5_3d-grow-r5_5d-no-spares20
-rw-r--r--tests/14imsm-r5_3d-migrate-r4_3d21
-rw-r--r--tests/15imsm-r0_3d_64k-migrate-r0_3d_256k21
-rw-r--r--tests/15imsm-r5_3d_4k-migrate-r5_3d_256k21
-rw-r--r--tests/15imsm-r5_3d_64k-migrate-r5_3d_256k21
-rw-r--r--tests/15imsm-r5_6d_4k-migrate-r5_6d_256k21
-rw-r--r--tests/15imsm-r5_r0_3d_64k-migrate-r5_r0_3d_256k34
-rw-r--r--tests/16imsm-r0_3d-migrate-r5_4d21
-rw-r--r--tests/16imsm-r0_5d-migrate-r5_6d21
-rw-r--r--tests/16imsm-r5_3d-migrate-r0_3d21
-rw-r--r--tests/16imsm-r5_5d-migrate-r0_5d21
-rw-r--r--tests/18imsm-1d-takeover-r0_1d23
-rw-r--r--tests/18imsm-1d-takeover-r1_2d21
-rw-r--r--tests/18imsm-r0_2d-takeover-r10_4d22
-rw-r--r--tests/18imsm-r10_4d-takeover-r0_2d22
-rw-r--r--tests/18imsm-r1_2d-takeover-r0_1d22
-rw-r--r--tests/19raid6auto-repair43
-rw-r--r--tests/19raid6repair47
-rw-r--r--tests/19repair-does-not-destroy29
-rw-r--r--tests/ToTest44
-rw-r--r--tests/check35
-rw-r--r--tests/env-ddf-template113
-rw-r--r--tests/env-imsm-template74
-rw-r--r--tests/imsm-grow-template104
-rw-r--r--tests/testdev13
-rw-r--r--tests/utils191
-rw-r--r--udev-md-raid-arrays.rules39
-rw-r--r--udev-md-raid-assembly.rules35
-rw-r--r--util.c1975
-rw-r--r--xmalloc.c72
310 files changed, 79505 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..217fe76d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,18 @@
+/*.o
+/*.man
+/*-stamp
+/mdadm
+/mdadm.8
+/mdadm.udeb
+/mdassemble
+/mdmon
+/swap_super
+/test_stripe
+/TAGS
+/mdadm.O2
+/mdadm.Os
+/mdadm.static
+/mdassemble.auto
+/mdassemble.static
+/mdmon.O2
+/raid6check
diff --git a/ANNOUNCE-3.0 b/ANNOUNCE-3.0
new file mode 100644
index 00000000..f2d4f847
--- /dev/null
+++ b/ANNOUNCE-3.0
@@ -0,0 +1,98 @@
+Subject: ANNOUNCE: mdadm 3.0 - A tool for managing Soft RAID under Linux
+
+I am pleased to (finally) announce the availability of
+ mdadm version 3.0
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git?p=mdadm
+
+
+This is a major new version and as such should be treated with some
+caution. However it has seen substantial testing and is considerred
+to be ready for wide use.
+
+
+The significant change which justifies the new major version number is
+that mdadm can now handle metadata updates entirely in userspace.
+This allows mdadm to support metadata formats that the kernel knows
+nothing about.
+
+Currently two such metadata formats are supported:
+ - DDF - The SNIA standard format
+ - Intel Matrix - The metadata used by recent Intel ICH controlers.
+
+Also the approach to device names has changed significantly.
+
+If udev is installed on the system, mdadm will not create any devices
+in /dev. Rather it allows udev to manage those devices. For this to work
+as expected, the included udev rules file should be installed.
+
+If udev is not installed, mdadm will still create devices and symlinks
+as required, and will also remove them when the array is stopped.
+
+mdadm now requires all devices which do not have a standard name (mdX
+or md_dX) to live in the directory /dev/md/. Names in this directory
+will always be created as symlinks back to the standard name in /dev.
+
+The man pages contain some information about the new externally managed
+metadata. However see below for a more condensed overview.
+
+Externally managed metadata introduces the concept of a 'container'.
+A container is a collection of (normally) physical devices which have
+a common set of metadata. A container is assembled as an md array, but
+is left 'inactive'.
+
+A container can contain one or more data arrays. These are composed from
+slices (partitions?) of various devices in the container.
+
+For example, a 5 devices DDF set can container a RAID1 using the first
+half of two devices, a RAID0 using the first half of the remain 3 devices,
+and a RAID5 over thte second half of all 5 devices.
+
+A container can be created with
+
+ mdadm --create /dev/md0 -e ddf -n5 /dev/sd[abcde]
+
+or "-e imsm" to use the Intel Matrix Storage Manager.
+
+An array can be created within a container either by giving the
+container name and the only member:
+
+ mdadm -C /dev/md1 --level raid1 -n 2 /dev/md0
+
+or by listing the component devices
+
+ mdadm -C /dev/md2 --level raid0 -n 3 /dev/sd[cde]
+
+To assemble a container, it is easiest just to pass each device in turn to
+mdadm -I
+
+ for i in /dev/sd[abcde]
+ do mdadm -I $i
+ done
+
+This will assemble the container and the components.
+
+Alternately the container can be assembled explicitly
+
+ mdadm -A /dev/md0 /dev/sd[abcde]
+
+Then the components can all be assembled with
+
+ mdadm -I /dev/md0
+
+For each container, mdadm will start a program called "mdmon" which will
+monitor the array and effect any metadata updates needed. The array is
+initially assembled readonly. It is up to "mdmon" to mark the metadata
+as 'dirty' and which the array to 'read-write'.
+
+The version 0.90 and 1.x metadata formats supported by previous
+versions for mdadm are still supported and the kernel still performs
+the same updates it use to. The new 'mdmon' approach is only used for
+newly introduced metadata types.
+
+NeilBrown 2nd June 2009
diff --git a/ANNOUNCE-3.0.1 b/ANNOUNCE-3.0.1
new file mode 100644
index 00000000..91b44284
--- /dev/null
+++ b/ANNOUNCE-3.0.1
@@ -0,0 +1,22 @@
+Subject: ANNOUNCE: mdadm 3.0.1 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.0.1
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git?p=mdadm
+
+
+This contains only minor bug fixes over 3.0. If you are using
+3.0, you could consider upgrading.
+
+The brief change log is:
+ - Fix various segfaults
+ - Fixed for --examine with containers
+ - Lots of other little fixes.
+
+NeilBrown 25th September 2009
diff --git a/ANNOUNCE-3.0.2 b/ANNOUNCE-3.0.2
new file mode 100644
index 00000000..93643d17
--- /dev/null
+++ b/ANNOUNCE-3.0.2
@@ -0,0 +1,21 @@
+Subject: ANNOUNCE: mdadm 3.0.2 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.0.2
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git?p=mdadm
+
+
+This just contains one bugfix over 3.0.1 - I was obviously a bit hasty
+in releasing that one.
+
+The brief change log is:
+ - Fix crash when hosthost is not set, as often happens in
+ early boot.
+
+NeilBrown 25th September 2009
diff --git a/ANNOUNCE-3.0.3 b/ANNOUNCE-3.0.3
new file mode 100644
index 00000000..d6117a1d
--- /dev/null
+++ b/ANNOUNCE-3.0.3
@@ -0,0 +1,29 @@
+Subject: ANNOUNCE: mdadm 3.0.3 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.0.3
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git?p=mdadm
+
+
+This contains a collection of bug fixes and minor enhancements over
+3.0.1.
+
+The brief change log is:
+ - Improvements for creating arrays giving just a name, like 'foo',
+ rather than the full '/dev/md/foo'.
+ - Improvements for assembling member arrays of containers.
+ - Improvements to test suite
+ - Add option to change increment for RebuildNN messages reported
+ by "mdadm --monitor"
+ - Improvements to mdmon 'hand-over' from initrd to final root.
+ - Handle merging of devices that have left an IMSM array and are
+ being re-incorporated.
+ - Add missing space in "--detail --brief" output.
+
+NeilBrown 22nd October 2009
diff --git a/ANNOUNCE-3.1 b/ANNOUNCE-3.1
new file mode 100644
index 00000000..343b85da
--- /dev/null
+++ b/ANNOUNCE-3.1
@@ -0,0 +1,33 @@
+Subject: ANNOUNCE: mdadm 3.1 - A tool for managing Soft RAID under Linux
+
+Hot on the heals of 3.0.3 I am pleased to announce the availability of
+ mdadm version 3.1
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git?p=mdadm
+
+
+It contains significant feature enhancements over 3.0.x
+
+The brief change log is:
+ - Support --grow to change the layout of RAID4/5/6
+ - Support --grow to change the chunksize of raid 4/5/6
+ - Support --grow to change level from RAID1 -> RAID5 -> RAID6 and
+ back.
+ - Support --grow to reduce the number of devices in RAID4/5/6.
+ - Support restart of these grow options which assembling an array
+ which is partially grown.
+ - Assorted tests of this code, and of different RAID6 layouts.
+
+Note that a 2.6.31 or later is needed to have access to these.
+Reducing devices in a RAID4/5/6 requires 2.6.32.
+Changing RAID5 to RAID1 requires 2.6.33.
+
+You should only upgrade if you need to use, or which to test, these
+features.
+
+NeilBrown 22nd October 2009
diff --git a/ANNOUNCE-3.1.1 b/ANNOUNCE-3.1.1
new file mode 100644
index 00000000..9e480dc0
--- /dev/null
+++ b/ANNOUNCE-3.1.1
@@ -0,0 +1,39 @@
+Subject: ANNOUNCE: mdadm 3.1.1 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.1.1
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git?p=mdadm
+
+This is a bugfix release over 3.1, which was withdrawn due to serious
+bugs. So it might be best to ignore 3.1 and say that this is a significant
+feature release over 3.0.x
+
+Significant changes are:
+ - RAID level conversion between RAID1, RAID5, and RAID6 are
+ possible were the kernel supports it (2.6.32 at least)
+ - online chunksize and layout changing for RAID5 and RAID6
+ where the kernel supports it.
+ - reduce the number of devices in a RAID4/5/6 array.
+
+ - The default metadata is not v1.1. This metadata is stored at the
+ start of the device so is safer in many ways but could interfere with
+ boot loaded. The old default (0.90) is still available and fully
+ supported.
+
+ - The default chunksize is now 512K rather than 64K. This seems more
+ appropriate for modern devices.
+
+ - The default bitmap chunksize for internal bitmaps is now at least
+ 64Meg as fine grained bitmaps tend to impact performance more for
+ little extra gain.
+
+This release is believed to be stable and you should feel free to
+upgrade to 3.1.1.
+
+NeilBrown 19th November 2009
diff --git a/ANNOUNCE-3.1.2 b/ANNOUNCE-3.1.2
new file mode 100644
index 00000000..321b8bef
--- /dev/null
+++ b/ANNOUNCE-3.1.2
@@ -0,0 +1,46 @@
+Subject: ANNOUNCE: mdadm 3.1.2 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.1.2
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git?p=mdadm
+
+This is a bugfix/stability release over 3.1.1.
+
+Significant changes are:
+ - The default metadata has change again (sorry about that).
+ It is now v1.2 and will hopefully stay that way. It turned
+ out there with boot-block issues with v1.1 which make it
+ unsuitable for a default, though in many cases it is still
+ suitable to use.
+ - Stopping a container is not permitted when members are still
+ active
+ - Add 'homehost' to the valid words for the "AUTO" config file
+ line. When followed by "-all", this causes mdadm to
+ auto-assemble any array belonging to this host, but not
+ auto-assemble anything else.
+ - Fix some bugs with "--grow --chunksize=" for changing chunksize.
+ - VAR_RUN can be easily changed at compile time just like ALT_RUN.
+ This gives distros more flexability in how to manage the
+ pid and sock files that mdmon needs.
+ - Various mdmon fixes
+ - Alway make bitmap 4K-aligned if at all possible.
+ - If mdadm.conf lists arrays which have inter-dependencies,
+ the previously had to be listed in the "right" order. Now
+ any order should work.
+ - Fix --force assembly of v1.x arrays which are in the process
+ of recovering.
+ - Add section on 'scrubbing' to 'md' man page.
+ - Various command-line-option parsing improvements.
+ - ... and lots of other bug fixes.
+
+
+This release is believed to be stable and you should feel free to
+upgrade to 3.1.2
+
+NeilBrown 10th March 2010
diff --git a/ANNOUNCE-3.1.3 b/ANNOUNCE-3.1.3
new file mode 100644
index 00000000..95b2b6c1
--- /dev/null
+++ b/ANNOUNCE-3.1.3
@@ -0,0 +1,46 @@
+Subject: ANNOUNCE: mdadm 3.1.3 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.1.3
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git?p=mdadm
+
+This is a bugfix/stability release over 3.1.2
+
+Significant changes are:
+ - mapfile now lives in a fixed location which default to
+ /dev/.mdadm/map but can be changed at compile time. This
+ location is choses and most distros provide it during early
+ boot and preserve it through. As long a /dev exists and is
+ writable, /dev/.mdadm will be created.
+ Other files file communication with mdmon live here too.
+ This fixes a bug reported by Debian and Gentoo users where
+ udev would spin in early-boot.
+ - IMSM and DDF metadata will not be recognised on partitions
+ as they should only be used on whole-disks.
+ - Various overflows causes by 2G drives have been addressed.
+ - A subarray of an IMSM contain can now be killed with
+ --kill-subarray. Also subarrays can be renamed with
+ --update-subarray
+ - -If (or --incremental --fail) can be used from udev to
+ fail and remove from all arrays a device which has been
+ unplugged from the system. i.e. hot-unplug-support.
+ - "mdadm /dev/mdX --re-add missing" will look for any device
+ that looks like it should be a member of /dev/mdX but isn't
+ and will automatically --re-add it
+ - Now compile with -Wextra to get extra warnings.
+ - Lots of minor bug fixes, documentation improvements, etcc
+
+This release is believed to be stable and you should feel free to
+upgrade to 3.1.3
+
+It is expected that the next release will be 3.2 with a number of new
+features. 3.1.4 will only happen if important bugs show up before 3.2
+is stable.
+
+NeilBrown 6th August 2010
diff --git a/ANNOUNCE-3.1.4 b/ANNOUNCE-3.1.4
new file mode 100644
index 00000000..c157a36a
--- /dev/null
+++ b/ANNOUNCE-3.1.4
@@ -0,0 +1,37 @@
+Subject: ANNOUNCE: mdadm 3.1.4 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.1.4
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git?p=mdadm
+
+This is a bugfix/stability release over 3.1.3.
+3.1.3 had a couple of embarrasing regressions and a couple of other
+issues surfaces which had easy fixes so I decided to make a 3.1.4
+release after all.
+
+Two fixes related to configs that aren't using udev:
+ - Don't remove md devices which 'standard' names on --stop
+ - Allow dev_open to work on read-only /dev
+And fixed regressions:
+ - Allow --incremental to add spares to an array
+ - Accept --no-degraded as a deprecated option rather than
+ throwing an error
+ - Return correct success status when --incrmental assembling
+ a container which does not yet have enough devices.
+ - Don't link mdadm with pthreads, only mdmon needs it.
+ - Fix compiler warning due to bad use of snprintf
+ - Fix spare migration
+
+This release is believed to be stable and you should feel free to
+upgrade to 3.1.4
+
+It is expected that the next release will be 3.2 with a number of new
+features.
+
+NeilBrown 31st August 2010
diff --git a/ANNOUNCE-3.1.5 b/ANNOUNCE-3.1.5
new file mode 100644
index 00000000..baa1f921
--- /dev/null
+++ b/ANNOUNCE-3.1.5
@@ -0,0 +1,42 @@
+Subject: ANNOUNCE: mdadm 3.1.5 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.1.5
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git?p=mdadm
+
+This is a bugfix/stability release over 3.1.4. It contains all the
+important bugfixes found while working on 3.2 and 3.2.1. It will be
+the last 3.1.x release - 3.2.1 is expected to be released in a few days.
+
+Changes include:
+ - Fixes for v1.x metadata on big-endian machines.
+ - man page improvements
+ - Improve '--detail --export' when run on partitions of an md array.
+ - Fix regression with removing 'failed' or 'detached' devices.
+ - Fixes for "--assemble --force" in various unusual cases.
+ - Allow '-Y' to mean --export. This was documented but not implemented.
+ - Various fixed for handling 'ddf' metadata. This is now more reliable
+ but could benefit from more interoperability testing.
+ - Correctly list subarrays of a container in "--detail" output.
+ - Improve checks on whether the requested number of devices is supported
+ by the metadata - both for --create and --grow.
+ - Don't remove partitions from a device that is being included in an
+ array until we are fully committed to including it.
+ - Allow "--assemble --update=no-bitmap" so an array with a corrupt
+ bitmap can still be assembled.
+ - Don't allow --add to succeed if it looks like a "--re-add" is probably
+ wanted, but cannot succeed. This avoids inadvertently turning
+ devices into spares when an array is failed.
+
+This release is believed to be stable and you should feel free to
+upgrade to 3.1.5
+
+
+NeilBrown 23rd March 2011
+
diff --git a/ANNOUNCE-3.2 b/ANNOUNCE-3.2
new file mode 100644
index 00000000..9e282bc6
--- /dev/null
+++ b/ANNOUNCE-3.2
@@ -0,0 +1,77 @@
+Subject: ANNOUNCE: mdadm 3.2 - A tool for managing Soft RAID under Linux (DEVEL ONLY)
+
+I am pleased to announce the availability of
+ mdadm version 3.2
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm devel-3.2
+ http://neil.brown.name/git?p=mdadm
+
+This is a "Developers only" release. Please don't consider using it
+or making it available to others without reading the following.
+
+
+By far the most significant change in this release related to the
+management of reshaping arrays. This code has been substantially
+re-written so that it can work with 'externally managed metadata' -
+Intel's IMSM in particular. We now support level migration and
+OnLine Capacity Expansion on these arrays.
+
+However, while the code largely works it has not been tested
+exhaustively so there are likely to be problems. As the reshape code
+for native metadata arrays was changed as part of this rewrite these
+problems could also result in regressions for reshape of native
+metadata.
+
+It is partly to encourage greater testing that this release is being
+made. Any reports of problem - particular reproducible recipes for
+triggering the problems - will be gratefully received.
+
+It is hopped that a "3.2.1" release will be available in early March
+which will be a bugfix release over this and can be considered
+suitable for general use.
+
+Other changes of note:
+
+ - Policy framework.
+ Various policy statements can be made in the mdadm.conf to guide
+ the behaviour of mdadm, particular with regards to how new devices
+ are treated by "mdadm -I".
+ Depending on the 'action' associated with a device (identified by
+ its 'path') such need devices can be automatically re-added to and
+ existing array that they previously fell out off, or automatically
+ added as a spare if they appear to contain no data.
+
+ - mdadm now has a limited understanding of partition tables. This
+ allows the policy framework to make decisions about partitioned
+ devices as well.
+
+ - --incremental --remove can be told what --path the device was on,
+ and this info will be recorded so that another device appearing at
+ the same physical location can be preferentially added to the same
+ array (provides the spare-same-slot action policy applied to the
+ path).
+
+ - A new flags "--invalid-backup" flag is available in --assemble
+ mode. This can be used to re-assemble an array which was stopping
+ in the middle of a reshape, and for which the 'backup file' is no
+ longer available or is corrupted. The array may have some
+ corruption in it at the point where reshape was up to, but at least
+ the rest of the array will become available.
+
+
+ - Various internal restructuring - more is needed.
+
+
+Any feed back and bug reports are always welcomed at:
+ linux-raid@vger.kernel.org
+
+And please: don't use this in production - particularly not the
+--grow functionality.
+
+NeilBrown 1st February 2011
+
+
diff --git a/ANNOUNCE-3.2.1 b/ANNOUNCE-3.2.1
new file mode 100644
index 00000000..0e7826ca
--- /dev/null
+++ b/ANNOUNCE-3.2.1
@@ -0,0 +1,75 @@
+
+
+I am pleased to announce the availability of
+ mdadm version 3.2.1
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git/mdadm
+
+Many of the changes in this release are of internal interest only,
+restructuring and refactoring code and so forth.
+
+Most of the bugs found and fixed during development for 3.2.1 have been
+back-ported for the recently-release 3.1.5 so this release primarily
+provides a few new features over 3.1.5.
+
+They include:
+ - policy framework
+ Policy can be expressed for moving spare devices between arrays, and
+ for how to handle hot-plugged devices. This policy can be different
+ for devices plugged in to different controllers etc.
+ This, for example, allows a configuration where when a device is plugged
+ in it is immediately included in an md array as a hot spare and
+ possibly starts recovery immediately if an array is degraded.
+
+ - some understanding of mbr and gpt paritition tables
+ This is primarly to support the new hot-plug support. If a
+ device is plugged in and policy suggests it should have a partition table,
+ the partition table will be copied from a suitably similar device, and
+ then the partitions will hot-plug and can then be added to md arrays.
+
+ - "--incremental --remove" can remember where a device was removed from
+ so if a device gets plugged back in the same place, special policy applies
+ to it, allowing it to be included in an array even if a general hotplug
+ will not be included.
+
+ - enhanced reshape options, including growing a RAID0 by converting to RAID4,
+ restriping, and converting back. Also convertions between RAID0 and
+ RAID10 and between RAID1 and RAID10 are possible (with a suitably recent
+ kernel).
+
+ - spare migration for IMSM arrays.
+ Spare migration can now work across 'containers' using non-native metadata
+ and specifically Intel's IMSM arrays support spare migrations.
+
+ - OLCE and level migration for Intel IMSM arrays.
+ OnLine Capacity Expansion and level migration (e.g. RAID0 -> RAID5) is
+ supported for Intel Matrix Storage Manager arrays.
+ This support is currently 'experimental' for technical reasons. It can
+ be enabled with "export MDADM_EXPERIMENTAL=1"
+
+ - avoid including wayward devices
+ If you split a RAID1, mount the two halves as two separate degraded RAID1s,
+ and then later bring the two back together, it is possible that the md
+ metadata won't properly show that one must over-ride the other.
+ mdadm now does extra checking to detect this possibilty and avoid
+ potentially corrupting data.
+
+ - remove any possible confusion between similar options.
+ e.g. --brief and --bitmap were mapped to 'b' and mdadm wouldn't
+ notice if one was used where the other was expected.
+
+ - allow K,M,G suffixes on chunk sizes
+
+
+While mdadm-3.2.1 is considered to be reasonably stable, you should
+only use it if you want to try out the new features, or if you
+generally like to be on the bleeding edge. If the new features are not
+important to you, then 3.1.5 is probably the appropriate version to be using
+until 3.2.2 comes out.
+
+NeilBrown 28th March 2011
diff --git a/ANNOUNCE-3.2.2 b/ANNOUNCE-3.2.2
new file mode 100644
index 00000000..b70d18b9
--- /dev/null
+++ b/ANNOUNCE-3.2.2
@@ -0,0 +1,36 @@
+Subject: ANNOUNCE: mdadm 3.2.2 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.2.2
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git/mdadm
+
+This release is largely a stablising release for the 3.2 series.
+Many of the changes just fix bugs introduces in 3.2 or 3.2.1.
+
+There are some new features. They are:
+ - reshaping IMSM (Intel metadata) arrays is no longer 'experimental',
+ it should work properly and be largely compatible with IMSM drivers in
+ other platforms.
+ - --assume-clean can be used with --grow --size to avoid resyncing the
+ new part of the array. This is only support with very new kernels.
+ - RAID0 arrays can have chunksize which is not a power of 2. This has been
+ supported in the kernel for a while but is only now supprted by
+ mdadm.
+
+ - A new tool 'raid6check' is available which can check a RAID6 array,
+ or part of it, and report which device is most inconsistent with the
+ others if any stripe is inconsistent. This is still under development
+ and does not have a man page yet. If anyone tries it out and has any
+ questions or experience to report, they would be most welcome on
+ linux-raid@vger.kernel.org.
+
+Future releases in the 3.2 series will only be made if bugfixes are needed.
+The next release to add features is expected to be 3.3.
+
+NeilBrown 17th June 2011
diff --git a/ANNOUNCE-3.2.3 b/ANNOUNCE-3.2.3
new file mode 100644
index 00000000..8a8dba46
--- /dev/null
+++ b/ANNOUNCE-3.2.3
@@ -0,0 +1,24 @@
+Subject: ANNOUNCE: mdadm 3.2.3 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.2.3
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git/mdadm
+
+This release is largely a bugfix release for the 3.2 series with many
+minor fixes with little or no impact.
+
+The largest single area of change is support for reshape of Intel
+IMSM arrays (OnLine Capacity Explansion and Level Migtration).
+Among other fixes, this now has a better chance of surviving if a
+device fails during reshape.
+
+Upgrading is recommended - particularly if you use mdadm for IMSM
+arrays - but not essential.
+
+NeilBrown 23rd December 2011
diff --git a/ANNOUNCE-3.2.4 b/ANNOUNCE-3.2.4
new file mode 100644
index 00000000..e3216786
--- /dev/null
+++ b/ANNOUNCE-3.2.4
@@ -0,0 +1,144 @@
+Subject: ANNOUNCE: mdadm 3.2.4 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.2.4
+
+It is available at the usual places, now including github:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://github.com/neilbrown/mdadm
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git/mdadm
+
+This release is largely a bugfix release for the 3.2 series with many
+minor fixes with little or no impact.
+
+"--oneline" log of changes is below. Some notable ones are:
+
+ - --offroot argument to improve interactions between mdmon and initrd
+ - --prefer argument to select which /dev names to display in some
+ circumstances.
+ - relax restructions on when "--add" will be allowed
+ - Fix bug with adding write-intent-bitmap to active array
+ - Now defaults to "/run/mdadm" for storing run-time files.
+
+Upgrading is encouraged.
+
+The next mdadm release is expected to be 3.3 with a number of new
+features.
+
+NeilBrown 9th May 2012
+
+77b3ac8 monitor: make return from read_and_act more symbolic.
+68226a8 monitor: ensure we retry soon when 'remove' fails.
+8453f8d fix: Monitor sometimes crashes
+90fa1a2 Work around gcc-4.7's strict aliasing checks
+0c4304c fix: container creation with --incremental used.
+5d1c7cd FIX: External metadata sometimes is not updated
+3c20f98 FIX: mdmon check in reshape_container() can cause a problem
+59ab9f5 FIX: Typo error in fprint command
+9587c37 imsm: load_super_imsm_all function refactoring
+ec50f7b imsm: load_imsm_super_all supports loading metadata from the device list
+ca9de18 imsm: validate the number of imsm volumes per controller
+30602f5 imsm: display fd in error trace when when store_imsm_mpb failes
+eb155f6 mdmon: Use getopt_long() to parse command line options
+08ca2ad Add --offroot argument to mdadm
+da82751 Add --offroot argument to mdmon
+a0963a8 Spawn mdmon with --offroot if mdadm was launched with --offroot
+f878b24 imsm: fix, the second array need to have the whole available space on devices
+d597705 getinfo_super1: Use MaxSector in place of sb->size
+6ef8905 super1: make aread/awrite always use an aligned buffer.
+de5a472 Remove avail_disks arg from 'enough'.
+da8fe5a Assemble: fix --force assemble during reshape.
+b10c663 config: fix handing of 'homehost' in AUTO line.
+92d49ec FIX: NULL pointer to strdup() can be passed
+d2bde6d imsm: FIX: No new missing disks are allowed during general migration
+111e9fd FIX: Array is not run when expansion disks are added
+bf5cf7c imsm: FIX: imsm_get_allowed_degradation() doesn't count degradation for raid1
+50927b1 Fix: Sometimes mdmon throws core dump during reshape
+78340e2 Flush mdmon before next reshape step during container operation
+e174219 imsm: FIX: Chunk size migration problem
+f93346e FIX: use md position to reshape restart
+6a75c8c imsm: FIX: use md position to reshape restart
+51d83f5 imsm: FIX: Clear migration record when migration switches to next volume.
+e1dd332 FIX: restart reshape when reshape process is stopped just between 2 reshapes
+1ca90aa FIX: Do not try to (continue) reshape using inactive array
+9f1b0f0 config: conf_match should ignore devname when not set.
+d669228 Use posix_memalign() for memory used to write bitmaps
+178950e FIX: Changes in '0' case for reshape position verification
+9200d41 avoid double-free upon "old buggy kernel" sysfs_read failure
+4011421 Print error message if failing to write super for 1.x metadata
+0011874 Use MDMON_DIR for pid files created in Monitor.c
+56d1885 Assemble: don't use O_EXCL until we have checked device content.
+b720636 Assemble: support assembling of a RAID0 being reshaped.
+c69ffac Manage: allow --re-add to failed array.
+52f07f5 Reset bad flag on map update
+911cead super1: support superblocks up to 4K.
+ad6db3c Create: reduce the verbosity of 'default_layout'.
+b2bfdfa super1.c don't keep recalculating bitmap pointer
+4122675 Define and use SUPER1_SIZE for allocations
+1afa930 init_super1() memset full buffer allocated for superblock
+2de0b8a match_metadata_desc1(): Use calloc instead of malloc+memset
+3c0bcd4 Use 4K buffer alignment for superblock allocations
+308340a Use struct align_fd to cache fd's block size for aligned reads/writes
+65ed615 match_metadata_desc0(): Use calloc instead of malloc+memset
+de89706 Generalize ROUND_UP() macro and introduce matching ROUND_UP_PTR()
+0a2f189 super1.c: use ROUND_UP/ROUND_UP_PTR
+654a381 super-intel.c: Use ROUND_UP() instead of manually coding it
+42d5dfd __write_init_super_ddf(): Use posix_memalign() instead of static aligned buffer
+d4633e0 Examine: fix array size calculation for RAID10.
+e62b778 Assemble: improve verbose logging when including old devices.
+0073a6e Remove possible crash during RAID6 -> RAID5 reshape.
+69fe207 Incremental: fix adding devices with --incremental
+bcbb311 Manage: replace 'return 1' with 'goto abort'.
+9f58469 Manage: freeze recovery while adding multiple devices.
+ae6c05a Create: round off size for RAID1 arrays.
+5ca3a90 Grow: print useful error when converting RAID1->RAID5 will fail.
+c07d640 Fix tests/05r1-re-add-nosupper
+2d762ad Fix the new ROUND_UP macro.
+fd324b0 sysfs: fixed sysfs_freeze_array array to work properly with Manage_subdevs.
+5551b11 imsm: avoid overflows for disks over 1TB
+97f81ee clear hi bits if not used after loading metadata from disk
+e03640b simplify calculating array_blocks
+29cd082 show 2TB volumes/disks support in --detail-platform
+2cc699a check volume size in validate_geometry_imsm_orom
+9126b9a check that no disk over 2TB is used to create container when no support
+027c374 imsm: set 2tb disk attribute for spare
+3556c2f Fix typo: wan -> want
+15632a9 parse_size: distinguish between 0 and error.
+fbdef49 Bitmap_offset is a signed number
+508a7f1 super1: leave more space in front of data by default.
+40110b9 Fix two typos in fprintf messages
+342460c mdadm man page: fix typo
+0e7f69a imsm: display maximum volumes per controller and array
+36fd8cc imsm: FIX: Update function imsm_num_data_members() for Raid1/10
+7abc987 imsm: FIX: Add volume size expand support to imsm_analyze_change()
+f3871fd imsm: Add new metadata update for volume size expansion
+54397ed imsm: Execute size change for external metatdata
+016e00f FIX: Support metadata changes rollback
+fbf3d20 imsm: FIX: Support metadata changes rollback
+44f6f18 FIX: Extend size of raid0 array
+7e7e9a4 FIX: Respect metadata size limitations
+65a9798 FIX: Detect error and rollback metadata
+13bcac9 imsm: Add function imsm_get_free_size()
+b130333 imsm: Support setting max size for size change operation
+c41e00b imsm: FIX: Component size alignment check
+58d26a2 FIX: Size change is possible as standalone change only
+4aecb54 FIX: Assembled second array is in read only state during reshape
+ae2416e FIX: resolve make everything compilation error
+480f356 Raid limit of 1024 when scanning for devices.
+c2ecf5f Add --prefer option for --detail and --monitor
+0a99975 Relax restrictions on when --add is permitted.
+7ce0570 imsm: fix: rebuild does not continue after reboot
+b51702b fix: correct extending size of raid0 array
+34a1395 Fix sign extension of bitmap_offset in super1.c
+012a864 Introduce sysfs_set_num_signed() and use it to set bitmap/offset
+5d7b407 imsm: fix: thunderdome may drop 2tb attribute
+5ffdc2d Update test for "is udev active".
+96fd06e Adjust to new standard of /run
+974e039 test: don't worry too much about array size.
+b0a658f Grow: failing the set the per-device size is not an error.
+36614e9 super-intel.c: Don't try to close negative fd
+562aa10 super-intel.c: Fix resource leak from opendir()
+
diff --git a/ANNOUNCE-3.2.5 b/ANNOUNCE-3.2.5
new file mode 100644
index 00000000..396da12a
--- /dev/null
+++ b/ANNOUNCE-3.2.5
@@ -0,0 +1,31 @@
+Subject: ANNOUNCE: mdadm 3.2.5 - A tool for managing Soft RAID under Linux
+
+I am somewhat disappointed to have to announce the availability of
+ mdadm version 3.2.5
+
+It is available at the usual places, now including github:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://github.com/neilbrown/mdadm
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git/mdadm
+
+This release primarily fixes a serious regression in 3.2.4.
+This regression does *not* cause any risk to data. It simply
+means that adding a device with "--add" would sometime fail
+when it should not.
+
+The fix also includes a couple of minor fixes such as making
+the "--layout=preserve" option to "--grow" work again.
+
+A reminder that the default location for runtime files is now
+"/run/mdadm". If you compile this for a distro that does not
+have "/run", you will need to compile with an alternate setting for
+MAP_DIR. e.g.
+ make MAP_DIR=/var/run/mdadm
+or
+ make MAP_DIR=/dev/.mdadm
+
+NeilBrown 18th May 2012
+
diff --git a/ANNOUNCE-3.2.6 b/ANNOUNCE-3.2.6
new file mode 100644
index 00000000..f5cfd492
--- /dev/null
+++ b/ANNOUNCE-3.2.6
@@ -0,0 +1,57 @@
+Subject: ANNOUNCE: mdadm 3.2.6 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.2.6
+
+It is available at the usual places, now including github:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://github.com/neilbrown/mdadm
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git/mdadm
+
+This is a stablity release which adds a number of bugfixs to 3.2.5.
+There are no real stand-out fixes, just lots of little bits and pieces.
+
+Below is the "git log --oneline --reverse" list of changes since
+3.2.5.
+
+NeilBrown 25th October 2012
+
+b7e05d2 udev-rules: prevent systemd from mount devices before they are ready.
+0d478e2 mdadm: Fix Segmentation fault.
+42f0ca1 imsm: fix: correct checking volume's degradation
+fcf2195 Monitor: fix inconsistencies in values for ->percent
+5f862fb Monitor: Report NewArray when an array the disappeared, reappears.
+6f51b1c Monitor: fix reporting for Fail vs FailSpare etc.
+68ad53b mdmon: fix arg parsing.
+517f135 Assemble: don't leak memory with fdlist.
+090900c udev-rules: prevent systemd from mount devices before they are ready.
+446e000 sha1.h: remove ansidecl.h header inclusion
+ec894f5 Manage: zero metadata before adding to 'external' array.
+3a84db5 ddf: allow a non-spare to be used to recovery a missing device.
+c5d61ca ddf: hack to fix container recognition.
+23084aa mdmon: fix arg processing for -a
+c4e96a3 mdmon: allow --takeover when original was started with --offroot
+80841df find_free_devnum: avoid auto-using names in /etc/mdadm.conf
+c5c56d6 mapfile: fix mapfile rebuild for containers
+aec89f6 fix segfaults in Detail()
+2117ad1 Fix 'enough' function for RAID10.
+0bc300d Use --offroot flag when assembling md arrays via --incrmental
+ac78f24 Grow: make warning about old metadata more explicit.
+14026ab Replace sha1.h with slightly older version.
+6f6809f Add zlib license to crc32.c
+5267ba0 Handles spaces in array names better.
+c51f288 imsm: allow --assume-clean to work.
+acf7076 Grow: allow --grow --continue to work for native metadata.
+335d2a6 Grow: fix a couple of typos with --assume-clean usage
+9ff1427 Fix open_container
+3713633 mdadm: super0: do not override uuid with homehost
+31bff58 Trivial bugfix and spelling fixes.
+e1e539f Detail: don't report a faulty device as 'spare' or 'rebuilding'.
+22a6461 super0: allow creation of array on 2TB+ devices.
+a5d47a2 Create new md devices consistently
+eb48676 Monitor: don't complain about non-monitorable arrays in mdadm.conf
+ecdf2d7 Query: don't be confused by partition tables.
+f7b75c1 Query: allow member of non-0.90 arrays to be better reported.
diff --git a/ANNOUNCE-3.3 b/ANNOUNCE-3.3
new file mode 100644
index 00000000..f770aa13
--- /dev/null
+++ b/ANNOUNCE-3.3
@@ -0,0 +1,63 @@
+Subject: ANNOUNCE: mdadm 3.3 - A tools for managing md Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.3
+
+It is available at the usual places:
+ http://www.kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://github.com/neilbrown/mdadm
+ git://neil.brown.name/mdadm
+ http://git.neil.brown.name/git/mdadm
+
+This is a major new release so don't be too surprised if there are a
+few issues. If I hear about them they will be fixed in 3.3.1.
+git log reports nearly 500 changes since 3.2.6 so I won't list them
+all.
+
+Some highlights are:
+
+- Some array reshapes can proceed without needing backup file.
+ This is done by changing the 'data_offset' so we never need to write
+ any data back over where it was before. If there is no "head space"
+ or "tail space" to allow data_offset to change, the old mechanism
+ with a backup file can still be used.
+- RAID10 arrays can be reshaped to change the number of devices,
+ change the chunk size, or change the layout between 'near'
+ and 'offset'.
+ This will always change data_offset, and will fail if there is no
+ room for data_offset to be moved.
+- "--assemble --update=metadata" can convert a 0.90 array to a 1.0 array.
+- bad-block-logs are supported (but not heavily tested yet)
+- "--assemble --update=revert-reshape" can be used to undo a reshape
+ that has just been started but isn't really wanted. This is very
+ new and while it passes basic tests it cannot be guaranteed.
+- improved locking between --incremental and --assemble
+- uses systemd to run "mdmon" if systemd is configured to do that.
+- kernel names of md devices can be non-numeric. e.g. "md_home" rather than
+ "md0". This will probably confuse lots of other tools, so you need to
+ echo CREATE names=yes >> /etc/mdadm.conf
+ or the feature will not be used. (you also need a reasonably new kernel).
+- "--stop" can be given a kernel name instead of a device name. i.e
+ mdadm --stop md4
+ will work even if /dev/md4 doesn't exist.
+- "--detail --export" has some information about the devices in the array
+- --dump and --restore can be used to backup and restore the metadata on an
+ array.
+- Hot-replace is supported with
+ mdadm /dev/mdX --replace /dev/foo
+ and
+ mdadm /dev/mdX --replace /dev/foo --with /dev/bar
+- Config file can be a directory in which case all "*.conf" files are
+ read in lexical order.
+ Default is to read /etc/mdadm.conf and then /etc/mdadm.conf.d
+ Thus
+ echo CREATE name=yes > /etc/mdadm.conf.d/names.conf
+ will also enable the use of named md devices.
+
+- Lots of improvements to DDF support including adding support for
+ RAID10 (thanks Martin Wilck).
+
+and lots of bugfixes and other little changes.
+
+NeilBrown 3rd September 2013
diff --git a/ANNOUNCE-3.3.1 b/ANNOUNCE-3.3.1
new file mode 100644
index 00000000..7d5e666e
--- /dev/null
+++ b/ANNOUNCE-3.3.1
@@ -0,0 +1,23 @@
+Subject: ANNOUNCE: mdadm 3.3.1 - A tool for managing md Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.3.1
+
+It is available at the usual places:
+ http://www.kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://github.com/neilbrown/mdadm
+ git://neil.brown.name/mdadm
+ http://git.neil.brown.name/git/mdadm.git
+
+The main changes are:
+ - lots of work on "DDF" support. Hopefully it will be more stable
+ now. Bug reports are always welcome.
+ - improved interactions with 'systemd'. Where possible, background
+ tasks are run from systemd (if it is present) rather then forking
+ disassociationg from the session. This is important because udev
+ doesn't really let you disassociate.
+
+though there are a number of other little bug fixes too.
+
+NeilBrown 5th June 2014
diff --git a/ANNOUNCE-3.3.2 b/ANNOUNCE-3.3.2
new file mode 100644
index 00000000..6b549611
--- /dev/null
+++ b/ANNOUNCE-3.3.2
@@ -0,0 +1,16 @@
+Subject: ANNOUNCE: mdadm 3.3.2 - A tool for managing md Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.3.2
+
+It is available at the usual places:
+ http://www.kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://github.com/neilbrown/mdadm
+ git://neil.brown.name/mdadm
+ http://git.neil.brown.name/git/mdadm.git
+
+Changes since 3.3.1 are mostly little bugfixes and some man-page
+updates.
+
+NeilBrown 21st August 2014
diff --git a/Assemble.c b/Assemble.c
new file mode 100644
index 00000000..cdcdb0f8
--- /dev/null
+++ b/Assemble.c
@@ -0,0 +1,2007 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include <ctype.h>
+
+static int name_matches(char *found, char *required, char *homehost)
+{
+ /* See if the name found matches the required name, possibly
+ * prefixed with 'homehost'
+ */
+ char fnd[33];
+
+ strncpy(fnd, found, 32);
+ fnd[32] = 0;
+ if (strcmp(found, required)==0)
+ return 1;
+ if (homehost) {
+ int l = strlen(homehost);
+ if (l < 32 && fnd[l] == ':' &&
+ strcmp(fnd+l+1, required)==0)
+ return 1;
+ }
+ return 0;
+}
+
+static int is_member_busy(char *metadata_version)
+{
+ /* check if the given member array is active */
+ struct mdstat_ent *mdstat = mdstat_read(0, 0);
+ struct mdstat_ent *ent;
+ int busy = 0;
+
+ for (ent = mdstat; ent; ent = ent->next) {
+ if (ent->metadata_version == NULL)
+ continue;
+ if (strncmp(ent->metadata_version, "external:", 9) != 0)
+ continue;
+ if (!is_subarray(&ent->metadata_version[9]))
+ continue;
+ /* Skip first char - it can be '/' or '-' */
+ if (strcmp(&ent->metadata_version[10], metadata_version+1) == 0) {
+ busy = 1;
+ break;
+ }
+ }
+ free_mdstat(mdstat);
+
+ return busy;
+}
+
+static int ident_matches(struct mddev_ident *ident,
+ struct mdinfo *content,
+ struct supertype *tst,
+ char *homehost,
+ char *update, char *devname)
+{
+
+ if (ident->uuid_set && (!update || strcmp(update, "uuid")!= 0) &&
+ same_uuid(content->uuid, ident->uuid, tst->ss->swapuuid)==0 &&
+ memcmp(content->uuid, uuid_zero, sizeof(int[4])) != 0) {
+ if (devname)
+ pr_err("%s has wrong uuid.\n", devname);
+ return 0;
+ }
+ if (ident->name[0] && (!update || strcmp(update, "name")!= 0) &&
+ name_matches(content->name, ident->name, homehost)==0) {
+ if (devname)
+ pr_err("%s has wrong name.\n", devname);
+ return 0;
+ }
+ if (ident->super_minor != UnSet &&
+ ident->super_minor != content->array.md_minor) {
+ if (devname)
+ pr_err("%s has wrong super-minor.\n",
+ devname);
+ return 0;
+ }
+ if (ident->level != UnSet &&
+ ident->level != content->array.level) {
+ if (devname)
+ pr_err("%s has wrong raid level.\n",
+ devname);
+ return 0;
+ }
+ if (ident->raid_disks != UnSet &&
+ content->array.raid_disks != 0 && /* metadata doesn't know how many to expect */
+ ident->raid_disks!= content->array.raid_disks) {
+ if (devname)
+ pr_err("%s requires wrong number of drives.\n",
+ devname);
+ return 0;
+ }
+ if (ident->member && ident->member[0]) {
+ /* content->text_version must match */
+ char *s = strchr(content->text_version+1, '/');
+ if (s == NULL) {
+ if (devname)
+ pr_err("%s is not a container and one is required.\n",
+ devname);
+ return 0;
+ } else if (strcmp(ident->member, s+1) != 0) {
+ if (devname)
+ pr_err("skipping wrong member %s is %s\n",
+ content->text_version, devname);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int select_devices(struct mddev_dev *devlist,
+ struct mddev_ident *ident,
+ struct supertype **stp,
+ struct mdinfo **contentp,
+ struct context *c,
+ int inargv, int auto_assem)
+{
+ struct mddev_dev *tmpdev;
+ int num_devs;
+ struct supertype *st = *stp;
+ struct mdinfo *content = NULL;
+ int report_mismatch = ((inargv && c->verbose >= 0) || c->verbose > 0);
+ struct domainlist *domains = NULL;
+
+ tmpdev = devlist; num_devs = 0;
+ while (tmpdev) {
+ if (tmpdev->used)
+ tmpdev->used = 2;
+ else
+ num_devs++;
+ tmpdev->disposition = 0;
+ tmpdev = tmpdev->next;
+ }
+
+ /* first walk the list of devices to find a consistent set
+ * that match the criterea, if that is possible.
+ * We flag the ones we like with 'used'.
+ */
+ for (tmpdev = devlist;
+ tmpdev;
+ tmpdev = tmpdev ? tmpdev->next : NULL) {
+ char *devname = tmpdev->devname;
+ int dfd;
+ struct stat stb;
+ struct supertype *tst;
+ struct dev_policy *pol = NULL;
+ int found_container = 0;
+
+ if (tmpdev->used > 1)
+ continue;
+
+ if (ident->container) {
+ if (ident->container[0] == '/' &&
+ !same_dev(ident->container, devname)) {
+ if (report_mismatch)
+ pr_err("%s is not the container required (%s)\n",
+ devname, ident->container);
+ continue;
+ }
+ } else if (ident->devices &&
+ !match_oneof(ident->devices, devname)) {
+ /* Note that we ignore the "device=" identifier if a
+ * "container=" is given. Checking both is unnecessarily
+ * complicated.
+ */
+ if (report_mismatch)
+ pr_err("%s is not one of %s\n", devname, ident->devices);
+ continue;
+ }
+
+ tst = dup_super(st);
+
+ dfd = dev_open(devname, O_RDONLY);
+ if (dfd < 0) {
+ if (report_mismatch)
+ pr_err("cannot open device %s: %s\n",
+ devname, strerror(errno));
+ tmpdev->used = 2;
+ } else if (fstat(dfd, &stb)< 0) {
+ /* Impossible! */
+ pr_err("fstat failed for %s: %s\n",
+ devname, strerror(errno));
+ tmpdev->used = 2;
+ } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+ pr_err("%s is not a block device.\n",
+ devname);
+ tmpdev->used = 2;
+ } else if (must_be_container(dfd)) {
+ if (st) {
+ /* already found some components, this cannot
+ * be another one.
+ */
+ if (report_mismatch)
+ pr_err("%s is a container, but we are looking for components\n",
+ devname);
+ tmpdev->used = 2;
+#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
+ } if (!tst && (tst = super_by_fd(dfd, NULL)) == NULL) {
+ if (report_mismatch)
+ pr_err("not a recognisable container: %s\n",
+ devname);
+ tmpdev->used = 2;
+#endif
+ } else if (!tst->ss->load_container
+ || tst->ss->load_container(tst, dfd, NULL)) {
+ if (report_mismatch)
+ pr_err("no correct container type: %s\n",
+ devname);
+ tmpdev->used = 2;
+ } else if (auto_assem &&
+ !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)),
+ tst->ss->match_home(tst, c->homehost) == 1)) {
+ if (report_mismatch)
+ pr_err("%s has metadata type %s for which "
+ "auto-assembly is disabled\n",
+ devname, tst->ss->name);
+ tmpdev->used = 2;
+ } else
+ found_container = 1;
+ } else {
+ if (!tst && (tst = guess_super(dfd)) == NULL) {
+ if (report_mismatch)
+ pr_err("no recogniseable superblock on %s\n",
+ devname);
+ tmpdev->used = 2;
+ } else if (tst->ss->load_super(tst,dfd, NULL)) {
+ if (report_mismatch)
+ pr_err("no RAID superblock on %s\n",
+ devname);
+ tmpdev->used = 2;
+ } else if (tst->ss->compare_super == NULL) {
+ if (report_mismatch)
+ pr_err("Cannot assemble %s metadata on %s\n",
+ tst->ss->name, devname);
+ tmpdev->used = 2;
+ } else if (auto_assem && st == NULL &&
+ !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)),
+ tst->ss->match_home(tst, c->homehost) == 1)) {
+ if (report_mismatch)
+ pr_err("%s has metadata type %s for which "
+ "auto-assembly is disabled\n",
+ devname, tst->ss->name);
+ tmpdev->used = 2;
+ }
+ }
+ if (dfd >= 0) close(dfd);
+ if (tmpdev->used == 2) {
+ if (auto_assem || !inargv)
+ /* Ignore unrecognised devices during auto-assembly */
+ goto loop;
+ if (ident->uuid_set || ident->name[0] ||
+ ident->super_minor != UnSet)
+ /* Ignore unrecognised device if looking for
+ * specific array */
+ goto loop;
+
+ pr_err("%s has no superblock - assembly aborted\n",
+ devname);
+ if (st)
+ st->ss->free_super(st);
+ dev_policy_free(pol);
+ domain_free(domains);
+ return -1;
+ }
+
+ if (found_container) {
+ /* tmpdev is a container. We need to be either
+ * looking for a member, or auto-assembling
+ */
+ /* should be safe to try an exclusive open now, we
+ * have rejected anything that some other mdadm might
+ * be looking at
+ */
+ dfd = dev_open(devname, O_RDONLY | O_EXCL);
+ if (dfd < 0) {
+ if (report_mismatch)
+ pr_err("%s is busy - skipping\n", devname);
+ goto loop;
+ }
+ close(dfd);
+
+ if (ident->container && ident->container[0] != '/') {
+ /* we have a uuid */
+ int uuid[4];
+
+ content = *contentp;
+ tst->ss->getinfo_super(tst, content, NULL);
+
+ if (!parse_uuid(ident->container, uuid) ||
+ !same_uuid(content->uuid, uuid, tst->ss->swapuuid)) {
+ if (report_mismatch)
+ pr_err("%s has wrong UUID to be required container\n",
+ devname);
+ goto loop;
+ }
+ }
+ /* It is worth looking inside this container.
+ */
+ if (c->verbose > 0)
+ pr_err("looking in container %s\n",
+ devname);
+
+ for (content = tst->ss->container_content(tst, NULL);
+ content;
+ content = content->next) {
+
+ if (!ident_matches(ident, content, tst,
+ c->homehost, c->update,
+ report_mismatch ? devname : NULL))
+ /* message already printed */;
+ else if (is_member_busy(content->text_version)) {
+ if (report_mismatch)
+ pr_err("member %s in %s is already assembled\n",
+ content->text_version,
+ devname);
+ } else if (content->array.state & (1<<MD_SB_BLOCK_VOLUME)) {
+ /* do not assemble arrays with unsupported configurations */
+ pr_err("Cannot activate member %s in %s.\n",
+ content->text_version,
+ devname);
+ } else
+ break;
+ }
+ if (!content) {
+ tmpdev->used = 2;
+ goto loop; /* empty container */
+ }
+
+ st = tst; tst = NULL;
+ if (!auto_assem && inargv && tmpdev->next != NULL) {
+ pr_err("%s is a container, but is not "
+ "only device given: confused and aborting\n",
+ devname);
+ st->ss->free_super(st);
+ dev_policy_free(pol);
+ domain_free(domains);
+ return -1;
+ }
+ if (c->verbose > 0)
+ pr_err("found match on member %s in %s\n",
+ content->text_version, devname);
+
+ /* make sure we finished the loop */
+ tmpdev = NULL;
+ goto loop;
+ } else {
+ content = *contentp;
+ tst->ss->getinfo_super(tst, content, NULL);
+
+ if (!ident_matches(ident, content, tst,
+ c->homehost, c->update,
+ report_mismatch ? devname : NULL))
+ goto loop;
+
+ if (auto_assem) {
+ /* Never auto-assemble things that conflict
+ * with mdadm.conf in some way
+ */
+ struct mddev_ident *match;
+ int rv = 0;
+
+ match = conf_match(tst, content, devname,
+ report_mismatch ? c->verbose : -1,
+ &rv);
+ if (!match && rv == 2)
+ goto loop;
+ if (match && match->devname &&
+ strcasecmp(match->devname, "<ignore>") == 0) {
+ if (report_mismatch)
+ pr_err("%s is a member of an explicitly ignored array\n",
+ devname);
+ goto loop;
+ }
+ if (match && !ident_matches(match, content, tst,
+ c->homehost, c->update,
+ report_mismatch ? devname : NULL))
+ /* Array exists in mdadm.conf but some
+ * details don't match, so reject it
+ */
+ goto loop;
+ }
+
+ /* should be safe to try an exclusive open now, we
+ * have rejected anything that some other mdadm might
+ * be looking at
+ */
+ dfd = dev_open(devname, O_RDONLY | O_EXCL);
+ if (dfd < 0) {
+ if (report_mismatch)
+ pr_err("%s is busy - skipping\n", devname);
+ goto loop;
+ }
+ close(dfd);
+
+ if (st == NULL)
+ st = dup_super(tst);
+ if (st->minor_version == -1)
+ st->minor_version = tst->minor_version;
+
+ if (memcmp(content->uuid, uuid_zero,
+ sizeof(int[4])) == 0) {
+ /* this is a floating spare. It cannot define
+ * an array unless there are no more arrays of
+ * this type to be found. It can be included
+ * in an array of this type though.
+ */
+ tmpdev->used = 3;
+ goto loop;
+ }
+
+ if (st->ss != tst->ss ||
+ st->minor_version != tst->minor_version ||
+ st->ss->compare_super(st, tst) != 0) {
+ /* Some mismatch. If exactly one array matches this host,
+ * we can resolve on that one.
+ * Or, if we are auto assembling, we just ignore the second
+ * for now.
+ */
+ if (auto_assem)
+ goto loop;
+ if (c->homehost) {
+ int first = st->ss->match_home(st, c->homehost);
+ int last = tst->ss->match_home(tst, c->homehost);
+ if (first != last &&
+ (first == 1 || last == 1)) {
+ /* We can do something */
+ if (first) {/* just ignore this one */
+ if (report_mismatch)
+ pr_err("%s misses out due to wrong homehost\n",
+ devname);
+ goto loop;
+ } else { /* reject all those sofar */
+ struct mddev_dev *td;
+ if (report_mismatch)
+ pr_err("%s overrides previous devices due to good homehost\n",
+ devname);
+ for (td=devlist; td != tmpdev; td=td->next)
+ if (td->used == 1)
+ td->used = 0;
+ tmpdev->used = 1;
+ goto loop;
+ }
+ }
+ }
+ pr_err("superblock on %s doesn't match others - assembly aborted\n",
+ devname);
+ tst->ss->free_super(tst);
+ st->ss->free_super(st);
+ dev_policy_free(pol);
+ domain_free(domains);
+ return -1;
+ }
+ tmpdev->used = 1;
+ }
+ loop:
+ /* Collect domain information from members only */
+ if (tmpdev && tmpdev->used == 1) {
+ if (!pol)
+ pol = devid_policy(stb.st_rdev);
+ domain_merge(&domains, pol, tst?tst->ss->name:NULL);
+ }
+ dev_policy_free(pol);
+ pol = NULL;
+ if (tst)
+ tst->ss->free_super(tst);
+ }
+
+ /* Check if we found some imsm spares but no members */
+ if ((auto_assem ||
+ (ident->uuid_set &&
+ memcmp(uuid_zero, ident->uuid,sizeof(uuid_zero)) == 0)) &&
+ (!st || !st->sb))
+ for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
+ if (tmpdev->used != 3)
+ continue;
+ tmpdev->used = 1;
+ content = *contentp;
+
+ if (!st->sb) {
+ /* we need sb from one of the spares */
+ int dfd = dev_open(tmpdev->devname, O_RDONLY);
+ if (dfd < 0 ||
+ st->ss->load_super(st, dfd, NULL))
+ tmpdev->used = 2;
+ if (dfd > 0)
+ close(dfd);
+ }
+ }
+
+ /* Now reject spares that don't match domains of identified members */
+ for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
+ struct stat stb;
+ if (tmpdev->used != 3)
+ continue;
+ if (stat(tmpdev->devname, &stb)< 0) {
+ pr_err("fstat failed for %s: %s\n",
+ tmpdev->devname, strerror(errno));
+ tmpdev->used = 2;
+ } else {
+ struct dev_policy *pol = devid_policy(stb.st_rdev);
+ int dt = domain_test(domains, pol, NULL);
+ if (inargv && dt != 0)
+ /* take this spare as domains match
+ * if there are any */
+ tmpdev->used = 1;
+ else if (!inargv && dt == 1)
+ /* device wasn't explicitly listed, so need
+ * explicit domain match - which we have */
+ tmpdev->used = 1;
+ else
+ /* if domains don't match mark as unused */
+ tmpdev->used = 0;
+ dev_policy_free(pol);
+ }
+ }
+ domain_free(domains);
+ *stp = st;
+ if (st && st->sb && content == *contentp)
+ st->ss->getinfo_super(st, content, NULL);
+ *contentp = content;
+
+ return num_devs;
+}
+
+struct devs {
+ char *devname;
+ int uptodate; /* set once we decide that this device is as
+ * recent as everything else in the array.
+ */
+ int included; /* set if the device is already in the array
+ * due to a previous '-I'
+ */
+ struct mdinfo i;
+};
+
+static int load_devices(struct devs *devices, char *devmap,
+ struct mddev_ident *ident, struct supertype **stp,
+ struct mddev_dev *devlist, struct context *c,
+ struct mdinfo *content,
+ int mdfd, char *mddev,
+ int *most_recentp, int *bestcntp, int **bestp,
+ int inargv)
+{
+ struct mddev_dev *tmpdev;
+ int devcnt = 0;
+ int nextspare = 0;
+#ifndef MDASSEMBLE
+ int bitmap_done = 0;
+#endif
+ int most_recent = -1;
+ int bestcnt = 0;
+ int *best = *bestp;
+ struct supertype *st = *stp;
+
+ for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) {
+ char *devname = tmpdev->devname;
+ struct stat stb;
+ struct supertype *tst;
+ int i;
+
+ if (tmpdev->used != 1)
+ continue;
+ /* looks like a good enough match to update the super block if needed */
+#ifndef MDASSEMBLE
+ if (c->update) {
+ int dfd;
+ /* prepare useful information in info structures */
+ struct stat stb2;
+ int err;
+ fstat(mdfd, &stb2);
+
+ if (strcmp(c->update, "uuid")==0 &&
+ !ident->uuid_set) {
+ int rfd;
+ if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
+ read(rfd, ident->uuid, 16) != 16) {
+ *(__u32*)(ident->uuid) = random();
+ *(__u32*)(ident->uuid+1) = random();
+ *(__u32*)(ident->uuid+2) = random();
+ *(__u32*)(ident->uuid+3) = random();
+ }
+ if (rfd >= 0) close(rfd);
+ }
+ dfd = dev_open(devname,
+ tmpdev->disposition == 'I'
+ ? O_RDWR : (O_RDWR|O_EXCL));
+
+ tst = dup_super(st);
+ if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) {
+ pr_err("cannot re-read metadata from %s - aborting\n",
+ devname);
+ if (dfd >= 0)
+ close(dfd);
+ close(mdfd);
+ free(devices);
+ free(devmap);
+ tst->ss->free_super(tst);
+ free(tst);
+ *stp = st;
+ return -1;
+ }
+ tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks);
+
+ memcpy(content->uuid, ident->uuid, 16);
+ strcpy(content->name, ident->name);
+ content->array.md_minor = minor(stb2.st_rdev);
+
+ if (strcmp(c->update, "byteorder") == 0)
+ err = 0;
+ else
+ err = tst->ss->update_super(tst, content, c->update,
+ devname, c->verbose,
+ ident->uuid_set,
+ c->homehost);
+ if (err < 0) {
+ if (err == -1)
+ pr_err("--update=%s not understood"
+ " for %s metadata\n",
+ c->update, tst->ss->name);
+ tst->ss->free_super(tst);
+ free(tst);
+ close(mdfd);
+ close(dfd);
+ free(devices);
+ free(devmap);
+ *stp = st;
+ return -1;
+ }
+ if (strcmp(c->update, "uuid")==0 &&
+ !ident->uuid_set) {
+ ident->uuid_set = 1;
+ memcpy(ident->uuid, content->uuid, 16);
+ }
+ if (tst->ss->store_super(tst, dfd))
+ pr_err("Could not re-write superblock on %s.\n",
+ devname);
+ close(dfd);
+
+ if (strcmp(c->update, "uuid")==0 &&
+ ident->bitmap_fd >= 0 && !bitmap_done) {
+ if (bitmap_update_uuid(ident->bitmap_fd,
+ content->uuid,
+ tst->ss->swapuuid) != 0)
+ pr_err("Could not update uuid on external bitmap.\n");
+ else
+ bitmap_done = 1;
+ }
+ } else
+#endif
+ {
+ int dfd = dev_open(devname,
+ tmpdev->disposition == 'I'
+ ? O_RDWR : (O_RDWR|O_EXCL));
+ tst = dup_super(st);
+
+ if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) {
+ pr_err("cannot re-read metadata from %s - aborting\n",
+ devname);
+ if (dfd >= 0)
+ close(dfd);
+ close(mdfd);
+ free(devices);
+ free(devmap);
+ tst->ss->free_super(tst);
+ free(tst);
+ *stp = st;
+ return -1;
+ }
+ tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks);
+ close(dfd);
+ }
+
+ stat(devname, &stb);
+
+ if (c->verbose > 0)
+ pr_err("%s is identified as a member of %s, slot %d%s.\n",
+ devname, mddev, content->disk.raid_disk,
+ (content->disk.state & (1<<MD_DISK_REPLACEMENT)) ? " replacement":"");
+ devices[devcnt].devname = devname;
+ devices[devcnt].uptodate = 0;
+ devices[devcnt].included = (tmpdev->disposition == 'I');
+ devices[devcnt].i = *content;
+ devices[devcnt].i.disk.major = major(stb.st_rdev);
+ devices[devcnt].i.disk.minor = minor(stb.st_rdev);
+
+ if (devices[devcnt].i.disk.state == 6) {
+ if (most_recent < 0 ||
+ devices[devcnt].i.events
+ > devices[most_recent].i.events) {
+ struct supertype *tmp = tst;
+ tst = st;
+ st = tmp;
+ most_recent = devcnt;
+ }
+ }
+ tst->ss->free_super(tst);
+ free(tst);
+
+ if (content->array.level == LEVEL_MULTIPATH)
+ /* with multipath, the raid_disk from the superblock is meaningless */
+ i = devcnt;
+ else
+ i = devices[devcnt].i.disk.raid_disk;
+ if (i+1 == 0) {
+ if (nextspare < content->array.raid_disks*2)
+ nextspare = content->array.raid_disks*2;
+ i = nextspare++;
+ } else {
+ /* i is raid_disk - double it so there is room for
+ * replacements */
+ i *= 2;
+ if (devices[devcnt].i.disk.state & (1<<MD_DISK_REPLACEMENT))
+ i++;
+ if (i >= content->array.raid_disks*2 &&
+ i >= nextspare)
+ nextspare = i+1;
+ }
+ if (i < 10000) {
+ if (i >= bestcnt) {
+ int newbestcnt = i+10;
+ int *newbest = xmalloc(sizeof(int)*newbestcnt);
+ int c;
+ for (c=0; c < newbestcnt; c++)
+ if (c < bestcnt)
+ newbest[c] = best[c];
+ else
+ newbest[c] = -1;
+ if (best)free(best);
+ best = newbest;
+ bestcnt = newbestcnt;
+ }
+ if (best[i] >=0 &&
+ devices[best[i]].i.events
+ == devices[devcnt].i.events
+ && (devices[best[i]].i.disk.minor
+ != devices[devcnt].i.disk.minor)
+ && st->ss == &super0
+ && content->array.level != LEVEL_MULTIPATH) {
+ /* two different devices with identical superblock.
+ * Could be a mis-detection caused by overlapping
+ * partitions. fail-safe.
+ */
+ pr_err("WARNING %s and %s appear"
+ " to have very similar superblocks.\n"
+ " If they are really different, "
+ "please --zero the superblock on one\n"
+ " If they are the same or overlap,"
+ " please remove one from %s.\n",
+ devices[best[i]].devname, devname,
+ inargv ? "the list" :
+ "the\n DEVICE list in mdadm.conf"
+ );
+ close(mdfd);
+ free(devices);
+ free(devmap);
+ *stp = st;
+ return -1;
+ }
+ if (best[i] == -1
+ || (devices[best[i]].i.events
+ < devices[devcnt].i.events))
+ best[i] = devcnt;
+ }
+ devcnt++;
+ }
+ if (most_recent >= 0)
+ *most_recentp = most_recent;
+ *bestcntp = bestcnt;
+ *bestp = best;
+ *stp = st;
+ return devcnt;
+}
+
+static int force_array(struct mdinfo *content,
+ struct devs *devices,
+ int *best, int bestcnt, char *avail,
+ int most_recent,
+ struct supertype *st,
+ struct context *c)
+{
+ int okcnt = 0;
+ while (!enough(content->array.level, content->array.raid_disks,
+ content->array.layout, 1,
+ avail)
+ ||
+ (content->reshape_active && content->delta_disks > 0 &&
+ !enough(content->array.level, (content->array.raid_disks
+ - content->delta_disks),
+ content->new_layout, 1,
+ avail)
+ )) {
+ /* Choose the newest best drive which is
+ * not up-to-date, update the superblock
+ * and add it.
+ */
+ int fd;
+ struct supertype *tst;
+ unsigned long long current_events;
+ int chosen_drive = -1;
+ int i;
+
+ for (i = 0;
+ i < content->array.raid_disks * 2 && i < bestcnt;
+ i += 2) {
+ int j = best[i];
+ if (j>=0 &&
+ !devices[j].uptodate &&
+ devices[j].i.recovery_start == MaxSector &&
+ (chosen_drive < 0 ||
+ devices[j].i.events
+ > devices[chosen_drive].i.events))
+ chosen_drive = j;
+ }
+ if (chosen_drive < 0)
+ break;
+ current_events = devices[chosen_drive].i.events;
+ add_another:
+ if (c->verbose >= 0)
+ pr_err("forcing event count in %s(%d) from %d upto %d\n",
+ devices[chosen_drive].devname,
+ devices[chosen_drive].i.disk.raid_disk,
+ (int)(devices[chosen_drive].i.events),
+ (int)(devices[most_recent].i.events));
+ fd = dev_open(devices[chosen_drive].devname,
+ devices[chosen_drive].included ? O_RDWR
+ : (O_RDWR|O_EXCL));
+ if (fd < 0) {
+ pr_err("Couldn't open %s for write - not updating\n",
+ devices[chosen_drive].devname);
+ devices[chosen_drive].i.events = 0;
+ continue;
+ }
+ tst = dup_super(st);
+ if (tst->ss->load_super(tst,fd, NULL)) {
+ close(fd);
+ pr_err("RAID superblock disappeared from %s - not updating.\n",
+ devices[chosen_drive].devname);
+ devices[chosen_drive].i.events = 0;
+ continue;
+ }
+ content->events = devices[most_recent].i.events;
+ tst->ss->update_super(tst, content, "force-one",
+ devices[chosen_drive].devname, c->verbose,
+ 0, NULL);
+
+ if (tst->ss->store_super(tst, fd)) {
+ close(fd);
+ pr_err("Could not re-write superblock on %s\n",
+ devices[chosen_drive].devname);
+ devices[chosen_drive].i.events = 0;
+ tst->ss->free_super(tst);
+ continue;
+ }
+ close(fd);
+ devices[chosen_drive].i.events = devices[most_recent].i.events;
+ devices[chosen_drive].uptodate = 1;
+ avail[chosen_drive] = 1;
+ okcnt++;
+ tst->ss->free_super(tst);
+
+ /* If there are any other drives of the same vintage,
+ * add them in as well. We can't lose and we might gain
+ */
+ for (i = 0;
+ i < content->array.raid_disks * 2 && i < bestcnt ;
+ i += 2) {
+ int j = best[i];
+ if (j >= 0 &&
+ !devices[j].uptodate &&
+ devices[j].i.recovery_start == MaxSector &&
+ devices[j].i.events == current_events) {
+ chosen_drive = j;
+ goto add_another;
+ }
+ }
+ }
+ return okcnt;
+}
+
+static int start_array(int mdfd,
+ char *mddev,
+ struct mdinfo *content,
+ struct supertype *st,
+ struct mddev_ident *ident,
+ int *best, int bestcnt,
+ int chosen_drive,
+ struct devs *devices,
+ unsigned int okcnt,
+ unsigned int sparecnt,
+ unsigned int rebuilding_cnt,
+ struct context *c,
+ int clean, char *avail,
+ int start_partial_ok,
+ int err_ok,
+ int was_forced
+ )
+{
+ int rv;
+ int i;
+ unsigned int req_cnt;
+
+ rv = set_array_info(mdfd, st, content);
+ if (rv && !err_ok) {
+ pr_err("failed to set array info for %s: %s\n",
+ mddev, strerror(errno));
+ return 1;
+ }
+ if (ident->bitmap_fd >= 0) {
+ if (ioctl(mdfd, SET_BITMAP_FILE, ident->bitmap_fd) != 0) {
+ pr_err("SET_BITMAP_FILE failed.\n");
+ return 1;
+ }
+ } else if (ident->bitmap_file) {
+ /* From config file */
+ int bmfd = open(ident->bitmap_file, O_RDWR);
+ if (bmfd < 0) {
+ pr_err("Could not open bitmap file %s\n",
+ ident->bitmap_file);
+ return 1;
+ }
+ if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
+ pr_err("Failed to set bitmapfile for %s\n", mddev);
+ close(bmfd);
+ return 1;
+ }
+ close(bmfd);
+ }
+
+ /* First, add the raid disks, but add the chosen one last */
+ for (i=0; i<= bestcnt; i++) {
+ int j;
+ if (i < bestcnt) {
+ j = best[i];
+ if (j == chosen_drive)
+ continue;
+ } else
+ j = chosen_drive;
+
+ if (j >= 0 && !devices[j].included) {
+ int dfd = dev_open(devices[j].devname,
+ O_RDWR|O_EXCL);
+ if (dfd >= 0) {
+ remove_partitions(dfd);
+ close(dfd);
+ }
+ rv = add_disk(mdfd, st, content, &devices[j].i);
+
+ if (rv) {
+ pr_err("failed to add "
+ "%s to %s: %s\n",
+ devices[j].devname,
+ mddev,
+ strerror(errno));
+ if (i < content->array.raid_disks * 2
+ || i == bestcnt)
+ okcnt--;
+ else
+ sparecnt--;
+ } else if (c->verbose > 0)
+ pr_err("added %s to %s as %d%s%s\n",
+ devices[j].devname, mddev,
+ devices[j].i.disk.raid_disk,
+ devices[j].uptodate?"":
+ " (possibly out of date)",
+ (devices[j].i.disk.state & (1<<MD_DISK_REPLACEMENT))?" replacement":"");
+ } else if (j >= 0) {
+ if (c->verbose > 0)
+ pr_err("%s is already in %s as %d\n",
+ devices[j].devname, mddev,
+ devices[j].i.disk.raid_disk);
+ } else if (c->verbose > 0 && i < content->array.raid_disks*2
+ && (i&1) == 0)
+ pr_err("no uptodate device for slot %d of %s\n",
+ i, mddev);
+ }
+
+ if (content->array.level == LEVEL_CONTAINER) {
+ if (c->verbose >= 0) {
+ pr_err("Container %s has been "
+ "assembled with %d drive%s",
+ mddev, okcnt+sparecnt, okcnt+sparecnt==1?"":"s");
+ if (okcnt < (unsigned)content->array.raid_disks)
+ fprintf(stderr, " (out of %d)",
+ content->array.raid_disks);
+ fprintf(stderr, "\n");
+ }
+
+ if (st->ss->validate_container) {
+ struct mdinfo *devices_list;
+ struct mdinfo *info_devices = xmalloc(sizeof(struct mdinfo)*(okcnt+sparecnt));
+ unsigned int count;
+ devices_list = NULL;
+ for (count = 0; count < okcnt+sparecnt; count++) {
+ info_devices[count] = devices[count].i;
+ info_devices[count].next = devices_list;
+ devices_list = &info_devices[count];
+ }
+ if (st->ss->validate_container(devices_list))
+ pr_err("Mismatch detected!\n");
+ free(info_devices);
+ }
+
+ st->ss->free_super(st);
+ sysfs_uevent(content, "change");
+ if (err_ok && okcnt < (unsigned)content->array.raid_disks)
+ /* Was partial, is still partial, so signal an error
+ * to ensure we don't retry */
+ return 1;
+ return 0;
+ }
+
+ /* Get number of in-sync devices according to the superblock.
+ * We must have this number to start the array without -s or -R
+ */
+ req_cnt = content->array.working_disks;
+
+ if (c->runstop == 1 ||
+ (c->runstop <= 0 &&
+ ( enough(content->array.level, content->array.raid_disks,
+ content->array.layout, clean, avail) &&
+ (okcnt + rebuilding_cnt >= req_cnt || start_partial_ok)
+ ))) {
+ /* This array is good-to-go.
+ * If a reshape is in progress then we might need to
+ * continue monitoring it. In that case we start
+ * it read-only and let the grow code make it writable.
+ */
+ int rv;
+#ifndef MDASSEMBLE
+ if (content->reshape_active &&
+ !(content->reshape_active & RESHAPE_NO_BACKUP) &&
+ content->delta_disks <= 0) {
+ if (!c->backup_file) {
+ pr_err("%s: Need a backup file to complete reshape of this array.\n",
+ mddev);
+ pr_err("Please provided one with \"--backup-file=...\"\n");
+ if (c->update &&
+ strcmp(c->update, "revert-reshape") == 0)
+ pr_err("(Don't specify --update=revert-reshape again, that part succeeded.)\n");
+ return 1;
+ }
+ rv = sysfs_set_str(content, NULL,
+ "array_state", "readonly");
+ if (rv == 0)
+ rv = Grow_continue(mdfd, st, content,
+ c->backup_file, 0,
+ c->freeze_reshape);
+ } else if (c->readonly &&
+ sysfs_attribute_available(
+ content, NULL, "array_state")) {
+ rv = sysfs_set_str(content, NULL,
+ "array_state", "readonly");
+ } else
+#endif
+ rv = ioctl(mdfd, RUN_ARRAY, NULL);
+ reopen_mddev(mdfd); /* drop O_EXCL */
+ if (rv == 0) {
+ if (c->verbose >= 0) {
+ pr_err("%s has been started with %d drive%s",
+ mddev, okcnt, okcnt==1?"":"s");
+ if (okcnt < (unsigned)content->array.raid_disks)
+ fprintf(stderr, " (out of %d)", content->array.raid_disks);
+ if (rebuilding_cnt)
+ fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt);
+ if (sparecnt)
+ fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
+ fprintf(stderr, ".\n");
+ }
+ if (content->reshape_active &&
+ content->array.level >= 4 &&
+ content->array.level <= 6) {
+ /* might need to increase the size
+ * of the stripe cache - default is 256
+ */
+ if (256 < 4 * (content->array.chunk_size/4096)) {
+ struct mdinfo *sra = sysfs_read(mdfd, NULL, 0);
+ if (sra)
+ sysfs_set_num(sra, NULL,
+ "stripe_cache_size",
+ (4 * content->array.chunk_size / 4096) + 1);
+ sysfs_free(sra);
+ }
+ }
+ if (okcnt < (unsigned)content->array.raid_disks) {
+ /* If any devices did not get added
+ * because the kernel rejected them based
+ * on event count, try adding them
+ * again providing the action policy is
+ * 're-add' or greater. The bitmap
+ * might allow them to be included, or
+ * they will become spares.
+ */
+ for (i = 0; i < bestcnt; i++) {
+ int j = best[i];
+ if (j >= 0 && !devices[j].uptodate) {
+ if (!disk_action_allows(&devices[j].i, st->ss->name, act_re_add))
+ continue;
+ rv = add_disk(mdfd, st, content,
+ &devices[j].i);
+ if (rv == 0 && c->verbose >= 0)
+ pr_err("%s has been re-added.\n",
+ devices[j].devname);
+ }
+ }
+ }
+ if (content->array.level == 6 &&
+ okcnt + 1 == (unsigned)content->array.raid_disks &&
+ was_forced) {
+ struct mdinfo *sra = sysfs_read(mdfd, NULL, 0);
+ if (sra)
+ sysfs_set_str(sra, NULL,
+ "sync_action", "repair");
+ sysfs_free(sra);
+ }
+ return 0;
+ }
+ pr_err("failed to RUN_ARRAY %s: %s\n",
+ mddev, strerror(errno));
+
+ if (!enough(content->array.level, content->array.raid_disks,
+ content->array.layout, 1, avail))
+ pr_err("Not enough devices to "
+ "start the array.\n");
+ else if (!enough(content->array.level,
+ content->array.raid_disks,
+ content->array.layout, clean,
+ avail))
+ pr_err("Not enough devices to "
+ "start the array while not clean "
+ "- consider --force.\n");
+
+ return 1;
+ }
+ if (c->runstop == -1) {
+ pr_err("%s assembled from %d drive%s",
+ mddev, okcnt, okcnt==1?"":"s");
+ if (okcnt != (unsigned)content->array.raid_disks)
+ fprintf(stderr, " (out of %d)", content->array.raid_disks);
+ fprintf(stderr, ", but not started.\n");
+ return 2;
+ }
+ if (c->verbose >= -1) {
+ pr_err("%s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s");
+ if (rebuilding_cnt)
+ fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt);
+ if (sparecnt)
+ fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
+ if (!enough(content->array.level, content->array.raid_disks,
+ content->array.layout, 1, avail))
+ fprintf(stderr, " - not enough to start the array.\n");
+ else if (!enough(content->array.level,
+ content->array.raid_disks,
+ content->array.layout, clean,
+ avail))
+ fprintf(stderr, " - not enough to start the "
+ "array while not clean - consider "
+ "--force.\n");
+ else {
+ if (req_cnt == (unsigned)content->array.raid_disks)
+ fprintf(stderr, " - need all %d to start it", req_cnt);
+ else
+ fprintf(stderr, " - need %d to start", req_cnt);
+ fprintf(stderr, " (use --run to insist).\n");
+ }
+ }
+ return 1;
+}
+
+int Assemble(struct supertype *st, char *mddev,
+ struct mddev_ident *ident,
+ struct mddev_dev *devlist,
+ struct context *c)
+{
+ /*
+ * The task of Assemble is to find a collection of
+ * devices that should (according to their superblocks)
+ * form an array, and to give this collection to the MD driver.
+ * In Linux-2.4 and later, this involves submitting a
+ * SET_ARRAY_INFO ioctl with no arg - to prepare
+ * the array - and then submit a number of
+ * ADD_NEW_DISK ioctls to add disks into
+ * the array. Finally RUN_ARRAY might
+ * be submitted to start the array.
+ *
+ * Much of the work of Assemble is in finding and/or
+ * checking the disks to make sure they look right.
+ *
+ * If mddev is not set, then scan must be set and we
+ * read through the config file for dev+uuid mapping
+ * We recurse, setting mddev, for each device that
+ * - isn't running
+ * - has a valid uuid (or any uuid if !uuidset)
+ *
+ * If mddev is set, we try to determine state of md.
+ * check version - must be at least 0.90.0
+ * check kernel version. must be at least 2.4.
+ * If not, we can possibly fall back on START_ARRAY
+ * Try to GET_ARRAY_INFO.
+ * If possible, give up
+ * If not, try to STOP_ARRAY just to make sure
+ *
+ * If !uuidset and scan, look in conf-file for uuid
+ * If not found, give up
+ * If !devlist and scan and uuidset, get list of devs from conf-file
+ *
+ * For each device:
+ * Check superblock - discard if bad
+ * Check uuid (set if we don't have one) - discard if no match
+ * Check superblock similarity if we have a superblock - discard if different
+ * Record events, devicenum
+ * This should give us a list of devices for the array
+ * We should collect the most recent event number
+ *
+ * Count disks with recent enough event count
+ * While force && !enough disks
+ * Choose newest rejected disks, update event count
+ * mark clean and rewrite superblock
+ * If recent kernel:
+ * SET_ARRAY_INFO
+ * foreach device with recent events : ADD_NEW_DISK
+ * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
+ * If old kernel:
+ * Check the device numbers in superblock are right
+ * update superblock if any changes
+ * START_ARRAY
+ *
+ */
+ int rv;
+ int mdfd;
+ int clean;
+ int auto_assem = (mddev == NULL && !ident->uuid_set &&
+ ident->super_minor == UnSet && ident->name[0] == 0
+ && (ident->container == NULL || ident->member == NULL));
+ struct devs *devices;
+ char *devmap;
+ int *best = NULL; /* indexed by raid_disk */
+ int bestcnt = 0;
+ int devcnt;
+ unsigned int okcnt, sparecnt, rebuilding_cnt, replcnt;
+ int i;
+ int was_forced = 0;
+ int most_recent = 0;
+ int chosen_drive;
+ int change = 0;
+ int inargv = 0;
+ int start_partial_ok = (c->runstop >= 0) &&
+ (c->force || devlist==NULL || auto_assem);
+ int num_devs;
+ struct mddev_dev *tmpdev;
+ struct mdinfo info;
+ struct mdinfo *content = NULL;
+ struct mdinfo *pre_exist = NULL;
+ char *avail;
+ char *name = NULL;
+ char chosen_name[1024];
+ struct map_ent *map = NULL;
+ struct map_ent *mp;
+
+ /*
+ * If any subdevs are listed, then any that don't
+ * match ident are discarded. Remainder must all match and
+ * become the array.
+ * If no subdevs, then we scan all devices in the config file, but
+ * there must be something in the identity
+ */
+
+ if (!devlist &&
+ ident->uuid_set == 0 &&
+ (ident->super_minor < 0 || ident->super_minor == UnSet) &&
+ ident->name[0] == 0 &&
+ (ident->container == NULL || ident->member == NULL) &&
+ ident->devices == NULL) {
+ pr_err("No identity information available for %s - cannot assemble.\n",
+ mddev ? mddev : "further assembly");
+ return 1;
+ }
+
+ if (devlist == NULL)
+ devlist = conf_get_devs();
+ else if (mddev)
+ inargv = 1;
+
+try_again:
+ /* We come back here when doing auto-assembly and attempting some
+ * set of devices failed. Those are now marked as ->used==2 and
+ * we ignore them and try again
+ */
+ if (!st && ident->st)
+ st = ident->st;
+ if (c->verbose>0)
+ pr_err("looking for devices for %s\n",
+ mddev ? mddev : "further assembly");
+
+ content = &info;
+ if (st)
+ st->ignore_hw_compat = 1;
+ num_devs = select_devices(devlist, ident, &st, &content, c,
+ inargv, auto_assem);
+ if (num_devs < 0)
+ return 1;
+
+ if (!st || !st->sb || !content)
+ return 2;
+
+ /* We have a full set of devices - we now need to find the
+ * array device.
+ * However there is a risk that we are racing with "mdadm -I"
+ * and the array is already partially assembled - we will have
+ * rejected any devices already in this address.
+ * So we take a lock on the map file - to prevent further races -
+ * and look for the uuid in there. If found and the array is
+ * active, we abort. If found and the array is not active
+ * we commit to that md device and add all the contained devices
+ * to our list. We flag them so that we don't try to re-add,
+ * but can remove if they turn out to not be wanted.
+ */
+ if (map_lock(&map))
+ pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
+ mp = map_by_uuid(&map, content->uuid);
+ if (mp) {
+ struct mdinfo *dv;
+ /* array already exists. */
+ pre_exist = sysfs_read(-1, mp->devnm, GET_LEVEL|GET_DEVS);
+ if (pre_exist->array.level != UnSet) {
+ pr_err("Found some drive for an array that is already active: %s\n",
+ mp->path);
+ pr_err("giving up.\n");
+ return 1;
+ }
+ for (dv = pre_exist->devs; dv; dv = dv->next) {
+ /* We want to add this device to our list,
+ * but it could already be there if "mdadm -I"
+ * started *after* we checked for O_EXCL.
+ * If we add it to the top of the list
+ * it will be preferred over later copies.
+ */
+ struct mddev_dev *newdev;
+ char *devname = map_dev(dv->disk.major,
+ dv->disk.minor,
+ 0);
+ if (!devname)
+ continue;
+ newdev = xmalloc(sizeof(*newdev));
+ newdev->devname = devname;
+ newdev->disposition = 'I';
+ newdev->used = 1;
+ newdev->next = devlist;
+ devlist = newdev;
+ num_devs++;
+ }
+ strcpy(chosen_name, mp->path);
+ if (c->verbose > 0 || mddev == NULL ||
+ strcmp(mddev, chosen_name) != 0)
+ pr_err("Merging with already-assembled %s\n",
+ chosen_name);
+ mdfd = open_dev_excl(mp->devnm);
+ } else {
+ int trustworthy = FOREIGN;
+ name = content->name;
+ switch (st->ss->match_home(st, c->homehost)
+ ?: st->ss->match_home(st, "any")) {
+ case 1:
+ trustworthy = LOCAL;
+ name = strchr(content->name, ':');
+ if (name)
+ name++;
+ else
+ name = content->name;
+ break;
+ }
+ if (!auto_assem)
+ /* If the array is listed in mdadm.conf or on
+ * command line, then we trust the name
+ * even if the array doesn't look local
+ */
+ trustworthy = LOCAL;
+
+ if (name[0] == 0 &&
+ content->array.level == LEVEL_CONTAINER) {
+ name = content->text_version;
+ trustworthy = METADATA;
+ }
+
+ if (name[0] && trustworthy != LOCAL &&
+ ! c->require_homehost &&
+ conf_name_is_free(name))
+ trustworthy = LOCAL;
+
+ if (trustworthy == LOCAL &&
+ strchr(name, ':'))
+ /* Ignore 'host:' prefix of name */
+ name = strchr(name, ':')+1;
+
+ mdfd = create_mddev(mddev, name, ident->autof, trustworthy,
+ chosen_name);
+ }
+ if (mdfd < 0) {
+ st->ss->free_super(st);
+ if (auto_assem)
+ goto try_again;
+ return 1;
+ }
+ mddev = chosen_name;
+ if (get_linux_version() < 2004000 ||
+ md_get_version(mdfd) < 9000) {
+ pr_err("Assemble requires Linux 2.4 or later, and\n"
+ " md driver version 0.90.0 or later.\n"
+ " Upgrade your kernel or try --build\n");
+ close(mdfd);
+ return 1;
+ }
+ if (pre_exist == NULL) {
+ if (mddev_busy(fd2devnm(mdfd))) {
+ pr_err("%s already active, cannot restart it!\n",
+ mddev);
+ for (tmpdev = devlist ;
+ tmpdev && tmpdev->used != 1;
+ tmpdev = tmpdev->next)
+ ;
+ if (tmpdev && auto_assem)
+ pr_err("%s needed for %s...\n",
+ mddev, tmpdev->devname);
+ close(mdfd);
+ mdfd = -3;
+ st->ss->free_super(st);
+ if (auto_assem)
+ goto try_again;
+ return 1;
+ }
+ /* just incase it was started but has no content */
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ }
+
+#ifndef MDASSEMBLE
+ if (content != &info) {
+ /* This is a member of a container. Try starting the array. */
+ int err;
+ err = assemble_container_content(st, mdfd, content, c,
+ chosen_name, NULL);
+ close(mdfd);
+ return err;
+ }
+#endif
+ /* Ok, no bad inconsistancy, we can try updating etc */
+ devices = xcalloc(num_devs, sizeof(*devices));
+ devmap = xcalloc(num_devs, content->array.raid_disks);
+ devcnt = load_devices(devices, devmap, ident, &st, devlist,
+ c, content, mdfd, mddev,
+ &most_recent, &bestcnt, &best, inargv);
+ if (devcnt < 0)
+ return 1;
+
+ if (devcnt == 0) {
+ pr_err("no devices found for %s\n",
+ mddev);
+ if (st)
+ st->ss->free_super(st);
+ close(mdfd);
+ free(devices);
+ free(devmap);
+ return 1;
+ }
+
+ if (c->update && strcmp(c->update, "byteorder")==0)
+ st->minor_version = 90;
+
+ st->ss->getinfo_super(st, content, NULL);
+ clean = content->array.state & 1;
+
+ /* now we have some devices that might be suitable.
+ * I wonder how many
+ */
+ avail = xcalloc(content->array.raid_disks, 1);
+ okcnt = 0;
+ replcnt = 0;
+ sparecnt=0;
+ rebuilding_cnt=0;
+ for (i=0; i< bestcnt; i++) {
+ int j = best[i];
+ int event_margin = 1; /* always allow a difference of '1'
+ * like the kernel does
+ */
+ if (j < 0) continue;
+ /* note: we ignore error flags in multipath arrays
+ * as they don't make sense
+ */
+ if (content->array.level != LEVEL_MULTIPATH)
+ if (!(devices[j].i.disk.state & (1<<MD_DISK_ACTIVE))) {
+ if (!(devices[j].i.disk.state
+ & (1<<MD_DISK_FAULTY))) {
+ devices[j].uptodate = 1;
+ sparecnt++;
+ }
+ continue;
+ }
+ /* If this device thinks that 'most_recent' has failed, then
+ * we must reject this device.
+ */
+ if (j != most_recent && !c->force &&
+ content->array.raid_disks > 0 &&
+ devices[most_recent].i.disk.raid_disk >= 0 &&
+ devmap[j * content->array.raid_disks + devices[most_recent].i.disk.raid_disk] == 0) {
+ if (c->verbose > -1)
+ pr_err("ignoring %s as it reports %s as failed\n",
+ devices[j].devname, devices[most_recent].devname);
+ best[i] = -1;
+ continue;
+ }
+ /* Require event counter to be same as, or just less than,
+ * most recent. If it is bigger, it must be a stray spare and
+ * should be ignored.
+ */
+ if (devices[j].i.events+event_margin >=
+ devices[most_recent].i.events &&
+ devices[j].i.events <=
+ devices[most_recent].i.events
+ ) {
+ devices[j].uptodate = 1;
+ if (i < content->array.raid_disks * 2) {
+ if (devices[j].i.recovery_start == MaxSector ||
+ (content->reshape_active &&
+ ((i >= content->array.raid_disks - content->delta_disks) ||
+ (i >= content->array.raid_disks - content->delta_disks - 1
+ && content->array.level == 4)))) {
+ if (!avail[i/2]) {
+ okcnt++;
+ avail[i/2]=1;
+ } else
+ replcnt++;
+ } else
+ rebuilding_cnt++;
+ } else
+ sparecnt++;
+ }
+ }
+ free(devmap);
+ if (c->force) {
+ int force_ok = force_array(content, devices, best, bestcnt,
+ avail, most_recent, st, c);
+ okcnt += force_ok;
+ if (force_ok)
+ was_forced = 1;
+ }
+ /* Now we want to look at the superblock which the kernel will base things on
+ * and compare the devices that we think are working with the devices that the
+ * superblock thinks are working.
+ * If there are differences and --force is given, then update this chosen
+ * superblock.
+ */
+ chosen_drive = -1;
+ st->ss->free_super(st);
+ for (i=0; chosen_drive < 0 && i<bestcnt; i+=2) {
+ int j = best[i];
+ int fd;
+
+ if (j<0)
+ continue;
+ if (!devices[j].uptodate)
+ continue;
+ if (devices[j].i.events < devices[most_recent].i.events)
+ continue;
+ chosen_drive = j;
+ if ((fd=dev_open(devices[j].devname,
+ devices[j].included ? O_RDONLY
+ : (O_RDONLY|O_EXCL)))< 0) {
+ pr_err("Cannot open %s: %s\n",
+ devices[j].devname, strerror(errno));
+ close(mdfd);
+ free(devices);
+ return 1;
+ }
+ if (st->ss->load_super(st,fd, NULL)) {
+ close(fd);
+ pr_err("RAID superblock has disappeared from %s\n",
+ devices[j].devname);
+ close(mdfd);
+ free(devices);
+ return 1;
+ }
+ close(fd);
+ }
+ if (st->sb == NULL) {
+ pr_err("No suitable drives found for %s\n", mddev);
+ close(mdfd);
+ free(devices);
+ return 1;
+ }
+ st->ss->getinfo_super(st, content, NULL);
+#ifndef MDASSEMBLE
+ sysfs_init(content, mdfd, NULL);
+#endif
+ for (i=0; i<bestcnt; i++) {
+ int j = best[i];
+ unsigned int desired_state;
+
+ if (i >= content->array.raid_disks * 2)
+ desired_state = 0;
+ else if (i & 1)
+ desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_REPLACEMENT);
+ else
+ desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
+
+ if (j<0)
+ continue;
+ if (!devices[j].uptodate)
+ continue;
+
+ devices[j].i.disk.state = desired_state;
+ if (!(devices[j].i.array.state & 1))
+ clean = 0;
+
+ if (st->ss->update_super(st, &devices[j].i, "assemble", NULL,
+ c->verbose, 0, NULL)) {
+ if (c->force) {
+ if (c->verbose >= 0)
+ pr_err("clearing FAULTY flag for device %d in %s for %s\n",
+ j, mddev, devices[j].devname);
+ change = 1;
+ } else {
+ if (c->verbose >= -1)
+ pr_err("device %d in %s has wrong state in superblock, but %s seems ok\n",
+ i, mddev, devices[j].devname);
+ }
+ }
+#if 0
+ if (!(super.disks[i].i.disk.state & (1 << MD_DISK_FAULTY))) {
+ pr_err("devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
+ i, mddev);
+ }
+#endif
+ }
+ if (c->force && !clean &&
+ !enough(content->array.level, content->array.raid_disks,
+ content->array.layout, clean,
+ avail)) {
+ change += st->ss->update_super(st, content, "force-array",
+ devices[chosen_drive].devname, c->verbose,
+ 0, NULL);
+ was_forced = 1;
+ clean = 1;
+ }
+
+ if (change) {
+ int fd;
+ fd = dev_open(devices[chosen_drive].devname,
+ devices[chosen_drive].included ?
+ O_RDWR : (O_RDWR|O_EXCL));
+ if (fd < 0) {
+ pr_err("Could not open %s for write - cannot Assemble array.\n",
+ devices[chosen_drive].devname);
+ close(mdfd);
+ free(devices);
+ return 1;
+ }
+ if (st->ss->store_super(st, fd)) {
+ close(fd);
+ pr_err("Could not re-write superblock on %s\n",
+ devices[chosen_drive].devname);
+ close(mdfd);
+ free(devices);
+ return 1;
+ }
+ if (c->verbose >= 0)
+ pr_err("Marking array %s as 'clean'\n",
+ mddev);
+ close(fd);
+ }
+
+ /* If we are in the middle of a reshape we may need to restore saved data
+ * that was moved aside due to the reshape overwriting live data
+ * The code of doing this lives in Grow.c
+ */
+#ifndef MDASSEMBLE
+ if (content->reshape_active &&
+ !(content->reshape_active & RESHAPE_NO_BACKUP)) {
+ int err = 0;
+ int *fdlist = xmalloc(sizeof(int)* bestcnt);
+ if (c->verbose > 0)
+ pr_err(":%s has an active reshape - checking "
+ "if critical section needs to be restored\n",
+ chosen_name);
+ if (!c->backup_file)
+ c->backup_file = locate_backup(content->sys_name);
+ enable_fds(bestcnt/2);
+ for (i = 0; i < bestcnt/2; i++) {
+ int j = best[i*2];
+ if (j >= 0) {
+ fdlist[i] = dev_open(devices[j].devname,
+ devices[j].included
+ ? O_RDWR : (O_RDWR|O_EXCL));
+ if (fdlist[i] < 0) {
+ pr_err("Could not open %s for write - cannot Assemble array.\n",
+ devices[j].devname);
+ err = 1;
+ break;
+ }
+ } else
+ fdlist[i] = -1;
+ }
+ if (!err) {
+ if (st->ss->external && st->ss->recover_backup)
+ err = st->ss->recover_backup(st, content);
+ else
+ err = Grow_restart(st, content, fdlist, bestcnt/2,
+ c->backup_file, c->verbose > 0);
+ if (err && c->invalid_backup) {
+ if (c->verbose > 0)
+ pr_err("continuing"
+ " without restoring backup\n");
+ err = 0;
+ }
+ }
+ while (i>0) {
+ i--;
+ if (fdlist[i]>=0) close(fdlist[i]);
+ }
+ free(fdlist);
+ if (err) {
+ pr_err("Failed to restore critical section for reshape, sorry.\n");
+ if (c->backup_file == NULL)
+ cont_err("Possibly you needed to specify the --backup-file\n");
+ close(mdfd);
+ free(devices);
+ return err;
+ }
+ }
+#endif
+
+ /* Almost ready to actually *do* something */
+ /* First, fill in the map, so that udev can find our name
+ * as soon as we become active.
+ */
+ if (c->update && strcmp(c->update, "metadata")==0) {
+ content->array.major_version = 1;
+ content->array.minor_version = 0;
+ strcpy(content->text_version, "1.0");
+ }
+
+ map_update(&map, fd2devnm(mdfd), content->text_version,
+ content->uuid, chosen_name);
+
+ rv = start_array(mdfd, mddev, content,
+ st, ident, best, bestcnt,
+ chosen_drive, devices, okcnt, sparecnt,
+ rebuilding_cnt,
+ c,
+ clean, avail, start_partial_ok,
+ pre_exist != NULL,
+ was_forced);
+ if (rv == 1 && !pre_exist)
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ free(devices);
+ map_unlock(&map);
+ if (rv == 0) {
+ wait_for(chosen_name, mdfd);
+ close(mdfd);
+ if (auto_assem) {
+ int usecs = 1;
+ /* There is a nasty race with 'mdadm --monitor'.
+ * If it opens this device before we close it,
+ * it gets an incomplete open on which IO
+ * doesn't work and the capacity is
+ * wrong.
+ * If we reopen (to check for layered devices)
+ * before --monitor closes, we loose.
+ *
+ * So: wait upto 1 second for there to be
+ * a non-zero capacity.
+ */
+ while (usecs < 1000) {
+ mdfd = open(mddev, O_RDONLY);
+ if (mdfd >= 0) {
+ unsigned long long size;
+ if (get_dev_size(mdfd, NULL, &size) &&
+ size > 0)
+ break;
+ close(mdfd);
+ }
+ usleep(usecs);
+ usecs <<= 1;
+ }
+ }
+ } else
+ close(mdfd);
+
+ /* '2' means 'OK, but not started yet' */
+ return rv == 2 ? 0 : rv;
+}
+
+#ifndef MDASSEMBLE
+int assemble_container_content(struct supertype *st, int mdfd,
+ struct mdinfo *content, struct context *c,
+ char *chosen_name, int *result)
+{
+ struct mdinfo *dev, *sra, *dev2;
+ int working = 0, preexist = 0;
+ int expansion = 0;
+ struct map_ent *map = NULL;
+ int old_raid_disks;
+ int start_reshape;
+ char *avail = NULL;
+ int err;
+
+ sysfs_init(content, mdfd, NULL);
+
+ sra = sysfs_read(mdfd, NULL, GET_VERSION|GET_DEVS);
+ if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0) {
+ if (content->array.major_version == -1 &&
+ content->array.minor_version == -2 &&
+ c->readonly &&
+ content->text_version[0] == '/')
+ content->text_version[0] = '-';
+ if (sysfs_set_array(content, md_get_version(mdfd)) != 0) {
+ if (sra)
+ sysfs_free(sra);
+ return 1;
+ }
+ }
+
+ /* There are two types of reshape: container wide or sub-array specific
+ * Check if metadata requests blocking container wide reshapes
+ */
+ start_reshape = (content->reshape_active &&
+ !((content->reshape_active == CONTAINER_RESHAPE) &&
+ (content->array.state & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE))));
+
+ /* Block subarray here if it is under reshape now
+ * Do not allow for any changes in this array
+ */
+ if (st->ss->external && content->recovery_blocked && start_reshape)
+ block_subarray(content);
+
+ for (dev2 = sra->devs; dev2; dev2 = dev2->next) {
+ for (dev = content->devs; dev; dev = dev->next)
+ if (dev2->disk.major == dev->disk.major &&
+ dev2->disk.minor == dev->disk.minor)
+ break;
+ if (dev)
+ continue;
+ /* Don't want this one any more */
+ if (sysfs_set_str(sra, dev2, "slot", "none") < 0 &&
+ errno == EBUSY) {
+ pr_err("Cannot remove old device %s: not updating %s\n", dev2->sys_name, sra->sys_name);
+ sysfs_free(sra);
+ return 1;
+ }
+ sysfs_set_str(sra, dev2, "state", "remove");
+ }
+ old_raid_disks = content->array.raid_disks - content->delta_disks;
+ avail = xcalloc(content->array.raid_disks, 1);
+ for (dev = content->devs; dev; dev = dev->next) {
+ if (dev->disk.raid_disk >= 0)
+ avail[dev->disk.raid_disk] = 1;
+ if (sysfs_add_disk(content, dev, 1) == 0) {
+ if (dev->disk.raid_disk >= old_raid_disks &&
+ content->reshape_active)
+ expansion++;
+ else
+ working++;
+ } else if (errno == EEXIST)
+ preexist++;
+ }
+ sysfs_free(sra);
+ if (working + expansion == 0 && c->runstop <= 0) {
+ free(avail);
+ return 1;/* Nothing new, don't try to start */
+ }
+ map_update(&map, fd2devnm(mdfd),
+ content->text_version,
+ content->uuid, chosen_name);
+
+
+ if (enough(content->array.level, content->array.raid_disks,
+ content->array.layout, content->array.state & 1, avail) == 0) {
+ if (c->export && result)
+ *result |= INCR_NO;
+ else if (c->verbose >= 0) {
+ pr_err("%s assembled with %d device%s",
+ chosen_name, preexist + working,
+ preexist + working == 1 ? "":"s");
+ if (preexist)
+ fprintf(stderr, " (%d new)", working);
+ fprintf(stderr, " but not started\n");
+ }
+ free(avail);
+ return 1;
+ }
+ free(avail);
+
+ if (c->runstop <= 0 &&
+ (working + preexist + expansion) <
+ content->array.working_disks) {
+ if (c->export && result)
+ *result |= INCR_UNSAFE;
+ else if (c->verbose >= 0) {
+ pr_err("%s assembled with %d device%s",
+ chosen_name, preexist + working,
+ preexist + working == 1 ? "":"s");
+ if (preexist)
+ fprintf(stderr, " (%d new)", working);
+ fprintf(stderr, " but not safe to start\n");
+ }
+ return 1;
+ }
+
+
+ if (start_reshape) {
+ int spare = content->array.raid_disks + expansion;
+ if (restore_backup(st, content,
+ working,
+ spare, &c->backup_file, c->verbose) == 1)
+ return 1;
+
+ err = sysfs_set_str(content, NULL,
+ "array_state", "readonly");
+ if (err)
+ return 1;
+
+ if (st->ss->external) {
+ if (!mdmon_running(st->container_devnm))
+ start_mdmon(st->container_devnm);
+ ping_monitor(st->container_devnm);
+ if (mdmon_running(st->container_devnm) &&
+ st->update_tail == NULL)
+ st->update_tail = &st->updates;
+ }
+
+ err = Grow_continue(mdfd, st, content, c->backup_file,
+ 0, c->freeze_reshape);
+ } else switch(content->array.level) {
+ case LEVEL_LINEAR:
+ case LEVEL_MULTIPATH:
+ case 0:
+ err = sysfs_set_str(content, NULL, "array_state",
+ c->readonly ? "readonly" : "active");
+ break;
+ default:
+ err = sysfs_set_str(content, NULL, "array_state",
+ "readonly");
+ /* start mdmon if needed. */
+ if (!err) {
+ if (!mdmon_running(st->container_devnm))
+ start_mdmon(st->container_devnm);
+ ping_monitor(st->container_devnm);
+ }
+ break;
+ }
+ if (!err)
+ sysfs_set_safemode(content, content->safe_mode_delay);
+
+ /* Block subarray here if it is not reshaped now
+ * It has be blocked a little later to allow mdmon to switch in
+ * in to R/W state
+ */
+ if (st->ss->external && content->recovery_blocked &&
+ !start_reshape)
+ block_subarray(content);
+
+ if (c->export && result) {
+ if (err)
+ *result |= INCR_NO;
+ else
+ *result |= INCR_YES;
+ } else if (c->verbose >= 0) {
+ if (err)
+ pr_err("array %s now has %d device%s",
+ chosen_name, working + preexist,
+ working + preexist == 1 ? "":"s");
+ else
+ pr_err("Started %s with %d device%s",
+ chosen_name, working + preexist,
+ working + preexist == 1 ? "":"s");
+ if (preexist)
+ fprintf(stderr, " (%d new)", working);
+ if (expansion)
+ fprintf(stderr, " ( + %d for expansion)",
+ expansion);
+ fprintf(stderr, "\n");
+ }
+ if (!err)
+ wait_for(chosen_name, mdfd);
+ return err;
+ /* FIXME should have an O_EXCL and wait for read-auto */
+}
+#endif
diff --git a/Build.c b/Build.c
new file mode 100644
index 00000000..da64dc89
--- /dev/null
+++ b/Build.c
@@ -0,0 +1,293 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+
+#define REGISTER_DEV _IO (MD_MAJOR, 1)
+#define START_MD _IO (MD_MAJOR, 2)
+#define STOP_MD _IO (MD_MAJOR, 3)
+
+int Build(char *mddev, struct mddev_dev *devlist,
+ struct shape *s, struct context *c)
+{
+ /* Build a linear or raid0 arrays without superblocks
+ * We cannot really do any checks, we just do it.
+ * For md_version < 0.90.0, we call REGISTER_DEV
+ * with the device numbers, and then
+ * START_MD giving the "geometry"
+ * geometry is 0xpp00cc
+ * where pp is personality: 1==linear, 2=raid0
+ * cc = chunk size factor: 0==4k, 1==8k etc.
+ *
+ * For md_version >= 0.90.0 we call
+ * SET_ARRAY_INFO, ADD_NEW_DISK, RUN_ARRAY
+ *
+ */
+ int i;
+ int vers;
+ struct stat stb;
+ int subdevs = 0, missing_disks = 0;
+ struct mddev_dev *dv;
+ int bitmap_fd;
+ unsigned long long bitmapsize;
+ int mdfd;
+ char chosen_name[1024];
+ int uuid[4] = {0,0,0,0};
+ struct map_ent *map = NULL;
+
+ /* scan all devices, make sure they really are block devices */
+ for (dv = devlist; dv; dv=dv->next) {
+ subdevs++;
+ if (strcmp("missing", dv->devname) == 0) {
+ missing_disks++;
+ continue;
+ }
+ if (stat(dv->devname, &stb)) {
+ pr_err("Cannot find %s: %s\n",
+ dv->devname, strerror(errno));
+ return 1;
+ }
+ if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+ pr_err("%s is not a block device.\n",
+ dv->devname);
+ return 1;
+ }
+ }
+
+ if (s->raiddisks != subdevs) {
+ pr_err("requested %d devices in array but listed %d\n",
+ s->raiddisks, subdevs);
+ return 1;
+ }
+
+ if (s->layout == UnSet)
+ switch(s->level) {
+ default: /* no layout */
+ s->layout = 0;
+ break;
+ case 10:
+ s->layout = 0x102; /* near=2, far=1 */
+ if (c->verbose > 0)
+ pr_err("layout defaults to n1\n");
+ break;
+ case 5:
+ case 6:
+ s->layout = map_name(r5layout, "default");
+ if (c->verbose > 0)
+ pr_err("layout defaults to %s\n", map_num(r5layout, s->layout));
+ break;
+ case LEVEL_FAULTY:
+ s->layout = map_name(faultylayout, "default");
+
+ if (c->verbose > 0)
+ pr_err("layout defaults to %s\n", map_num(faultylayout, s->layout));
+ break;
+ }
+
+ /* We need to create the device. It can have no name. */
+ map_lock(&map);
+ mdfd = create_mddev(mddev, NULL, c->autof, LOCAL,
+ chosen_name);
+ if (mdfd < 0) {
+ map_unlock(&map);
+ return 1;
+ }
+ mddev = chosen_name;
+
+ map_update(&map, fd2devnm(mdfd), "none", uuid, chosen_name);
+ map_unlock(&map);
+
+ vers = md_get_version(mdfd);
+
+ /* looks Ok, go for it */
+ if (vers >= 9000) {
+ mdu_array_info_t array;
+ array.level = s->level;
+ if (s->size == MAX_SIZE)
+ s->size = 0;
+ array.size = s->size;
+ array.nr_disks = s->raiddisks;
+ array.raid_disks = s->raiddisks;
+ array.md_minor = 0;
+ if (fstat(mdfd, &stb)==0)
+ array.md_minor = minor(stb.st_rdev);
+ array.not_persistent = 1;
+ array.state = 0; /* not clean, but no errors */
+ if (s->assume_clean)
+ array.state |= 1;
+ array.active_disks = s->raiddisks - missing_disks;
+ array.working_disks = s->raiddisks - missing_disks;
+ array.spare_disks = 0;
+ array.failed_disks = missing_disks;
+ if (s->chunk == 0 && (s->level==0 || s->level==LEVEL_LINEAR))
+ s->chunk = 64;
+ array.chunk_size = s->chunk*1024;
+ array.layout = s->layout;
+ if (ioctl(mdfd, SET_ARRAY_INFO, &array)) {
+ pr_err("SET_ARRAY_INFO failed for %s: %s\n",
+ mddev, strerror(errno));
+ goto abort;
+ }
+ } else if (s->bitmap_file) {
+ pr_err("bitmaps not supported with this kernel\n");
+ goto abort;
+ }
+
+ if (s->bitmap_file && strcmp(s->bitmap_file, "none") == 0)
+ s->bitmap_file = NULL;
+ if (s->bitmap_file && s->level <= 0) {
+ pr_err("bitmaps not meaningful with level %s\n",
+ map_num(pers, s->level)?:"given");
+ goto abort;
+ }
+ /* now add the devices */
+ for ((i=0), (dv = devlist) ; dv ; i++, dv=dv->next) {
+ unsigned long long dsize;
+ int fd;
+ if (strcmp("missing", dv->devname) == 0)
+ continue;
+ if (stat(dv->devname, &stb)) {
+ pr_err("Weird: %s has disappeared.\n",
+ dv->devname);
+ goto abort;
+ }
+ if ((stb.st_mode & S_IFMT)!= S_IFBLK) {
+ pr_err("Weird: %s is no longer a block device.\n",
+ dv->devname);
+ goto abort;
+ }
+ fd = open(dv->devname, O_RDONLY|O_EXCL);
+ if (fd < 0) {
+ pr_err("Cannot open %s: %s\n",
+ dv->devname, strerror(errno));
+ goto abort;
+ }
+ if (get_dev_size(fd, NULL, &dsize) &&
+ (s->size == 0 || s->size == MAX_SIZE || dsize < s->size))
+ s->size = dsize;
+ close(fd);
+ if (vers >= 9000) {
+ mdu_disk_info_t disk;
+ disk.number = i;
+ disk.raid_disk = i;
+ disk.state = (1<<MD_DISK_SYNC) | (1<<MD_DISK_ACTIVE);
+ if (dv->writemostly == 1)
+ disk.state |= 1<<MD_DISK_WRITEMOSTLY;
+ disk.major = major(stb.st_rdev);
+ disk.minor = minor(stb.st_rdev);
+ if (ioctl(mdfd, ADD_NEW_DISK, &disk)) {
+ pr_err("ADD_NEW_DISK failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ goto abort;
+ }
+ } else {
+ if (ioctl(mdfd, REGISTER_DEV, &stb.st_rdev)) {
+ pr_err("REGISTER_DEV failed for %s: %s.\n",
+ dv->devname, strerror(errno));
+ goto abort;
+ }
+ }
+ }
+ /* now to start it */
+ if (vers >= 9000) {
+ mdu_param_t param; /* not used by syscall */
+ if (s->bitmap_file) {
+ bitmap_fd = open(s->bitmap_file, O_RDWR);
+ if (bitmap_fd < 0) {
+ int major = BITMAP_MAJOR_HI;
+#if 0
+ if (s->bitmap_chunk == UnSet) {
+ pr_err("%s cannot be openned.",
+ s->bitmap_file);
+ goto abort;
+ }
+#endif
+ if (vers < 9003) {
+ major = BITMAP_MAJOR_HOSTENDIAN;
+#ifdef __BIG_ENDIAN
+ pr_err("Warning - bitmaps created on this kernel are not portable\n"
+ " between different architectures. Consider upgrading the Linux kernel.\n");
+#endif
+ }
+ bitmapsize = s->size>>9; /* FIXME wrong for RAID10 */
+ if (CreateBitmap(s->bitmap_file, 1, NULL, s->bitmap_chunk,
+ c->delay, s->write_behind, bitmapsize, major)) {
+ goto abort;
+ }
+ bitmap_fd = open(s->bitmap_file, O_RDWR);
+ if (bitmap_fd < 0) {
+ pr_err("%s cannot be openned.",
+ s->bitmap_file);
+ goto abort;
+ }
+ }
+ if (bitmap_fd >= 0) {
+ if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
+ pr_err("Cannot set bitmap file for %s: %s\n",
+ mddev, strerror(errno));
+ goto abort;
+ }
+ }
+ }
+ if (ioctl(mdfd, RUN_ARRAY, &param)) {
+ pr_err("RUN_ARRAY failed: %s\n",
+ strerror(errno));
+ if (s->chunk & (s->chunk-1)) {
+ cont_err("Problem may be that chunk size"
+ " is not a power of 2\n");
+ }
+ goto abort;
+ }
+ } else {
+ unsigned long arg;
+ arg=0;
+ while (s->chunk > 4096) {
+ arg++;
+ s->chunk >>= 1;
+ }
+ if (s->level == 0)
+ arg |= 0x20000;
+ else
+ arg |= 0x10000;
+ if (ioctl(mdfd, START_MD, arg)) {
+ pr_err("START_MD failed: %s\n",
+ strerror(errno));
+ goto abort;
+ }
+ }
+ if (c->verbose >= 0)
+ pr_err("array %s built and started.\n",
+ mddev);
+ wait_for(mddev, mdfd);
+ close(mdfd);
+ return 0;
+
+ abort:
+ if (vers >= 9000)
+ ioctl(mdfd, STOP_ARRAY, 0);
+ else
+ ioctl(mdfd, STOP_MD, 0);
+ close(mdfd);
+ return 1;
+}
diff --git a/COPYING b/COPYING
new file mode 100644
index 00000000..d159169d
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 00000000..a3bf7007
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,306 @@
+Please see git logs for detailed change log.
+This file just contains highlight.
+
+Changes Prior to release 3.3
+- Some array reshapes can proceed without needing backup file.
+ This is done by changing the 'data_offset' so we never need to write
+ any data back over where it was before. If there is no "head space"
+ or "tail space" to allow data_offset to change, the old mechanism
+ with a backup file can still be used.
+- RAID10 arrays can be reshaped to change the number of devices,
+ change the chunk size, or change the layout between 'near'
+ and 'offset'.
+ This will always change data_offset, and will fail if there is no
+ room for data_offset to be moved.
+- "--assemble --update=metadata" can convert a 0.90 array to a 1.0 array.
+- bad-block-logs are supported (but not heavily tested yet)
+- "--assemble --update=revert-reshape" can be used to undo a reshape
+ that has just been started but isn't really wanted. This is very
+ new and while it passes basic tests it cannot be guaranteed.
+- improved locking between --incremental and --assemble
+- uses systemd to run "mdmon" if systemd is configured to do that.
+- kernel names of md devices can be non-numeric. e.g. "md_home" rather than
+ "md0". This will probably confuse lots of other tools, so you need to
+ echo CREATE names=yes >> /etc/mdadm.conf
+ or the feature will not be used. (you also need a reasonably new kernel).
+- "--stop" can be given a kernel name instead of a device name. i.e
+ mdadm --stop md4
+ will work even if /dev/md4 doesn't exist.
+- "--detail --export" has some information about the devices in the array
+- --dump and --restore can be used to backup and restore the metadata on an
+ array.
+- Hot-replace is supported with
+ mdadm /dev/mdX --replace /dev/foo
+ and
+ mdadm /dev/mdX --replace /dev/foo --with /dev/bar
+- Config file can be a directory in which case all "*.conf" files are
+ read in lexical order.
+ Default is to read /etc/mdadm.conf and then /etc/mdadm.conf.d
+ Thus
+ echo CREATE name=yes > /etc/mdadm.conf.d/names.conf
+ will also enable the use of named md devices.
+
+- Lots of improvements to DDF support including adding support for
+ RAID10 (thanks Martin Wilck).
+
+Changes Prior to release 3.2.6
+ - There are no real stand-out fixes, just lots of little bits and pieces.
+
+Changes Prior to release 3.2.5
+ - This release primarily fixes a serious regression in 3.2.4.
+ This regression does *not* cause any risk to data. It simply
+ means that adding a device with "--add" would sometime fail
+ when it should not.
+
+ - The fix also includes a couple of minor fixes such as making
+ the "--layout=preserve" option to "--grow" work again.
+
+
+Changes Prior to release 3.2.4
+"--oneline" log of changes is below. Some notable ones are:
+
+ - --offroot argument to improve interactions between mdmon and initrd
+ - --prefer argument to select which /dev names to display in some
+ circumstances.
+ - relax restructions on when "--add" will be allowed
+ - Fix bug with adding write-intent-bitmap to active array
+ - Now defaults to "/run/mdadm" for storing run-time files.
+
+Changes Prior to release 3.2.3
+ - The largest single area of change is support for reshape of Intel
+ IMSM arrays (OnLine Capacity Explansion and Level Migration).
+ - Among other fixes, this now has a better chance of surviving if a
+ device fails during reshape.
+
+Changes Prior to release 3.2.2
+ - reshaping IMSM (Intel metadata) arrays is no longer 'experimental',
+ it should work properly and be largely compatible with IMSM drivers in
+ other platforms.
+ - --assume-clean can be used with --grow --size to avoid resyncing the
+ new part of the array. This is only support with very new kernels.
+ - RAID0 arrays can have chunksize which is not a power of 2. This has been
+ supported in the kernel for a while but is only now supprted by
+ mdadm.
+
+ - A new tool 'raid6check' is available which can check a RAID6 array,
+ or part of it, and report which device is most inconsistent with the
+ others if any stripe is inconsistent. This is still under development
+ and does not have a man page yet. If anyone tries it out and has any
+ questions or experience to report, they would be most welcome on
+ linux-raid@vger.kernel.org.
+
+Changes Prior to release 3.2.1
+ - policy framework
+ Policy can be expressed for moving spare devices between arrays, and
+ for how to handle hot-plugged devices. This policy can be different
+ for devices plugged in to different controllers etc.
+ This, for example, allows a configuration where when a device is plugged
+ in it is immediately included in an md array as a hot spare and
+ possibly starts recovery immediately if an array is degraded.
+
+ - some understanding of mbr and gpt paritition tables
+ This is primarly to support the new hot-plug support. If a
+ device is plugged in and policy suggests it should have a partition table,
+ the partition table will be copied from a suitably similar device, and
+ then the partitions will hot-plug and can then be added to md arrays.
+
+ - "--incremental --remove" can remember where a device was removed from
+ so if a device gets plugged back in the same place, special policy applies
+ to it, allowing it to be included in an array even if a general hotplug
+ will not be included.
+
+ - enhanced reshape options, including growing a RAID0 by converting to RAID4,
+ restriping, and converting back. Also convertions between RAID0 and
+ RAID10 and between RAID1 and RAID10 are possible (with a suitably recent
+ kernel).
+
+ - spare migration for IMSM arrays.
+ Spare migration can now work across 'containers' using non-native metadata
+ and specifically Intel's IMSM arrays support spare migrations.
+
+ - OLCE and level migration for Intel IMSM arrays.
+ OnLine Capacity Expansion and level migration (e.g. RAID0 -> RAID5) is
+ supported for Intel Matrix Storage Manager arrays.
+ This support is currently 'experimental' for technical reasons. It can
+ be enabled with "export MDADM_EXPERIMENTAL=1"
+
+ - avoid including wayward devices
+ If you split a RAID1, mount the two halves as two separate degraded RAID1s,
+ and then later bring the two back together, it is possible that the md
+ metadata won't properly show that one must over-ride the other.
+ mdadm now does extra checking to detect this possibilty and avoid
+ potentially corrupting data.
+
+ - remove any possible confusion between similar options.
+ e.g. --brief and --bitmap were mapped to 'b' and mdadm wouldn't
+ notice if one was used where the other was expected.
+
+ - allow K,M,G suffixes on chunk sizes
+
+Changes Prior to release 3.2
+ - By far the most significant change in this release related to the
+ management of reshaping arrays. This code has been substantially
+ re-written so that it can work with 'externally managed metadata' -
+ Intel's IMSM in particular. We now support level migration and
+ OnLine Capacity Expansion on these arrays.
+ - Policy framework.
+ Various policy statements can be made in the mdadm.conf to guide
+ the behaviour of mdadm, particular with regards to how new devices
+ are treated by "mdadm -I".
+ Depending on the 'action' associated with a device (identified by
+ its 'path') such need devices can be automatically re-added to and
+ existing array that they previously fell out off, or automatically
+ added as a spare if they appear to contain no data.
+
+ - mdadm now has a limited understanding of partition tables. This
+ allows the policy framework to make decisions about partitioned
+ devices as well.
+
+ - --incremental --remove can be told what --path the device was on,
+ and this info will be recorded so that another device appearing at
+ the same physical location can be preferentially added to the same
+ array (provides the spare-same-slot action policy applied to the
+ path).
+
+ - A new flags "--invalid-backup" flag is available in --assemble
+ mode. This can be used to re-assemble an array which was stopping
+ in the middle of a reshape, and for which the 'backup file' is no
+ longer available or is corrupted. The array may have some
+ corruption in it at the point where reshape was up to, but at least
+ the rest of the array will become available.
+
+
+ - Various internal restructuring - more is needed.
+
+Changes Prior to release 3.1.5
+ - Fixes for v1.x metadata on big-endian machines.
+ - man page improvements
+ - Improve '--detail --export' when run on partitions of an md array.
+ - Fix regression with removing 'failed' or 'detached' devices.
+ - Fixes for "--assemble --force" in various unusual cases.
+ - Allow '-Y' to mean --export. This was documented but not implemented.
+ - Various fixed for handling 'ddf' metadata. This is now more reliable
+ but could benefit from more interoperability testing.
+ - Correctly list subarrays of a container in "--detail" output.
+ - Improve checks on whether the requested number of devices is supported
+ by the metadata - both for --create and --grow.
+ - Don't remove partitions from a device that is being included in an
+ array until we are fully committed to including it.
+ - Allow "--assemble --update=no-bitmap" so an array with a corrupt
+ bitmap can still be assembled.
+ - Don't allow --add to succeed if it looks like a "--re-add" is probably
+ wanted, but cannot succeed. This avoids inadvertently turning
+ devices into spares when an array is failed.
+
+Changes Prior to release 3.1.4
+ Two fixes related to configs that aren't using udev:
+ - Don't remove md devices which 'standard' names on --stop
+ - Allow dev_open to work on read-only /dev
+ And fixed regressions:
+ - Allow --incremental to add spares to an array
+ - Accept --no-degraded as a deprecated option rather than
+ throwing an error
+ - Return correct success status when --incrmental assembling
+ a container which does not yet have enough devices.
+ - Don't link mdadm with pthreads, only mdmon needs it.
+ - Fix compiler warning due to bad use of snprintf
+
+Changes Prior to release 3.1.3
+ - mapfile now lives in a fixed location which default to
+ /dev/.mdadm/map but can be changed at compile time. This
+ location is choses and most distros provide it during early
+ boot and preserve it through. As long a /dev exists and is
+ writable, /dev/.mdadm will be created.
+ Other files file communication with mdmon live here too.
+ This fixes a bug reported by Debian and Gentoo users where
+ udev would spin in early-boot.
+ - IMSM and DDF metadata will not be recognised on partitions
+ as they should only be used on whole-disks.
+ - Various overflows causes by 2G drives have been addressed.
+ - A subarray of an IMSM contain can now be killed with
+ --kill-subarray. Also subarrays can be renamed with
+ --update-subarray
+ - -If (or --incremental --fail) can be used from udev to
+ fail and remove from all arrays a device which has been
+ unplugged from the system. i.e. hot-unplug-support.
+ - "mdadm /dev/mdX --re-add missing" will look for any device
+ that looks like it should be a member of /dev/mdX but isn't
+ and will automatically --re-add it
+ - Now compile with -Wextra to get extra warnings.
+ - Lots of minor bug fixes, documentation improvements, etcc
+
+Changes Prior to release 3.1.2
+ - The default metadata has change again (sorry about that).
+ It is now v1.2 and will hopefully stay that way. It turned
+ out there with boot-block issues with v1.1 which make it
+ unsuitable for a default, though in many cases it is still
+ suitable to use.
+ - Stopping a container is not permitted when members are still
+ active
+ - Add 'homehost' to the valid words for the "AUTO" config file
+ line. When followed by "-all", this causes mdadm to
+ auto-assemble any array belonging to this host, but not
+ auto-assemble anything else.
+ - Fix some bugs with "--grow --chunksize=" for changing chunksize.
+ - VAR_RUN can be easily changed at compile time just like ALT_RUN.
+ This gives distros more flexability in how to manage the
+ pid and sock files that mdmon needs.
+ - Various mdmon fixes
+ - Alway make bitmap 4K-aligned if at all possible.
+ - If mdadm.conf lists arrays which have inter-dependencies,
+ the previously had to be listed in the "right" order. Now
+ any order should work.
+ - Fix --force assembly of v1.x arrays which are in the process
+ of recovering.
+ - Add section on 'scrubbing' to 'md' man page.
+ - Various command-line-option parsing improvements.
+ - ... and lots of other bug fixes.
+
+Changes Prior to release 3.1.1
+ - Multiple fixes for new --grow levels including fixes for
+ serious data corruption problems.
+ - Change default metadata to v1.1
+ - Change default chunk size to 512K
+ - Change default bitmap chunk size to 64Meg
+ - When --re-add is used, don't fall back to
+ --add if --re-add fails as this can destroy data.
+
+Changes Prior to release 3.1
+ - Support --grow to change the layout of RAID4/5/6
+ - Support --grow to change the chunksize of raid 4/5/6
+ - Support --grow to change level from RAID1 -> RAID5 -> RAID6 and
+ back.
+ - Support --grow to reduce the number of devices in RAID4/5/6.
+ - Support restart of these grow options which assembling an array
+ which is partially grown.
+ - Assorted tests of this code, and of different RAID6 layouts.
+
+Changes Prior to release 3.0.3
+ - Improvements for creating arrays giving just a name, like 'foo',
+ rather than the full '/dev/md/foo'.
+ - Improvements for assembling member arrays of containers.
+ - Improvements to test suite
+ - Add option to change increment for RebuildNN messages reported
+ by "mdadm --monitor"
+ - Improvements to mdmon 'hand-over' from initrd to final root.
+ - Handle merging of devices that have left an IMSM array and are
+ being re-incorporated.
+ - Add missing space in "--detail --brief" output.
+
+Changes Prior to release 3.0.2
+ - Fix crash when hosthost is not set, as often happens in
+ early boot.
+
+Changes Prior to release 3.0.1
+ - Fix various segfaults
+ - Fixed for --examine with containers
+ - Lots of other little fixes.
+
+Changes Prior to release 3.0
+ - Support for externally managed metadata, specifically DDF and IMSM.
+ - Depend on udev to create entries in /dev, rather than creating them
+ ourselves.
+ - remove --auto-update-home-hosts
+ - new config file line "auto"
+ - new "<ignore>" and "any" options for "homehost"
+ - numerous bug fixes and minor enhancements.
diff --git a/Create.c b/Create.c
new file mode 100644
index 00000000..330c5b42
--- /dev/null
+++ b/Create.c
@@ -0,0 +1,1069 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include "md_u.h"
+#include "md_p.h"
+#include <ctype.h>
+
+static int default_layout(struct supertype *st, int level, int verbose)
+{
+ int layout = UnSet;
+
+ if (st && st->ss->default_geometry)
+ st->ss->default_geometry(st, &level, &layout, NULL);
+
+ if (layout == UnSet)
+ switch(level) {
+ default: /* no layout */
+ layout = 0;
+ break;
+ case 10:
+ layout = 0x102; /* near=2, far=1 */
+ if (verbose > 0)
+ pr_err("layout defaults to n2\n");
+ break;
+ case 5:
+ case 6:
+ layout = map_name(r5layout, "default");
+ if (verbose > 0)
+ pr_err("layout defaults to %s\n", map_num(r5layout, layout));
+ break;
+ case LEVEL_FAULTY:
+ layout = map_name(faultylayout, "default");
+
+ if (verbose > 0)
+ pr_err("layout defaults to %s\n", map_num(faultylayout, layout));
+ break;
+ }
+
+ return layout;
+}
+
+int Create(struct supertype *st, char *mddev,
+ char *name, int *uuid,
+ int subdevs, struct mddev_dev *devlist,
+ struct shape *s,
+ struct context *c, unsigned long long data_offset)
+{
+ /*
+ * Create a new raid array.
+ *
+ * First check that necessary details are available
+ * (i.e. level, raid-disks)
+ *
+ * Then check each disk to see what might be on it
+ * and report anything interesting.
+ *
+ * If anything looks odd, and runstop not set,
+ * abort.
+ *
+ * SET_ARRAY_INFO and ADD_NEW_DISK, and
+ * if runstop==run, or raiddisks disks were used,
+ * RUN_ARRAY
+ */
+ int mdfd;
+ unsigned long long minsize=0, maxsize=0;
+ char *mindisc = NULL;
+ char *maxdisc = NULL;
+ int dnum;
+ struct mddev_dev *dv;
+ int fail=0, warn=0;
+ struct stat stb;
+ int first_missing = subdevs * 2;
+ int second_missing = subdevs * 2;
+ int missing_disks = 0;
+ int insert_point = subdevs * 2; /* where to insert a missing drive */
+ int total_slots;
+ int pass;
+ int vers;
+ int rv;
+ int bitmap_fd;
+ int have_container = 0;
+ int container_fd = -1;
+ int need_mdmon = 0;
+ unsigned long long bitmapsize;
+ struct mdinfo info, *infos;
+ int did_default = 0;
+ int do_default_layout = 0;
+ int do_default_chunk = 0;
+ unsigned long safe_mode_delay = 0;
+ char chosen_name[1024];
+ struct map_ent *map = NULL;
+ unsigned long long newsize;
+
+ int major_num = BITMAP_MAJOR_HI;
+
+ memset(&info, 0, sizeof(info));
+ if (s->level == UnSet && st && st->ss->default_geometry)
+ st->ss->default_geometry(st, &s->level, NULL, NULL);
+ if (s->level == UnSet) {
+ pr_err("a RAID level is needed to create an array.\n");
+ return 1;
+ }
+ if (s->raiddisks < 4 && s->level == 6) {
+ pr_err("at least 4 raid-devices needed for level 6\n");
+ return 1;
+ }
+ if (s->raiddisks > 256 && s->level == 6) {
+ pr_err("no more than 256 raid-devices supported for level 6\n");
+ return 1;
+ }
+ if (s->raiddisks < 2 && s->level >= 4) {
+ pr_err("at least 2 raid-devices needed for level 4 or 5\n");
+ return 1;
+ }
+ if (s->level <= 0 && s->sparedisks) {
+ pr_err("This level does not support spare devices\n");
+ return 1;
+ }
+
+ if (subdevs == 1 && strcmp(devlist->devname, "missing") != 0) {
+ /* If given a single device, it might be a container, and we can
+ * extract a device list from there
+ */
+ mdu_array_info_t inf;
+ int fd;
+
+ memset(&inf, 0, sizeof(inf));
+ fd = open(devlist->devname, O_RDONLY);
+ if (fd >= 0 &&
+ ioctl(fd, GET_ARRAY_INFO, &inf) == 0 &&
+ inf.raid_disks == 0) {
+ /* yep, looks like a container */
+ if (st) {
+ rv = st->ss->load_container(st, fd,
+ devlist->devname);
+ if (rv == 0)
+ have_container = 1;
+ } else {
+ st = super_by_fd(fd, NULL);
+ if (st && !(rv = st->ss->
+ load_container(st, fd,
+ devlist->devname)))
+ have_container = 1;
+ else
+ st = NULL;
+ }
+ if (have_container) {
+ subdevs = s->raiddisks;
+ first_missing = subdevs * 2;
+ second_missing = subdevs * 2;
+ insert_point = subdevs * 2;
+ }
+ }
+ if (fd >= 0)
+ close(fd);
+ }
+ if (st && st->ss->external && s->sparedisks) {
+ pr_err("This metadata type does not support "
+ "spare disks at create time\n");
+ return 1;
+ }
+ if (subdevs > s->raiddisks+s->sparedisks) {
+ pr_err("You have listed more devices (%d) than are in the array(%d)!\n", subdevs, s->raiddisks+s->sparedisks);
+ return 1;
+ }
+ if (!have_container && subdevs < s->raiddisks+s->sparedisks) {
+ pr_err("You haven't given enough devices (real or missing) to create this array\n");
+ return 1;
+ }
+ if (s->bitmap_file && s->level <= 0) {
+ pr_err("bitmaps not meaningful with level %s\n",
+ map_num(pers, s->level)?:"given");
+ return 1;
+ }
+
+ /* now set some defaults */
+
+ if (s->layout == UnSet) {
+ do_default_layout = 1;
+ s->layout = default_layout(st, s->level, c->verbose);
+ }
+
+ if (s->level == 10)
+ /* check layout fits in array*/
+ if ((s->layout&255) * ((s->layout>>8)&255) > s->raiddisks) {
+ pr_err("that layout requires at least %d devices\n",
+ (s->layout&255) * ((s->layout>>8)&255));
+ return 1;
+ }
+
+ switch(s->level) {
+ case 4:
+ case 5:
+ case 10:
+ case 6:
+ case 0:
+ if (s->chunk == 0 || s->chunk == UnSet) {
+ s->chunk = UnSet;
+ do_default_chunk = 1;
+ /* chunk will be set later */
+ }
+ break;
+ case LEVEL_LINEAR:
+ /* a chunksize of zero 0s perfectly valid (and preferred) since 2.6.16 */
+ if (get_linux_version() < 2006016 && s->chunk == 0) {
+ s->chunk = 64;
+ if (c->verbose > 0)
+ pr_err("chunk size defaults to 64K\n");
+ }
+ break;
+ case 1:
+ case LEVEL_FAULTY:
+ case LEVEL_MULTIPATH:
+ case LEVEL_CONTAINER:
+ if (s->chunk) {
+ s->chunk = 0;
+ if (c->verbose > 0)
+ pr_err("chunk size ignored for this level\n");
+ }
+ break;
+ default:
+ pr_err("unknown level %d\n", s->level);
+ return 1;
+ }
+ if (s->size == MAX_SIZE)
+ /* use '0' to mean 'max' now... */
+ s->size = 0;
+ if (s->size && s->chunk && s->chunk != UnSet)
+ s->size &= ~(unsigned long long)(s->chunk - 1);
+ newsize = s->size * 2;
+ if (st && ! st->ss->validate_geometry(st, s->level, s->layout, s->raiddisks,
+ &s->chunk, s->size*2,
+ data_offset, NULL,
+ &newsize, c->verbose>=0))
+ return 1;
+
+ if (s->chunk && s->chunk != UnSet) {
+ newsize &= ~(unsigned long long)(s->chunk*2 - 1);
+ if (do_default_chunk) {
+ /* default chunk was just set */
+ if (c->verbose > 0)
+ pr_err("chunk size "
+ "defaults to %dK\n", s->chunk);
+ s->size &= ~(unsigned long long)(s->chunk - 1);
+ do_default_chunk = 0;
+ }
+ }
+
+ if (s->size == 0) {
+ s->size = newsize / 2;
+ if (s->level == 1)
+ /* If this is ever reshaped to RAID5, we will
+ * need a chunksize. So round it off a bit
+ * now just to be safe
+ */
+ s->size &= ~(64ULL-1);
+
+ if (s->size && c->verbose > 0)
+ pr_err("setting size to %lluK\n", s->size);
+ }
+
+ /* now look at the subdevs */
+ info.array.active_disks = 0;
+ info.array.working_disks = 0;
+ dnum = 0;
+ for (dv = devlist; dv ; dv = dv->next)
+ if (data_offset == VARIABLE_OFFSET)
+ dv->data_offset = INVALID_SECTORS;
+ else
+ dv->data_offset = data_offset;
+
+ for (dv=devlist; dv && !have_container; dv=dv->next, dnum++) {
+ char *dname = dv->devname;
+ unsigned long long freesize;
+ int dfd;
+ char *doff;
+
+ if (strcasecmp(dname, "missing")==0) {
+ if (first_missing > dnum)
+ first_missing = dnum;
+ if (second_missing > dnum && dnum > first_missing)
+ second_missing = dnum;
+ missing_disks ++;
+ continue;
+ }
+ if (data_offset == VARIABLE_OFFSET) {
+ doff = strchr(dname, ':');
+ if (doff) {
+ *doff++ = 0;
+ dv->data_offset = parse_size(doff);
+ } else
+ dv->data_offset = INVALID_SECTORS;
+ } else
+ dv->data_offset = data_offset;
+
+ dfd = open(dname, O_RDONLY);
+ if (dfd < 0) {
+ pr_err("cannot open %s: %s\n",
+ dname, strerror(errno));
+ exit(2);
+ }
+ if (fstat(dfd, &stb) != 0 ||
+ (stb.st_mode & S_IFMT) != S_IFBLK) {
+ close(dfd);
+ pr_err("%s is not a block device\n",
+ dname);
+ exit(2);
+ }
+ close(dfd);
+ info.array.working_disks++;
+ if (dnum < s->raiddisks)
+ info.array.active_disks++;
+ if (st == NULL) {
+ struct createinfo *ci = conf_get_create_info();
+ if (ci)
+ st = ci->supertype;
+ }
+ if (st == NULL) {
+ /* Need to choose a default metadata, which is different
+ * depending on geometry of array.
+ */
+ int i;
+ char *name = "default";
+ for(i=0; !st && superlist[i]; i++) {
+ st = superlist[i]->match_metadata_desc(name);
+ if (!st)
+ continue;
+ if (do_default_layout)
+ s->layout = default_layout(st, s->level, c->verbose);
+ switch (st->ss->validate_geometry(
+ st, s->level, s->layout, s->raiddisks,
+ &s->chunk, s->size*2,
+ dv->data_offset, dname,
+ &freesize, c->verbose > 0)) {
+ case -1: /* Not valid, message printed, and not
+ * worth checking any further */
+ exit(2);
+ break;
+ case 0: /* Geometry not valid */
+ free(st);
+ st = NULL;
+ s->chunk = do_default_chunk ? UnSet : s->chunk;
+ break;
+ case 1: /* All happy */
+ break;
+ }
+ }
+
+ if (!st) {
+ int dfd = open(dname, O_RDONLY|O_EXCL);
+ if (dfd < 0) {
+ pr_err("cannot open %s: %s\n",
+ dname, strerror(errno));
+ exit(2);
+ }
+ pr_err("device %s not suitable "
+ "for any style of array\n",
+ dname);
+ exit(2);
+ }
+ if (st->ss != &super0 ||
+ st->minor_version != 90)
+ did_default = 1;
+ } else {
+ if (do_default_layout)
+ s->layout = default_layout(st, s->level, 0);
+ if (!st->ss->validate_geometry(st, s->level, s->layout,
+ s->raiddisks,
+ &s->chunk, s->size*2,
+ dv->data_offset,
+ dname, &freesize,
+ c->verbose >= 0)) {
+
+ pr_err("%s is not suitable for "
+ "this array.\n",
+ dname);
+ fail = 1;
+ continue;
+ }
+ }
+
+ freesize /= 2; /* convert to K */
+ if (s->chunk && s->chunk != UnSet) {
+ /* round to chunk size */
+ freesize = freesize & ~(s->chunk-1);
+ if (do_default_chunk) {
+ /* default chunk was just set */
+ if (c->verbose > 0)
+ pr_err("chunk size "
+ "defaults to %dK\n", s->chunk);
+ s->size &= ~(unsigned long long)(s->chunk - 1);
+ do_default_chunk = 0;
+ }
+ }
+ if (!freesize) {
+ pr_err("no free space left on %s\n", dname);
+ fail = 1;
+ continue;
+ }
+
+ if (s->size && freesize < s->size) {
+ pr_err("%s is smaller than given size."
+ " %lluK < %lluK + metadata\n",
+ dname, freesize, s->size);
+ fail = 1;
+ continue;
+ }
+ if (maxdisc == NULL || (maxdisc && freesize > maxsize)) {
+ maxdisc = dname;
+ maxsize = freesize;
+ }
+ if (mindisc ==NULL || (mindisc && freesize < minsize)) {
+ mindisc = dname;
+ minsize = freesize;
+ }
+ if (c->runstop != 1 || c->verbose >= 0) {
+ int fd = open(dname, O_RDONLY);
+ if (fd <0 ) {
+ pr_err("Cannot open %s: %s\n",
+ dname, strerror(errno));
+ fail=1;
+ continue;
+ }
+ warn |= check_ext2(fd, dname);
+ warn |= check_reiser(fd, dname);
+ warn |= check_raid(fd, dname);
+ if (strcmp(st->ss->name, "1.x") == 0 &&
+ st->minor_version >= 1)
+ /* metadata at front */
+ warn |= check_partitions(fd, dname, 0, 0);
+ else if (s->level == 1 || s->level == LEVEL_CONTAINER
+ || (s->level == 0 && s->raiddisks == 1))
+ /* partitions could be meaningful */
+ warn |= check_partitions(fd, dname, freesize*2, s->size*2);
+ else
+ /* partitions cannot be meaningful */
+ warn |= check_partitions(fd, dname, 0, 0);
+ if (strcmp(st->ss->name, "1.x") == 0 &&
+ st->minor_version >= 1 &&
+ did_default &&
+ s->level == 1 &&
+ (warn & 1024) == 0) {
+ warn |= 1024;
+ pr_err("Note: this array has metadata at the start and\n"
+ " may not be suitable as a boot device. If you plan to\n"
+ " store '/boot' on this device please ensure that\n"
+ " your boot-loader understands md/v1.x metadata, or use\n"
+ " --metadata=0.90\n");
+ }
+ close(fd);
+ }
+ }
+ if (s->raiddisks + s->sparedisks > st->max_devs) {
+ pr_err("Too many devices:"
+ " %s metadata only supports %d\n",
+ st->ss->name, st->max_devs);
+ return 1;
+ }
+ if (have_container)
+ info.array.working_disks = s->raiddisks;
+ if (fail) {
+ pr_err("create aborted\n");
+ return 1;
+ }
+ if (s->size == 0) {
+ if (mindisc == NULL && !have_container) {
+ pr_err("no size and no drives given - aborting create.\n");
+ return 1;
+ }
+ if (s->level > 0 || s->level == LEVEL_MULTIPATH
+ || s->level == LEVEL_FAULTY
+ || st->ss->external ) {
+ /* size is meaningful */
+ if (!st->ss->validate_geometry(st, s->level, s->layout,
+ s->raiddisks,
+ &s->chunk, minsize*2,
+ data_offset,
+ NULL, NULL, 0)) {
+ pr_err("devices too large for RAID level %d\n", s->level);
+ return 1;
+ }
+ s->size = minsize;
+ if (s->level == 1)
+ /* If this is ever reshaped to RAID5, we will
+ * need a chunksize. So round it off a bit
+ * now just to be safe
+ */
+ s->size &= ~(64ULL-1);
+ if (c->verbose > 0)
+ pr_err("size set to %lluK\n", s->size);
+ }
+ }
+
+ if (!s->bitmap_file &&
+ s->level >= 1 &&
+ st->ss->add_internal_bitmap &&
+ (s->write_behind || s->size > 100*1024*1024ULL)) {
+ if (c->verbose > 0)
+ pr_err("automatically enabling write-intent bitmap on large array\n");
+ s->bitmap_file = "internal";
+ }
+ if (s->bitmap_file && strcmp(s->bitmap_file, "none") == 0)
+ s->bitmap_file = NULL;
+
+ if (!have_container && s->level > 0 && ((maxsize-s->size)*100 > maxsize)) {
+ if (c->runstop != 1 || c->verbose >= 0)
+ pr_err("largest drive (%s) exceeds size (%lluK) by more than 1%%\n",
+ maxdisc, s->size);
+ warn = 1;
+ }
+
+ if (st->ss->detail_platform && st->ss->detail_platform(0, 1, NULL) != 0) {
+ if (c->runstop != 1 || c->verbose >= 0)
+ pr_err("%s unable to enumerate platform support\n"
+ " array may not be compatible with hardware/firmware\n",
+ st->ss->name);
+ warn = 1;
+ }
+
+ if (warn) {
+ if (c->runstop!= 1) {
+ if (!ask("Continue creating array? ")) {
+ pr_err("create aborted.\n");
+ return 1;
+ }
+ } else {
+ if (c->verbose > 0)
+ pr_err("creation continuing despite oddities due to --run\n");
+ }
+ }
+
+ /* If this is raid4/5, we want to configure the last active slot
+ * as missing, so that a reconstruct happens (faster than re-parity)
+ * FIX: Can we do this for raid6 as well?
+ */
+ if (st->ss->external == 0 &&
+ s->assume_clean==0 && c->force == 0 && first_missing >= s->raiddisks) {
+ switch ( s->level ) {
+ case 4:
+ case 5:
+ insert_point = s->raiddisks-1;
+ s->sparedisks++;
+ info.array.active_disks--;
+ missing_disks++;
+ break;
+ default:
+ break;
+ }
+ }
+ /* For raid6, if creating with 1 missing drive, make a good drive
+ * into a spare, else the create will fail
+ */
+ if (s->assume_clean == 0 && c->force == 0 && first_missing < s->raiddisks &&
+ st->ss->external == 0 &&
+ second_missing >= s->raiddisks && s->level == 6) {
+ insert_point = s->raiddisks - 1;
+ if (insert_point == first_missing)
+ insert_point--;
+ s->sparedisks ++;
+ info.array.active_disks--;
+ missing_disks++;
+ }
+
+ if (s->level <= 0 && first_missing < subdevs * 2) {
+ pr_err("This level does not support missing devices\n");
+ return 1;
+ }
+
+ /* We need to create the device */
+ map_lock(&map);
+ mdfd = create_mddev(mddev, name, c->autof, LOCAL, chosen_name);
+ if (mdfd < 0) {
+ map_unlock(&map);
+ return 1;
+ }
+ /* verify if chosen_name is not in use,
+ * it could be in conflict with already existing device
+ * e.g. container, array
+ */
+ if (strncmp(chosen_name, "/dev/md/", 8) == 0
+ && map_by_name(&map, chosen_name+8) != NULL) {
+ pr_err("Array name %s is in use already.\n",
+ chosen_name);
+ close(mdfd);
+ map_unlock(&map);
+ return 1;
+ }
+ mddev = chosen_name;
+
+ vers = md_get_version(mdfd);
+ if (vers < 9000) {
+ pr_err("Create requires md driver version 0.90.0 or later\n");
+ goto abort_locked;
+ } else {
+ mdu_array_info_t inf;
+ memset(&inf, 0, sizeof(inf));
+ ioctl(mdfd, GET_ARRAY_INFO, &inf);
+ if (inf.working_disks != 0) {
+ pr_err("another array by this name"
+ " is already running.\n");
+ goto abort_locked;
+ }
+ }
+
+ /* Ok, lets try some ioctls */
+
+ info.array.level = s->level;
+ info.array.size = s->size;
+ info.array.raid_disks = s->raiddisks;
+ /* The kernel should *know* what md_minor we are dealing
+ * with, but it chooses to trust me instead. Sigh
+ */
+ info.array.md_minor = 0;
+ if (fstat(mdfd, &stb)==0)
+ info.array.md_minor = minor(stb.st_rdev);
+ info.array.not_persistent = 0;
+
+ if ( ( (s->level == 4 || s->level == 5) &&
+ (insert_point < s->raiddisks || first_missing < s->raiddisks) )
+ ||
+ ( s->level == 6 && (insert_point < s->raiddisks
+ || second_missing < s->raiddisks))
+ ||
+ ( s->level <= 0 )
+ ||
+ s->assume_clean
+ ) {
+ info.array.state = 1; /* clean, but one+ drive will be missing*/
+ info.resync_start = MaxSector;
+ } else {
+ info.array.state = 0; /* not clean, but no errors */
+ info.resync_start = 0;
+ }
+ if (s->level == 10) {
+ /* for raid10, the bitmap size is the capacity of the array,
+ * which is array.size * raid_disks / ncopies;
+ * .. but convert to sectors.
+ */
+ int ncopies = ((s->layout>>8) & 255) * (s->layout & 255);
+ bitmapsize = s->size * s->raiddisks / ncopies * 2;
+/* printf("bms=%llu as=%d rd=%d nc=%d\n", bitmapsize, s->size, s->raiddisks, ncopies);*/
+ } else
+ bitmapsize = s->size * 2;
+
+ /* There is lots of redundancy in these disk counts,
+ * raid_disks is the most meaningful value
+ * it describes the geometry of the array
+ * it is constant
+ * nr_disks is total number of used slots.
+ * it should be raid_disks+spare_disks
+ * spare_disks is the number of extra disks present
+ * see above
+ * active_disks is the number of working disks in
+ * active slots. (With raid_disks)
+ * working_disks is the total number of working disks,
+ * including spares
+ * failed_disks is the number of disks marked failed
+ *
+ * Ideally, the kernel would keep these (except raid_disks)
+ * up-to-date as we ADD_NEW_DISK, but it doesn't (yet).
+ * So for now, we assume that all raid and spare
+ * devices will be given.
+ */
+ info.array.spare_disks=s->sparedisks;
+ info.array.failed_disks=missing_disks;
+ info.array.nr_disks = info.array.working_disks
+ + info.array.failed_disks;
+ info.array.layout = s->layout;
+ info.array.chunk_size = s->chunk*1024;
+
+ if (name == NULL || *name == 0) {
+ /* base name on mddev */
+ /* /dev/md0 -> 0
+ * /dev/md_d0 -> d0
+ * /dev/md_foo -> foo
+ * /dev/md/1 -> 1
+ * /dev/md/d1 -> d1
+ * /dev/md/home -> home
+ * /dev/mdhome -> home
+ */
+ /* FIXME compare this with rules in create_mddev */
+ name = strrchr(mddev, '/');
+ if (name) {
+ name++;
+ if (strncmp(name, "md_", 3)==0 &&
+ strlen(name) > 3 &&
+ (name-mddev) == 5 /* /dev/ */)
+ name += 3;
+ else if (strncmp(name, "md", 2)==0 &&
+ strlen(name) > 2 &&
+ isdigit(name[2]) &&
+ (name-mddev) == 5 /* /dev/ */)
+ name += 2;
+ }
+ }
+ if (!st->ss->init_super(st, &info.array, s->size, name, c->homehost, uuid,
+ data_offset))
+ goto abort_locked;
+
+ total_slots = info.array.nr_disks;
+ st->ss->getinfo_super(st, &info, NULL);
+ sysfs_init(&info, mdfd, NULL);
+
+ if (did_default && c->verbose >= 0) {
+ if (is_subarray(info.text_version)) {
+ char devnm[32];
+ char *ep;
+ struct mdinfo *mdi;
+
+ strncpy(devnm, info.text_version+1, 32);
+ devnm[31] = 0;
+ ep = strchr(devnm, '/');
+ if (ep)
+ *ep = 0;
+
+ mdi = sysfs_read(-1, devnm, GET_VERSION);
+
+ pr_err("Creating array inside %s container %s\n",
+ mdi?mdi->text_version:"managed", devnm);
+ sysfs_free(mdi);
+ } else
+ pr_err("Defaulting to version"
+ " %s metadata\n", info.text_version);
+ }
+
+ map_update(&map, fd2devnm(mdfd), info.text_version,
+ info.uuid, chosen_name);
+ /* Keep map locked until devices have been added to array
+ * to stop another mdadm from finding and using those devices.
+ */
+
+ if (s->bitmap_file && vers < 9003) {
+ major_num = BITMAP_MAJOR_HOSTENDIAN;
+#ifdef __BIG_ENDIAN
+ pr_err("Warning - bitmaps created on this kernel are not portable\n"
+ " between different architectured. Consider upgrading the Linux kernel.\n");
+#endif
+ }
+
+ if (s->bitmap_file && strcmp(s->bitmap_file, "internal")==0) {
+ if ((vers%100) < 2) {
+ pr_err("internal bitmaps not supported by this kernel.\n");
+ goto abort_locked;
+ }
+ if (!st->ss->add_internal_bitmap) {
+ pr_err("internal bitmaps not supported with %s metadata\n",
+ st->ss->name);
+ goto abort_locked;
+ }
+ if (!st->ss->add_internal_bitmap(st, &s->bitmap_chunk,
+ c->delay, s->write_behind,
+ bitmapsize, 1, major_num)) {
+ pr_err("Given bitmap chunk size not supported.\n");
+ goto abort_locked;
+ }
+ s->bitmap_file = NULL;
+ }
+
+ sysfs_init(&info, mdfd, NULL);
+
+ if (st->ss->external && st->container_devnm[0]) {
+ /* member */
+
+ /* When creating a member, we need to be careful
+ * to negotiate with mdmon properly.
+ * If it is already running, we cannot write to
+ * the devices and must ask it to do that part.
+ * If it isn't running, we write to the devices,
+ * and then start it.
+ * We hold an exclusive open on the container
+ * device to make sure mdmon doesn't exit after
+ * we checked that it is running.
+ *
+ * For now, fail if it is already running.
+ */
+ container_fd = open_dev_excl(st->container_devnm);
+ if (container_fd < 0) {
+ pr_err("Cannot get exclusive "
+ "open on container - weird.\n");
+ goto abort_locked;
+ }
+ if (mdmon_running(st->container_devnm)) {
+ if (c->verbose)
+ pr_err("reusing mdmon "
+ "for %s.\n",
+ st->container_devnm);
+ st->update_tail = &st->updates;
+ } else
+ need_mdmon = 1;
+ }
+ rv = set_array_info(mdfd, st, &info);
+ if (rv) {
+ pr_err("failed to set array info for %s: %s\n",
+ mddev, strerror(errno));
+ goto abort_locked;
+ }
+
+ if (s->bitmap_file) {
+ int uuid[4];
+
+ st->ss->uuid_from_super(st, uuid);
+ if (CreateBitmap(s->bitmap_file, c->force, (char*)uuid, s->bitmap_chunk,
+ c->delay, s->write_behind,
+ bitmapsize,
+ major_num)) {
+ goto abort_locked;
+ }
+ bitmap_fd = open(s->bitmap_file, O_RDWR);
+ if (bitmap_fd < 0) {
+ pr_err("weird: %s cannot be openned\n",
+ s->bitmap_file);
+ goto abort_locked;
+ }
+ if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
+ pr_err("Cannot set bitmap file for %s: %s\n",
+ mddev, strerror(errno));
+ goto abort_locked;
+ }
+ }
+
+ infos = xmalloc(sizeof(*infos) * total_slots);
+ enable_fds(total_slots);
+ for (pass=1; pass <=2 ; pass++) {
+ struct mddev_dev *moved_disk = NULL; /* the disk that was moved out of the insert point */
+
+ for (dnum=0, dv = devlist ; dv ;
+ dv=(dv->next)?(dv->next):moved_disk, dnum++) {
+ int fd;
+ struct stat stb;
+ struct mdinfo *inf = &infos[dnum];
+
+ if (dnum >= total_slots)
+ abort();
+ if (dnum == insert_point) {
+ moved_disk = dv;
+ continue;
+ }
+ if (strcasecmp(dv->devname, "missing")==0)
+ continue;
+ if (have_container)
+ moved_disk = NULL;
+ if (have_container && dnum < info.array.raid_disks - 1)
+ /* repeatedly use the container */
+ moved_disk = dv;
+
+ switch(pass) {
+ case 1:
+ *inf = info;
+
+ inf->disk.number = dnum;
+ inf->disk.raid_disk = dnum;
+ if (inf->disk.raid_disk < s->raiddisks)
+ inf->disk.state = (1<<MD_DISK_ACTIVE) |
+ (1<<MD_DISK_SYNC);
+ else
+ inf->disk.state = 0;
+
+ if (dv->writemostly == 1)
+ inf->disk.state |= (1<<MD_DISK_WRITEMOSTLY);
+
+ if (have_container)
+ fd = -1;
+ else {
+ if (st->ss->external &&
+ st->container_devnm[0])
+ fd = open(dv->devname, O_RDWR);
+ else
+ fd = open(dv->devname, O_RDWR|O_EXCL);
+
+ if (fd < 0) {
+ pr_err("failed to open %s "
+ "after earlier success - aborting\n",
+ dv->devname);
+ goto abort_locked;
+ }
+ fstat(fd, &stb);
+ inf->disk.major = major(stb.st_rdev);
+ inf->disk.minor = minor(stb.st_rdev);
+ }
+ if (fd >= 0)
+ remove_partitions(fd);
+ if (st->ss->add_to_super(st, &inf->disk,
+ fd, dv->devname,
+ dv->data_offset)) {
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ goto abort_locked;
+ }
+ st->ss->getinfo_super(st, inf, NULL);
+ safe_mode_delay = inf->safe_mode_delay;
+
+ if (have_container && c->verbose > 0)
+ pr_err("Using %s for device %d\n",
+ map_dev(inf->disk.major,
+ inf->disk.minor,
+ 0), dnum);
+
+ if (!have_container) {
+ /* getinfo_super might have lost these ... */
+ inf->disk.major = major(stb.st_rdev);
+ inf->disk.minor = minor(stb.st_rdev);
+ }
+ break;
+ case 2:
+ inf->errors = 0;
+
+ rv = add_disk(mdfd, st, &info, inf);
+
+ if (rv) {
+ pr_err("ADD_NEW_DISK for %s "
+ "failed: %s\n",
+ dv->devname, strerror(errno));
+ goto abort_locked;
+ }
+ break;
+ }
+ if (!have_container &&
+ dv == moved_disk && dnum != insert_point) break;
+ }
+ if (pass == 1) {
+ struct mdinfo info_new;
+ struct map_ent *me = NULL;
+
+ /* check to see if the uuid has changed due to these
+ * metadata changes, and if so update the member array
+ * and container uuid. Note ->write_init_super clears
+ * the subarray cursor such that ->getinfo_super once
+ * again returns container info.
+ */
+ st->ss->getinfo_super(st, &info_new, NULL);
+ if (st->ss->external && s->level != LEVEL_CONTAINER &&
+ !same_uuid(info_new.uuid, info.uuid, 0)) {
+ map_update(&map, fd2devnm(mdfd),
+ info_new.text_version,
+ info_new.uuid, chosen_name);
+ me = map_by_devnm(&map, st->container_devnm);
+ }
+
+ if (st->ss->write_init_super(st)) {
+ st->ss->free_super(st);
+ goto abort_locked;
+ }
+
+ /* update parent container uuid */
+ if (me) {
+ char *path = xstrdup(me->path);
+
+ st->ss->getinfo_super(st, &info_new, NULL);
+ map_update(&map, st->container_devnm,
+ info_new.text_version,
+ info_new.uuid, path);
+ free(path);
+ }
+
+ flush_metadata_updates(st);
+ st->ss->free_super(st);
+ }
+ }
+ map_unlock(&map);
+ free(infos);
+
+ if (s->level == LEVEL_CONTAINER) {
+ /* No need to start. But we should signal udev to
+ * create links */
+ sysfs_uevent(&info, "change");
+ if (c->verbose >= 0)
+ pr_err("container %s prepared.\n", mddev);
+ wait_for(chosen_name, mdfd);
+ } else if (c->runstop == 1 || subdevs >= s->raiddisks) {
+ if (st->ss->external) {
+ int err;
+ switch(s->level) {
+ case LEVEL_LINEAR:
+ case LEVEL_MULTIPATH:
+ case 0:
+ err = sysfs_set_str(&info, NULL, "array_state",
+ c->readonly
+ ? "readonly"
+ : "active");
+ need_mdmon = 0;
+ break;
+ default:
+ err = sysfs_set_str(&info, NULL, "array_state",
+ "readonly");
+ break;
+ }
+ sysfs_set_safemode(&info, safe_mode_delay);
+ if (err) {
+ pr_err("failed to"
+ " activate array.\n");
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ goto abort;
+ }
+ } else if (c->readonly &&
+ sysfs_attribute_available(
+ &info, NULL, "array_state")) {
+ if (sysfs_set_str(&info, NULL,
+ "array_state", "readonly") < 0) {
+ pr_err("Failed to start array: %s\n",
+ strerror(errno));
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ goto abort;
+ }
+ } else {
+ /* param is not actually used */
+ mdu_param_t param;
+ if (ioctl(mdfd, RUN_ARRAY, &param)) {
+ pr_err("RUN_ARRAY failed: %s\n",
+ strerror(errno));
+ if (info.array.chunk_size & (info.array.chunk_size-1)) {
+ cont_err("Problem may be that "
+ "chunk size is not a power of 2\n");
+ }
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ goto abort;
+ }
+ /* if start_ro module parameter is set, array is
+ * auto-read-only, which is bad as the resync won't
+ * start. So lets make it read-write now.
+ */
+ ioctl(mdfd, RESTART_ARRAY_RW, NULL);
+ }
+ if (c->verbose >= 0)
+ pr_err("array %s started.\n", mddev);
+ if (st->ss->external && st->container_devnm[0]) {
+ if (need_mdmon)
+ start_mdmon(st->container_devnm);
+
+ ping_monitor(st->container_devnm);
+ close(container_fd);
+ }
+ wait_for(chosen_name, mdfd);
+ } else {
+ pr_err("not starting array - not enough devices.\n");
+ }
+ close(mdfd);
+ return 0;
+
+ abort:
+ map_lock(&map);
+ abort_locked:
+ map_remove(&map, fd2devnm(mdfd));
+ map_unlock(&map);
+
+ if (mdfd >= 0)
+ close(mdfd);
+ return 1;
+}
diff --git a/Detail.c b/Detail.c
new file mode 100644
index 00000000..c4fcad96
--- /dev/null
+++ b/Detail.c
@@ -0,0 +1,763 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include "md_p.h"
+#include "md_u.h"
+#include <dirent.h>
+
+static int cmpstringp(const void *p1, const void *p2)
+{
+ return strcmp(* (char * const *) p1, * (char * const *) p2);
+}
+
+static int add_device(const char *dev, char ***p_devices,
+ int *p_max_devices, int n_devices)
+{
+ if (n_devices + 1 >= *p_max_devices) {
+ *p_max_devices += 16;
+ *p_devices = xrealloc(*p_devices, *p_max_devices *
+ sizeof(**p_devices));
+ if (!*p_devices) {
+ *p_max_devices = 0;
+ return 0;
+ }
+ };
+ (*p_devices)[n_devices] = xstrdup(dev);
+ return n_devices + 1;
+}
+
+int Detail(char *dev, struct context *c)
+{
+ /*
+ * Print out details for an md array by using
+ * GET_ARRAY_INFO and GET_DISK_INFO ioctl calls
+ */
+
+ int fd = open(dev, O_RDONLY);
+ int vers;
+ mdu_array_info_t array;
+ mdu_disk_info_t *disks;
+ int next;
+ int d;
+ time_t atime;
+ char *str;
+ char **devices = NULL;
+ int max_devices = 0, n_devices = 0;
+ int spares = 0;
+ struct stat stb;
+ int is_26 = get_linux_version() >= 2006000;
+ int is_rebuilding = 0;
+ int failed = 0;
+ struct supertype *st;
+ char *subarray = NULL;
+ int max_disks = MD_SB_DISKS; /* just a default */
+ struct mdinfo *info = NULL;
+ struct mdinfo *sra;
+ struct mdinfo *subdev;
+ char *member = NULL;
+ char *container = NULL;
+
+ int rv = c->test ? 4 : 1;
+ int avail_disks = 0;
+ char *avail = NULL;
+ int external;
+ int inactive;
+
+ if (fd < 0) {
+ pr_err("cannot open %s: %s\n",
+ dev, strerror(errno));
+ return rv;
+ }
+ vers = md_get_version(fd);
+ if (vers < 0) {
+ pr_err("%s does not appear to be an md device\n",
+ dev);
+ close(fd);
+ return rv;
+ }
+ if (vers < 9000) {
+ pr_err("cannot get detail for md device %s: driver version too old.\n",
+ dev);
+ close(fd);
+ return rv;
+ }
+ sra = sysfs_read(fd, NULL, GET_VERSION|GET_DEVS);
+ external = (sra != NULL && sra->array.major_version == -1
+ && sra->array.minor_version == -2);
+ st = super_by_fd(fd, &subarray);
+ if (ioctl(fd, GET_ARRAY_INFO, &array) == 0) {
+ inactive = 0;
+ } else if (errno == ENODEV && sra) {
+ array = sra->array;
+ inactive = 1;
+ } else {
+ pr_err("cannot get array detail for %s: %s\n",
+ dev, strerror(errno));
+ close(fd);
+ return rv;
+ }
+
+ if (fstat(fd, &stb) != 0 && !S_ISBLK(stb.st_mode))
+ stb.st_rdev = 0;
+ rv = 0;
+
+ if (st)
+ max_disks = st->max_devs;
+
+ if (subarray) {
+ /* This is a subarray of some container.
+ * We want the name of the container, and the member
+ */
+ int devid = devnm2devid(st->container_devnm);
+ int cfd, err;
+
+ member = subarray;
+ container = map_dev_preferred(major(devid), minor(devid),
+ 1, c->prefer);
+ cfd = open_dev(st->container_devnm);
+ if (cfd >= 0) {
+ err = st->ss->load_container(st, cfd, NULL);
+ close(cfd);
+ if (err == 0)
+ info = st->ss->container_content(st, subarray);
+ }
+ }
+
+ /* try to load a superblock. Try sra->devs first, then try ioctl */
+ if (st && !info) for (d = 0, subdev = sra ? sra->devs : NULL;
+ d < max_disks || subdev;
+ subdev ? (void)(subdev = subdev->next) : (void)(d++)){
+ mdu_disk_info_t disk;
+ char *dv;
+ int fd2;
+ int err;
+ if (subdev)
+ disk = subdev->disk;
+ else {
+ disk.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ continue;
+ if (d >= array.raid_disks &&
+ disk.major == 0 &&
+ disk.minor == 0)
+ continue;
+ }
+
+ if (array.raid_disks > 0 &&
+ (disk.state & (1 << MD_DISK_ACTIVE)) == 0)
+ continue;
+
+ dv = map_dev(disk.major, disk.minor, 1);
+ if (!dv)
+ continue;
+
+ fd2 = dev_open(dv, O_RDONLY);
+ if (fd2 < 0)
+ continue;
+
+ if (st->sb)
+ st->ss->free_super(st);
+
+ err = st->ss->load_super(st, fd2, NULL);
+ close(fd2);
+ if (err)
+ continue;
+ if (info)
+ free(info);
+ if (subarray)
+ info = st->ss->container_content(st, subarray);
+ else {
+ info = xmalloc(sizeof(*info));
+ st->ss->getinfo_super(st, info, NULL);
+ }
+ if (!info)
+ continue;
+
+ if (array.raid_disks != 0 && /* container */
+ (info->array.ctime != array.ctime ||
+ info->array.level != array.level)) {
+ st->ss->free_super(st);
+ continue;
+ }
+ /* some formats (imsm) have free-floating-spares
+ * with a uuid of uuid_zero, they don't
+ * have very good info about the rest of the
+ * container, so keep searching when
+ * encountering such a device. Otherwise, stop
+ * after the first successful call to
+ * ->load_super.
+ */
+ if (memcmp(uuid_zero,
+ info->uuid,
+ sizeof(uuid_zero)) == 0) {
+ st->ss->free_super(st);
+ continue;
+ }
+ break;
+ }
+
+ /* Ok, we have some info to print... */
+ str = map_num(pers, array.level);
+
+ if (c->export) {
+ if (array.raid_disks) {
+ if (str)
+ printf("MD_LEVEL=%s\n", str);
+ printf("MD_DEVICES=%d\n", array.raid_disks);
+ } else {
+ if (!inactive)
+ printf("MD_LEVEL=container\n");
+ printf("MD_DEVICES=%d\n", array.nr_disks);
+ }
+ if (container) {
+ printf("MD_CONTAINER=%s\n", container);
+ printf("MD_MEMBER=%s\n", member);
+ } else {
+ if (sra && sra->array.major_version < 0)
+ printf("MD_METADATA=%s\n", sra->text_version);
+ else
+ printf("MD_METADATA=%d.%d\n",
+ array.major_version, array.minor_version);
+ }
+
+ if (st && st->sb && info) {
+ char nbuf[64];
+ struct map_ent *mp, *map = NULL;
+
+ fname_from_uuid(st, info, nbuf, ':');
+ printf("MD_UUID=%s\n", nbuf+5);
+ mp = map_by_uuid(&map, info->uuid);
+ if (mp && mp->path &&
+ strncmp(mp->path, "/dev/md/", 8) == 0) {
+ printf("MD_DEVNAME=");
+ print_escape(mp->path+8);
+ putchar('\n');
+ }
+
+ if (st->ss->export_detail_super)
+ st->ss->export_detail_super(st);
+ } else {
+ struct map_ent *mp, *map = NULL;
+ char nbuf[64];
+ mp = map_by_devnm(&map, fd2devnm(fd));
+ if (mp) {
+ __fname_from_uuid(mp->uuid, 0, nbuf, ':');
+ printf("MD_UUID=%s\n", nbuf+5);
+ }
+ if (mp && mp->path &&
+ strncmp(mp->path, "/dev/md/", 8) == 0) {
+ printf("MD_DEVNAME=");
+ print_escape(mp->path+8);
+ putchar('\n');
+ }
+ }
+ if (sra) {
+ struct mdinfo *mdi;
+ for (mdi = sra->devs; mdi; mdi = mdi->next) {
+ char *path =
+ map_dev(mdi->disk.major,
+ mdi->disk.minor, 0);
+
+ if (mdi->disk.raid_disk >= 0)
+ printf("MD_DEVICE_%s_ROLE=%d\n",
+ mdi->sys_name+4,
+ mdi->disk.raid_disk);
+ else
+ printf("MD_DEVICE_%s_ROLE=spare\n",
+ mdi->sys_name+4);
+ if (path)
+ printf("MD_DEVICE_%s_DEV=%s\n",
+ mdi->sys_name+4, path);
+ }
+ }
+ goto out;
+ }
+
+ disks = xmalloc(max_disks * sizeof(mdu_disk_info_t));
+ for (d = 0; d < max_disks; d++) {
+ disks[d].state = (1<<MD_DISK_REMOVED);
+ disks[d].major = disks[d].minor = 0;
+ disks[d].number = disks[d].raid_disk = d;
+ }
+
+ next = array.raid_disks*2;
+ if (inactive) {
+ struct mdinfo *mdi;
+ if (sra != NULL)
+ for (mdi = sra->devs; mdi; mdi = mdi->next) {
+ disks[next++] = mdi->disk;
+ disks[next-1].number = -1;
+ }
+ } else for (d = 0; d < max_disks; d++) {
+ mdu_disk_info_t disk;
+ disk.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
+ if (d < array.raid_disks)
+ pr_err("cannot get device detail for device %d: %s\n",
+ d, strerror(errno));
+ continue;
+ }
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ if (disk.raid_disk >= 0 && disk.raid_disk < array.raid_disks
+ && disks[disk.raid_disk*2].state == (1<<MD_DISK_REMOVED))
+ disks[disk.raid_disk*2] = disk;
+ else if (disk.raid_disk >= 0 && disk.raid_disk < array.raid_disks
+ && disks[disk.raid_disk*2+1].state == (1<<MD_DISK_REMOVED))
+ disks[disk.raid_disk*2+1] = disk;
+ else if (next < max_disks)
+ disks[next++] = disk;
+ }
+
+ avail = xcalloc(array.raid_disks, 1);
+
+ for (d= 0; d < array.raid_disks; d++) {
+
+ if ((disks[d*2].state & (1<<MD_DISK_SYNC)) ||
+ (disks[d*2+1].state & (1<<MD_DISK_SYNC))) {
+ avail_disks ++;
+ avail[d] = 1;
+ }
+ }
+
+ if (c->brief) {
+ mdu_bitmap_file_t bmf;
+ printf("%sARRAY %s", inactive ? "INACTIVE-":"", dev);
+ if (c->verbose > 0) {
+ if (array.raid_disks)
+ printf(" level=%s num-devices=%d",
+ str?str:"-unknown-",
+ array.raid_disks );
+ else if (!inactive)
+ printf(" level=container num-devices=%d",
+ array.nr_disks);
+ else
+ printf(" num-devices=%d", array.nr_disks);
+ }
+ if (container) {
+ printf(" container=%s", container);
+ printf(" member=%s", member);
+ } else {
+ if (sra && sra->array.major_version < 0)
+ printf(" metadata=%s", sra->text_version);
+ else
+ printf(" metadata=%d.%d",
+ array.major_version, array.minor_version);
+ }
+
+ /* Only try GET_BITMAP_FILE for 0.90.01 and later */
+ if (vers >= 9001 &&
+ ioctl(fd, GET_BITMAP_FILE, &bmf) == 0 &&
+ bmf.pathname[0]) {
+ printf(" bitmap=%s", bmf.pathname);
+ }
+ } else {
+ mdu_bitmap_file_t bmf;
+ unsigned long long larray_size;
+ struct mdstat_ent *ms = mdstat_read(0, 0);
+ struct mdstat_ent *e;
+ char *devnm;
+
+ devnm = stat2devnm(&stb);
+ for (e=ms; e; e=e->next)
+ if (strcmp(e->devnm, devnm) == 0)
+ break;
+ if (!get_dev_size(fd, NULL, &larray_size))
+ larray_size = 0;
+
+ printf("%s:\n", dev);
+
+ if (container)
+ printf(" Container : %s, member %s\n", container, member);
+ else {
+ if (sra && sra->array.major_version < 0)
+ printf(" Version : %s\n", sra->text_version);
+ else
+ printf(" Version : %d.%d\n",
+ array.major_version, array.minor_version);
+ }
+
+ atime = array.ctime;
+ if (atime)
+ printf(" Creation Time : %.24s\n", ctime(&atime));
+ if (array.raid_disks == 0 && external)
+ str = "container";
+ if (str)
+ printf(" Raid Level : %s\n", str);
+ if (larray_size)
+ printf(" Array Size : %llu%s\n", (larray_size>>10),
+ human_size(larray_size));
+ if (array.level >= 1) {
+ if (sra)
+ array.major_version = sra->array.major_version;
+ if (array.major_version != 0 &&
+ (larray_size >= 0xFFFFFFFFULL|| array.size == 0)) {
+ unsigned long long dsize = get_component_size(fd);
+ if (dsize > 0)
+ printf(" Used Dev Size : %llu%s\n",
+ dsize/2,
+ human_size((long long)dsize<<9));
+ else
+ printf(" Used Dev Size : unknown\n");
+ } else
+ printf(" Used Dev Size : %d%s\n", array.size,
+ human_size((long long)array.size<<10));
+ }
+ if (array.raid_disks)
+ printf(" Raid Devices : %d\n", array.raid_disks);
+ printf(" Total Devices : %d\n", array.nr_disks);
+ if (!container &&
+ ((sra == NULL && array.major_version == 0) ||
+ (sra && sra->array.major_version == 0)))
+ printf("Preferred Minor : %d\n", array.md_minor);
+ if (sra == NULL || sra->array.major_version >= 0)
+ printf(" Persistence : Superblock is %spersistent\n",
+ array.not_persistent?"not ":"");
+ printf("\n");
+ /* Only try GET_BITMAP_FILE for 0.90.01 and later */
+ if (vers >= 9001 &&
+ ioctl(fd, GET_BITMAP_FILE, &bmf) == 0 &&
+ bmf.pathname[0]) {
+ printf(" Intent Bitmap : %s\n", bmf.pathname);
+ printf("\n");
+ } else if (array.state & (1<<MD_SB_BITMAP_PRESENT))
+ printf(" Intent Bitmap : Internal\n\n");
+ atime = array.utime;
+ if (atime)
+ printf(" Update Time : %.24s\n", ctime(&atime));
+ if (array.raid_disks) {
+ static char *sync_action[] = {
+ ", recovering", ", resyncing",
+ ", reshaping", ", checking" };
+ char *st;
+ if (avail_disks == array.raid_disks)
+ st = "";
+ else if (!enough(array.level, array.raid_disks,
+ array.layout, 1, avail))
+ st = ", FAILED";
+ else
+ st = ", degraded";
+
+ printf(" State : %s%s%s%s%s%s \n",
+ (array.state&(1<<MD_SB_CLEAN))?"clean":"active", st,
+ (!e || (e->percent < 0 && e->percent != RESYNC_PENDING &&
+ e->percent != RESYNC_DELAYED)) ? "" : sync_action[e->resync],
+ larray_size ? "": ", Not Started",
+ (e && e->percent == RESYNC_DELAYED) ? " (DELAYED)": "",
+ (e && e->percent == RESYNC_PENDING) ? " (PENDING)": "");
+ } else if (inactive) {
+ printf(" State : inactive\n");
+ }
+ if (array.raid_disks)
+ printf(" Active Devices : %d\n", array.active_disks);
+ if (array.working_disks > 0)
+ printf("Working Devices : %d\n", array.working_disks);
+ if (array.raid_disks) {
+ printf(" Failed Devices : %d\n", array.failed_disks);
+ printf(" Spare Devices : %d\n", array.spare_disks);
+ }
+ printf("\n");
+ if (array.level == 5) {
+ str = map_num(r5layout, array.layout);
+ printf(" Layout : %s\n", str?str:"-unknown-");
+ }
+ if (array.level == 6) {
+ str = map_num(r6layout, array.layout);
+ printf(" Layout : %s\n", str?str:"-unknown-");
+ }
+ if (array.level == 10) {
+ printf(" Layout :");
+ print_r10_layout(array.layout);
+ printf("\n");
+ }
+ switch (array.level) {
+ case 0:
+ case 4:
+ case 5:
+ case 10:
+ case 6:
+ if (array.chunk_size)
+ printf(" Chunk Size : %dK\n\n",
+ array.chunk_size/1024);
+ break;
+ case -1:
+ printf(" Rounding : %dK\n\n", array.chunk_size/1024);
+ break;
+ default: break;
+ }
+
+ if (e && e->percent >= 0) {
+ static char *sync_action[] = {
+ "Rebuild", "Resync",
+ "Reshape", "Check"};
+ printf(" %7s Status : %d%% complete\n", sync_action[e->resync], e->percent);
+ is_rebuilding = 1;
+ }
+ free_mdstat(ms);
+
+ if ((st && st->sb) && (info && info->reshape_active)) {
+#if 0
+This is pretty boring
+ printf(" Reshape pos'n : %llu%s\n", (unsigned long long) info->reshape_progress<<9,
+ human_size((unsigned long long)info->reshape_progress<<9));
+#endif
+ if (info->delta_disks != 0)
+ printf(" Delta Devices : %d, (%d->%d)\n",
+ info->delta_disks,
+ array.raid_disks - info->delta_disks,
+ array.raid_disks);
+ if (info->new_level != array.level) {
+ str = map_num(pers, info->new_level);
+ printf(" New Level : %s\n", str?str:"-unknown-");
+ }
+ if (info->new_level != array.level ||
+ info->new_layout != array.layout) {
+ if (info->new_level == 5) {
+ str = map_num(r5layout, info->new_layout);
+ printf(" New Layout : %s\n",
+ str?str:"-unknown-");
+ }
+ if (info->new_level == 6) {
+ str = map_num(r6layout, info->new_layout);
+ printf(" New Layout : %s\n",
+ str?str:"-unknown-");
+ }
+ if (info->new_level == 10) {
+ printf(" New Layout : near=%d, %s=%d\n",
+ info->new_layout&255,
+ (info->new_layout&0x10000)?"offset":"far",
+ (info->new_layout>>8)&255);
+ }
+ }
+ if (info->new_chunk != array.chunk_size)
+ printf(" New Chunksize : %dK\n", info->new_chunk/1024);
+ printf("\n");
+ } else if (e && e->percent >= 0)
+ printf("\n");
+ if (st && st->sb)
+ st->ss->detail_super(st, c->homehost);
+
+ if (array.raid_disks == 0 && sra && sra->array.major_version == -1
+ && sra->array.minor_version == -2 && sra->text_version[0] != '/') {
+ /* This looks like a container. Find any active arrays
+ * That claim to be a member.
+ */
+ DIR *dir = opendir("/sys/block");
+ struct dirent *de;
+
+ printf(" Member Arrays :");
+
+ while (dir && (de = readdir(dir)) != NULL) {
+ char path[200];
+ char vbuf[1024];
+ int nlen = strlen(sra->sys_name);
+ int devid;
+ if (de->d_name[0] == '.')
+ continue;
+ sprintf(path, "/sys/block/%s/md/metadata_version",
+ de->d_name);
+ if (load_sys(path, vbuf) < 0)
+ continue;
+ if (strncmp(vbuf, "external:", 9) != 0 ||
+ !is_subarray(vbuf+9) ||
+ strncmp(vbuf+10, sra->sys_name, nlen) != 0 ||
+ vbuf[10+nlen] != '/')
+ continue;
+ devid = devnm2devid(de->d_name);
+ printf(" %s", map_dev_preferred(
+ major(devid),
+ minor(devid), 1, c->prefer));
+ }
+ if (dir)
+ closedir(dir);
+ printf("\n\n");
+ }
+
+ if (array.raid_disks)
+ printf(" Number Major Minor RaidDevice State\n");
+ else
+ printf(" Number Major Minor RaidDevice\n");
+ }
+ free(info);
+
+ for (d= 0; d < max_disks; d++) {
+ char *dv;
+ mdu_disk_info_t disk = disks[d];
+
+ if (d >= array.raid_disks*2 &&
+ disk.major == 0 &&
+ disk.minor == 0)
+ continue;
+ if ((d & 1) &&
+ disk.major == 0 &&
+ disk.minor == 0)
+ continue;
+ if (!c->brief) {
+ if (d == array.raid_disks*2) printf("\n");
+ if (disk.number < 0)
+ printf(" - %5d %5d - ",
+ disk.major, disk.minor);
+ else if (disk.raid_disk < 0)
+ printf(" %5d %5d %5d - ",
+ disk.number, disk.major, disk.minor);
+ else
+ printf(" %5d %5d %5d %5d ",
+ disk.number, disk.major, disk.minor, disk.raid_disk);
+ }
+ if (!c->brief && array.raid_disks) {
+
+ if (disk.state & (1<<MD_DISK_FAULTY)) {
+ printf(" faulty");
+ if (disk.raid_disk < array.raid_disks &&
+ disk.raid_disk >= 0)
+ failed++;
+ }
+ if (disk.state & (1<<MD_DISK_ACTIVE)) printf(" active");
+ if (disk.state & (1<<MD_DISK_SYNC)) {
+ printf(" sync");
+ if (array.level == 10 && (array.layout & ~0x1FFFF) == 0) {
+ int nc = array.layout & 0xff;
+ int fc = (array.layout >> 8) & 0xff;
+ int copies = nc*fc;
+ if (fc == 1 && array.raid_disks % copies == 0 && copies <= 26) {
+ /* We can divide the devices into 'sets' */
+ int set = disk.raid_disk % copies;
+ printf(" set-%c", set + 'A');
+ }
+ }
+ }
+ if (disk.state & (1<<MD_DISK_REMOVED)) printf(" removed");
+ if (disk.state & (1<<MD_DISK_WRITEMOSTLY)) printf(" writemostly");
+ if ((disk.state &
+ ((1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC)
+ |(1<<MD_DISK_REMOVED)|(1<<MD_DISK_FAULTY)))
+ == 0) {
+ printf(" spare");
+ if (is_26) {
+ if (disk.raid_disk < array.raid_disks && disk.raid_disk >= 0)
+ printf(" rebuilding");
+ } else if (is_rebuilding && failed) {
+ /* Taking a bit of a risk here, we remove the
+ * device from the array, and then put it back.
+ * If this fails, we are rebuilding
+ */
+ int err = ioctl(fd, HOT_REMOVE_DISK, makedev(disk.major, disk.minor));
+ if (err == 0) ioctl(fd, HOT_ADD_DISK, makedev(disk.major, disk.minor));
+ if (err && errno == EBUSY)
+ printf(" rebuilding");
+ }
+ }
+ }
+ if (disk.state == 0) spares++;
+ if (c->test && d < array.raid_disks
+ && !(disk.state & (1<<MD_DISK_SYNC)))
+ rv |= 1;
+ dv=map_dev_preferred(disk.major, disk.minor, 0, c->prefer);
+ if (dv != NULL) {
+ if (c->brief)
+ n_devices = add_device(dv, &devices,
+ &max_devices,
+ n_devices);
+ else
+ printf(" %s", dv);
+ }
+ if (!c->brief) printf("\n");
+ }
+ if (spares && c->brief && array.raid_disks) printf(" spares=%d", spares);
+ if (c->brief && st && st->sb)
+ st->ss->brief_detail_super(st);
+ if (st)
+ st->ss->free_super(st);
+
+ if (c->brief && c->verbose > 0 && devices) {
+ qsort(devices, n_devices, sizeof(*devices), cmpstringp);
+ printf("\n devices=%s", devices[0]);
+ for (d = 1; d < n_devices; d++)
+ printf(",%s", devices[d]);
+ }
+ if (c->brief)
+ printf("\n");
+ if (c->test &&
+ !enough(array.level, array.raid_disks, array.layout,
+ 1, avail))
+ rv = 2;
+
+ free(disks);
+out:
+ close(fd);
+ free(subarray);
+ free(avail);
+ for (d = 0; d < n_devices; d++)
+ free(devices[d]);
+ free(devices);
+ sysfs_free(sra);
+ return rv;
+}
+
+int Detail_Platform(struct superswitch *ss, int scan, int verbose, int export, char *controller_path)
+{
+ /* display platform capabilities for the given metadata format
+ * 'scan' in this context means iterate over all metadata types
+ */
+ int i;
+ int err = 1;
+
+ if (ss && export && ss->export_detail_platform)
+ err = ss->export_detail_platform(verbose, controller_path);
+ else if (ss && ss->detail_platform)
+ err = ss->detail_platform(verbose, 0, controller_path);
+ else if (ss) {
+ if (verbose > 0)
+ pr_err("%s metadata is platform independent\n",
+ ss->name ? : "[no name]");
+ } else if (!scan) {
+ if (verbose > 0)
+ pr_err("specify a metadata type or --scan\n");
+ }
+
+ if (!scan)
+ return err;
+
+ err = 0;
+ for (i = 0; superlist[i]; i++) {
+ struct superswitch *meta = superlist[i];
+
+ if (meta == ss)
+ continue;
+ if (verbose > 0)
+ pr_err("checking metadata %s\n",
+ meta->name ? : "[no name]");
+ if (!meta->detail_platform) {
+ if (verbose > 0)
+ pr_err("%s metadata is platform independent\n",
+ meta->name ? : "[no name]");
+ } else if (export && meta->export_detail_platform) {
+ err |= meta->export_detail_platform(verbose, controller_path);
+ } else
+ err |= meta->detail_platform(verbose, 0, controller_path);
+ }
+
+ return err;
+}
diff --git a/Dump.c b/Dump.c
new file mode 100644
index 00000000..7bdbf6f7
--- /dev/null
+++ b/Dump.c
@@ -0,0 +1,311 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2013 Neil Brown <neilb@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include <sys/dir.h>
+
+int Dump_metadata(char *dev, char *dir, struct context *c,
+ struct supertype *st)
+{
+ /* create a new file in 'dir' named for the basename of 'dev'.
+ * Truncate to the same size as 'dev' and ask the metadata
+ * handler to copy metadata there.
+ * For every name in /dev/disk/by-id that points to this device,
+ * create a hardlink in 'dir'.
+ * Complain if any of those hardlinks cannot be created.
+ */
+ int fd, fl;
+ struct stat stb, dstb;
+ char *base;
+ char *fname = NULL;
+ unsigned long long size;
+ DIR *dirp;
+ struct dirent *de;
+
+ if (stat(dir, &stb) != 0 ||
+ (S_IFMT & stb.st_mode) != S_IFDIR) {
+ pr_err("--dump requires an existing directory, not: %s\n",
+ dir);
+ return 16;
+ }
+
+ fd = dev_open(dev, O_RDONLY);
+ if (fd < 0) {
+ pr_err("Cannot open %s to dump metadata: %s\n",
+ dev, strerror(errno));
+ return 1;
+ }
+ if (!get_dev_size(fd, dev, &size)) {
+ close(fd);
+ return 1;
+ }
+
+ if (st == NULL)
+ st = guess_super_type(fd, guess_array);
+ if (!st) {
+ pr_err("Cannot find RAID metadata on %s\n", dev);
+ close(fd);
+ return 1;
+ }
+
+ st->ignore_hw_compat = 1;
+ if (st->ss->load_super(st, fd, NULL) != 0) {
+ pr_err("No %s metadata found on %s\n",
+ st->ss->name, dev);
+ close(fd);
+ return 1;
+ }
+ if (st->ss->copy_metadata == NULL) {
+ pr_err("%s metadata on %s cannot be copied\n",
+ st->ss->name, dev);
+ close(fd);
+ return 1;
+ }
+
+ base = strrchr(dev, '/');
+ if (base)
+ base++;
+ else
+ base = dev;
+ xasprintf(&fname, "%s/%s", dir, base);
+ fl = open(fname, O_RDWR|O_CREAT|O_EXCL, 0666);
+ if (fl < 0) {
+ pr_err("Cannot create dump file %s: %s\n",
+ fname, strerror(errno));
+ close(fd);
+ free(fname);
+ return 1;
+ }
+ if (ftruncate(fl, size) < 0) {
+ pr_err("failed to set size of dump file: %s\n",
+ strerror(errno));
+ close(fd);
+ close(fl);
+ free(fname);
+ return 1;
+ }
+
+ if (st->ss->copy_metadata(st, fd, fl) != 0) {
+ pr_err("Failed to copy metadata from %s to %s\n",
+ dev, fname);
+ close(fd);
+ close(fl);
+ unlink(fname);
+ free(fname);
+ return 1;
+ }
+ if (c->verbose >= 0)
+ printf("%s saved as %s.\n", dev, fname);
+ fstat(fd, &dstb);
+ close(fd);
+ close(fl);
+ if ((dstb.st_mode & S_IFMT) != S_IFBLK) {
+ /* Not a block device, so cannot create links */
+ free(fname);
+ return 0;
+ }
+ /* mostly done: just want to find some other names */
+ dirp = opendir("/dev/disk/by-id");
+ if (!dirp) {
+ free(fname);
+ return 0;
+ }
+ while ((de = readdir(dirp)) != NULL) {
+ char *p = NULL;
+ if (de->d_name[0] == '.')
+ continue;
+ xasprintf(&p, "/dev/disk/by-id/%s", de->d_name);
+ if (stat(p, &stb) != 0 ||
+ (stb.st_mode & S_IFMT) != S_IFBLK ||
+ stb.st_rdev != dstb.st_rdev) {
+ /* Not this one */
+ free(p);
+ continue;
+ }
+ free(p);
+ xasprintf(&p, "%s/%s", dir, de->d_name);
+ if (link(fname, p) == 0) {
+ if (c->verbose >= 0)
+ printf("%s also saved as %s.\n",
+ dev, p);
+ } else {
+ pr_err("Could not save %s as %s!!\n",
+ dev, p);
+ }
+ free(p);
+ }
+ closedir(dirp);
+ free(fname);
+ return 0;
+}
+
+int Restore_metadata(char *dev, char *dir, struct context *c,
+ struct supertype *st, int only)
+{
+ /* If 'dir' really is a directory we choose a name
+ * from it that matches a suitable name in /dev/disk/by-id,
+ * and copy metadata from the file to the device.
+ * If two names from by-id match and aren't both the same
+ * inode, we fail. If none match and basename of 'dev'
+ * can be found in dir, use that.
+ * If 'dir' is really a file then it is only permitted if
+ * 'only' is set (meaning there was only one device given)
+ * and the metadata is restored irrespective of file names.
+ */
+ int fd, fl;
+ struct stat stb, dstb;
+ char *fname = NULL;
+ unsigned long long size;
+
+ if (stat(dir, &stb) != 0) {
+ pr_err("%s does not exist: cannot restore from there.\n",
+ dir);
+ return 16;
+ } else if ((S_IFMT & stb.st_mode) != S_IFDIR && !only) {
+ pr_err("--restore requires a directory when multiple devices given\n");
+ return 16;
+ }
+
+ fd = dev_open(dev, O_RDWR);
+ if (fd < 0) {
+ pr_err("Cannot open %s to restore metadata: %s\n",
+ dev, strerror(errno));
+ return 1;
+ }
+ if (!get_dev_size(fd, dev, &size)) {
+ close(fd);
+ return 1;
+ }
+
+ if ((S_IFMT & stb.st_mode) == S_IFDIR) {
+ /* choose one name from the directory. */
+ DIR *d = opendir(dir);
+ struct dirent *de;
+ char *chosen = NULL;
+ unsigned int chosen_inode = 0;
+
+ fstat(fd, &dstb);
+
+ while (d && (de = readdir(d)) != NULL) {
+ if (de->d_name[0] == '.')
+ continue;
+ xasprintf(&fname, "/dev/disk/by-id/%s", de->d_name);
+ if (stat(fname, &stb) != 0) {
+ free(fname);
+ continue;
+ }
+ free(fname);
+ if ((S_IFMT & stb.st_mode) != S_IFBLK)
+ continue;
+ if (stb.st_rdev != dstb.st_rdev)
+ continue;
+ /* This file is a good match for our device. */
+ xasprintf(&fname, "%s/%s", dir, de->d_name);
+ if (stat(fname, &stb) != 0) {
+ /* Weird! */
+ free(fname);
+ continue;
+ }
+ if (chosen == NULL) {
+ chosen = fname;
+ chosen_inode = stb.st_ino;
+ continue;
+ }
+ if (chosen_inode == stb.st_ino) {
+ /* same, no need to change */
+ free(fname);
+ continue;
+ }
+ /* Oh dear, two names both match. Must give up. */
+ pr_err("Both %s and %s seem suitable for %s. Please choose one.\n",
+ chosen, fname, dev);
+ free(fname);
+ free(chosen);
+ close(fd);
+ closedir(d);
+ return 1;
+ }
+ closedir(d);
+ if (!chosen) {
+ /* One last chance: try basename of device */
+ char *base = strrchr(dev, '/');
+ if (base)
+ base++;
+ else
+ base = dev;
+ xasprintf(&fname, "%s/%s", dir, base);
+ if (stat(fname, &stb) == 0)
+ chosen = fname;
+ else
+ free(fname);
+ }
+ fname = chosen;
+ } else
+ fname = strdup(dir);
+
+ if (!fname) {
+ pr_err("Cannot find suitable file in %s for %s\n",
+ dir, dev);
+ close(fd);
+ return 1;
+ }
+
+ fl = open(fname, O_RDONLY);
+ if (!fl) {
+ pr_err("Could not open %s for --restore.\n",
+ fname);
+ goto err;
+ }
+ if (((unsigned long long)stb.st_size) != size) {
+ pr_err("%s is not the same size as %s - cannot restore.\n",
+ fname, dev);
+ goto err;
+ }
+ if (st == NULL)
+ st = guess_super_type(fl, guess_array);
+ if (!st) {
+ pr_err("Cannot find metadata on %s\n", fname);
+ goto err;
+ }
+ st->ignore_hw_compat = 1;
+ if (st->ss->load_super(st, fl, NULL) != 0) {
+ pr_err("No %s metadata found on %s\n",
+ st->ss->name, fname);
+ goto err;
+ }
+ if (st->ss->copy_metadata == NULL) {
+ pr_err("%s metadata on %s cannot be copied\n",
+ st->ss->name, dev);
+ goto err;
+ }
+ if (st->ss->copy_metadata(st, fl, fd) != 0) {
+ pr_err("Failed to copy metadata from %s to %s\n",
+ fname, dev);
+ goto err;
+ }
+ if (c->verbose >= 0)
+ printf("%s restored from %s.\n", dev, fname);
+ return 0;
+
+err:
+ close(fd);
+ close(fl);
+ free(fname);
+ return 1;
+}
diff --git a/Examine.c b/Examine.c
new file mode 100644
index 00000000..953b8eee
--- /dev/null
+++ b/Examine.c
@@ -0,0 +1,225 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include "dlink.h"
+
+#if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
+#error no endian defined
+#endif
+#include "md_u.h"
+#include "md_p.h"
+int Examine(struct mddev_dev *devlist,
+ struct context *c,
+ struct supertype *forcest)
+{
+
+ /* Read the raid superblock from a device and
+ * display important content.
+ *
+ * If cannot be found, print reason: too small, bad magic
+ *
+ * Print:
+ * version, ctime, level, size, raid+spare+
+ * prefered minor
+ * uuid
+ *
+ * utime, state etc
+ *
+ * If (brief) gather devices for same array and just print a mdadm.conf
+ * line including devices=
+ * if devlist==NULL, use conf_get_devs()
+ */
+ int fd;
+ int rv = 0;
+ int err = 0;
+
+ struct array {
+ struct supertype *st;
+ struct mdinfo info;
+ void *devs;
+ struct array *next;
+ int spares;
+ } *arrays = NULL;
+
+ for (; devlist ; devlist = devlist->next) {
+ struct supertype *st;
+ int have_container = 0;
+
+ fd = dev_open(devlist->devname, O_RDONLY);
+ if (fd < 0) {
+ if (!c->scan) {
+ pr_err("cannot open %s: %s\n",
+ devlist->devname, strerror(errno));
+ rv = 1;
+ }
+ err = 1;
+ }
+ else {
+ int container = 0;
+ if (forcest)
+ st = dup_super(forcest);
+ else if (must_be_container(fd)) {
+ /* might be a container */
+ st = super_by_fd(fd, NULL);
+ container = 1;
+ } else
+ st = guess_super(fd);
+ if (st) {
+ err = 1;
+ st->ignore_hw_compat = 1;
+ if (!container)
+ err = st->ss->load_super(st, fd,
+ (c->brief||c->scan) ? NULL
+ :devlist->devname);
+ if (err && st->ss->load_container) {
+ err = st->ss->load_container(st, fd,
+ (c->brief||c->scan) ? NULL
+ :devlist->devname);
+ if (!err)
+ have_container = 1;
+ }
+ st->ignore_hw_compat = 0;
+ } else {
+ if (!c->brief) {
+ pr_err("No md superblock detected on %s.\n", devlist->devname);
+ rv = 1;
+ }
+ err = 1;
+ }
+ close(fd);
+ }
+ if (err)
+ continue;
+
+ if (c->SparcAdjust)
+ st->ss->update_super(st, NULL, "sparc2.2",
+ devlist->devname, 0, 0, NULL);
+ /* Ok, its good enough to try, though the checksum could be wrong */
+
+ if (c->brief && st->ss->brief_examine_super == NULL) {
+ if (!c->scan)
+ pr_err("No brief listing for %s on %s\n",
+ st->ss->name, devlist->devname);
+ } else if (c->brief) {
+ struct array *ap;
+ char *d;
+ for (ap = arrays; ap; ap = ap->next) {
+ if (st->ss == ap->st->ss &&
+ st->ss->compare_super(ap->st, st) == 0)
+ break;
+ }
+ if (!ap) {
+ ap = xmalloc(sizeof(*ap));
+ ap->devs = dl_head();
+ ap->next = arrays;
+ ap->spares = 0;
+ ap->st = st;
+ arrays = ap;
+ st->ss->getinfo_super(st, &ap->info, NULL);
+ } else
+ st->ss->getinfo_super(st, &ap->info, NULL);
+ if (!have_container &&
+ !(ap->info.disk.state & (1<<MD_DISK_SYNC)))
+ ap->spares++;
+ d = dl_strdup(devlist->devname);
+ dl_add(ap->devs, d);
+ } else if (c->export) {
+ if (st->ss->export_examine_super)
+ st->ss->export_examine_super(st);
+ st->ss->free_super(st);
+ } else {
+ printf("%s:\n",devlist->devname);
+ st->ss->examine_super(st, c->homehost);
+ st->ss->free_super(st);
+ }
+ }
+ if (c->brief) {
+ struct array *ap;
+ for (ap = arrays; ap; ap = ap->next) {
+ char sep='=';
+ char *d;
+ int newline = 0;
+
+ ap->st->ss->brief_examine_super(ap->st, c->verbose > 0);
+ if (ap->spares)
+ newline += printf(" spares=%d", ap->spares);
+ if (c->verbose > 0) {
+ newline += printf(" devices");
+ for (d = dl_next(ap->devs);
+ d != ap->devs;
+ d=dl_next(d)) {
+ printf("%c%s", sep, d);
+ sep=',';
+ }
+ }
+ if (ap->st->ss->brief_examine_subarrays) {
+ if (newline)
+ printf("\n");
+ ap->st->ss->brief_examine_subarrays(ap->st, c->verbose);
+ }
+ ap->st->ss->free_super(ap->st);
+ /* FIXME free ap */
+ if (ap->spares || c->verbose > 0)
+ printf("\n");
+ }
+ }
+ return rv;
+}
+
+int ExamineBadblocks(char *devname, int brief, struct supertype *forcest)
+{
+ int fd = dev_open(devname, O_RDONLY);
+ struct supertype *st = forcest;
+ int err = 1;
+
+ if (fd < 0) {
+ pr_err("cannot open %s: %s\n", devname, strerror(errno));
+ return 1;
+ }
+ if (!st)
+ st = guess_super(fd);
+ if (!st) {
+ if (!brief)
+ pr_err("No md superblock detected on %s\n", devname);
+ goto out;
+ }
+ if (!st->ss->examine_badblocks) {
+ pr_err("%s metadata does not support badblocks\n", st->ss->name);
+ goto out;
+ }
+ err = st->ss->load_super(st, fd, brief ? NULL : devname);
+ if (err)
+ goto out;
+ err = st->ss->examine_badblocks(st, fd, devname);
+
+out:
+ if (fd >= 0)
+ close(fd);
+ if (st) {
+ st->ss->free_super(st);
+ free(st);
+ }
+ return err;
+}
diff --git a/Grow.c b/Grow.c
new file mode 100644
index 00000000..a9c8589c
--- /dev/null
+++ b/Grow.c
@@ -0,0 +1,4998 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+#include "mdadm.h"
+#include "dlink.h"
+#include <sys/mman.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <signal.h>
+#include <sys/wait.h>
+
+#if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
+#error no endian defined
+#endif
+#include "md_u.h"
+#include "md_p.h"
+
+int restore_backup(struct supertype *st,
+ struct mdinfo *content,
+ int working_disks,
+ int next_spare,
+ char **backup_filep,
+ int verbose)
+{
+ int i;
+ int *fdlist;
+ struct mdinfo *dev;
+ int err;
+ int disk_count = next_spare + working_disks;
+ char *backup_file = *backup_filep;
+
+ dprintf("Called restore_backup()\n");
+ fdlist = xmalloc(sizeof(int) * disk_count);
+
+ enable_fds(next_spare);
+ for (i = 0; i < next_spare; i++)
+ fdlist[i] = -1;
+ for (dev = content->devs; dev; dev = dev->next) {
+ char buf[22];
+ int fd;
+ sprintf(buf, "%d:%d",
+ dev->disk.major,
+ dev->disk.minor);
+ fd = dev_open(buf, O_RDWR);
+
+ if (dev->disk.raid_disk >= 0)
+ fdlist[dev->disk.raid_disk] = fd;
+ else
+ fdlist[next_spare++] = fd;
+ }
+
+ if (!backup_file) {
+ backup_file = locate_backup(content->sys_name);
+ *backup_filep = backup_file;
+ }
+
+ if (st->ss->external && st->ss->recover_backup)
+ err = st->ss->recover_backup(st, content);
+ else
+ err = Grow_restart(st, content, fdlist, next_spare,
+ backup_file, verbose > 0);
+
+ while (next_spare > 0) {
+ next_spare--;
+ if (fdlist[next_spare] >= 0)
+ close(fdlist[next_spare]);
+ }
+ free(fdlist);
+ if (err) {
+ pr_err("Failed to restore critical"
+ " section for reshape - sorry.\n");
+ if (!backup_file)
+ pr_err("Possibly you need"
+ " to specify a --backup-file\n");
+ return 1;
+ }
+
+ dprintf("restore_backup() returns status OK.\n");
+ return 0;
+}
+
+int Grow_Add_device(char *devname, int fd, char *newdev)
+{
+ /* Add a device to an active array.
+ * Currently, just extend a linear array.
+ * This requires writing a new superblock on the
+ * new device, calling the kernel to add the device,
+ * and if that succeeds, update the superblock on
+ * all other devices.
+ * This means that we need to *find* all other devices.
+ */
+ struct mdinfo info;
+
+ struct stat stb;
+ int nfd, fd2;
+ int d, nd;
+ struct supertype *st = NULL;
+ char *subarray = NULL;
+
+ if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
+ pr_err("cannot get array info for %s\n", devname);
+ return 1;
+ }
+
+ if (info.array.level != -1) {
+ pr_err("can only add devices to linear arrays\n");
+ return 1;
+ }
+
+ st = super_by_fd(fd, &subarray);
+ if (!st) {
+ pr_err("cannot handle arrays with superblock version %d\n",
+ info.array.major_version);
+ return 1;
+ }
+
+ if (subarray) {
+ pr_err("Cannot grow linear sub-arrays yet\n");
+ free(subarray);
+ free(st);
+ return 1;
+ }
+
+ nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT);
+ if (nfd < 0) {
+ pr_err("cannot open %s\n", newdev);
+ free(st);
+ return 1;
+ }
+ fstat(nfd, &stb);
+ if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+ pr_err("%s is not a block device!\n", newdev);
+ close(nfd);
+ free(st);
+ return 1;
+ }
+ /* now check out all the devices and make sure we can read the
+ * superblock */
+ for (d=0 ; d < info.array.raid_disks ; d++) {
+ mdu_disk_info_t disk;
+ char *dv;
+
+ st->ss->free_super(st);
+
+ disk.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
+ pr_err("cannot get device detail for device %d\n",
+ d);
+ close(nfd);
+ free(st);
+ return 1;
+ }
+ dv = map_dev(disk.major, disk.minor, 1);
+ if (!dv) {
+ pr_err("cannot find device file for device %d\n",
+ d);
+ close(nfd);
+ free(st);
+ return 1;
+ }
+ fd2 = dev_open(dv, O_RDWR);
+ if (fd2 < 0) {
+ pr_err("cannot open device file %s\n", dv);
+ close(nfd);
+ free(st);
+ return 1;
+ }
+
+ if (st->ss->load_super(st, fd2, NULL)) {
+ pr_err("cannot find super block on %s\n", dv);
+ close(nfd);
+ close(fd2);
+ free(st);
+ return 1;
+ }
+ close(fd2);
+ }
+ /* Ok, looks good. Lets update the superblock and write it out to
+ * newdev.
+ */
+
+ info.disk.number = d;
+ info.disk.major = major(stb.st_rdev);
+ info.disk.minor = minor(stb.st_rdev);
+ info.disk.raid_disk = d;
+ info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
+ st->ss->update_super(st, &info, "linear-grow-new", newdev,
+ 0, 0, NULL);
+
+ if (st->ss->store_super(st, nfd)) {
+ pr_err("Cannot store new superblock on %s\n",
+ newdev);
+ close(nfd);
+ return 1;
+ }
+ close(nfd);
+
+ if (ioctl(fd, ADD_NEW_DISK, &info.disk) != 0) {
+ pr_err("Cannot add new disk to this array\n");
+ return 1;
+ }
+ /* Well, that seems to have worked.
+ * Now go through and update all superblocks
+ */
+
+ if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
+ pr_err("cannot get array info for %s\n", devname);
+ return 1;
+ }
+
+ nd = d;
+ for (d=0 ; d < info.array.raid_disks ; d++) {
+ mdu_disk_info_t disk;
+ char *dv;
+
+ disk.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
+ pr_err("cannot get device detail for device %d\n",
+ d);
+ return 1;
+ }
+ dv = map_dev(disk.major, disk.minor, 1);
+ if (!dv) {
+ pr_err("cannot find device file for device %d\n",
+ d);
+ return 1;
+ }
+ fd2 = dev_open(dv, O_RDWR);
+ if (fd2 < 0) {
+ pr_err("cannot open device file %s\n", dv);
+ return 1;
+ }
+ if (st->ss->load_super(st, fd2, NULL)) {
+ pr_err("cannot find super block on %s\n", dv);
+ close(fd);
+ return 1;
+ }
+ info.array.raid_disks = nd+1;
+ info.array.nr_disks = nd+1;
+ info.array.active_disks = nd+1;
+ info.array.working_disks = nd+1;
+
+ st->ss->update_super(st, &info, "linear-grow-update", dv,
+ 0, 0, NULL);
+
+ if (st->ss->store_super(st, fd2)) {
+ pr_err("Cannot store new superblock on %s\n", dv);
+ close(fd2);
+ return 1;
+ }
+ close(fd2);
+ }
+
+ return 0;
+}
+
+int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
+{
+ /*
+ * First check that array doesn't have a bitmap
+ * Then create the bitmap
+ * Then add it
+ *
+ * For internal bitmaps, we need to check the version,
+ * find all the active devices, and write the bitmap block
+ * to all devices
+ */
+ mdu_bitmap_file_t bmf;
+ mdu_array_info_t array;
+ struct supertype *st;
+ char *subarray = NULL;
+ int major = BITMAP_MAJOR_HI;
+ int vers = md_get_version(fd);
+ unsigned long long bitmapsize, array_size;
+
+ if (vers < 9003) {
+ major = BITMAP_MAJOR_HOSTENDIAN;
+ pr_err("Warning - bitmaps created on this kernel"
+ " are not portable\n"
+ " between different architectures. Consider upgrading"
+ " the Linux kernel.\n");
+ }
+
+ if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
+ if (errno == ENOMEM)
+ pr_err("Memory allocation failure.\n");
+ else
+ pr_err("bitmaps not supported by this kernel.\n");
+ return 1;
+ }
+ if (bmf.pathname[0]) {
+ if (strcmp(s->bitmap_file,"none")==0) {
+ if (ioctl(fd, SET_BITMAP_FILE, -1)!= 0) {
+ pr_err("failed to remove bitmap %s\n",
+ bmf.pathname);
+ return 1;
+ }
+ return 0;
+ }
+ pr_err("%s already has a bitmap (%s)\n",
+ devname, bmf.pathname);
+ return 1;
+ }
+ if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+ pr_err("cannot get array status for %s\n", devname);
+ return 1;
+ }
+ if (array.state & (1<<MD_SB_BITMAP_PRESENT)) {
+ if (strcmp(s->bitmap_file, "none")==0) {
+ array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
+ if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
+ pr_err("failed to remove internal bitmap.\n");
+ return 1;
+ }
+ return 0;
+ }
+ pr_err("Internal bitmap already present on %s\n",
+ devname);
+ return 1;
+ }
+
+ if (strcmp(s->bitmap_file, "none") == 0) {
+ pr_err("no bitmap found on %s\n", devname);
+ return 1;
+ }
+ if (array.level <= 0) {
+ pr_err("Bitmaps not meaningful with level %s\n",
+ map_num(pers, array.level)?:"of this array");
+ return 1;
+ }
+ bitmapsize = array.size;
+ bitmapsize <<= 1;
+ if (get_dev_size(fd, NULL, &array_size) &&
+ array_size > (0x7fffffffULL<<9)) {
+ /* Array is big enough that we cannot trust array.size
+ * try other approaches
+ */
+ bitmapsize = get_component_size(fd);
+ }
+ if (bitmapsize == 0) {
+ pr_err("Cannot reliably determine size of array to create bitmap - sorry.\n");
+ return 1;
+ }
+
+ if (array.level == 10) {
+ int ncopies = (array.layout&255)*((array.layout>>8)&255);
+ bitmapsize = bitmapsize * array.raid_disks / ncopies;
+ }
+
+ st = super_by_fd(fd, &subarray);
+ if (!st) {
+ pr_err("Cannot understand version %d.%d\n",
+ array.major_version, array.minor_version);
+ return 1;
+ }
+ if (subarray) {
+ pr_err("Cannot add bitmaps to sub-arrays yet\n");
+ free(subarray);
+ free(st);
+ return 1;
+ }
+ if (strcmp(s->bitmap_file, "internal") == 0) {
+ int rv;
+ int d;
+ int offset_setable = 0;
+ struct mdinfo *mdi;
+ if (st->ss->add_internal_bitmap == NULL) {
+ pr_err("Internal bitmaps not supported "
+ "with %s metadata\n", st->ss->name);
+ return 1;
+ }
+ mdi = sysfs_read(fd, NULL, GET_BITMAP_LOCATION);
+ if (mdi)
+ offset_setable = 1;
+ for (d=0; d< st->max_devs; d++) {
+ mdu_disk_info_t disk;
+ char *dv;
+ disk.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ continue;
+ if (disk.major == 0 &&
+ disk.minor == 0)
+ continue;
+ if ((disk.state & (1<<MD_DISK_SYNC))==0)
+ continue;
+ dv = map_dev(disk.major, disk.minor, 1);
+ if (dv) {
+ int fd2 = dev_open(dv, O_RDWR);
+ if (fd2 < 0)
+ continue;
+ if (st->ss->load_super(st, fd2, NULL)==0) {
+ if (st->ss->add_internal_bitmap(
+ st,
+ &s->bitmap_chunk, c->delay, s->write_behind,
+ bitmapsize, offset_setable,
+ major)
+ )
+ st->ss->write_bitmap(st, fd2);
+ else {
+ pr_err("failed to create internal bitmap"
+ " - chunksize problem.\n");
+ close(fd2);
+ return 1;
+ }
+ }
+ close(fd2);
+ }
+ }
+ if (offset_setable) {
+ st->ss->getinfo_super(st, mdi, NULL);
+ sysfs_init(mdi, fd, NULL);
+ rv = sysfs_set_num_signed(mdi, NULL, "bitmap/location",
+ mdi->bitmap_offset);
+ } else {
+ array.state |= (1<<MD_SB_BITMAP_PRESENT);
+ rv = ioctl(fd, SET_ARRAY_INFO, &array);
+ }
+ if (rv < 0) {
+ if (errno == EBUSY)
+ pr_err("Cannot add bitmap while array is"
+ " resyncing or reshaping etc.\n");
+ pr_err("failed to set internal bitmap.\n");
+ return 1;
+ }
+ } else {
+ int uuid[4];
+ int bitmap_fd;
+ int d;
+ int max_devs = st->max_devs;
+
+ /* try to load a superblock */
+ for (d = 0; d < max_devs; d++) {
+ mdu_disk_info_t disk;
+ char *dv;
+ int fd2;
+ disk.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ continue;
+ if ((disk.major==0 && disk.minor==0) ||
+ (disk.state & (1<<MD_DISK_REMOVED)))
+ continue;
+ dv = map_dev(disk.major, disk.minor, 1);
+ if (!dv)
+ continue;
+ fd2 = dev_open(dv, O_RDONLY);
+ if (fd2 >= 0) {
+ if (st->ss->load_super(st, fd2, NULL) == 0) {
+ close(fd2);
+ st->ss->uuid_from_super(st, uuid);
+ break;
+ }
+ close(fd2);
+ }
+ }
+ if (d == max_devs) {
+ pr_err("cannot find UUID for array!\n");
+ return 1;
+ }
+ if (CreateBitmap(s->bitmap_file, c->force, (char*)uuid, s->bitmap_chunk,
+ c->delay, s->write_behind, bitmapsize, major)) {
+ return 1;
+ }
+ bitmap_fd = open(s->bitmap_file, O_RDWR);
+ if (bitmap_fd < 0) {
+ pr_err("weird: %s cannot be opened\n",
+ s->bitmap_file);
+ return 1;
+ }
+ if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) {
+ int err = errno;
+ if (errno == EBUSY)
+ pr_err("Cannot add bitmap while array is"
+ " resyncing or reshaping etc.\n");
+ pr_err("Cannot set bitmap file for %s: %s\n",
+ devname, strerror(err));
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * When reshaping an array we might need to backup some data.
+ * This is written to all spares with a 'super_block' describing it.
+ * The superblock goes 4K from the end of the used space on the
+ * device.
+ * It if written after the backup is complete.
+ * It has the following structure.
+ */
+
+static struct mdp_backup_super {
+ char magic[16]; /* md_backup_data-1 or -2 */
+ __u8 set_uuid[16];
+ __u64 mtime;
+ /* start/sizes in 512byte sectors */
+ __u64 devstart; /* address on backup device/file of data */
+ __u64 arraystart;
+ __u64 length;
+ __u32 sb_csum; /* csum of preceeding bytes. */
+ __u32 pad1;
+ __u64 devstart2; /* offset in to data of second section */
+ __u64 arraystart2;
+ __u64 length2;
+ __u32 sb_csum2; /* csum of preceeding bytes. */
+ __u8 pad[512-68-32];
+} __attribute__((aligned(512))) bsb, bsb2;
+
+static __u32 bsb_csum(char *buf, int len)
+{
+ int i;
+ int csum = 0;
+ for (i = 0; i < len; i++)
+ csum = (csum<<3) + buf[0];
+ return __cpu_to_le32(csum);
+}
+
+static int check_idle(struct supertype *st)
+{
+ /* Check that all member arrays for this container, or the
+ * container of this array, are idle
+ */
+ char *container = (st->container_devnm[0]
+ ? st->container_devnm : st->devnm);
+ struct mdstat_ent *ent, *e;
+ int is_idle = 1;
+
+ ent = mdstat_read(0, 0);
+ for (e = ent ; e; e = e->next) {
+ if (!is_container_member(e, container))
+ continue;
+ if (e->percent >= 0) {
+ is_idle = 0;
+ break;
+ }
+ }
+ free_mdstat(ent);
+ return is_idle;
+}
+
+static int freeze_container(struct supertype *st)
+{
+ char *container = (st->container_devnm[0]
+ ? st->container_devnm : st->devnm);
+
+ if (!check_idle(st))
+ return -1;
+
+ if (block_monitor(container, 1)) {
+ pr_err("failed to freeze container\n");
+ return -2;
+ }
+
+ return 1;
+}
+
+static void unfreeze_container(struct supertype *st)
+{
+ char *container = (st->container_devnm[0]
+ ? st->container_devnm : st->devnm);
+
+ unblock_monitor(container, 1);
+}
+
+static int freeze(struct supertype *st)
+{
+ /* Try to freeze resync/rebuild on this array/container.
+ * Return -1 if the array is busy,
+ * return -2 container cannot be frozen,
+ * return 0 if this kernel doesn't support 'frozen'
+ * return 1 if it worked.
+ */
+ if (st->ss->external)
+ return freeze_container(st);
+ else {
+ struct mdinfo *sra = sysfs_read(-1, st->devnm, GET_VERSION);
+ int err;
+ char buf[20];
+
+ if (!sra)
+ return -1;
+ /* Need to clear any 'read-auto' status */
+ if (sysfs_get_str(sra, NULL, "array_state", buf, 20) > 0 &&
+ strncmp(buf, "read-auto", 9) == 0)
+ sysfs_set_str(sra, NULL, "array_state", "clean");
+
+ err = sysfs_freeze_array(sra);
+ sysfs_free(sra);
+ return err;
+ }
+}
+
+static void unfreeze(struct supertype *st)
+{
+ if (st->ss->external)
+ return unfreeze_container(st);
+ else {
+ struct mdinfo *sra = sysfs_read(-1, st->devnm, GET_VERSION);
+ char buf[20];
+
+ if (sra &&
+ sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0
+ && strcmp(buf, "frozen\n") == 0) {
+ printf("unfreeze\n");
+ sysfs_set_str(sra, NULL, "sync_action", "idle");
+ }
+ sysfs_free(sra);
+ }
+}
+
+static void wait_reshape(struct mdinfo *sra)
+{
+ int fd = sysfs_get_fd(sra, NULL, "sync_action");
+ char action[20];
+
+ if (fd < 0)
+ return;
+
+ while (sysfs_fd_get_str(fd, action, 20) > 0 &&
+ strncmp(action, "reshape", 7) == 0)
+ sysfs_wait(fd, NULL);
+ close(fd);
+}
+
+static int reshape_super(struct supertype *st, unsigned long long size,
+ int level, int layout, int chunksize, int raid_disks,
+ int delta_disks, char *backup_file, char *dev,
+ int direction, int verbose)
+{
+ /* nothing extra to check in the native case */
+ if (!st->ss->external)
+ return 0;
+ if (!st->ss->reshape_super ||
+ !st->ss->manage_reshape) {
+ pr_err("%s metadata does not support reshape\n",
+ st->ss->name);
+ return 1;
+ }
+
+ return st->ss->reshape_super(st, size, level, layout, chunksize,
+ raid_disks, delta_disks, backup_file, dev,
+ direction, verbose);
+}
+
+static void sync_metadata(struct supertype *st)
+{
+ if (st->ss->external) {
+ if (st->update_tail) {
+ flush_metadata_updates(st);
+ st->update_tail = &st->updates;
+ } else
+ st->ss->sync_metadata(st);
+ }
+}
+
+static int subarray_set_num(char *container, struct mdinfo *sra, char *name, int n)
+{
+ /* when dealing with external metadata subarrays we need to be
+ * prepared to handle EAGAIN. The kernel may need to wait for
+ * mdmon to mark the array active so the kernel can handle
+ * allocations/writeback when preparing the reshape action
+ * (md_allow_write()). We temporarily disable safe_mode_delay
+ * to close a race with the array_state going clean before the
+ * next write to raid_disks / stripe_cache_size
+ */
+ char safe[50];
+ int rc;
+
+ /* only 'raid_disks' and 'stripe_cache_size' trigger md_allow_write */
+ if (!container ||
+ (strcmp(name, "raid_disks") != 0 &&
+ strcmp(name, "stripe_cache_size") != 0))
+ return sysfs_set_num(sra, NULL, name, n);
+
+ rc = sysfs_get_str(sra, NULL, "safe_mode_delay", safe, sizeof(safe));
+ if (rc <= 0)
+ return -1;
+ sysfs_set_num(sra, NULL, "safe_mode_delay", 0);
+ rc = sysfs_set_num(sra, NULL, name, n);
+ if (rc < 0 && errno == EAGAIN) {
+ ping_monitor(container);
+ /* if we get EAGAIN here then the monitor is not active
+ * so stop trying
+ */
+ rc = sysfs_set_num(sra, NULL, name, n);
+ }
+ sysfs_set_str(sra, NULL, "safe_mode_delay", safe);
+ return rc;
+}
+
+int start_reshape(struct mdinfo *sra, int already_running,
+ int before_data_disks, int data_disks)
+{
+ int err;
+ unsigned long long sync_max_to_set;
+
+ sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
+ err = sysfs_set_num(sra, NULL, "suspend_hi", sra->reshape_progress);
+ err = err ?: sysfs_set_num(sra, NULL, "suspend_lo",
+ sra->reshape_progress);
+ if (before_data_disks <= data_disks)
+ sync_max_to_set = sra->reshape_progress / data_disks;
+ else
+ sync_max_to_set = (sra->component_size * data_disks
+ - sra->reshape_progress) / data_disks;
+ if (!already_running)
+ sysfs_set_num(sra, NULL, "sync_min", sync_max_to_set);
+ err = err ?: sysfs_set_num(sra, NULL, "sync_max", sync_max_to_set);
+ if (!already_running)
+ err = err ?: sysfs_set_str(sra, NULL, "sync_action", "reshape");
+
+ return err;
+}
+
+void abort_reshape(struct mdinfo *sra)
+{
+ sysfs_set_str(sra, NULL, "sync_action", "idle");
+ sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
+ sysfs_set_num(sra, NULL, "suspend_hi", 0);
+ sysfs_set_num(sra, NULL, "suspend_lo", 0);
+ sysfs_set_num(sra, NULL, "sync_min", 0);
+ // It isn't safe to reset sync_max as we aren't monitoring.
+ // Array really should be stopped at this point.
+}
+
+int remove_disks_for_takeover(struct supertype *st,
+ struct mdinfo *sra,
+ int layout)
+{
+ int nr_of_copies;
+ struct mdinfo *remaining;
+ int slot;
+
+ if (sra->array.level == 10)
+ nr_of_copies = layout & 0xff;
+ else if (sra->array.level == 1)
+ nr_of_copies = sra->array.raid_disks;
+ else
+ return 1;
+
+ remaining = sra->devs;
+ sra->devs = NULL;
+ /* for each 'copy', select one device and remove from the list. */
+ for (slot = 0; slot < sra->array.raid_disks; slot += nr_of_copies) {
+ struct mdinfo **diskp;
+ int found = 0;
+
+ /* Find a working device to keep */
+ for (diskp = &remaining; *diskp ; diskp = &(*diskp)->next) {
+ struct mdinfo *disk = *diskp;
+
+ if (disk->disk.raid_disk < slot)
+ continue;
+ if (disk->disk.raid_disk >= slot + nr_of_copies)
+ continue;
+ if (disk->disk.state & (1<<MD_DISK_REMOVED))
+ continue;
+ if (disk->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ if (!(disk->disk.state & (1<<MD_DISK_SYNC)))
+ continue;
+
+ /* We have found a good disk to use! */
+ *diskp = disk->next;
+ disk->next = sra->devs;
+ sra->devs = disk;
+ found = 1;
+ break;
+ }
+ if (!found)
+ break;
+ }
+
+ if (slot < sra->array.raid_disks) {
+ /* didn't find all slots */
+ struct mdinfo **e;
+ e = &remaining;
+ while (*e)
+ e = &(*e)->next;
+ *e = sra->devs;
+ sra->devs = remaining;
+ return 1;
+ }
+
+ /* Remove all 'remaining' devices from the array */
+ while (remaining) {
+ struct mdinfo *sd = remaining;
+ remaining = sd->next;
+
+ sysfs_set_str(sra, sd, "state", "faulty");
+ sysfs_set_str(sra, sd, "slot", "none");
+ /* for external metadata disks should be removed in mdmon */
+ if (!st->ss->external)
+ sysfs_set_str(sra, sd, "state", "remove");
+ sd->disk.state |= (1<<MD_DISK_REMOVED);
+ sd->disk.state &= ~(1<<MD_DISK_SYNC);
+ sd->next = sra->devs;
+ sra->devs = sd;
+ }
+ return 0;
+}
+
+void reshape_free_fdlist(int *fdlist,
+ unsigned long long *offsets,
+ int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ if (fdlist[i] >= 0)
+ close(fdlist[i]);
+
+ free(fdlist);
+ free(offsets);
+}
+
+int reshape_prepare_fdlist(char *devname,
+ struct mdinfo *sra,
+ int raid_disks,
+ int nrdisks,
+ unsigned long blocks,
+ char *backup_file,
+ int *fdlist,
+ unsigned long long *offsets)
+{
+ int d = 0;
+ struct mdinfo *sd;
+
+ enable_fds(nrdisks);
+ for (d = 0; d <= nrdisks; d++)
+ fdlist[d] = -1;
+ d = raid_disks;
+ for (sd = sra->devs; sd; sd = sd->next) {
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ if (sd->disk.state & (1<<MD_DISK_SYNC)) {
+ char *dn = map_dev(sd->disk.major,
+ sd->disk.minor, 1);
+ fdlist[sd->disk.raid_disk]
+ = dev_open(dn, O_RDONLY);
+ offsets[sd->disk.raid_disk] = sd->data_offset*512;
+ if (fdlist[sd->disk.raid_disk] < 0) {
+ pr_err("%s: cannot open component %s\n",
+ devname, dn ? dn : "-unknown-");
+ d = -1;
+ goto release;
+ }
+ } else if (backup_file == NULL) {
+ /* spare */
+ char *dn = map_dev(sd->disk.major,
+ sd->disk.minor, 1);
+ fdlist[d] = dev_open(dn, O_RDWR);
+ offsets[d] = (sd->data_offset + sra->component_size - blocks - 8)*512;
+ if (fdlist[d] < 0) {
+ pr_err("%s: cannot open component %s\n",
+ devname, dn ? dn : "-unknown-");
+ d = -1;
+ goto release;
+ }
+ d++;
+ }
+ }
+release:
+ return d;
+}
+
+int reshape_open_backup_file(char *backup_file,
+ int fd,
+ char *devname,
+ long blocks,
+ int *fdlist,
+ unsigned long long *offsets,
+ char *sys_name,
+ int restart)
+{
+ /* Return 1 on success, 0 on any form of failure */
+ /* need to check backup file is large enough */
+ char buf[512];
+ struct stat stb;
+ unsigned int dev;
+ int i;
+
+ *fdlist = open(backup_file, O_RDWR|O_CREAT|(restart ? O_TRUNC : O_EXCL),
+ S_IRUSR | S_IWUSR);
+ *offsets = 8 * 512;
+ if (*fdlist < 0) {
+ pr_err("%s: cannot create backup file %s: %s\n",
+ devname, backup_file, strerror(errno));
+ return 0;
+ }
+ /* Guard against backup file being on array device.
+ * If array is partitioned or if LVM etc is in the
+ * way this will not notice, but it is better than
+ * nothing.
+ */
+ fstat(*fdlist, &stb);
+ dev = stb.st_dev;
+ fstat(fd, &stb);
+ if (stb.st_rdev == dev) {
+ pr_err("backup file must NOT be"
+ " on the array being reshaped.\n");
+ close(*fdlist);
+ return 0;
+ }
+
+ memset(buf, 0, 512);
+ for (i=0; i < blocks + 8 ; i++) {
+ if (write(*fdlist, buf, 512) != 512) {
+ pr_err("%s: cannot create"
+ " backup file %s: %s\n",
+ devname, backup_file, strerror(errno));
+ return 0;
+ }
+ }
+ if (fsync(*fdlist) != 0) {
+ pr_err("%s: cannot create backup file %s: %s\n",
+ devname, backup_file, strerror(errno));
+ return 0;
+ }
+
+ if (!restart && strncmp(backup_file, MAP_DIR, strlen(MAP_DIR)) != 0) {
+ char *bu = make_backup(sys_name);
+ if (symlink(backup_file, bu))
+ pr_err("Recording backup file in " MAP_DIR "failed: %s\n",
+ strerror(errno));
+ free(bu);
+ }
+
+ return 1;
+}
+
+unsigned long compute_backup_blocks(int nchunk, int ochunk,
+ unsigned int ndata, unsigned int odata)
+{
+ unsigned long a, b, blocks;
+ /* So how much do we need to backup.
+ * We need an amount of data which is both a whole number of
+ * old stripes and a whole number of new stripes.
+ * So LCM for (chunksize*datadisks).
+ */
+ a = (ochunk/512) * odata;
+ b = (nchunk/512) * ndata;
+ /* Find GCD */
+ a = GCD(a, b);
+ /* LCM == product / GCD */
+ blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a;
+
+ return blocks;
+}
+
+char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
+{
+ /* Based on the current array state in info->array and
+ * the changes in info->new_* etc, determine:
+ * - whether the change is possible
+ * - Intermediate level/raid_disks/layout
+ * - whether a restriping reshape is needed
+ * - number of sectors in minimum change unit. This
+ * will cover a whole number of stripes in 'before' and
+ * 'after'.
+ *
+ * Return message if the change should be rejected
+ * NULL if the change can be achieved
+ *
+ * This can be called as part of starting a reshape, or
+ * when assembling an array that is undergoing reshape.
+ */
+ int near, far, offset, copies;
+ int new_disks;
+ int old_chunk, new_chunk;
+ /* delta_parity records change in number of devices
+ * caused by level change
+ */
+ int delta_parity = 0;
+
+ memset(re, 0, sizeof(*re));
+
+ /* If a new level not explicitly given, we assume no-change */
+ if (info->new_level == UnSet)
+ info->new_level = info->array.level;
+
+ if (info->new_chunk)
+ switch (info->new_level) {
+ case 0:
+ case 4:
+ case 5:
+ case 6:
+ case 10:
+ /* chunk size is meaningful, must divide component_size
+ * evenly
+ */
+ if (info->component_size % (info->new_chunk/512)) {
+ unsigned long long shrink = info->component_size;
+ shrink &= ~(unsigned long long)(info->new_chunk/512-1);
+ pr_err("New chunk size (%dK) does not evenly divide device size (%lluk)\n",
+ info->new_chunk/1024, info->component_size/2);
+ pr_err("After shrinking any filesystem, \"mdadm --grow %s --size %llu\"\n",
+ devname, shrink/2);
+ pr_err("will shrink the array so the given chunk size would work.\n");
+ return "";
+ }
+ break;
+ default:
+ return "chunk size not meaningful for this level";
+ }
+ else
+ info->new_chunk = info->array.chunk_size;
+
+ switch (info->array.level) {
+ default:
+ return "No reshape is possibly for this RAID level";
+ case LEVEL_LINEAR:
+ if (info->delta_disks != UnSet)
+ return "Only --add is supported for LINEAR, setting --raid-disks is not needed";
+ else
+ return "Only --add is supported for LINEAR, other --grow options are not meaningful";
+ case 1:
+ /* RAID1 can convert to RAID1 with different disks, or
+ * raid5 with 2 disks, or
+ * raid0 with 1 disk
+ */
+ if (info->new_level > 1 &&
+ (info->component_size & 7))
+ return "Cannot convert RAID1 of this size - "
+ "reduce size to multiple of 4K first.";
+ if (info->new_level == 0) {
+ if (info->delta_disks != UnSet &&
+ info->delta_disks != 0)
+ return "Cannot change number of disks "
+ "with RAID1->RAID0 conversion";
+ re->level = 0;
+ re->before.data_disks = 1;
+ re->after.data_disks = 1;
+ return NULL;
+ }
+ if (info->new_level == 1) {
+ if (info->delta_disks == UnSet)
+ /* Don't know what to do */
+ return "no change requested for Growing RAID1";
+ re->level = 1;
+ return NULL;
+ }
+ if (info->array.raid_disks == 2 &&
+ info->new_level == 5) {
+
+ re->level = 5;
+ re->before.data_disks = 1;
+ if (info->delta_disks != UnSet &&
+ info->delta_disks != 0)
+ re->after.data_disks = 1 + info->delta_disks;
+ else
+ re->after.data_disks = 1;
+ if (re->after.data_disks < 1)
+ return "Number of disks too small for RAID5";
+
+ re->before.layout = ALGORITHM_LEFT_SYMMETRIC;
+ info->array.chunk_size = 65536;
+ break;
+ }
+ /* Could do some multi-stage conversions, but leave that to
+ * later.
+ */
+ return "Impossibly level change request for RAID1";
+
+ case 10:
+ /* RAID10 can be converted from near mode to
+ * RAID0 by removing some devices.
+ * It can also be reshaped if the kernel supports
+ * new_data_offset.
+ */
+ switch (info->new_level) {
+ case 0:
+ if ((info->array.layout & ~0xff) != 0x100)
+ return "Cannot Grow RAID10 with far/offset layout";
+ /* number of devices must be multiple of number of copies */
+ if (info->array.raid_disks % (info->array.layout & 0xff))
+ return "RAID10 layout too complex for Grow operation";
+
+ new_disks = (info->array.raid_disks
+ / (info->array.layout & 0xff));
+ if (info->delta_disks == UnSet)
+ info->delta_disks = (new_disks
+ - info->array.raid_disks);
+
+ if (info->delta_disks != new_disks - info->array.raid_disks)
+ return "New number of raid-devices impossible for RAID10";
+ if (info->new_chunk &&
+ info->new_chunk != info->array.chunk_size)
+ return "Cannot change chunk-size with RAID10 Grow";
+
+ /* looks good */
+ re->level = 0;
+ re->before.data_disks = new_disks;
+ re->after.data_disks = re->before.data_disks;
+ return NULL;
+
+ case 10:
+ near = info->array.layout & 0xff;
+ far = (info->array.layout >> 8) & 0xff;
+ offset = info->array.layout & 0x10000;
+ if (far > 1 && !offset)
+ return "Cannot reshape RAID10 in far-mode";
+ copies = near * far;
+
+ old_chunk = info->array.chunk_size * far;
+
+ if (info->new_layout == UnSet)
+ info->new_layout = info->array.layout;
+ else {
+ near = info->new_layout & 0xff;
+ far = (info->new_layout >> 8) & 0xff;
+ offset = info->new_layout & 0x10000;
+ if (far > 1 && !offset)
+ return "Cannot reshape RAID10 to far-mode";
+ if (near * far != copies)
+ return "Cannot change number of copies"
+ " when reshaping RAID10";
+ }
+ if (info->delta_disks == UnSet)
+ info->delta_disks = 0;
+ new_disks = (info->array.raid_disks +
+ info->delta_disks);
+
+ new_chunk = info->new_chunk * far;
+
+ re->level = 10;
+ re->before.layout = info->array.layout;
+ re->before.data_disks = info->array.raid_disks;
+ re->after.layout = info->new_layout;
+ re->after.data_disks = new_disks;
+ /* For RAID10 we don't do backup but do allow reshape,
+ * so set backup_blocks to INVALID_SECTORS rather than
+ * zero.
+ * And there is no need to synchronise stripes on both
+ * 'old' and 'new'. So the important
+ * number is the minimum data_offset difference
+ * which is the larger of (offset copies * chunk).
+ */
+ re->backup_blocks = INVALID_SECTORS;
+ re->min_offset_change = max(old_chunk, new_chunk) / 512;
+ if (new_disks < re->before.data_disks &&
+ info->space_after < re->min_offset_change)
+ /* Reduce component size by one chunk */
+ re->new_size = (info->component_size -
+ re->min_offset_change);
+ else
+ re->new_size = info->component_size;
+ re->new_size = re->new_size * new_disks / copies;
+ return NULL;
+
+ default:
+ return "RAID10 can only be changed to RAID0";
+ }
+ case 0:
+ /* RAID0 can be converted to RAID10, or to RAID456 */
+ if (info->new_level == 10) {
+ if (info->new_layout == UnSet && info->delta_disks == UnSet) {
+ /* Assume near=2 layout */
+ info->new_layout = 0x102;
+ info->delta_disks = info->array.raid_disks;
+ }
+ if (info->new_layout == UnSet) {
+ int copies = 1 + (info->delta_disks
+ / info->array.raid_disks);
+ if (info->array.raid_disks * (copies-1)
+ != info->delta_disks)
+ return "Impossible number of devices"
+ " for RAID0->RAID10";
+ info->new_layout = 0x100 + copies;
+ }
+ if (info->delta_disks == UnSet) {
+ int copies = info->new_layout & 0xff;
+ if (info->new_layout != 0x100 + copies)
+ return "New layout impossible"
+ " for RAID0->RAID10";;
+ info->delta_disks = (copies - 1) *
+ info->array.raid_disks;
+ }
+ if (info->new_chunk &&
+ info->new_chunk != info->array.chunk_size)
+ return "Cannot change chunk-size with RAID0->RAID10";
+ /* looks good */
+ re->level = 10;
+ re->before.data_disks = (info->array.raid_disks +
+ info->delta_disks);
+ re->after.data_disks = re->before.data_disks;
+ re->before.layout = info->new_layout;
+ return NULL;
+ }
+
+ /* RAID0 can also covert to RAID0/4/5/6 by first converting to
+ * a raid4 style layout of the final level.
+ */
+ switch (info->new_level) {
+ case 4:
+ delta_parity = 1;
+ case 0:
+ re->level = 4;
+ re->before.layout = 0;
+ break;
+ case 5:
+ delta_parity = 1;
+ re->level = 5;
+ re->before.layout = ALGORITHM_PARITY_N;
+ if (info->new_layout == UnSet)
+ info->new_layout = map_name(r5layout, "default");
+ break;
+ case 6:
+ delta_parity = 2;
+ re->level = 6;
+ re->before.layout = ALGORITHM_PARITY_N;
+ if (info->new_layout == UnSet)
+ info->new_layout = map_name(r6layout, "default");
+ break;
+ default:
+ return "Impossible level change requested";
+ }
+ re->before.data_disks = info->array.raid_disks;
+ /* determining 'after' layout happens outside this 'switch' */
+ break;
+
+ case 4:
+ info->array.layout = ALGORITHM_PARITY_N;
+ case 5:
+ switch (info->new_level) {
+ case 0:
+ delta_parity = -1;
+ case 4:
+ re->level = info->array.level;
+ re->before.data_disks = info->array.raid_disks - 1;
+ re->before.layout = info->array.layout;
+ break;
+ case 5:
+ re->level = 5;
+ re->before.data_disks = info->array.raid_disks - 1;
+ re->before.layout = info->array.layout;
+ break;
+ case 6:
+ delta_parity = 1;
+ re->level = 6;
+ re->before.data_disks = info->array.raid_disks - 1;
+ switch (info->array.layout) {
+ case ALGORITHM_LEFT_ASYMMETRIC:
+ re->before.layout = ALGORITHM_LEFT_ASYMMETRIC_6;
+ break;
+ case ALGORITHM_RIGHT_ASYMMETRIC:
+ re->before.layout = ALGORITHM_RIGHT_ASYMMETRIC_6;
+ break;
+ case ALGORITHM_LEFT_SYMMETRIC:
+ re->before.layout = ALGORITHM_LEFT_SYMMETRIC_6;
+ break;
+ case ALGORITHM_RIGHT_SYMMETRIC:
+ re->before.layout = ALGORITHM_RIGHT_SYMMETRIC_6;
+ break;
+ case ALGORITHM_PARITY_0:
+ re->before.layout = ALGORITHM_PARITY_0_6;
+ break;
+ case ALGORITHM_PARITY_N:
+ re->before.layout = ALGORITHM_PARITY_N_6;
+ break;
+ default:
+ return "Cannot convert an array with this layout";
+ }
+ break;
+ case 1:
+ if (info->array.raid_disks != 2)
+ return "Can only convert a 2-device array to RAID1";
+ if (info->delta_disks != UnSet &&
+ info->delta_disks != 0)
+ return "Cannot set raid_disk when "
+ "converting RAID5->RAID1";
+ re->level = 1;
+ info->new_chunk = 0;
+ return NULL;
+ default:
+ return "Impossible level change requested";
+ }
+ break;
+ case 6:
+ switch (info->new_level) {
+ case 4:
+ case 5:
+ delta_parity = -1;
+ case 6:
+ re->level = 6;
+ re->before.data_disks = info->array.raid_disks - 2;
+ re->before.layout = info->array.layout;
+ break;
+ default:
+ return "Impossible level change requested";
+ }
+ break;
+ }
+
+ /* If we reached here then it looks like a re-stripe is
+ * happening. We have determined the intermediate level
+ * and initial raid_disks/layout and stored these in 're'.
+ *
+ * We need to deduce the final layout that can be atomically
+ * converted to the end state.
+ */
+ switch (info->new_level) {
+ case 0:
+ /* We can only get to RAID0 from RAID4 or RAID5
+ * with appropriate layout and one extra device
+ */
+ if (re->level != 4 && re->level != 5)
+ return "Cannot covert to RAID0 from this level";
+
+ switch (re->level) {
+ case 4:
+ re->before.layout = 0;
+ re->after.layout = 0;
+ break;
+ case 5:
+ re->after.layout = ALGORITHM_PARITY_N;
+ break;
+ }
+ break;
+
+ case 4:
+ /* We can only get to RAID4 from RAID5 */
+ if (re->level != 4 && re->level != 5)
+ return "Cannot convert to RAID4 from this level";
+
+ switch (re->level) {
+ case 4:
+ re->after.layout = 0;
+ break;
+ case 5:
+ re->after.layout = ALGORITHM_PARITY_N;
+ break;
+ }
+ break;
+
+ case 5:
+ /* We get to RAID5 from RAID5 or RAID6 */
+ if (re->level != 5 && re->level != 6)
+ return "Cannot convert to RAID5 from this level";
+
+ switch (re->level) {
+ case 5:
+ if (info->new_layout == UnSet)
+ re->after.layout = re->before.layout;
+ else
+ re->after.layout = info->new_layout;
+ break;
+ case 6:
+ if (info->new_layout == UnSet)
+ info->new_layout = re->before.layout;
+
+ /* after.layout needs to be raid6 version of new_layout */
+ if (info->new_layout == ALGORITHM_PARITY_N)
+ re->after.layout = ALGORITHM_PARITY_N;
+ else {
+ char layout[40];
+ char *ls = map_num(r5layout, info->new_layout);
+ int l;
+ if (ls) {
+ /* Current RAID6 layout has a RAID5
+ * equivalent - good
+ */
+ strcat(strcpy(layout, ls), "-6");
+ l = map_name(r6layout, layout);
+ if (l == UnSet)
+ return "Cannot find RAID6 layout"
+ " to convert to";
+ } else {
+ /* Current RAID6 has no equivalent.
+ * If it is already a '-6' layout we
+ * can leave it unchanged, else we must
+ * fail
+ */
+ ls = map_num(r6layout, info->new_layout);
+ if (!ls ||
+ strcmp(ls+strlen(ls)-2, "-6") != 0)
+ return "Please specify new layout";
+ l = info->new_layout;
+ }
+ re->after.layout = l;
+ }
+ }
+ break;
+
+ case 6:
+ /* We must already be at level 6 */
+ if (re->level != 6)
+ return "Impossible level change";
+ if (info->new_layout == UnSet)
+ re->after.layout = info->array.layout;
+ else
+ re->after.layout = info->new_layout;
+ break;
+ default:
+ return "Impossible level change requested";
+ }
+ if (info->delta_disks == UnSet)
+ info->delta_disks = delta_parity;
+
+ re->after.data_disks = (re->before.data_disks
+ + info->delta_disks
+ - delta_parity);
+ switch (re->level) {
+ case 6: re->parity = 2;
+ break;
+ case 4:
+ case 5: re->parity = 1;
+ break;
+ default: re->parity = 0;
+ break;
+ }
+ /* So we have a restripe operation, we need to calculate the number
+ * of blocks per reshape operation.
+ */
+ re->new_size = info->component_size * re->before.data_disks;
+ if (info->new_chunk == 0)
+ info->new_chunk = info->array.chunk_size;
+ if (re->after.data_disks == re->before.data_disks &&
+ re->after.layout == re->before.layout &&
+ info->new_chunk == info->array.chunk_size) {
+ /* Nothing to change, can change level immediately. */
+ re->level = info->new_level;
+ re->backup_blocks = 0;
+ return NULL;
+ }
+ if (re->after.data_disks == 1 && re->before.data_disks == 1) {
+ /* chunk and layout changes make no difference */
+ re->level = info->new_level;
+ re->backup_blocks = 0;
+ return NULL;
+ }
+
+ if (re->after.data_disks == re->before.data_disks &&
+ get_linux_version() < 2006032)
+ return "in-place reshape is not safe before 2.6.32 - sorry.";
+
+ if (re->after.data_disks < re->before.data_disks &&
+ get_linux_version() < 2006030)
+ return "reshape to fewer devices is not supported before 2.6.30 - sorry.";
+
+ re->backup_blocks = compute_backup_blocks(
+ info->new_chunk, info->array.chunk_size,
+ re->after.data_disks,
+ re->before.data_disks);
+ re->min_offset_change = re->backup_blocks / re->before.data_disks;
+
+ re->new_size = info->component_size * re->after.data_disks;
+ return NULL;
+}
+
+static int set_array_size(struct supertype *st, struct mdinfo *sra,
+ char *text_version)
+{
+ struct mdinfo *info;
+ char *subarray;
+ int ret_val = -1;
+
+ if ((st == NULL) || (sra == NULL))
+ return ret_val;
+
+ if (text_version == NULL)
+ text_version = sra->text_version;
+ subarray = strchr(text_version+1, '/')+1;
+ info = st->ss->container_content(st, subarray);
+ if (info) {
+ unsigned long long current_size = 0;
+ unsigned long long new_size =
+ info->custom_array_size/2;
+
+ if (sysfs_get_ll(sra, NULL, "array_size", &current_size) == 0 &&
+ new_size > current_size) {
+ if (sysfs_set_num(sra, NULL, "array_size", new_size)
+ < 0)
+ dprintf("Error: Cannot set array size");
+ else {
+ ret_val = 0;
+ dprintf("Array size changed");
+ }
+ dprintf(" from %llu to %llu.\n",
+ current_size, new_size);
+ }
+ sysfs_free(info);
+ } else
+ dprintf("Error: set_array_size(): info pointer in NULL\n");
+
+ return ret_val;
+}
+
+static int reshape_array(char *container, int fd, char *devname,
+ struct supertype *st, struct mdinfo *info,
+ int force, struct mddev_dev *devlist,
+ unsigned long long data_offset,
+ char *backup_file, int verbose, int forked,
+ int restart, int freeze_reshape);
+static int reshape_container(char *container, char *devname,
+ int mdfd,
+ struct supertype *st,
+ struct mdinfo *info,
+ int force,
+ char *backup_file, int verbose,
+ int forked, int restart, int freeze_reshape);
+
+int Grow_reshape(char *devname, int fd,
+ struct mddev_dev *devlist,
+ unsigned long long data_offset,
+ struct context *c, struct shape *s)
+{
+ /* Make some changes in the shape of an array.
+ * The kernel must support the change.
+ *
+ * There are three different changes. Each can trigger
+ * a resync or recovery so we freeze that until we have
+ * requested everything (if kernel supports freezing - 2.6.30).
+ * The steps are:
+ * - change size (i.e. component_size)
+ * - change level
+ * - change layout/chunksize/ndisks
+ *
+ * The last can require a reshape. It is different on different
+ * levels so we need to check the level before actioning it.
+ * Some times the level change needs to be requested after the
+ * reshape (e.g. raid6->raid5, raid5->raid0)
+ *
+ */
+ struct mdu_array_info_s array;
+ int rv = 0;
+ struct supertype *st;
+ char *subarray = NULL;
+
+ int frozen;
+ int changed = 0;
+ char *container = NULL;
+ int cfd = -1;
+
+ struct mddev_dev *dv;
+ int added_disks;
+
+ struct mdinfo info;
+ struct mdinfo *sra;
+
+ if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
+ pr_err("%s is not an active md array - aborting\n",
+ devname);
+ return 1;
+ }
+ if (data_offset != INVALID_SECTORS && array.level != 10
+ && (array.level < 4 || array.level > 6)) {
+ pr_err("--grow --data-offset not yet supported\n");
+ return 1;
+ }
+
+ if (s->size > 0 &&
+ (s->chunk || s->level!= UnSet || s->layout_str || s->raiddisks)) {
+ pr_err("cannot change component size at the same time "
+ "as other changes.\n"
+ " Change size first, then check data is intact before "
+ "making other changes.\n");
+ return 1;
+ }
+
+ if (s->raiddisks && s->raiddisks < array.raid_disks && array.level > 1 &&
+ get_linux_version() < 2006032 &&
+ !check_env("MDADM_FORCE_FEWER")) {
+ pr_err("reducing the number of devices is not safe before Linux 2.6.32\n"
+ " Please use a newer kernel\n");
+ return 1;
+ }
+
+ st = super_by_fd(fd, &subarray);
+ if (!st) {
+ pr_err("Unable to determine metadata format for %s\n", devname);
+ return 1;
+ }
+ if (s->raiddisks > st->max_devs) {
+ pr_err("Cannot increase raid-disks on this array"
+ " beyond %d\n", st->max_devs);
+ return 1;
+ }
+
+ /* in the external case we need to check that the requested reshape is
+ * supported, and perform an initial check that the container holds the
+ * pre-requisite spare devices (mdmon owns final validation)
+ */
+ if (st->ss->external) {
+ int rv;
+
+ if (subarray) {
+ container = st->container_devnm;
+ cfd = open_dev_excl(st->container_devnm);
+ } else {
+ container = st->devnm;
+ close(fd);
+ cfd = open_dev_excl(st->devnm);
+ fd = cfd;
+ }
+ if (cfd < 0) {
+ pr_err("Unable to open container for %s\n",
+ devname);
+ free(subarray);
+ return 1;
+ }
+
+ rv = st->ss->load_container(st, cfd, NULL);
+
+ if (rv) {
+ pr_err("Cannot read superblock for %s\n",
+ devname);
+ free(subarray);
+ return 1;
+ }
+
+ /* check if operation is supported for metadata handler */
+ if (st->ss->container_content) {
+ struct mdinfo *cc = NULL;
+ struct mdinfo *content = NULL;
+
+ cc = st->ss->container_content(st, subarray);
+ for (content = cc; content ; content = content->next) {
+ int allow_reshape = 1;
+
+ /* check if reshape is allowed based on metadata
+ * indications stored in content.array.status
+ */
+ if (content->array.state & (1<<MD_SB_BLOCK_VOLUME))
+ allow_reshape = 0;
+ if (content->array.state
+ & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE))
+ allow_reshape = 0;
+ if (!allow_reshape) {
+ pr_err("cannot reshape arrays in"
+ " container with unsupported"
+ " metadata: %s(%s)\n",
+ devname, container);
+ sysfs_free(cc);
+ free(subarray);
+ return 1;
+ }
+ }
+ sysfs_free(cc);
+ }
+ if (mdmon_running(container))
+ st->update_tail = &st->updates;
+ }
+
+ added_disks = 0;
+ for (dv = devlist; dv; dv = dv->next)
+ added_disks++;
+ if (s->raiddisks > array.raid_disks &&
+ array.spare_disks +added_disks < (s->raiddisks - array.raid_disks) &&
+ !c->force) {
+ pr_err("Need %d spare%s to avoid degraded array,"
+ " and only have %d.\n"
+ " Use --force to over-ride this check.\n",
+ s->raiddisks - array.raid_disks,
+ s->raiddisks - array.raid_disks == 1 ? "" : "s",
+ array.spare_disks + added_disks);
+ return 1;
+ }
+
+ sra = sysfs_read(fd, NULL, GET_LEVEL | GET_DISKS | GET_DEVS
+ | GET_STATE | GET_VERSION);
+ if (sra) {
+ if (st->ss->external && subarray == NULL) {
+ array.level = LEVEL_CONTAINER;
+ sra->array.level = LEVEL_CONTAINER;
+ }
+ } else {
+ pr_err("failed to read sysfs parameters for %s\n",
+ devname);
+ return 1;
+ }
+ frozen = freeze(st);
+ if (frozen < -1) {
+ /* freeze() already spewed the reason */
+ sysfs_free(sra);
+ return 1;
+ } else if (frozen < 0) {
+ pr_err("%s is performing resync/recovery and cannot"
+ " be reshaped\n", devname);
+ sysfs_free(sra);
+ return 1;
+ }
+
+ /* ========= set size =============== */
+ if (s->size > 0 && (s->size == MAX_SIZE || s->size != (unsigned)array.size)) {
+ unsigned long long orig_size = get_component_size(fd)/2;
+ unsigned long long min_csize;
+ struct mdinfo *mdi;
+ int raid0_takeover = 0;
+
+ if (orig_size == 0)
+ orig_size = (unsigned) array.size;
+
+ if (orig_size == 0) {
+ pr_err("Cannot set device size in this type of array.\n");
+ rv = 1;
+ goto release;
+ }
+
+ if (reshape_super(st, s->size, UnSet, UnSet, 0, 0, UnSet, NULL,
+ devname, APPLY_METADATA_CHANGES, c->verbose > 0)) {
+ rv = 1;
+ goto release;
+ }
+ sync_metadata(st);
+ if (st->ss->external) {
+ /* metadata can have size limitation
+ * update size value according to metadata information
+ */
+ struct mdinfo *sizeinfo =
+ st->ss->container_content(st, subarray);
+ if (sizeinfo) {
+ unsigned long long new_size =
+ sizeinfo->custom_array_size/2;
+ int data_disks = get_data_disks(
+ sizeinfo->array.level,
+ sizeinfo->array.layout,
+ sizeinfo->array.raid_disks);
+ new_size /= data_disks;
+ dprintf("Metadata size correction from %llu to "
+ "%llu (%llu)\n", orig_size, new_size,
+ new_size * data_disks);
+ s->size = new_size;
+ sysfs_free(sizeinfo);
+ }
+ }
+
+ /* Update the size of each member device in case
+ * they have been resized. This will never reduce
+ * below the current used-size. The "size" attribute
+ * understands '0' to mean 'max'.
+ */
+ min_csize = 0;
+ rv = 0;
+ for (mdi = sra->devs; mdi; mdi = mdi->next) {
+ if (sysfs_set_num(sra, mdi, "size",
+ s->size == MAX_SIZE ? 0 : s->size) < 0) {
+ /* Probably kernel refusing to let us
+ * reduce the size - not an error.
+ */
+ break;
+ }
+ if (array.not_persistent == 0 &&
+ array.major_version == 0 &&
+ get_linux_version() < 3001000) {
+ /* Dangerous to allow size to exceed 2TB */
+ unsigned long long csize;
+ if (sysfs_get_ll(sra, mdi, "size", &csize) == 0) {
+ if (csize >= 2ULL*1024*1024*1024)
+ csize = 2ULL*1024*1024*1024;
+ if ((min_csize == 0 || (min_csize
+ > csize)))
+ min_csize = csize;
+ }
+ }
+ }
+ if (rv) {
+ pr_err("Cannot set size on "
+ "array members.\n");
+ goto size_change_error;
+ }
+ if (min_csize && s->size > min_csize) {
+ pr_err("Cannot safely make this array "
+ "use more than 2TB per device on this kernel.\n");
+ rv = 1;
+ goto size_change_error;
+ }
+ if (min_csize && s->size == MAX_SIZE) {
+ /* Don't let the kernel choose a size - it will get
+ * it wrong
+ */
+ pr_err("Limited v0.90 array to "
+ "2TB per device\n");
+ s->size = min_csize;
+ }
+ if (st->ss->external) {
+ if (sra->array.level == 0) {
+ rv = sysfs_set_str(sra, NULL, "level",
+ "raid5");
+ if (!rv) {
+ raid0_takeover = 1;
+ /* get array parametes after takeover
+ * to chane one parameter at time only
+ */
+ rv = ioctl(fd, GET_ARRAY_INFO, &array);
+ }
+ }
+ /* make sure mdmon is
+ * aware of the new level */
+ if (!mdmon_running(st->container_devnm))
+ start_mdmon(st->container_devnm);
+ ping_monitor(container);
+ if (mdmon_running(st->container_devnm) &&
+ st->update_tail == NULL)
+ st->update_tail = &st->updates;
+ }
+
+ if (s->size == MAX_SIZE)
+ s->size = 0;
+ array.size = s->size;
+ if (array.size != (signed)s->size) {
+ /* got truncated to 32bit, write to
+ * component_size instead
+ */
+ if (sra)
+ rv = sysfs_set_num(sra, NULL,
+ "component_size", s->size);
+ else
+ rv = -1;
+ } else {
+ rv = ioctl(fd, SET_ARRAY_INFO, &array);
+
+ /* manage array size when it is managed externally
+ */
+ if ((rv == 0) && st->ss->external)
+ rv = set_array_size(st, sra, sra->text_version);
+ }
+
+ if (raid0_takeover) {
+ /* do not recync non-existing parity,
+ * we will drop it anyway
+ */
+ sysfs_set_str(sra, NULL, "sync_action", "frozen");
+ /* go back to raid0, drop parity disk
+ */
+ sysfs_set_str(sra, NULL, "level", "raid0");
+ ioctl(fd, GET_ARRAY_INFO, &array);
+ }
+
+size_change_error:
+ if (rv != 0) {
+ int err = errno;
+
+ /* restore metadata */
+ if (reshape_super(st, orig_size, UnSet, UnSet, 0, 0,
+ UnSet, NULL, devname,
+ ROLLBACK_METADATA_CHANGES,
+ c->verbose) == 0)
+ sync_metadata(st);
+ pr_err("Cannot set device size for %s: %s\n",
+ devname, strerror(err));
+ if (err == EBUSY &&
+ (array.state & (1<<MD_SB_BITMAP_PRESENT)))
+ cont_err("Bitmap must be removed before size can be changed\n");
+ rv = 1;
+ goto release;
+ }
+ if (s->assume_clean) {
+ /* This will fail on kernels older than 3.0 unless
+ * a backport has been arranged.
+ */
+ if (sra == NULL ||
+ sysfs_set_str(sra, NULL, "resync_start", "none") < 0)
+ pr_err("--assume-clean not supported with --grow on this kernel\n");
+ }
+ ioctl(fd, GET_ARRAY_INFO, &array);
+ s->size = get_component_size(fd)/2;
+ if (s->size == 0)
+ s->size = array.size;
+ if (c->verbose >= 0) {
+ if (s->size == orig_size)
+ pr_err("component size of %s "
+ "unchanged at %lluK\n",
+ devname, s->size);
+ else
+ pr_err("component size of %s "
+ "has been set to %lluK\n",
+ devname, s->size);
+ }
+ changed = 1;
+ } else if (array.level != LEVEL_CONTAINER) {
+ s->size = get_component_size(fd)/2;
+ if (s->size == 0)
+ s->size = array.size;
+ }
+
+ /* See if there is anything else to do */
+ if ((s->level == UnSet || s->level == array.level) &&
+ (s->layout_str == NULL) &&
+ (s->chunk == 0 || s->chunk == array.chunk_size) &&
+ data_offset == INVALID_SECTORS &&
+ (s->raiddisks == 0 || s->raiddisks == array.raid_disks)) {
+ /* Nothing more to do */
+ if (!changed && c->verbose >= 0)
+ pr_err("%s: no change requested\n",
+ devname);
+ goto release;
+ }
+
+ /* ========= check for Raid10/Raid1 -> Raid0 conversion ===============
+ * current implementation assumes that following conditions must be met:
+ * - RAID10:
+ * - far_copies == 1
+ * - near_copies == 2
+ */
+ if ((s->level == 0 && array.level == 10 && sra &&
+ array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) ||
+ (s->level == 0 && array.level == 1 && sra)) {
+ int err;
+ err = remove_disks_for_takeover(st, sra, array.layout);
+ if (err) {
+ dprintf(Name": Array cannot be reshaped\n");
+ if (cfd > -1)
+ close(cfd);
+ rv = 1;
+ goto release;
+ }
+ /* Make sure mdmon has seen the device removal
+ * and updated metadata before we continue with
+ * level change
+ */
+ if (container)
+ ping_monitor(container);
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.array = array;
+ sysfs_init(&info, fd, NULL);
+ strcpy(info.text_version, sra->text_version);
+ info.component_size = s->size*2;
+ info.new_level = s->level;
+ info.new_chunk = s->chunk * 1024;
+ if (info.array.level == LEVEL_CONTAINER) {
+ info.delta_disks = UnSet;
+ info.array.raid_disks = s->raiddisks;
+ } else if (s->raiddisks)
+ info.delta_disks = s->raiddisks - info.array.raid_disks;
+ else
+ info.delta_disks = UnSet;
+ if (s->layout_str == NULL) {
+ info.new_layout = UnSet;
+ if (info.array.level == 6 &&
+ (info.new_level == 6 || info.new_level == UnSet) &&
+ info.array.layout >= 16) {
+ pr_err("%s has a non-standard layout. If you"
+ " wish to preserve this\n", devname);
+ cont_err("during the reshape, please specify"
+ " --layout=preserve\n");
+ cont_err("If you want to change it, specify a"
+ " layout or use --layout=normalise\n");
+ rv = 1;
+ goto release;
+ }
+ } else if (strcmp(s->layout_str, "normalise") == 0 ||
+ strcmp(s->layout_str, "normalize") == 0) {
+ /* If we have a -6 RAID6 layout, remove the '-6'. */
+ info.new_layout = UnSet;
+ if (info.array.level == 6 && info.new_level == UnSet) {
+ char l[40], *h;
+ strcpy(l, map_num(r6layout, info.array.layout));
+ h = strrchr(l, '-');
+ if (h && strcmp(h, "-6") == 0) {
+ *h = 0;
+ info.new_layout = map_name(r6layout, l);
+ }
+ } else {
+ pr_err("%s is only meaningful when reshaping"
+ " a RAID6 array.\n", s->layout_str);
+ rv = 1;
+ goto release;
+ }
+ } else if (strcmp(s->layout_str, "preserve") == 0) {
+ /* This means that a non-standard RAID6 layout
+ * is OK.
+ * In particular:
+ * - When reshape a RAID6 (e.g. adding a device)
+ * which is in a non-standard layout, it is OK
+ * to preserve that layout.
+ * - When converting a RAID5 to RAID6, leave it in
+ * the XXX-6 layout, don't re-layout.
+ */
+ if (info.array.level == 6 && info.new_level == UnSet)
+ info.new_layout = info.array.layout;
+ else if (info.array.level == 5 && info.new_level == 6) {
+ char l[40];
+ strcpy(l, map_num(r5layout, info.array.layout));
+ strcat(l, "-6");
+ info.new_layout = map_name(r6layout, l);
+ } else {
+ pr_err("%s in only meaningful when reshaping"
+ " to RAID6\n", s->layout_str);
+ rv = 1;
+ goto release;
+ }
+ } else {
+ int l = info.new_level;
+ if (l == UnSet)
+ l = info.array.level;
+ switch (l) {
+ case 5:
+ info.new_layout = map_name(r5layout, s->layout_str);
+ break;
+ case 6:
+ info.new_layout = map_name(r6layout, s->layout_str);
+ break;
+ case 10:
+ info.new_layout = parse_layout_10(s->layout_str);
+ break;
+ case LEVEL_FAULTY:
+ info.new_layout = parse_layout_faulty(s->layout_str);
+ break;
+ default:
+ pr_err("layout not meaningful"
+ " with this level\n");
+ rv = 1;
+ goto release;
+ }
+ if (info.new_layout == UnSet) {
+ pr_err("layout %s not understood"
+ " for this level\n",
+ s->layout_str);
+ rv = 1;
+ goto release;
+ }
+ }
+
+ if (array.level == LEVEL_FAULTY) {
+ if (s->level != UnSet && s->level != array.level) {
+ pr_err("cannot change level of Faulty device\n");
+ rv =1 ;
+ }
+ if (s->chunk) {
+ pr_err("cannot set chunksize of Faulty device\n");
+ rv =1 ;
+ }
+ if (s->raiddisks && s->raiddisks != 1) {
+ pr_err("cannot set raid_disks of Faulty device\n");
+ rv =1 ;
+ }
+ if (s->layout_str) {
+ if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+ dprintf("Cannot get array information.\n");
+ goto release;
+ }
+ array.layout = info.new_layout;
+ if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ pr_err("failed to set new layout\n");
+ rv = 1;
+ } else if (c->verbose >= 0)
+ printf("layout for %s set to %d\n",
+ devname, array.layout);
+ }
+ } else if (array.level == LEVEL_CONTAINER) {
+ /* This change is to be applied to every array in the
+ * container. This is only needed when the metadata imposes
+ * restraints of the various arrays in the container.
+ * Currently we only know that IMSM requires all arrays
+ * to have the same number of devices so changing the
+ * number of devices (On-Line Capacity Expansion) must be
+ * performed at the level of the container
+ */
+ rv = reshape_container(container, devname, -1, st, &info,
+ c->force, c->backup_file, c->verbose, 0, 0, 0);
+ frozen = 0;
+ } else {
+ /* get spare devices from external metadata
+ */
+ if (st->ss->external) {
+ struct mdinfo *info2;
+
+ info2 = st->ss->container_content(st, subarray);
+ if (info2) {
+ info.array.spare_disks =
+ info2->array.spare_disks;
+ sysfs_free(info2);
+ }
+ }
+
+ /* Impose these changes on a single array. First
+ * check that the metadata is OK with the change. */
+
+ if (reshape_super(st, 0, info.new_level,
+ info.new_layout, info.new_chunk,
+ info.array.raid_disks, info.delta_disks,
+ c->backup_file, devname, APPLY_METADATA_CHANGES,
+ c->verbose)) {
+ rv = 1;
+ goto release;
+ }
+ sync_metadata(st);
+ rv = reshape_array(container, fd, devname, st, &info, c->force,
+ devlist, data_offset, c->backup_file, c->verbose,
+ 0, 0, 0);
+ frozen = 0;
+ }
+release:
+ sysfs_free(sra);
+ if (frozen > 0)
+ unfreeze(st);
+ return rv;
+}
+
+/* verify_reshape_position()
+ * Function checks if reshape position in metadata is not farther
+ * than position in md.
+ * Return value:
+ * 0 : not valid sysfs entry
+ * it can be caused by not started reshape, it should be started
+ * by reshape array or raid0 array is before takeover
+ * -1 : error, reshape position is obviously wrong
+ * 1 : success, reshape progress correct or updated
+*/
+static int verify_reshape_position(struct mdinfo *info, int level)
+{
+ int ret_val = 0;
+ char buf[40];
+ int rv;
+
+ /* read sync_max, failure can mean raid0 array */
+ rv = sysfs_get_str(info, NULL, "sync_max", buf, 40);
+
+ if (rv > 0) {
+ char *ep;
+ unsigned long long position = strtoull(buf, &ep, 0);
+
+ dprintf(Name": Read sync_max sysfs entry is: %s\n", buf);
+ if (!(ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))) {
+ position *= get_data_disks(level,
+ info->new_layout,
+ info->array.raid_disks);
+ if (info->reshape_progress < position) {
+ dprintf("Corrected reshape progress (%llu) to "
+ "md position (%llu)\n",
+ info->reshape_progress, position);
+ info->reshape_progress = position;
+ ret_val = 1;
+ } else if (info->reshape_progress > position) {
+ pr_err("Fatal error: array "
+ "reshape was not properly frozen "
+ "(expected reshape position is %llu, "
+ "but reshape progress is %llu.\n",
+ position, info->reshape_progress);
+ ret_val = -1;
+ } else {
+ dprintf("Reshape position in md and metadata "
+ "are the same;");
+ ret_val = 1;
+ }
+ }
+ } else if (rv == 0) {
+ /* for valid sysfs entry, 0-length content
+ * should be indicated as error
+ */
+ ret_val = -1;
+ }
+
+ return ret_val;
+}
+
+static unsigned long long choose_offset(unsigned long long lo,
+ unsigned long long hi,
+ unsigned long long min,
+ unsigned long long max)
+{
+ /* Choose a new offset between hi and lo.
+ * It must be between min and max, but
+ * we would prefer something near the middle of hi/lo, and also
+ * prefer to be aligned to a big power of 2.
+ *
+ * So we start with the middle, then for each bit,
+ * starting at '1' and increasing, if it is set, we either
+ * add it or subtract it if possible, preferring the option
+ * which is furthest from the boundary.
+ *
+ * We stop once we get a 1MB alignment. As units are in sectors,
+ * 1MB = 2*1024 sectors.
+ */
+ unsigned long long choice = (lo + hi) / 2;
+ unsigned long long bit = 1;
+
+ for (bit = 1; bit < 2*1024; bit = bit << 1) {
+ unsigned long long bigger, smaller;
+ if (! (bit & choice))
+ continue;
+ bigger = choice + bit;
+ smaller = choice - bit;
+ if (bigger > max && smaller < min)
+ break;
+ if (bigger > max)
+ choice = smaller;
+ else if (smaller < min)
+ choice = bigger;
+ else if (hi - bigger > smaller - lo)
+ choice = bigger;
+ else
+ choice = smaller;
+ }
+ return choice;
+}
+
+static int set_new_data_offset(struct mdinfo *sra, struct supertype *st,
+ char *devname, int delta_disks,
+ unsigned long long data_offset,
+ unsigned long long min,
+ int can_fallback)
+{
+ struct mdinfo *sd;
+ int dir = 0;
+ int err = 0;
+ unsigned long long before, after;
+
+ /* Need to find min space before and after so same is used
+ * on all devices
+ */
+ before = UINT64_MAX;
+ after = UINT64_MAX;
+ for (sd = sra->devs; sd; sd = sd->next) {
+ char *dn;
+ int dfd;
+ int rv;
+ struct supertype *st2;
+ struct mdinfo info2;
+
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ dn = map_dev(sd->disk.major, sd->disk.minor, 0);
+ dfd = dev_open(dn, O_RDONLY);
+ if (dfd < 0) {
+ pr_err("%s: cannot open component %s\n",
+ devname, dn ? dn : "-unknown-");
+ goto release;
+ }
+ st2 = dup_super(st);
+ rv = st2->ss->load_super(st2,dfd, NULL);
+ close(dfd);
+ if (rv) {
+ free(st2);
+ pr_err("%s: cannot get superblock from %s\n",
+ devname, dn);
+ goto release;
+ }
+ st2->ss->getinfo_super(st2, &info2, NULL);
+ st2->ss->free_super(st2);
+ free(st2);
+ if (info2.space_before == 0 &&
+ info2.space_after == 0) {
+ /* Metadata doesn't support data_offset changes */
+ return 1;
+ }
+ if (before > info2.space_before)
+ before = info2.space_before;
+ if (after > info2.space_after)
+ after = info2.space_after;
+
+ if (data_offset != INVALID_SECTORS) {
+ if (dir == 0) {
+ if (info2.data_offset == data_offset) {
+ pr_err("%s: already has that data_offset\n",
+ dn);
+ goto release;
+ }
+ if (data_offset < info2.data_offset)
+ dir = -1;
+ else
+ dir = 1;
+ } else if ((data_offset <= info2.data_offset && dir == 1) ||
+ (data_offset >= info2.data_offset && dir == -1)) {
+ pr_err("%s: differing data offsets on devices make this --data-offset setting impossible\n",
+ dn);
+ goto release;
+ }
+ }
+ }
+ if (before == UINT64_MAX)
+ /* impossible really, there must be no devices */
+ return 1;
+
+ for (sd = sra->devs; sd; sd = sd->next) {
+ char *dn = map_dev(sd->disk.major, sd->disk.minor, 0);
+ unsigned long long new_data_offset;
+
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ if (delta_disks < 0) {
+ /* Don't need any space as array is shrinking
+ * just move data_offset up by min
+ */
+ if (data_offset == INVALID_SECTORS)
+ new_data_offset = sd->data_offset + min;
+ else {
+ if (data_offset < sd->data_offset + min) {
+ pr_err("--data-offset too small for %s\n",
+ dn);
+ goto release;
+ }
+ new_data_offset = data_offset;
+ }
+ } else if (delta_disks > 0) {
+ /* need space before */
+ if (before < min) {
+ if (can_fallback)
+ goto fallback;
+ pr_err("Insufficient head-space for reshape on %s\n",
+ dn);
+ goto release;
+ }
+ if (data_offset == INVALID_SECTORS)
+ new_data_offset = sd->data_offset - min;
+ else {
+ if (data_offset > sd->data_offset - min) {
+ pr_err("--data-offset too large for %s\n",
+ dn);
+ goto release;
+ }
+ new_data_offset = data_offset;
+ }
+ } else {
+ if (dir == 0) {
+ /* can move up or down. If 'data_offset'
+ * was set we would have already decided,
+ * so just choose direction with most space.
+ */
+ if (before > after)
+ dir = -1;
+ else
+ dir = 1;
+ }
+ sysfs_set_str(sra, NULL, "reshape_direction",
+ dir == 1 ? "backwards" : "forwards");
+ if (dir > 0) {
+ /* Increase data offset */
+ if (after < min) {
+ if (can_fallback)
+ goto fallback;
+ pr_err("Insufficient tail-space for reshape on %s\n",
+ dn);
+ goto release;
+ }
+ if (data_offset != INVALID_SECTORS &&
+ data_offset < sd->data_offset + min) {
+ pr_err("--data-offset too small on %s\n",
+ dn);
+ goto release;
+ }
+ if (data_offset != INVALID_SECTORS)
+ new_data_offset = data_offset;
+ else
+ new_data_offset = choose_offset(sd->data_offset,
+ sd->data_offset + after,
+ sd->data_offset + min,
+ sd->data_offset + after);
+ } else {
+ /* Decrease data offset */
+ if (before < min) {
+ if (can_fallback)
+ goto fallback;
+ pr_err("insufficient head-room on %s\n",
+ dn);
+ goto release;
+ }
+ if (data_offset != INVALID_SECTORS &&
+ data_offset < sd->data_offset - min) {
+ pr_err("--data-offset too small on %s\n",
+ dn);
+ goto release;
+ }
+ if (data_offset != INVALID_SECTORS)
+ new_data_offset = data_offset;
+ else
+ new_data_offset = choose_offset(sd->data_offset - before,
+ sd->data_offset,
+ sd->data_offset - before,
+ sd->data_offset - min);
+ }
+ }
+ err = sysfs_set_num(sra, sd, "new_offset", new_data_offset);
+ if (err < 0 && errno == E2BIG) {
+ /* try again after increasing data size to max */
+ err = sysfs_set_num(sra, sd, "size", 0);
+ if (err < 0 && errno == EINVAL &&
+ !(sd->disk.state & (1<<MD_DISK_SYNC))) {
+ /* some kernels have a bug where you cannot
+ * use '0' on spare devices. */
+ sysfs_set_num(sra, sd, "size",
+ (sra->component_size + after)/2);
+ }
+ err = sysfs_set_num(sra, sd, "new_offset",
+ new_data_offset);
+ }
+ if (err < 0) {
+ if (errno == E2BIG && data_offset != INVALID_SECTORS) {
+ pr_err("data-offset is too big for %s\n",
+ dn);
+ goto release;
+ }
+ if (sd == sra->devs &&
+ (errno == ENOENT || errno == E2BIG))
+ /* Early kernel, no 'new_offset' file,
+ * or kernel doesn't like us.
+ * For RAID5/6 this is not fatal
+ */
+ return 1;
+ pr_err("Cannot set new_offset for %s\n",
+ dn);
+ break;
+ }
+ }
+ return err;
+release:
+ return -1;
+fallback:
+ /* Just use a backup file */
+ return 1;
+}
+
+static int raid10_reshape(char *container, int fd, char *devname,
+ struct supertype *st, struct mdinfo *info,
+ struct reshape *reshape,
+ unsigned long long data_offset,
+ int force, int verbose)
+{
+ /* Changing raid_disks, layout, chunksize or possibly
+ * just data_offset for a RAID10.
+ * We must always change data_offset. We change by at least
+ * ->min_offset_change which is the largest of the old and new
+ * chunk sizes.
+ * If raid_disks is increasing, then data_offset must decrease
+ * by at least this copy size.
+ * If raid_disks is unchanged, data_offset must increase or
+ * decrease by at least min_offset_change but preferably by much more.
+ * We choose half of the available space.
+ * If raid_disks is decreasing, data_offset must increase by
+ * at least min_offset_change. To allow of this, component_size
+ * must be decreased by the same amount.
+ *
+ * So we calculate the required minimum and direction, possibly
+ * reduce the component_size, then iterate through the devices
+ * and set the new_data_offset.
+ * If that all works, we set chunk_size, layout, raid_disks, and start
+ * 'reshape'
+ */
+ struct mdinfo *sra;
+ unsigned long long min;
+ int err = 0;
+
+ sra = sysfs_read(fd, NULL,
+ GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK
+ );
+ if (!sra) {
+ pr_err("%s: Cannot get array details from sysfs\n",
+ devname);
+ goto release;
+ }
+ min = reshape->min_offset_change;
+
+ if (info->delta_disks)
+ sysfs_set_str(sra, NULL, "reshape_direction",
+ info->delta_disks < 0 ? "backwards" : "forwards");
+ if (info->delta_disks < 0 &&
+ info->space_after < min) {
+ int rv = sysfs_set_num(sra, NULL, "component_size",
+ (sra->component_size -
+ min)/2);
+ if (rv) {
+ pr_err("cannot reduce component size\n");
+ goto release;
+ }
+ }
+ err = set_new_data_offset(sra, st, devname, info->delta_disks, data_offset,
+ min, 0);
+ if (err == 1) {
+ pr_err("Cannot set new_data_offset: RAID10 reshape not\n");
+ cont_err("supported on this kernel\n");
+ err = -1;
+ }
+ if (err < 0)
+ goto release;
+
+ if (!err && sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
+ err = errno;
+ if (!err && sysfs_set_num(sra, NULL, "layout", reshape->after.layout) < 0)
+ err = errno;
+ if (!err && sysfs_set_num(sra, NULL, "raid_disks",
+ info->array.raid_disks + info->delta_disks) < 0)
+ err = errno;
+ if (!err && sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0)
+ err = errno;
+ if (err) {
+ pr_err("Cannot set array shape for %s\n",
+ devname);
+ if (err == EBUSY &&
+ (info->array.state & (1<<MD_SB_BITMAP_PRESENT)))
+ cont_err(" Bitmap must be removed before"
+ " shape can be changed\n");
+ goto release;
+ }
+ sysfs_free(sra);
+ return 0;
+release:
+ sysfs_free(sra);
+ return 1;
+}
+
+static void get_space_after(int fd, struct supertype *st, struct mdinfo *info)
+{
+ struct mdinfo *sra, *sd;
+ /* Initialisation to silence compiler warning */
+ unsigned long long min_space_before = 0, min_space_after = 0;
+ int first = 1;
+
+ sra = sysfs_read(fd, NULL, GET_DEVS);
+ if (!sra)
+ return;
+ for (sd = sra->devs; sd; sd = sd->next) {
+ char *dn;
+ int dfd;
+ struct supertype *st2;
+ struct mdinfo info2;
+
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ dn = map_dev(sd->disk.major, sd->disk.minor, 0);
+ dfd = dev_open(dn, O_RDONLY);
+ if (dfd < 0)
+ break;
+ st2 = dup_super(st);
+ if (st2->ss->load_super(st2,dfd, NULL)) {
+ close(dfd);
+ free(st2);
+ break;
+ }
+ close(dfd);
+ st2->ss->getinfo_super(st2, &info2, NULL);
+ st2->ss->free_super(st2);
+ free(st2);
+ if (first ||
+ min_space_before > info2.space_before)
+ min_space_before = info2.space_before;
+ if (first ||
+ min_space_after > info2.space_after)
+ min_space_after = info2.space_after;
+ first = 0;
+ }
+ if (sd == NULL && !first) {
+ info->space_after = min_space_after;
+ info->space_before = min_space_before;
+ }
+ sysfs_free(sra);
+}
+
+static void update_cache_size(char *container, struct mdinfo *sra,
+ struct mdinfo *info,
+ int disks, unsigned long long blocks)
+{
+ /* Check that the internal stripe cache is
+ * large enough, or it won't work.
+ * It must hold at least 4 stripes of the larger
+ * chunk size
+ */
+ unsigned long cache;
+ cache = max(info->array.chunk_size, info->new_chunk);
+ cache *= 4; /* 4 stripes minimum */
+ cache /= 512; /* convert to sectors */
+ /* make sure there is room for 'blocks' with a bit to spare */
+ if (cache < 16 + blocks / disks)
+ cache = 16 + blocks / disks;
+ cache /= (4096/512); /* Covert from sectors to pages */
+
+ if (sra->cache_size < cache)
+ subarray_set_num(container, sra, "stripe_cache_size",
+ cache+1);
+}
+
+static int impose_reshape(struct mdinfo *sra,
+ struct mdinfo *info,
+ struct supertype *st,
+ int fd,
+ int restart,
+ char *devname, char *container,
+ struct reshape *reshape)
+{
+ struct mdu_array_info_s array;
+
+ sra->new_chunk = info->new_chunk;
+
+ if (restart) {
+ /* for external metadata checkpoint saved by mdmon can be lost
+ * or missed /due to e.g. crash/. Check if md is not during
+ * restart farther than metadata points to.
+ * If so, this means metadata information is obsolete.
+ */
+ if (st->ss->external)
+ verify_reshape_position(info, reshape->level);
+ sra->reshape_progress = info->reshape_progress;
+ } else {
+ sra->reshape_progress = 0;
+ if (reshape->after.data_disks < reshape->before.data_disks)
+ /* start from the end of the new array */
+ sra->reshape_progress = (sra->component_size
+ * reshape->after.data_disks);
+ }
+
+ ioctl(fd, GET_ARRAY_INFO, &array);
+ if (info->array.chunk_size == info->new_chunk &&
+ reshape->before.layout == reshape->after.layout &&
+ st->ss->external == 0) {
+ /* use SET_ARRAY_INFO but only if reshape hasn't started */
+ array.raid_disks = reshape->after.data_disks + reshape->parity;
+ if (!restart &&
+ ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ int err = errno;
+
+ pr_err("Cannot set device shape for %s: %s\n",
+ devname, strerror(errno));
+
+ if (err == EBUSY &&
+ (array.state & (1<<MD_SB_BITMAP_PRESENT)))
+ cont_err("Bitmap must be removed before"
+ " shape can be changed\n");
+
+ goto release;
+ }
+ } else if (!restart) {
+ /* set them all just in case some old 'new_*' value
+ * persists from some earlier problem.
+ */
+ int err = 0;
+ if (sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
+ err = errno;
+ if (!err && sysfs_set_num(sra, NULL, "layout",
+ reshape->after.layout) < 0)
+ err = errno;
+ if (!err && subarray_set_num(container, sra, "raid_disks",
+ reshape->after.data_disks +
+ reshape->parity) < 0)
+ err = errno;
+ if (err) {
+ pr_err("Cannot set device shape for %s\n",
+ devname);
+
+ if (err == EBUSY &&
+ (array.state & (1<<MD_SB_BITMAP_PRESENT)))
+ cont_err("Bitmap must be removed before"
+ " shape can be changed\n");
+ goto release;
+ }
+ }
+ return 0;
+release:
+ return -1;
+}
+
+static int impose_level(int fd, int level, char *devname, int verbose)
+{
+ char *c;
+ struct mdu_array_info_s array;
+ struct mdinfo info;
+ sysfs_init(&info, fd, NULL);
+
+ ioctl(fd, GET_ARRAY_INFO, &array);
+ if (level == 0 &&
+ (array.level >= 4 && array.level <= 6)) {
+ /* To convert to RAID0 we need to fail and
+ * remove any non-data devices. */
+ int found = 0;
+ int d;
+ int data_disks = array.raid_disks - 1;
+ if (array.level == 6)
+ data_disks -= 1;
+ if (array.level == 5 &&
+ array.layout != ALGORITHM_PARITY_N)
+ return -1;
+ if (array.level == 6 &&
+ array.layout != ALGORITHM_PARITY_N_6)
+ return -1;
+ sysfs_set_str(&info, NULL,"sync_action", "idle");
+ /* First remove any spares so no recovery starts */
+ for (d = 0, found = 0;
+ d < MAX_DISKS && found < array.nr_disks;
+ d++) {
+ mdu_disk_info_t disk;
+ disk.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ found++;
+ if ((disk.state & (1 << MD_DISK_ACTIVE))
+ && disk.raid_disk < data_disks)
+ /* keep this */
+ continue;
+ ioctl(fd, HOT_REMOVE_DISK,
+ makedev(disk.major, disk.minor));
+ }
+ /* Now fail anything left */
+ ioctl(fd, GET_ARRAY_INFO, &array);
+ for (d = 0, found = 0;
+ d < MAX_DISKS && found < array.nr_disks;
+ d++) {
+ int cnt;
+ mdu_disk_info_t disk;
+ disk.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ found++;
+ if ((disk.state & (1 << MD_DISK_ACTIVE))
+ && disk.raid_disk < data_disks)
+ /* keep this */
+ continue;
+ ioctl(fd, SET_DISK_FAULTY,
+ makedev(disk.major, disk.minor));
+ cnt = 5;
+ while (ioctl(fd, HOT_REMOVE_DISK,
+ makedev(disk.major, disk.minor)) < 0
+ && errno == EBUSY
+ && cnt--) {
+ usleep(10000);
+ }
+ }
+ }
+ c = map_num(pers, level);
+ if (c) {
+ int err = sysfs_set_str(&info, NULL, "level", c);
+ if (err) {
+ err = errno;
+ pr_err("%s: could not set level to %s\n",
+ devname, c);
+ if (err == EBUSY &&
+ (array.state & (1<<MD_SB_BITMAP_PRESENT)))
+ cont_err("Bitmap must be removed"
+ " before level can be changed\n");
+ return err;
+ }
+ if (verbose >= 0)
+ pr_err("level of %s changed to %s\n",
+ devname, c);
+ }
+ return 0;
+}
+
+int sigterm = 0;
+static void catch_term(int sig)
+{
+ sigterm = 1;
+}
+
+static int continue_via_systemd(char *devnm)
+{
+ int skipped, i, pid, status;
+ char pathbuf[1024];
+ /* In a systemd/udev world, it is best to get systemd to
+ * run "mdadm --grow --continue" rather than running in the
+ * background.
+ */
+ switch(fork()) {
+ case 0:
+ /* FIXME yuk. CLOSE_EXEC?? */
+ skipped = 0;
+ for (i = 3; skipped < 20; i++)
+ if (close(i) < 0)
+ skipped++;
+ else
+ skipped = 0;
+
+ /* Don't want to see error messages from
+ * systemctl. If the service doesn't exist,
+ * we fork ourselves.
+ */
+ close(2);
+ open("/dev/null", O_WRONLY);
+ snprintf(pathbuf, sizeof(pathbuf), "mdadm-grow-continue@%s.service",
+ devnm);
+ status = execl("/usr/bin/systemctl", "systemctl",
+ "start",
+ pathbuf, NULL);
+ status = execl("/bin/systemctl", "systemctl", "start",
+ pathbuf, NULL);
+ exit(1);
+ case -1: /* Just do it ourselves. */
+ break;
+ default: /* parent - good */
+ pid = wait(&status);
+ if (pid >= 0 && status == 0)
+ return 1;
+ }
+ return 0;
+}
+
+static int reshape_array(char *container, int fd, char *devname,
+ struct supertype *st, struct mdinfo *info,
+ int force, struct mddev_dev *devlist,
+ unsigned long long data_offset,
+ char *backup_file, int verbose, int forked,
+ int restart, int freeze_reshape)
+{
+ struct reshape reshape;
+ int spares_needed;
+ char *msg;
+ int orig_level = UnSet;
+ int odisks;
+ int delayed;
+
+ struct mdu_array_info_s array;
+ char *c;
+
+ struct mddev_dev *dv;
+ int added_disks;
+
+ int *fdlist = NULL;
+ unsigned long long *offsets = NULL;
+ int d;
+ int nrdisks;
+ int err;
+ unsigned long blocks;
+ unsigned long long array_size;
+ int done;
+ struct mdinfo *sra = NULL;
+ char buf[20];
+
+ /* when reshaping a RAID0, the component_size might be zero.
+ * So try to fix that up.
+ */
+ if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+ dprintf("Cannot get array information.\n");
+ goto release;
+ }
+ if (array.level == 0 && info->component_size == 0) {
+ get_dev_size(fd, NULL, &array_size);
+ info->component_size = array_size / array.raid_disks;
+ }
+
+ if (array.level == 10)
+ /* Need space_after info */
+ get_space_after(fd, st, info);
+
+ if (info->reshape_active) {
+ int new_level = info->new_level;
+ info->new_level = UnSet;
+ if (info->delta_disks > 0)
+ info->array.raid_disks -= info->delta_disks;
+ msg = analyse_change(devname, info, &reshape);
+ info->new_level = new_level;
+ if (info->delta_disks > 0)
+ info->array.raid_disks += info->delta_disks;
+ if (!restart)
+ /* Make sure the array isn't read-only */
+ ioctl(fd, RESTART_ARRAY_RW, 0);
+ } else
+ msg = analyse_change(devname, info, &reshape);
+ if (msg) {
+ /* if msg == "", error has already been printed */
+ if (msg[0])
+ pr_err("%s\n", msg);
+ goto release;
+ }
+ if (restart &&
+ (reshape.level != info->array.level ||
+ reshape.before.layout != info->array.layout ||
+ reshape.before.data_disks + reshape.parity
+ != info->array.raid_disks - max(0, info->delta_disks))) {
+ pr_err("reshape info is not in native format -"
+ " cannot continue.\n");
+ goto release;
+ }
+
+ if (st->ss->external && restart && (info->reshape_progress == 0) &&
+ !((sysfs_get_str(info, NULL, "sync_action", buf, sizeof(buf)) > 0) &&
+ (strncmp(buf, "reshape", 7) == 0))) {
+ /* When reshape is restarted from '0', very begin of array
+ * it is possible that for external metadata reshape and array
+ * configuration doesn't happen.
+ * Check if md has the same opinion, and reshape is restarted
+ * from 0. If so, this is regular reshape start after reshape
+ * switch in metadata to next array only.
+ */
+ if ((verify_reshape_position(info, reshape.level) >= 0) &&
+ (info->reshape_progress == 0))
+ restart = 0;
+ }
+ if (restart) {
+ /* reshape already started. just skip to monitoring the reshape */
+ if (reshape.backup_blocks == 0)
+ return 0;
+ if (restart & RESHAPE_NO_BACKUP)
+ return 0;
+
+ /* Need 'sra' down at 'started:' */
+ sra = sysfs_read(fd, NULL,
+ GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK|
+ GET_CACHE);
+ if (!sra) {
+ pr_err("%s: Cannot get array details from sysfs\n",
+ devname);
+ goto release;
+ }
+
+ if (!backup_file)
+ backup_file = locate_backup(sra->sys_name);
+
+ goto started;
+ }
+ /* The container is frozen but the array may not be.
+ * So freeze the array so spares don't get put to the wrong use
+ * FIXME there should probably be a cleaner separation between
+ * freeze_array and freeze_container.
+ */
+ sysfs_freeze_array(info);
+ /* Check we have enough spares to not be degraded */
+ added_disks = 0;
+ for (dv = devlist; dv ; dv=dv->next)
+ added_disks++;
+ spares_needed = max(reshape.before.data_disks,
+ reshape.after.data_disks)
+ + reshape.parity - array.raid_disks;
+
+ if (!force &&
+ info->new_level > 1 && info->array.level > 1 &&
+ spares_needed > info->array.spare_disks + added_disks) {
+ pr_err("Need %d spare%s to avoid degraded array,"
+ " and only have %d.\n"
+ " Use --force to over-ride this check.\n",
+ spares_needed,
+ spares_needed == 1 ? "" : "s",
+ info->array.spare_disks + added_disks);
+ goto release;
+ }
+ /* Check we have enough spares to not fail */
+ spares_needed = max(reshape.before.data_disks,
+ reshape.after.data_disks)
+ - array.raid_disks;
+ if ((info->new_level > 1 || info->new_level == 0) &&
+ spares_needed > info->array.spare_disks +added_disks) {
+ pr_err("Need %d spare%s to create working array,"
+ " and only have %d.\n",
+ spares_needed,
+ spares_needed == 1 ? "" : "s",
+ info->array.spare_disks + added_disks);
+ goto release;
+ }
+
+ if (reshape.level != array.level) {
+ int err = impose_level(fd, reshape.level, devname, verbose);
+ if (err)
+ goto release;
+ info->new_layout = UnSet; /* after level change,
+ * layout is meaningless */
+ orig_level = array.level;
+ sysfs_freeze_array(info);
+
+ if (reshape.level > 0 && st->ss->external) {
+ /* make sure mdmon is aware of the new level */
+ if (mdmon_running(container))
+ flush_mdmon(container);
+
+ if (!mdmon_running(container))
+ start_mdmon(container);
+ ping_monitor(container);
+ if (mdmon_running(container) &&
+ st->update_tail == NULL)
+ st->update_tail = &st->updates;
+ }
+ }
+ /* ->reshape_super might have chosen some spares from the
+ * container that it wants to be part of the new array.
+ * We can collect them with ->container_content and give
+ * them to the kernel.
+ */
+ if (st->ss->reshape_super && st->ss->container_content) {
+ char *subarray = strchr(info->text_version+1, '/')+1;
+ struct mdinfo *info2 =
+ st->ss->container_content(st, subarray);
+ struct mdinfo *d;
+
+ if (info2) {
+ sysfs_init(info2, fd, st->devnm);
+ /* When increasing number of devices, we need to set
+ * new raid_disks before adding these, or they might
+ * be rejected.
+ */
+ if (reshape.backup_blocks &&
+ reshape.after.data_disks > reshape.before.data_disks)
+ subarray_set_num(container, info2, "raid_disks",
+ reshape.after.data_disks +
+ reshape.parity);
+ for (d = info2->devs; d; d = d->next) {
+ if (d->disk.state == 0 &&
+ d->disk.raid_disk >= 0) {
+ /* This is a spare that wants to
+ * be part of the array.
+ */
+ add_disk(fd, st, info2, d);
+ }
+ }
+ sysfs_free(info2);
+ }
+ }
+ /* We might have been given some devices to add to the
+ * array. Now that the array has been changed to the right
+ * level and frozen, we can safely add them.
+ */
+ if (devlist)
+ Manage_subdevs(devname, fd, devlist, verbose,
+ 0,NULL, 0);
+
+ if (reshape.backup_blocks == 0 && data_offset != INVALID_SECTORS)
+ reshape.backup_blocks = reshape.before.data_disks * info->array.chunk_size/512;
+ if (reshape.backup_blocks == 0) {
+ /* No restriping needed, but we might need to impose
+ * some more changes: layout, raid_disks, chunk_size
+ */
+ /* read current array info */
+ if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+ dprintf("Cannot get array information.\n");
+ goto release;
+ }
+ /* compare current array info with new values and if
+ * it is different update them to new */
+ if (info->new_layout != UnSet &&
+ info->new_layout != array.layout) {
+ array.layout = info->new_layout;
+ if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ pr_err("failed to set new layout\n");
+ goto release;
+ } else if (verbose >= 0)
+ printf("layout for %s set to %d\n",
+ devname, array.layout);
+ }
+ if (info->delta_disks != UnSet &&
+ info->delta_disks != 0 &&
+ array.raid_disks != (info->array.raid_disks + info->delta_disks)) {
+ array.raid_disks += info->delta_disks;
+ if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ pr_err("failed to set raid disks\n");
+ goto release;
+ } else if (verbose >= 0) {
+ printf("raid_disks for %s set to %d\n",
+ devname, array.raid_disks);
+ }
+ }
+ if (info->new_chunk != 0 &&
+ info->new_chunk != array.chunk_size) {
+ if (sysfs_set_num(info, NULL,
+ "chunk_size", info->new_chunk) != 0) {
+ pr_err("failed to set chunk size\n");
+ goto release;
+ } else if (verbose >= 0)
+ printf("chunk size for %s set to %d\n",
+ devname, array.chunk_size);
+ }
+ unfreeze(st);
+ return 0;
+ }
+
+ /*
+ * There are three possibilities.
+ * 1/ The array will shrink.
+ * We need to ensure the reshape will pause before reaching
+ * the 'critical section'. We also need to fork and wait for
+ * that to happen. When it does we
+ * suspend/backup/complete/unfreeze
+ *
+ * 2/ The array will not change size.
+ * This requires that we keep a backup of a sliding window
+ * so that we can restore data after a crash. So we need
+ * to fork and monitor progress.
+ * In future we will allow the data_offset to change, so
+ * a sliding backup becomes unnecessary.
+ *
+ * 3/ The array will grow. This is relatively easy.
+ * However the kernel's restripe routines will cheerfully
+ * overwrite some early data before it is safe. So we
+ * need to make a backup of the early parts of the array
+ * and be ready to restore it if rebuild aborts very early.
+ * For externally managed metadata, we still need a forked
+ * child to monitor the reshape and suspend IO over the region
+ * that is being reshaped.
+ *
+ * We backup data by writing it to one spare, or to a
+ * file which was given on command line.
+ *
+ * In each case, we first make sure that storage is available
+ * for the required backup.
+ * Then we:
+ * - request the shape change.
+ * - fork to handle backup etc.
+ */
+ /* Check that we can hold all the data */
+ get_dev_size(fd, NULL, &array_size);
+ if (reshape.new_size < (array_size/512)) {
+ pr_err("this change will reduce the size of the array.\n"
+ " use --grow --array-size first to truncate array.\n"
+ " e.g. mdadm --grow %s --array-size %llu\n",
+ devname, reshape.new_size/2);
+ goto release;
+ }
+
+ if (array.level == 10) {
+ /* Reshaping RAID10 does not require any data backup by
+ * user-space. Instead it requires that the data_offset
+ * is changed to avoid the need for backup.
+ * So this is handled very separately
+ */
+ if (restart)
+ /* Nothing to do. */
+ return 0;
+ return raid10_reshape(container, fd, devname, st, info,
+ &reshape, data_offset,
+ force, verbose);
+ }
+ sra = sysfs_read(fd, NULL,
+ GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK|
+ GET_CACHE);
+ if (!sra) {
+ pr_err("%s: Cannot get array details from sysfs\n",
+ devname);
+ goto release;
+ }
+
+ if (!backup_file)
+ switch(set_new_data_offset(sra, st, devname,
+ reshape.after.data_disks - reshape.before.data_disks,
+ data_offset,
+ reshape.min_offset_change, 1)) {
+ case -1:
+ goto release;
+ case 0:
+ /* Updated data_offset, so it's easy now */
+ update_cache_size(container, sra, info,
+ min(reshape.before.data_disks,
+ reshape.after.data_disks),
+ reshape.backup_blocks);
+
+ /* Right, everything seems fine. Let's kick things off.
+ */
+ sync_metadata(st);
+
+ if (impose_reshape(sra, info, st, fd, restart,
+ devname, container, &reshape) < 0)
+ goto release;
+ if (sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0) {
+ pr_err("Failed to initiate reshape!\n");
+ goto release;
+ }
+ if (info->new_level == reshape.level)
+ return 0;
+ /* need to adjust level when reshape completes */
+ switch(fork()) {
+ case -1: /* ignore error, but don't wait */
+ return 0;
+ default: /* parent */
+ return 0;
+ case 0:
+ map_fork();
+ break;
+ }
+ close(fd);
+ wait_reshape(sra);
+ fd = open_dev(sra->sys_name);
+ if (fd >= 0)
+ impose_level(fd, info->new_level, devname, verbose);
+ return 0;
+ case 1: /* Couldn't set data_offset, try the old way */
+ if (data_offset != INVALID_SECTORS) {
+ pr_err("Cannot update data_offset on this array\n");
+ goto release;
+ }
+ break;
+ }
+
+started:
+ /* Decide how many blocks (sectors) for a reshape
+ * unit. The number we have so far is just a minimum
+ */
+ blocks = reshape.backup_blocks;
+ if (reshape.before.data_disks ==
+ reshape.after.data_disks) {
+ /* Make 'blocks' bigger for better throughput, but
+ * not so big that we reject it below.
+ * Try for 16 megabytes
+ */
+ while (blocks * 32 < sra->component_size &&
+ blocks < 16*1024*2)
+ blocks *= 2;
+ } else
+ pr_err("Need to backup %luK of critical "
+ "section..\n", blocks/2);
+
+ if (blocks >= sra->component_size/2) {
+ pr_err("%s: Something wrong"
+ " - reshape aborted\n",
+ devname);
+ goto release;
+ }
+
+ /* Now we need to open all these devices so we can read/write.
+ */
+ nrdisks = max(reshape.before.data_disks,
+ reshape.after.data_disks) + reshape.parity
+ + sra->array.spare_disks;
+ fdlist = xcalloc((1+nrdisks), sizeof(int));
+ offsets = xcalloc((1+nrdisks), sizeof(offsets[0]));
+
+ odisks = reshape.before.data_disks + reshape.parity;
+ d = reshape_prepare_fdlist(devname, sra, odisks,
+ nrdisks, blocks, backup_file,
+ fdlist, offsets);
+ if (d < 0) {
+ goto release;
+ }
+ if ((st->ss->manage_reshape == NULL) ||
+ (st->ss->recover_backup == NULL)) {
+ if (backup_file == NULL) {
+ if (reshape.after.data_disks <=
+ reshape.before.data_disks) {
+ pr_err("%s: Cannot grow - need backup-file\n",
+ devname);
+ pr_err(" Please provide one with \"--backup=...\"\n");
+ goto release;
+ } else if (sra->array.spare_disks == 0) {
+ pr_err("%s: Cannot grow - "
+ "need a spare or backup-file to backup "
+ "critical section\n", devname);
+ goto release;
+ }
+ } else {
+ if (!reshape_open_backup_file(backup_file, fd, devname,
+ (signed)blocks,
+ fdlist+d, offsets+d,
+ sra->sys_name,
+ restart)) {
+ goto release;
+ }
+ d++;
+ }
+ }
+
+ update_cache_size(container, sra, info,
+ min(reshape.before.data_disks, reshape.after.data_disks),
+ blocks);
+
+ /* Right, everything seems fine. Let's kick things off.
+ * If only changing raid_disks, use ioctl, else use
+ * sysfs.
+ */
+ sync_metadata(st);
+
+ if (impose_reshape(sra, info, st, fd, restart,
+ devname, container, &reshape) < 0)
+ goto release;
+
+ err = start_reshape(sra, restart, reshape.before.data_disks,
+ reshape.after.data_disks);
+ if (err) {
+ pr_err("Cannot %s reshape for %s\n",
+ restart ? "continue" : "start",
+ devname);
+ goto release;
+ }
+ if (restart)
+ sysfs_set_str(sra, NULL, "array_state", "active");
+ if (freeze_reshape) {
+ free(fdlist);
+ free(offsets);
+ sysfs_free(sra);
+ pr_err("Reshape has to be continued from"
+ " location %llu when root filesystem has been mounted.\n",
+ sra->reshape_progress);
+ return 1;
+ }
+
+ if (!forked && !check_env("MDADM_NO_SYSTEMCTL"))
+ if (continue_via_systemd(container ?: sra->sys_name)) {
+ free(fdlist);
+ free(offsets);
+ sysfs_free(sra);
+ return 0;
+ }
+
+ /* Now we just need to kick off the reshape and watch, while
+ * handling backups of the data...
+ * This is all done by a forked background process.
+ */
+ switch(forked ? 0 : fork()) {
+ case -1:
+ pr_err("Cannot run child to monitor reshape: %s\n",
+ strerror(errno));
+ abort_reshape(sra);
+ goto release;
+ default:
+ free(fdlist);
+ free(offsets);
+ sysfs_free(sra);
+ return 0;
+ case 0:
+ map_fork();
+ break;
+ }
+
+ /* If another array on the same devices is busy, the
+ * reshape will wait for them. This would mean that
+ * the first section that we suspend will stay suspended
+ * for a long time. So check on that possibility
+ * by looking for "DELAYED" in /proc/mdstat, and if found,
+ * wait a while
+ */
+ do {
+ struct mdstat_ent *mds, *m;
+ delayed = 0;
+ mds = mdstat_read(1, 0);
+ for (m = mds; m; m = m->next)
+ if (strcmp(m->devnm, sra->sys_name) == 0) {
+ if (m->resync &&
+ m->percent == RESYNC_DELAYED)
+ delayed = 1;
+ if (m->resync == 0)
+ /* Haven't started the reshape thread
+ * yet, wait a bit
+ */
+ delayed = 2;
+ break;
+ }
+ free_mdstat(mds);
+ if (delayed == 1 && get_linux_version() < 3007000) {
+ pr_err("Reshape is delayed, but cannot wait carefully with this kernel.\n"
+ " You might experience problems until other reshapes complete.\n");
+ delayed = 0;
+ }
+ if (delayed)
+ mdstat_wait(30 - (delayed-1) * 25);
+ } while (delayed);
+ mdstat_close();
+ close(fd);
+ if (check_env("MDADM_GROW_VERIFY"))
+ fd = open(devname, O_RDONLY | O_DIRECT);
+ else
+ fd = -1;
+ mlockall(MCL_FUTURE);
+
+ signal(SIGTERM, catch_term);
+
+ if (st->ss->external) {
+ /* metadata handler takes it from here */
+ done = st->ss->manage_reshape(
+ fd, sra, &reshape, st, blocks,
+ fdlist, offsets,
+ d - odisks, fdlist+odisks,
+ offsets+odisks);
+ } else
+ done = child_monitor(
+ fd, sra, &reshape, st, blocks,
+ fdlist, offsets,
+ d - odisks, fdlist+odisks,
+ offsets+odisks);
+
+ free(fdlist);
+ free(offsets);
+
+ if (backup_file && done) {
+ char *bul;
+ bul = make_backup(sra->sys_name);
+ if (bul) {
+ char buf[1024];
+ int l = readlink(bul, buf, sizeof(buf));
+ if (l > 0) {
+ buf[l]=0;
+ unlink(buf);
+ }
+ unlink(bul);
+ free(bul);
+ }
+ unlink(backup_file);
+ }
+ if (!done) {
+ abort_reshape(sra);
+ goto out;
+ }
+
+ if (!st->ss->external &&
+ !(reshape.before.data_disks != reshape.after.data_disks
+ && info->custom_array_size) &&
+ info->new_level == reshape.level &&
+ !forked) {
+ /* no need to wait for the reshape to finish as
+ * there is nothing more to do.
+ */
+ sysfs_free(sra);
+ exit(0);
+ }
+ wait_reshape(sra);
+
+ if (st->ss->external) {
+ /* Re-load the metadata as much could have changed */
+ int cfd = open_dev(st->container_devnm);
+ if (cfd >= 0) {
+ flush_mdmon(container);
+ st->ss->free_super(st);
+ st->ss->load_container(st, cfd, container);
+ close(cfd);
+ }
+ }
+
+ /* set new array size if required customer_array_size is used
+ * by this metadata.
+ */
+ if (reshape.before.data_disks !=
+ reshape.after.data_disks &&
+ info->custom_array_size)
+ set_array_size(st, info, info->text_version);
+
+ if (info->new_level != reshape.level) {
+ if (fd < 0)
+ fd = open(devname, O_RDONLY);
+ impose_level(fd, info->new_level, devname, verbose);
+ close(fd);
+ if (info->new_level == 0)
+ st->update_tail = NULL;
+ }
+out:
+ sysfs_free(sra);
+ if (forked)
+ return 0;
+ unfreeze(st);
+ exit(0);
+
+release:
+ free(fdlist);
+ free(offsets);
+ if (orig_level != UnSet && sra) {
+ c = map_num(pers, orig_level);
+ if (c && sysfs_set_str(sra, NULL, "level", c) == 0)
+ pr_err("aborting level change\n");
+ }
+ sysfs_free(sra);
+ if (!forked)
+ unfreeze(st);
+ return 1;
+}
+
+/* mdfd handle is passed to be closed in child process (after fork).
+ */
+int reshape_container(char *container, char *devname,
+ int mdfd,
+ struct supertype *st,
+ struct mdinfo *info,
+ int force,
+ char *backup_file, int verbose,
+ int forked, int restart, int freeze_reshape)
+{
+ struct mdinfo *cc = NULL;
+ int rv = restart;
+ char last_devnm[32] = "";
+
+ /* component_size is not meaningful for a container,
+ * so pass '0' meaning 'no change'
+ */
+ if (!restart &&
+ reshape_super(st, 0, info->new_level,
+ info->new_layout, info->new_chunk,
+ info->array.raid_disks, info->delta_disks,
+ backup_file, devname, APPLY_METADATA_CHANGES,
+ verbose)) {
+ unfreeze(st);
+ return 1;
+ }
+
+ sync_metadata(st);
+
+ /* ping monitor to be sure that update is on disk
+ */
+ ping_monitor(container);
+
+ if (!forked && !freeze_reshape && !check_env("MDADM_NO_SYSTEMCTL"))
+ if (continue_via_systemd(container))
+ return 0;
+
+ switch (forked ? 0 : fork()) {
+ case -1: /* error */
+ perror("Cannot fork to complete reshape\n");
+ unfreeze(st);
+ return 1;
+ default: /* parent */
+ if (!freeze_reshape)
+ printf(Name ": multi-array reshape continues"
+ " in background\n");
+ return 0;
+ case 0: /* child */
+ map_fork();
+ break;
+ }
+
+ /* close unused handle in child process
+ */
+ if (mdfd > -1)
+ close(mdfd);
+
+ while(1) {
+ /* For each member array with reshape_active,
+ * we need to perform the reshape.
+ * We pick the first array that needs reshaping and
+ * reshape it. reshape_array() will re-read the metadata
+ * so the next time through a different array should be
+ * ready for reshape.
+ * It is possible that the 'different' array will not
+ * be assembled yet. In that case we simple exit.
+ * When it is assembled, the mdadm which assembles it
+ * will take over the reshape.
+ */
+ struct mdinfo *content;
+ int fd;
+ struct mdstat_ent *mdstat;
+ char *adev;
+ int devid;
+
+ sysfs_free(cc);
+
+ cc = st->ss->container_content(st, NULL);
+
+ for (content = cc; content ; content = content->next) {
+ char *subarray;
+ if (!content->reshape_active)
+ continue;
+
+ subarray = strchr(content->text_version+1, '/')+1;
+ mdstat = mdstat_by_subdev(subarray, container);
+ if (!mdstat)
+ continue;
+ if (mdstat->active == 0) {
+ pr_err("Skipping inactive array %s.\n",
+ mdstat->devnm);
+ free_mdstat(mdstat);
+ mdstat = NULL;
+ continue;
+ }
+ break;
+ }
+ if (!content)
+ break;
+
+ devid = devnm2devid(mdstat->devnm);
+ adev = map_dev(major(devid), minor(devid), 0);
+ if (!adev)
+ adev = content->text_version;
+
+ fd = open_dev(mdstat->devnm);
+ if (fd < 0) {
+ printf(Name ": Device %s cannot be opened for reshape.",
+ adev);
+ break;
+ }
+
+ if (strcmp(last_devnm, mdstat->devnm) == 0) {
+ /* Do not allow for multiple reshape_array() calls for
+ * the same array.
+ * It can happen when reshape_array() returns without
+ * error, when reshape is not finished (wrong reshape
+ * starting/continuation conditions). Mdmon doesn't
+ * switch to next array in container and reentry
+ * conditions for the same array occur.
+ * This is possibly interim until the behaviour of
+ * reshape_array is resolved().
+ */
+ printf(Name ": Multiple reshape execution detected for "
+ "device %s.", adev);
+ close(fd);
+ break;
+ }
+ strcpy(last_devnm, mdstat->devnm);
+
+ sysfs_init(content, fd, mdstat->devnm);
+
+ if (mdmon_running(container))
+ flush_mdmon(container);
+
+ rv = reshape_array(container, fd, adev, st,
+ content, force, NULL, INVALID_SECTORS,
+ backup_file, verbose, 1, restart,
+ freeze_reshape);
+ close(fd);
+
+ if (freeze_reshape) {
+ sysfs_free(cc);
+ exit(0);
+ }
+
+ restart = 0;
+ if (rv)
+ break;
+
+ if (mdmon_running(container))
+ flush_mdmon(container);
+ }
+ if (!rv)
+ unfreeze(st);
+ sysfs_free(cc);
+ exit(0);
+}
+
+/*
+ * We run a child process in the background which performs the following
+ * steps:
+ * - wait for resync to reach a certain point
+ * - suspend io to the following section
+ * - backup that section
+ * - allow resync to proceed further
+ * - resume io
+ * - discard the backup.
+ *
+ * When are combined in slightly different ways in the three cases.
+ * Grow:
+ * - suspend/backup/allow/wait/resume/discard
+ * Shrink:
+ * - allow/wait/suspend/backup/allow/wait/resume/discard
+ * same-size:
+ * - wait/resume/discard/suspend/backup/allow
+ *
+ * suspend/backup/allow always come together
+ * wait/resume/discard do too.
+ * For the same-size case we have two backups to improve flow.
+ *
+ */
+
+int progress_reshape(struct mdinfo *info, struct reshape *reshape,
+ unsigned long long backup_point,
+ unsigned long long wait_point,
+ unsigned long long *suspend_point,
+ unsigned long long *reshape_completed, int *frozen)
+{
+ /* This function is called repeatedly by the reshape manager.
+ * It determines how much progress can safely be made and allows
+ * that progress.
+ * - 'info' identifies the array and particularly records in
+ * ->reshape_progress the metadata's knowledge of progress
+ * This is a sector offset from the start of the array
+ * of the next array block to be relocated. This number
+ * may increase from 0 or decrease from array_size, depending
+ * on the type of reshape that is happening.
+ * Note that in contrast, 'sync_completed' is a block count of the
+ * reshape so far. It gives the distance between the start point
+ * (head or tail of device) and the next place that data will be
+ * written. It always increases.
+ * - 'reshape' is the structure created by analyse_change
+ * - 'backup_point' shows how much the metadata manager has backed-up
+ * data. For reshapes with increasing progress, it is the next address
+ * to be backed up, previous addresses have been backed-up. For
+ * decreasing progress, it is the earliest address that has been
+ * backed up - later address are also backed up.
+ * So addresses between reshape_progress and backup_point are
+ * backed up providing those are in the 'correct' order.
+ * - 'wait_point' is an array address. When reshape_completed
+ * passes this point, progress_reshape should return. It might
+ * return earlier if it determines that ->reshape_progress needs
+ * to be updated or further backup is needed.
+ * - suspend_point is maintained by progress_reshape and the caller
+ * should not touch it except to initialise to zero.
+ * It is an array address and it only increases in 2.6.37 and earlier.
+ * This makes it difficult to handle reducing reshapes with
+ * external metadata.
+ * However: it is similar to backup_point in that it records the
+ * other end of a suspended region from reshape_progress.
+ * it is moved to extend the region that is safe to backup and/or
+ * reshape
+ * - reshape_completed is read from sysfs and returned. The caller
+ * should copy this into ->reshape_progress when it has reason to
+ * believe that the metadata knows this, and any backup outside this
+ * has been erased.
+ *
+ * Return value is:
+ * 1 if more data from backup_point - but only as far as suspend_point,
+ * should be backed up
+ * 0 if things are progressing smoothly
+ * -1 if the reshape is finished because it is all done,
+ * -2 if the reshape is finished due to an error.
+ */
+
+ int advancing = (reshape->after.data_disks
+ >= reshape->before.data_disks);
+ unsigned long long need_backup; /* All data between start of array and
+ * here will at some point need to
+ * be backed up.
+ */
+ unsigned long long read_offset, write_offset;
+ unsigned long long write_range;
+ unsigned long long max_progress, target, completed;
+ unsigned long long array_size = (info->component_size
+ * reshape->before.data_disks);
+ int fd;
+ char buf[20];
+
+ /* First, we unsuspend any region that is now known to be safe.
+ * If suspend_point is on the 'wrong' side of reshape_progress, then
+ * we don't have or need suspension at the moment. This is true for
+ * native metadata when we don't need to back-up.
+ */
+ if (advancing) {
+ if (info->reshape_progress <= *suspend_point)
+ sysfs_set_num(info, NULL, "suspend_lo",
+ info->reshape_progress);
+ } else {
+ /* Note: this won't work in 2.6.37 and before.
+ * Something somewhere should make sure we don't need it!
+ */
+ if (info->reshape_progress >= *suspend_point)
+ sysfs_set_num(info, NULL, "suspend_hi",
+ info->reshape_progress);
+ }
+
+ /* Now work out how far it is safe to progress.
+ * If the read_offset for ->reshape_progress is less than
+ * 'blocks' beyond the write_offset, we can only progress as far
+ * as a backup.
+ * Otherwise we can progress until the write_offset for the new location
+ * reaches (within 'blocks' of) the read_offset at the current location.
+ * However that region must be suspended unless we are using native
+ * metadata.
+ * If we need to suspend more, we limit it to 128M per device, which is
+ * rather arbitrary and should be some time-based calculation.
+ */
+ read_offset = info->reshape_progress / reshape->before.data_disks;
+ write_offset = info->reshape_progress / reshape->after.data_disks;
+ write_range = info->new_chunk/512;
+ if (reshape->before.data_disks == reshape->after.data_disks)
+ need_backup = array_size;
+ else
+ need_backup = reshape->backup_blocks;
+ if (advancing) {
+ if (read_offset < write_offset + write_range)
+ max_progress = backup_point;
+ else
+ max_progress =
+ read_offset *
+ reshape->after.data_disks;
+ } else {
+ if (read_offset > write_offset - write_range)
+ /* Can only progress as far as has been backed up,
+ * which must be suspended */
+ max_progress = backup_point;
+ else if (info->reshape_progress <= need_backup)
+ max_progress = backup_point;
+ else {
+ if (info->array.major_version >= 0)
+ /* Can progress until backup is needed */
+ max_progress = need_backup;
+ else {
+ /* Can progress until metadata update is required */
+ max_progress =
+ read_offset *
+ reshape->after.data_disks;
+ /* but data must be suspended */
+ if (max_progress < *suspend_point)
+ max_progress = *suspend_point;
+ }
+ }
+ }
+
+ /* We know it is safe to progress to 'max_progress' providing
+ * it is suspended or we are using native metadata.
+ * Consider extending suspend_point 128M per device if it
+ * is less than 64M per device beyond reshape_progress.
+ * But always do a multiple of 'blocks'
+ * FIXME this is too big - it takes to long to complete
+ * this much.
+ */
+ target = 64*1024*2 * min(reshape->before.data_disks,
+ reshape->after.data_disks);
+ target /= reshape->backup_blocks;
+ if (target < 2)
+ target = 2;
+ target *= reshape->backup_blocks;
+
+ /* For externally managed metadata we always need to suspend IO to
+ * the area being reshaped so we regularly push suspend_point forward.
+ * For native metadata we only need the suspend if we are going to do
+ * a backup.
+ */
+ if (advancing) {
+ if ((need_backup > info->reshape_progress
+ || info->array.major_version < 0) &&
+ *suspend_point < info->reshape_progress + target) {
+ if (need_backup < *suspend_point + 2 * target)
+ *suspend_point = need_backup;
+ else if (*suspend_point + 2 * target < array_size)
+ *suspend_point += 2 * target;
+ else
+ *suspend_point = array_size;
+ sysfs_set_num(info, NULL, "suspend_hi", *suspend_point);
+ if (max_progress > *suspend_point)
+ max_progress = *suspend_point;
+ }
+ } else {
+ if (info->array.major_version >= 0) {
+ /* Only need to suspend when about to backup */
+ if (info->reshape_progress < need_backup * 2 &&
+ *suspend_point > 0) {
+ *suspend_point = 0;
+ sysfs_set_num(info, NULL, "suspend_lo", 0);
+ sysfs_set_num(info, NULL, "suspend_hi", need_backup);
+ }
+ } else {
+ /* Need to suspend continually */
+ if (info->reshape_progress < *suspend_point)
+ *suspend_point = info->reshape_progress;
+ if (*suspend_point + target < info->reshape_progress)
+ /* No need to move suspend region yet */;
+ else {
+ if (*suspend_point >= 2 * target)
+ *suspend_point -= 2 * target;
+ else
+ *suspend_point = 0;
+ sysfs_set_num(info, NULL, "suspend_lo",
+ *suspend_point);
+ }
+ if (max_progress < *suspend_point)
+ max_progress = *suspend_point;
+ }
+ }
+
+ /* now set sync_max to allow that progress. sync_max, like
+ * sync_completed is a count of sectors written per device, so
+ * we find the difference between max_progress and the start point,
+ * and divide that by after.data_disks to get a sync_max
+ * number.
+ * At the same time we convert wait_point to a similar number
+ * for comparing against sync_completed.
+ */
+ /* scale down max_progress to per_disk */
+ max_progress /= reshape->after.data_disks;
+ /* Round to chunk size as some kernels give an erroneously high number */
+ max_progress /= info->new_chunk/512;
+ max_progress *= info->new_chunk/512;
+ /* And round to old chunk size as the kernel wants that */
+ max_progress /= info->array.chunk_size/512;
+ max_progress *= info->array.chunk_size/512;
+ /* Limit progress to the whole device */
+ if (max_progress > info->component_size)
+ max_progress = info->component_size;
+ wait_point /= reshape->after.data_disks;
+ if (!advancing) {
+ /* switch from 'device offset' to 'processed block count' */
+ max_progress = info->component_size - max_progress;
+ wait_point = info->component_size - wait_point;
+ }
+
+ if (!*frozen)
+ sysfs_set_num(info, NULL, "sync_max", max_progress);
+
+ /* Now wait. If we have already reached the point that we were
+ * asked to wait to, don't wait at all, else wait for any change.
+ * We need to select on 'sync_completed' as that is the place that
+ * notifications happen, but we are really interested in
+ * 'reshape_position'
+ */
+ fd = sysfs_get_fd(info, NULL, "sync_completed");
+ if (fd < 0)
+ goto check_progress;
+
+ if (sysfs_fd_get_ll(fd, &completed) < 0)
+ goto check_progress;
+
+ while (completed < max_progress && completed < wait_point) {
+ /* Check that sync_action is still 'reshape' to avoid
+ * waiting forever on a dead array
+ */
+ char action[20];
+ if (sysfs_get_str(info, NULL, "sync_action",
+ action, 20) <= 0 ||
+ strncmp(action, "reshape", 7) != 0)
+ break;
+ /* Some kernels reset 'sync_completed' to zero
+ * before setting 'sync_action' to 'idle'.
+ * So we need these extra tests.
+ */
+ if (completed == 0 && advancing
+ && info->reshape_progress > 0)
+ break;
+ if (completed == 0 && !advancing
+ && info->reshape_progress < (info->component_size
+ * reshape->after.data_disks))
+ break;
+ sysfs_wait(fd, NULL);
+ if (sysfs_fd_get_ll(fd, &completed) < 0)
+ goto check_progress;
+ }
+ /* Some kernels reset 'sync_completed' to zero,
+ * we need to have real point we are in md
+ */
+ if (completed == 0)
+ completed = max_progress;
+
+ /* some kernels can give an incorrectly high 'completed' number */
+ completed /= (info->new_chunk/512);
+ completed *= (info->new_chunk/512);
+ /* Convert 'completed' back in to a 'progress' number */
+ completed *= reshape->after.data_disks;
+ if (!advancing) {
+ completed = info->component_size * reshape->after.data_disks
+ - completed;
+ }
+ *reshape_completed = completed;
+
+ close(fd);
+
+ /* We return the need_backup flag. Caller will decide
+ * how much - a multiple of ->backup_blocks up to *suspend_point
+ */
+ if (advancing)
+ return need_backup > info->reshape_progress;
+ else
+ return need_backup >= info->reshape_progress;
+
+check_progress:
+ /* if we couldn't read a number from sync_completed, then
+ * either the reshape did complete, or it aborted.
+ * We can tell which by checking for 'none' in reshape_position.
+ * If it did abort, then it might immediately restart if it
+ * it was just a device failure that leaves us degraded but
+ * functioning.
+ */
+ strcpy(buf, "hi");
+ if (sysfs_get_str(info, NULL, "reshape_position", buf, sizeof(buf)) < 0
+ || strncmp(buf, "none", 4) != 0) {
+ /* The abort might only be temporary. Wait up to 10
+ * seconds for fd to contain a valid number again.
+ */
+ int wait = 10000;
+ int rv = -2;
+ unsigned long long new_sync_max;
+ while (fd >= 0 && rv < 0 && wait > 0) {
+ if (sysfs_wait(fd, &wait) != 1)
+ break;
+ switch (sysfs_fd_get_ll(fd, &completed)) {
+ case 0:
+ /* all good again */
+ rv = 1;
+ /* If "sync_max" is no longer max_progress
+ * we need to freeze things
+ */
+ sysfs_get_ll(info, NULL, "sync_max", &new_sync_max);
+ *frozen = (new_sync_max != max_progress);
+ break;
+ case -2: /* read error - abort */
+ wait = 0;
+ break;
+ }
+ }
+ if (fd >= 0)
+ close(fd);
+ return rv; /* abort */
+ } else {
+ /* Maybe racing with array shutdown - check state */
+ if (fd >= 0)
+ close(fd);
+ if (sysfs_get_str(info, NULL, "array_state", buf, sizeof(buf)) < 0
+ || strncmp(buf, "inactive", 8) == 0
+ || strncmp(buf, "clear",5) == 0)
+ return -2; /* abort */
+ return -1; /* complete */
+ }
+}
+
+/* FIXME return status is never checked */
+static int grow_backup(struct mdinfo *sra,
+ unsigned long long offset, /* per device */
+ unsigned long stripes, /* per device, in old chunks */
+ int *sources, unsigned long long *offsets,
+ int disks, int chunk, int level, int layout,
+ int dests, int *destfd, unsigned long long *destoffsets,
+ int part, int *degraded,
+ char *buf)
+{
+ /* Backup 'blocks' sectors at 'offset' on each device of the array,
+ * to storage 'destfd' (offset 'destoffsets'), after first
+ * suspending IO. Then allow resync to continue
+ * over the suspended section.
+ * Use part 'part' of the backup-super-block.
+ */
+ int odata = disks;
+ int rv = 0;
+ int i;
+ unsigned long long ll;
+ int new_degraded;
+ //printf("offset %llu\n", offset);
+ if (level >= 4)
+ odata--;
+ if (level == 6)
+ odata--;
+
+ /* Check that array hasn't become degraded, else we might backup the wrong data */
+ if (sysfs_get_ll(sra, NULL, "degraded", &ll) < 0)
+ return -1; /* FIXME this error is ignored */
+ new_degraded = (int)ll;
+ if (new_degraded != *degraded) {
+ /* check each device to ensure it is still working */
+ struct mdinfo *sd;
+ for (sd = sra->devs ; sd ; sd = sd->next) {
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ if (sd->disk.state & (1<<MD_DISK_SYNC)) {
+ char sbuf[20];
+ if (sysfs_get_str(sra, sd, "state", sbuf, 20) < 0 ||
+ strstr(sbuf, "faulty") ||
+ strstr(sbuf, "in_sync") == NULL) {
+ /* this device is dead */
+ sd->disk.state = (1<<MD_DISK_FAULTY);
+ if (sd->disk.raid_disk >= 0 &&
+ sources[sd->disk.raid_disk] >= 0) {
+ close(sources[sd->disk.raid_disk]);
+ sources[sd->disk.raid_disk] = -1;
+ }
+ }
+ }
+ }
+ *degraded = new_degraded;
+ }
+ if (part) {
+ bsb.arraystart2 = __cpu_to_le64(offset * odata);
+ bsb.length2 = __cpu_to_le64(stripes * (chunk/512) * odata);
+ } else {
+ bsb.arraystart = __cpu_to_le64(offset * odata);
+ bsb.length = __cpu_to_le64(stripes * (chunk/512) * odata);
+ }
+ if (part)
+ bsb.magic[15] = '2';
+ for (i = 0; i < dests; i++)
+ if (part)
+ lseek64(destfd[i], destoffsets[i] + __le64_to_cpu(bsb.devstart2)*512, 0);
+ else
+ lseek64(destfd[i], destoffsets[i], 0);
+
+ rv = save_stripes(sources, offsets,
+ disks, chunk, level, layout,
+ dests, destfd,
+ offset*512*odata, stripes * chunk * odata,
+ buf);
+
+ if (rv)
+ return rv;
+ bsb.mtime = __cpu_to_le64(time(0));
+ for (i = 0; i < dests; i++) {
+ bsb.devstart = __cpu_to_le64(destoffsets[i]/512);
+
+ bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
+ if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0)
+ bsb.sb_csum2 = bsb_csum((char*)&bsb,
+ ((char*)&bsb.sb_csum2)-((char*)&bsb));
+
+ rv = -1;
+ if ((unsigned long long)lseek64(destfd[i], destoffsets[i] - 4096, 0)
+ != destoffsets[i] - 4096)
+ break;
+ if (write(destfd[i], &bsb, 512) != 512)
+ break;
+ if (destoffsets[i] > 4096) {
+ if ((unsigned long long)lseek64(destfd[i], destoffsets[i]+stripes*chunk*odata, 0) !=
+ destoffsets[i]+stripes*chunk*odata)
+ break;
+ if (write(destfd[i], &bsb, 512) != 512)
+ break;
+ }
+ fsync(destfd[i]);
+ rv = 0;
+ }
+
+ return rv;
+}
+
+/* in 2.6.30, the value reported by sync_completed can be
+ * less that it should be by one stripe.
+ * This only happens when reshape hits sync_max and pauses.
+ * So allow wait_backup to either extent sync_max further
+ * than strictly necessary, or return before the
+ * sync has got quite as far as we would really like.
+ * This is what 'blocks2' is for.
+ * The various caller give appropriate values so that
+ * every works.
+ */
+/* FIXME return value is often ignored */
+static int forget_backup(int dests, int *destfd,
+ unsigned long long *destoffsets,
+ int part)
+{
+ /*
+ * Erase backup 'part' (which is 0 or 1)
+ */
+ int i;
+ int rv;
+
+ if (part) {
+ bsb.arraystart2 = __cpu_to_le64(0);
+ bsb.length2 = __cpu_to_le64(0);
+ } else {
+ bsb.arraystart = __cpu_to_le64(0);
+ bsb.length = __cpu_to_le64(0);
+ }
+ bsb.mtime = __cpu_to_le64(time(0));
+ rv = 0;
+ for (i = 0; i < dests; i++) {
+ bsb.devstart = __cpu_to_le64(destoffsets[i]/512);
+ bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
+ if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0)
+ bsb.sb_csum2 = bsb_csum((char*)&bsb,
+ ((char*)&bsb.sb_csum2)-((char*)&bsb));
+ if ((unsigned long long)lseek64(destfd[i], destoffsets[i]-4096, 0) !=
+ destoffsets[i]-4096)
+ rv = -1;
+ if (rv == 0 &&
+ write(destfd[i], &bsb, 512) != 512)
+ rv = -1;
+ fsync(destfd[i]);
+ }
+ return rv;
+}
+
+static void fail(char *msg)
+{
+ int rv;
+ rv = (write(2, msg, strlen(msg)) != (int)strlen(msg));
+ rv |= (write(2, "\n", 1) != 1);
+ exit(rv ? 1 : 2);
+}
+
+static char *abuf, *bbuf;
+static unsigned long long abuflen;
+static void validate(int afd, int bfd, unsigned long long offset)
+{
+ /* check that the data in the backup against the array.
+ * This is only used for regression testing and should not
+ * be used while the array is active
+ */
+ if (afd < 0)
+ return;
+ lseek64(bfd, offset - 4096, 0);
+ if (read(bfd, &bsb2, 512) != 512)
+ fail("cannot read bsb");
+ if (bsb2.sb_csum != bsb_csum((char*)&bsb2,
+ ((char*)&bsb2.sb_csum)-((char*)&bsb2)))
+ fail("first csum bad");
+ if (memcmp(bsb2.magic, "md_backup_data", 14) != 0)
+ fail("magic is bad");
+ if (memcmp(bsb2.magic, "md_backup_data-2", 16) == 0 &&
+ bsb2.sb_csum2 != bsb_csum((char*)&bsb2,
+ ((char*)&bsb2.sb_csum2)-((char*)&bsb2)))
+ fail("second csum bad");
+
+ if (__le64_to_cpu(bsb2.devstart)*512 != offset)
+ fail("devstart is wrong");
+
+ if (bsb2.length) {
+ unsigned long long len = __le64_to_cpu(bsb2.length)*512;
+
+ if (abuflen < len) {
+ free(abuf);
+ free(bbuf);
+ abuflen = len;
+ if (posix_memalign((void**)&abuf, 4096, abuflen) ||
+ posix_memalign((void**)&bbuf, 4096, abuflen)) {
+ abuflen = 0;
+ /* just stop validating on mem-alloc failure */
+ return;
+ }
+ }
+
+ lseek64(bfd, offset, 0);
+ if ((unsigned long long)read(bfd, bbuf, len) != len) {
+ //printf("len %llu\n", len);
+ fail("read first backup failed");
+ }
+ lseek64(afd, __le64_to_cpu(bsb2.arraystart)*512, 0);
+ if ((unsigned long long)read(afd, abuf, len) != len)
+ fail("read first from array failed");
+ if (memcmp(bbuf, abuf, len) != 0) {
+#if 0
+ int i;
+ printf("offset=%llu len=%llu\n",
+ (unsigned long long)__le64_to_cpu(bsb2.arraystart)*512, len);
+ for (i=0; i<len; i++)
+ if (bbuf[i] != abuf[i]) {
+ printf("first diff byte %d\n", i);
+ break;
+ }
+#endif
+ fail("data1 compare failed");
+ }
+ }
+ if (bsb2.length2) {
+ unsigned long long len = __le64_to_cpu(bsb2.length2)*512;
+
+ if (abuflen < len) {
+ free(abuf);
+ free(bbuf);
+ abuflen = len;
+ abuf = xmalloc(abuflen);
+ bbuf = xmalloc(abuflen);
+ }
+
+ lseek64(bfd, offset+__le64_to_cpu(bsb2.devstart2)*512, 0);
+ if ((unsigned long long)read(bfd, bbuf, len) != len)
+ fail("read second backup failed");
+ lseek64(afd, __le64_to_cpu(bsb2.arraystart2)*512, 0);
+ if ((unsigned long long)read(afd, abuf, len) != len)
+ fail("read second from array failed");
+ if (memcmp(bbuf, abuf, len) != 0)
+ fail("data2 compare failed");
+ }
+}
+
+int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
+ struct supertype *st, unsigned long blocks,
+ int *fds, unsigned long long *offsets,
+ int dests, int *destfd, unsigned long long *destoffsets)
+{
+ /* Monitor a reshape where backup is being performed using
+ * 'native' mechanism - either to a backup file, or
+ * to some space in a spare.
+ */
+ char *buf;
+ int degraded = -1;
+ unsigned long long speed;
+ unsigned long long suspend_point, array_size;
+ unsigned long long backup_point, wait_point;
+ unsigned long long reshape_completed;
+ int done = 0;
+ int increasing = reshape->after.data_disks >= reshape->before.data_disks;
+ int part = 0; /* The next part of the backup area to fill. It may already
+ * be full, so we need to check */
+ int level = reshape->level;
+ int layout = reshape->before.layout;
+ int data = reshape->before.data_disks;
+ int disks = reshape->before.data_disks + reshape->parity;
+ int chunk = sra->array.chunk_size;
+ struct mdinfo *sd;
+ unsigned long stripes;
+ int uuid[4];
+ int frozen = 0;
+
+ /* set up the backup-super-block. This requires the
+ * uuid from the array.
+ */
+ /* Find a superblock */
+ for (sd = sra->devs; sd; sd = sd->next) {
+ char *dn;
+ int devfd;
+ int ok;
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ dn = map_dev(sd->disk.major, sd->disk.minor, 1);
+ devfd = dev_open(dn, O_RDONLY);
+ if (devfd < 0)
+ continue;
+ ok = st->ss->load_super(st, devfd, NULL);
+ close(devfd);
+ if (ok == 0)
+ break;
+ }
+ if (!sd) {
+ pr_err("Cannot find a superblock\n");
+ return 0;
+ }
+
+ memset(&bsb, 0, 512);
+ memcpy(bsb.magic, "md_backup_data-1", 16);
+ st->ss->uuid_from_super(st, uuid);
+ memcpy(bsb.set_uuid, uuid, 16);
+ bsb.mtime = __cpu_to_le64(time(0));
+ bsb.devstart2 = blocks;
+
+ stripes = blocks / (sra->array.chunk_size/512) /
+ reshape->before.data_disks;
+
+ if (posix_memalign((void**)&buf, 4096, disks * chunk))
+ /* Don't start the 'reshape' */
+ return 0;
+ if (reshape->before.data_disks == reshape->after.data_disks) {
+ sysfs_get_ll(sra, NULL, "sync_speed_min", &speed);
+ sysfs_set_num(sra, NULL, "sync_speed_min", 200000);
+ }
+
+ if (increasing) {
+ array_size = sra->component_size * reshape->after.data_disks;
+ backup_point = sra->reshape_progress;
+ suspend_point = 0;
+ } else {
+ array_size = sra->component_size * reshape->before.data_disks;
+ backup_point = reshape->backup_blocks;
+ suspend_point = array_size;
+ }
+
+ while (!done) {
+ int rv;
+
+ /* Want to return as soon the oldest backup slot can
+ * be released as that allows us to start backing up
+ * some more, providing suspend_point has been
+ * advanced, which it should have.
+ */
+ if (increasing) {
+ wait_point = array_size;
+ if (part == 0 && __le64_to_cpu(bsb.length) > 0)
+ wait_point = (__le64_to_cpu(bsb.arraystart) +
+ __le64_to_cpu(bsb.length));
+ if (part == 1 && __le64_to_cpu(bsb.length2) > 0)
+ wait_point = (__le64_to_cpu(bsb.arraystart2) +
+ __le64_to_cpu(bsb.length2));
+ } else {
+ wait_point = 0;
+ if (part == 0 && __le64_to_cpu(bsb.length) > 0)
+ wait_point = __le64_to_cpu(bsb.arraystart);
+ if (part == 1 && __le64_to_cpu(bsb.length2) > 0)
+ wait_point = __le64_to_cpu(bsb.arraystart2);
+ }
+
+ reshape_completed = sra->reshape_progress;
+ rv = progress_reshape(sra, reshape,
+ backup_point, wait_point,
+ &suspend_point, &reshape_completed,
+ &frozen);
+ /* external metadata would need to ping_monitor here */
+ sra->reshape_progress = reshape_completed;
+
+ /* Clear any backup region that is before 'here' */
+ if (increasing) {
+ if (__le64_to_cpu(bsb.length) > 0 &&
+ reshape_completed >= (__le64_to_cpu(bsb.arraystart) +
+ __le64_to_cpu(bsb.length)))
+ forget_backup(dests, destfd,
+ destoffsets, 0);
+ if (__le64_to_cpu(bsb.length2) > 0 &&
+ reshape_completed >= (__le64_to_cpu(bsb.arraystart2) +
+ __le64_to_cpu(bsb.length2)))
+ forget_backup(dests, destfd,
+ destoffsets, 1);
+ } else {
+ if (__le64_to_cpu(bsb.length) > 0 &&
+ reshape_completed <= (__le64_to_cpu(bsb.arraystart)))
+ forget_backup(dests, destfd,
+ destoffsets, 0);
+ if (__le64_to_cpu(bsb.length2) > 0 &&
+ reshape_completed <= (__le64_to_cpu(bsb.arraystart2)))
+ forget_backup(dests, destfd,
+ destoffsets, 1);
+ }
+ if (sigterm)
+ rv = -2;
+ if (rv < 0) {
+ if (rv == -1)
+ done = 1;
+ break;
+ }
+ if (rv == 0 && increasing && !st->ss->external) {
+ /* No longer need to monitor this reshape */
+ sysfs_set_str(sra, NULL, "sync_max", "max");
+ done = 1;
+ break;
+ }
+
+ while (rv) {
+ unsigned long long offset;
+ unsigned long actual_stripes;
+ /* Need to backup some data.
+ * If 'part' is not used and the desired
+ * backup size is suspended, do a backup,
+ * then consider the next part.
+ */
+ /* Check that 'part' is unused */
+ if (part == 0 && __le64_to_cpu(bsb.length) != 0)
+ break;
+ if (part == 1 && __le64_to_cpu(bsb.length2) != 0)
+ break;
+
+ offset = backup_point / data;
+ actual_stripes = stripes;
+ if (increasing) {
+ if (offset + actual_stripes * (chunk/512) >
+ sra->component_size)
+ actual_stripes = ((sra->component_size - offset)
+ / (chunk/512));
+ if (offset + actual_stripes * (chunk/512) >
+ suspend_point/data)
+ break;
+ } else {
+ if (offset < actual_stripes * (chunk/512))
+ actual_stripes = offset / (chunk/512);
+ offset -= actual_stripes * (chunk/512);
+ if (offset < suspend_point/data)
+ break;
+ }
+ if (actual_stripes == 0)
+ break;
+ grow_backup(sra, offset, actual_stripes,
+ fds, offsets,
+ disks, chunk, level, layout,
+ dests, destfd, destoffsets,
+ part, &degraded, buf);
+ validate(afd, destfd[0], destoffsets[0]);
+ /* record where 'part' is up to */
+ part = !part;
+ if (increasing)
+ backup_point += actual_stripes * (chunk/512) * data;
+ else
+ backup_point -= actual_stripes * (chunk/512) * data;
+ }
+ }
+
+ /* FIXME maybe call progress_reshape one more time instead */
+ /* remove any remaining suspension */
+ sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
+ sysfs_set_num(sra, NULL, "suspend_hi", 0);
+ sysfs_set_num(sra, NULL, "suspend_lo", 0);
+ sysfs_set_num(sra, NULL, "sync_min", 0);
+
+ if (reshape->before.data_disks == reshape->after.data_disks)
+ sysfs_set_num(sra, NULL, "sync_speed_min", speed);
+ free(buf);
+ return done;
+}
+
+/*
+ * If any spare contains md_back_data-1 which is recent wrt mtime,
+ * write that data into the array and update the super blocks with
+ * the new reshape_progress
+ */
+int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt,
+ char *backup_file, int verbose)
+{
+ int i, j;
+ int old_disks;
+ unsigned long long *offsets;
+ unsigned long long nstripe, ostripe;
+ int ndata, odata;
+
+ odata = info->array.raid_disks - info->delta_disks - 1;
+ if (info->array.level == 6) odata--; /* number of data disks */
+ ndata = info->array.raid_disks - 1;
+ if (info->new_level == 6) ndata--;
+
+ old_disks = info->array.raid_disks - info->delta_disks;
+
+ if (info->delta_disks <= 0)
+ /* Didn't grow, so the backup file must have
+ * been used
+ */
+ old_disks = cnt;
+ for (i=old_disks-(backup_file?1:0); i<cnt; i++) {
+ struct mdinfo dinfo;
+ int fd;
+ int bsbsize;
+ char *devname, namebuf[20];
+ unsigned long long lo, hi;
+
+ /* This was a spare and may have some saved data on it.
+ * Load the superblock, find and load the
+ * backup_super_block.
+ * If either fail, go on to next device.
+ * If the backup contains no new info, just return
+ * else restore data and update all superblocks
+ */
+ if (i == old_disks-1) {
+ fd = open(backup_file, O_RDONLY);
+ if (fd<0) {
+ pr_err("backup file %s inaccessible: %s\n",
+ backup_file, strerror(errno));
+ continue;
+ }
+ devname = backup_file;
+ } else {
+ fd = fdlist[i];
+ if (fd < 0)
+ continue;
+ if (st->ss->load_super(st, fd, NULL))
+ continue;
+
+ st->ss->getinfo_super(st, &dinfo, NULL);
+ st->ss->free_super(st);
+
+ if (lseek64(fd,
+ (dinfo.data_offset + dinfo.component_size - 8) <<9,
+ 0) < 0) {
+ pr_err("Cannot seek on device %d\n", i);
+ continue; /* Cannot seek */
+ }
+ sprintf(namebuf, "device-%d", i);
+ devname = namebuf;
+ }
+ if (read(fd, &bsb, sizeof(bsb)) != sizeof(bsb)) {
+ if (verbose)
+ pr_err("Cannot read from %s\n", devname);
+ continue; /* Cannot read */
+ }
+ if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0 &&
+ memcmp(bsb.magic, "md_backup_data-2", 16) != 0) {
+ if (verbose)
+ pr_err("No backup metadata on %s\n", devname);
+ continue;
+ }
+ if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb))) {
+ if (verbose)
+ pr_err("Bad backup-metadata checksum on %s\n", devname);
+ continue; /* bad checksum */
+ }
+ if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0 &&
+ bsb.sb_csum2 != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum2)-((char*)&bsb))) {
+ if (verbose)
+ pr_err("Bad backup-metadata checksum2 on %s\n", devname);
+ continue; /* Bad second checksum */
+ }
+ if (memcmp(bsb.set_uuid,info->uuid, 16) != 0) {
+ if (verbose)
+ pr_err("Wrong uuid on backup-metadata on %s\n", devname);
+ continue; /* Wrong uuid */
+ }
+
+ /* array utime and backup-mtime should be updated at much the same time, but it seems that
+ * sometimes they aren't... So allow considerable flexability in matching, and allow
+ * this test to be overridden by an environment variable.
+ */
+ if (info->array.utime > (int)__le64_to_cpu(bsb.mtime) + 2*60*60 ||
+ info->array.utime < (int)__le64_to_cpu(bsb.mtime) - 10*60) {
+ if (check_env("MDADM_GROW_ALLOW_OLD")) {
+ pr_err("accepting backup with timestamp %lu "
+ "for array with timestamp %lu\n",
+ (unsigned long)__le64_to_cpu(bsb.mtime),
+ (unsigned long)info->array.utime);
+ } else {
+ pr_err("too-old timestamp on backup-metadata on %s\n", devname);
+ pr_err("If you think it is should be safe, try 'export MDADM_GROW_ALLOW_OLD=1'\n");
+ continue; /* time stamp is too bad */
+ }
+ }
+
+ if (bsb.magic[15] == '1') {
+ if (bsb.length == 0)
+ continue;
+ if (info->delta_disks >= 0) {
+ /* reshape_progress is increasing */
+ if (__le64_to_cpu(bsb.arraystart)
+ + __le64_to_cpu(bsb.length)
+ < info->reshape_progress) {
+ nonew:
+ if (verbose)
+ pr_err("backup-metadata found on %s but is not needed\n", devname);
+ continue; /* No new data here */
+ }
+ } else {
+ /* reshape_progress is decreasing */
+ if (__le64_to_cpu(bsb.arraystart) >=
+ info->reshape_progress)
+ goto nonew; /* No new data here */
+ }
+ } else {
+ if (bsb.length == 0 && bsb.length2 == 0)
+ continue;
+ if (info->delta_disks >= 0) {
+ /* reshape_progress is increasing */
+ if ((__le64_to_cpu(bsb.arraystart)
+ + __le64_to_cpu(bsb.length)
+ < info->reshape_progress)
+ &&
+ (__le64_to_cpu(bsb.arraystart2)
+ + __le64_to_cpu(bsb.length2)
+ < info->reshape_progress))
+ goto nonew; /* No new data here */
+ } else {
+ /* reshape_progress is decreasing */
+ if (__le64_to_cpu(bsb.arraystart) >=
+ info->reshape_progress &&
+ __le64_to_cpu(bsb.arraystart2) >=
+ info->reshape_progress)
+ goto nonew; /* No new data here */
+ }
+ }
+ if (lseek64(fd, __le64_to_cpu(bsb.devstart)*512, 0)< 0) {
+ second_fail:
+ if (verbose)
+ pr_err("Failed to verify secondary backup-metadata block on %s\n",
+ devname);
+ continue; /* Cannot seek */
+ }
+ /* There should be a duplicate backup superblock 4k before here */
+ if (lseek64(fd, -4096, 1) < 0 ||
+ read(fd, &bsb2, sizeof(bsb2)) != sizeof(bsb2))
+ goto second_fail; /* Cannot find leading superblock */
+ if (bsb.magic[15] == '1')
+ bsbsize = offsetof(struct mdp_backup_super, pad1);
+ else
+ bsbsize = offsetof(struct mdp_backup_super, pad);
+ if (memcmp(&bsb2, &bsb, bsbsize) != 0)
+ goto second_fail; /* Cannot find leading superblock */
+
+ /* Now need the data offsets for all devices. */
+ offsets = xmalloc(sizeof(*offsets)*info->array.raid_disks);
+ for(j=0; j<info->array.raid_disks; j++) {
+ if (fdlist[j] < 0)
+ continue;
+ if (st->ss->load_super(st, fdlist[j], NULL))
+ /* FIXME should be this be an error */
+ continue;
+ st->ss->getinfo_super(st, &dinfo, NULL);
+ st->ss->free_super(st);
+ offsets[j] = dinfo.data_offset * 512;
+ }
+ printf(Name ": restoring critical section\n");
+
+ if (restore_stripes(fdlist, offsets,
+ info->array.raid_disks,
+ info->new_chunk,
+ info->new_level,
+ info->new_layout,
+ fd, __le64_to_cpu(bsb.devstart)*512,
+ __le64_to_cpu(bsb.arraystart)*512,
+ __le64_to_cpu(bsb.length)*512, NULL)) {
+ /* didn't succeed, so giveup */
+ if (verbose)
+ pr_err("Error restoring backup from %s\n",
+ devname);
+ free(offsets);
+ return 1;
+ }
+
+ if (bsb.magic[15] == '2' &&
+ restore_stripes(fdlist, offsets,
+ info->array.raid_disks,
+ info->new_chunk,
+ info->new_level,
+ info->new_layout,
+ fd, __le64_to_cpu(bsb.devstart)*512 +
+ __le64_to_cpu(bsb.devstart2)*512,
+ __le64_to_cpu(bsb.arraystart2)*512,
+ __le64_to_cpu(bsb.length2)*512, NULL)) {
+ /* didn't succeed, so giveup */
+ if (verbose)
+ pr_err("Error restoring second backup from %s\n",
+ devname);
+ free(offsets);
+ return 1;
+ }
+
+ free(offsets);
+
+ /* Ok, so the data is restored. Let's update those superblocks. */
+
+ lo = hi = 0;
+ if (bsb.length) {
+ lo = __le64_to_cpu(bsb.arraystart);
+ hi = lo + __le64_to_cpu(bsb.length);
+ }
+ if (bsb.magic[15] == '2' && bsb.length2) {
+ unsigned long long lo1, hi1;
+ lo1 = __le64_to_cpu(bsb.arraystart2);
+ hi1 = lo1 + __le64_to_cpu(bsb.length2);
+ if (lo == hi) {
+ lo = lo1;
+ hi = hi1;
+ } else if (lo < lo1)
+ hi = hi1;
+ else
+ lo = lo1;
+ }
+ if (lo < hi &&
+ (info->reshape_progress < lo ||
+ info->reshape_progress > hi))
+ /* backup does not affect reshape_progress*/ ;
+ else if (info->delta_disks >= 0) {
+ info->reshape_progress = __le64_to_cpu(bsb.arraystart) +
+ __le64_to_cpu(bsb.length);
+ if (bsb.magic[15] == '2') {
+ unsigned long long p2 = __le64_to_cpu(bsb.arraystart2) +
+ __le64_to_cpu(bsb.length2);
+ if (p2 > info->reshape_progress)
+ info->reshape_progress = p2;
+ }
+ } else {
+ info->reshape_progress = __le64_to_cpu(bsb.arraystart);
+ if (bsb.magic[15] == '2') {
+ unsigned long long p2 = __le64_to_cpu(bsb.arraystart2);
+ if (p2 < info->reshape_progress)
+ info->reshape_progress = p2;
+ }
+ }
+ for (j=0; j<info->array.raid_disks; j++) {
+ if (fdlist[j] < 0)
+ continue;
+ if (st->ss->load_super(st, fdlist[j], NULL))
+ continue;
+ st->ss->getinfo_super(st, &dinfo, NULL);
+ dinfo.reshape_progress = info->reshape_progress;
+ st->ss->update_super(st, &dinfo,
+ "_reshape_progress",
+ NULL,0, 0, NULL);
+ st->ss->store_super(st, fdlist[j]);
+ st->ss->free_super(st);
+ }
+ return 0;
+ }
+ /* Didn't find any backup data, try to see if any
+ * was needed.
+ */
+ if (info->delta_disks < 0) {
+ /* When shrinking, the critical section is at the end.
+ * So see if we are before the critical section.
+ */
+ unsigned long long first_block;
+ nstripe = ostripe = 0;
+ first_block = 0;
+ while (ostripe >= nstripe) {
+ ostripe += info->array.chunk_size / 512;
+ first_block = ostripe * odata;
+ nstripe = first_block / ndata / (info->new_chunk/512) *
+ (info->new_chunk/512);
+ }
+
+ if (info->reshape_progress >= first_block)
+ return 0;
+ }
+ if (info->delta_disks > 0) {
+ /* See if we are beyond the critical section. */
+ unsigned long long last_block;
+ nstripe = ostripe = 0;
+ last_block = 0;
+ while (nstripe >= ostripe) {
+ nstripe += info->new_chunk / 512;
+ last_block = nstripe * ndata;
+ ostripe = last_block / odata / (info->array.chunk_size/512) *
+ (info->array.chunk_size/512);
+ }
+
+ if (info->reshape_progress >= last_block)
+ return 0;
+ }
+ /* needed to recover critical section! */
+ if (verbose)
+ pr_err("Failed to find backup of critical section\n");
+ return 1;
+}
+
+int Grow_continue_command(char *devname, int fd,
+ char *backup_file, int verbose)
+{
+ int ret_val = 0;
+ struct supertype *st = NULL;
+ struct mdinfo *content = NULL;
+ struct mdinfo array;
+ char *subarray = NULL;
+ struct mdinfo *cc = NULL;
+ struct mdstat_ent *mdstat = NULL;
+ int cfd = -1;
+ int fd2 = -1;
+
+ dprintf("Grow continue from command line called for %s\n",
+ devname);
+
+ st = super_by_fd(fd, &subarray);
+ if (!st || !st->ss) {
+ pr_err("Unable to determine metadata format for %s\n",
+ devname);
+ return 1;
+ }
+ dprintf("Grow continue is run for ");
+ if (st->ss->external == 0) {
+ int d;
+ dprintf("native array (%s)\n", devname);
+ if (ioctl(fd, GET_ARRAY_INFO, &array.array) < 0) {
+ pr_err("%s is not an active md array -"
+ " aborting\n", devname);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+ content = &array;
+ /* Need to load a superblock.
+ * FIXME we should really get what we need from
+ * sysfs
+ */
+ for (d = 0; d < MAX_DISKS; d++) {
+ mdu_disk_info_t disk;
+ char *dv;
+ int err;
+ disk.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ if ((disk.state & (1 << MD_DISK_ACTIVE)) == 0)
+ continue;
+ dv = map_dev(disk.major, disk.minor, 1);
+ if (!dv)
+ continue;
+ fd2 = dev_open(dv, O_RDONLY);
+ if (fd2 < 0)
+ continue;
+ err = st->ss->load_super(st, fd2, NULL);
+ close(fd2);
+ /* invalidate fd2 to avoid possible double close() */
+ fd2 = -1;
+ if (err)
+ continue;
+ break;
+ }
+ if (d == MAX_DISKS) {
+ pr_err("Unable to load metadata for %s\n",
+ devname);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+ st->ss->getinfo_super(st, content, NULL);
+ } else {
+ char *container;
+
+ if (subarray) {
+ dprintf("subarray (%s)\n", subarray);
+ container = st->container_devnm;
+ cfd = open_dev_excl(st->container_devnm);
+ } else {
+ container = st->devnm;
+ close(fd);
+ cfd = open_dev_excl(st->devnm);
+ dprintf("container (%s)\n", container);
+ fd = cfd;
+ }
+ if (cfd < 0) {
+ pr_err("Unable to open container "
+ "for %s\n", devname);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+
+ /* find in container array under reshape
+ */
+ ret_val = st->ss->load_container(st, cfd, NULL);
+ if (ret_val) {
+ pr_err("Cannot read superblock for %s\n",
+ devname);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+
+ cc = st->ss->container_content(st, subarray);
+ for (content = cc; content ; content = content->next) {
+ char *array;
+ int allow_reshape = 1;
+
+ if (content->reshape_active == 0)
+ continue;
+ /* The decision about array or container wide
+ * reshape is taken in Grow_continue based
+ * content->reshape_active state, therefore we
+ * need to check_reshape based on
+ * reshape_active and subarray name
+ */
+ if (content->array.state & (1<<MD_SB_BLOCK_VOLUME))
+ allow_reshape = 0;
+ if (content->reshape_active == CONTAINER_RESHAPE &&
+ (content->array.state
+ & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE)))
+ allow_reshape = 0;
+
+ if (!allow_reshape) {
+ pr_err("cannot continue reshape of an array"
+ " in container with unsupported"
+ " metadata: %s(%s)\n",
+ devname, container);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+
+ array = strchr(content->text_version+1, '/')+1;
+ mdstat = mdstat_by_subdev(array, container);
+ if (!mdstat)
+ continue;
+ if (mdstat->active == 0) {
+ pr_err("Skipping inactive array %s.\n",
+ mdstat->devnm);
+ free_mdstat(mdstat);
+ mdstat = NULL;
+ continue;
+ }
+ break;
+ }
+ if (!content) {
+ pr_err("Unable to determine reshaped "
+ "array for %s\n", devname);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+ fd2 = open_dev(mdstat->devnm);
+ if (fd2 < 0) {
+ pr_err("cannot open (%s)\n", mdstat->devnm);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+
+ sysfs_init(content, fd2, mdstat->devnm);
+
+ /* start mdmon in case it is not running
+ */
+ if (!mdmon_running(container))
+ start_mdmon(container);
+ ping_monitor(container);
+
+ if (mdmon_running(container))
+ st->update_tail = &st->updates;
+ else {
+ pr_err("No mdmon found. "
+ "Grow cannot continue.\n");
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+ }
+
+ /* verify that array under reshape is started from
+ * correct position
+ */
+ if (verify_reshape_position(content, content->array.level) < 0) {
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+
+ /* continue reshape
+ */
+ ret_val = Grow_continue(fd, st, content, backup_file, 1, 0);
+
+Grow_continue_command_exit:
+ if (fd2 > -1)
+ close(fd2);
+ if (cfd > -1)
+ close(cfd);
+ st->ss->free_super(st);
+ free_mdstat(mdstat);
+ sysfs_free(cc);
+ free(subarray);
+
+ return ret_val;
+}
+
+int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
+ char *backup_file, int forked, int freeze_reshape)
+{
+ int ret_val = 2;
+
+ if (!info->reshape_active)
+ return ret_val;
+
+ if (st->ss->external) {
+ int cfd = open_dev(st->container_devnm);
+
+ if (cfd < 0)
+ return 1;
+
+ st->ss->load_container(st, cfd, st->container_devnm);
+ close(cfd);
+ ret_val = reshape_container(st->container_devnm, NULL, mdfd,
+ st, info, 0, backup_file,
+ 0, forked,
+ 1 | info->reshape_active,
+ freeze_reshape);
+ } else
+ ret_val = reshape_array(NULL, mdfd, "array", st, info, 1,
+ NULL, INVALID_SECTORS,
+ backup_file, 0, forked,
+ 1 | info->reshape_active,
+ freeze_reshape);
+
+ return ret_val;
+}
+
+char *make_backup(char *name)
+{
+ char *base = "backup_file-";
+ int len;
+ char *fname;
+
+ len = strlen(MAP_DIR) + 1 + strlen(base) + strlen(name)+1;
+ fname = xmalloc(len);
+ sprintf(fname, "%s/%s%s", MAP_DIR, base, name);
+ return fname;
+}
+
+char *locate_backup(char *name)
+{
+ char *fl = make_backup(name);
+ struct stat stb;
+
+ if (stat(fl, &stb) == 0 &&
+ S_ISREG(stb.st_mode))
+ return fl;
+
+ free(fl);
+ return NULL;
+}
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 00000000..f7bcc3e6
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,13 @@
+
+To build mdadm, simply run:
+
+ make
+
+to install, run
+
+ make install
+
+as root.
+
+
+No configuration is necessary.
diff --git a/Incremental.c b/Incremental.c
new file mode 100644
index 00000000..c9372587
--- /dev/null
+++ b/Incremental.c
@@ -0,0 +1,1798 @@
+/*
+ * Incremental.c - support --incremental. Part of:
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2006-2013 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ * Paper: Neil Brown
+ * Novell Inc
+ * GPO Box Q1283
+ * QVB Post Office, NSW 1230
+ * Australia
+ */
+
+#include "mdadm.h"
+#include <sys/wait.h>
+#include <dirent.h>
+#include <ctype.h>
+
+static int count_active(struct supertype *st, struct mdinfo *sra,
+ int mdfd, char **availp,
+ struct mdinfo *info);
+static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
+ int number, __u64 events, int verbose,
+ char *array_name);
+static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+ struct map_ent *target,
+ struct supertype *st, int verbose);
+
+static int Incremental_container(struct supertype *st, char *devname,
+ struct context *c, char *only);
+
+int Incremental(struct mddev_dev *devlist, struct context *c,
+ struct supertype *st)
+{
+ /* Add this device to an array, creating the array if necessary
+ * and starting the array if sensible or - if runstop>0 - if possible.
+ *
+ * This has several steps:
+ *
+ * 1/ Check if device is permitted by mdadm.conf, reject if not.
+ * 2/ Find metadata, reject if none appropriate (check
+ * version/name from args)
+ * 3/ Check if there is a match in mdadm.conf
+ * 3a/ if not, check for homehost match. If no match, assemble as
+ * a 'foreign' array.
+ * 4/ Determine device number.
+ * - If in mdadm.conf with std name, use that
+ * - UUID in /var/run/mdadm.map use that
+ * - If name is suggestive, use that. unless in use with different uuid.
+ * - Choose a free, high number.
+ * - Use a partitioned device unless strong suggestion not to.
+ * e.g. auto=md
+ * Don't choose partitioned for containers.
+ * 5/ Find out if array already exists
+ * 5a/ if it does not
+ * - choose a name, from mdadm.conf or 'name' field in array.
+ * - create the array
+ * - add the device
+ * 5b/ if it does
+ * - check one drive in array to make sure metadata is a reasonably
+ * close match. Reject if not (e.g. different type)
+ * - add the device
+ * 6/ Make sure /var/run/mdadm.map contains this array.
+ * 7/ Is there enough devices to possibly start the array?
+ * For a container, this means running Incremental_container.
+ * 7a/ if not, finish with success.
+ * 7b/ if yes,
+ * - read all metadata and arrange devices like -A does
+ * - if number of OK devices match expected, or -R and there are enough,
+ * start the array (auto-readonly).
+ */
+ struct stat stb;
+ struct mdinfo info, dinfo;
+ struct mdinfo *sra = NULL, *d;
+ struct mddev_ident *match;
+ char chosen_name[1024];
+ char *md_devname;
+ int rv = 1;
+ struct map_ent *mp, *map = NULL;
+ int dfd = -1, mdfd = -1;
+ char *avail = NULL;
+ int active_disks;
+ int trustworthy;
+ char *name_to_use;
+ mdu_array_info_t ainf;
+ struct dev_policy *policy = NULL;
+ struct map_ent target_array;
+ int have_target;
+ char *devname = devlist->devname;
+
+ struct createinfo *ci = conf_get_create_info();
+
+ if (stat(devname, &stb) < 0) {
+ if (c->verbose >= 0)
+ pr_err("stat failed for %s: %s.\n",
+ devname, strerror(errno));
+ return rv;
+ }
+ if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+ if (c->verbose >= 0)
+ pr_err("%s is not a block device.\n",
+ devname);
+ return rv;
+ }
+ dfd = dev_open(devname, O_RDONLY);
+ if (dfd < 0) {
+ if (c->verbose >= 0)
+ pr_err("cannot open %s: %s.\n",
+ devname, strerror(errno));
+ return rv;
+ }
+ /* If the device is a container, we do something very different */
+ if (must_be_container(dfd)) {
+ if (!st)
+ st = super_by_fd(dfd, NULL);
+ if (st)
+ st->ignore_hw_compat = 1;
+ if (st && st->ss->load_container)
+ rv = st->ss->load_container(st, dfd, NULL);
+
+ close(dfd);
+ if (!rv && st->ss->container_content) {
+ if (map_lock(&map))
+ pr_err("failed to get "
+ "exclusive lock on mapfile\n");
+ if (c->export)
+ printf("MD_DEVNAME=%s\n", devname);
+ rv = Incremental_container(st, devname, c, NULL);
+ map_unlock(&map);
+ return rv;
+ }
+
+ pr_err("%s is not part of an md array.\n",
+ devname);
+ return rv;
+ }
+
+ /* 1/ Check if device is permitted by mdadm.conf */
+
+ for (;devlist; devlist = devlist->next)
+ if (conf_test_dev(devlist->devname))
+ break;
+ if (!devlist) {
+ devlist = conf_get_devs();
+ for (;devlist; devlist = devlist->next) {
+ struct stat st2;
+ if (stat(devlist->devname, &st2) == 0 &&
+ (st2.st_mode & S_IFMT) == S_IFBLK &&
+ st2.st_rdev == stb.st_rdev)
+ break;
+ }
+ }
+ if (!devlist) {
+ if (c->verbose >= 0)
+ pr_err("%s not permitted by mdadm.conf.\n",
+ devname);
+ goto out;
+ }
+
+ /* 2/ Find metadata, reject if none appropriate (check
+ * version/name from args) */
+
+ if (fstat(dfd, &stb) < 0) {
+ if (c->verbose >= 0)
+ pr_err("fstat failed for %s: %s.\n",
+ devname, strerror(errno));
+ goto out;
+ }
+ if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+ if (c->verbose >= 0)
+ pr_err("%s is not a block device.\n",
+ devname);
+ goto out;
+ }
+
+ dinfo.disk.major = major(stb.st_rdev);
+ dinfo.disk.minor = minor(stb.st_rdev);
+
+ policy = disk_policy(&dinfo);
+ have_target = policy_check_path(&dinfo, &target_array);
+
+ if (st == NULL && (st = guess_super(dfd)) == NULL) {
+ if (c->verbose >= 0)
+ pr_err("no recognisable superblock on %s.\n",
+ devname);
+ rv = try_spare(devname, &dfd, policy,
+ have_target ? &target_array : NULL,
+ st, c->verbose);
+ goto out;
+ }
+ st->ignore_hw_compat = 1;
+ if (st->ss->compare_super == NULL ||
+ st->ss->load_super(st, dfd, NULL)) {
+ if (c->verbose >= 0)
+ pr_err("no RAID superblock on %s.\n",
+ devname);
+ rv = try_spare(devname, &dfd, policy,
+ have_target ? &target_array : NULL,
+ st, c->verbose);
+ free(st);
+ goto out;
+ }
+ close (dfd); dfd = -1;
+
+ st->ss->getinfo_super(st, &info, NULL);
+
+ /* 3/ Check if there is a match in mdadm.conf */
+ match = conf_match(st, &info, devname, c->verbose, &rv);
+ if (!match && rv == 2)
+ goto out;
+
+ if (match && match->devname
+ && strcasecmp(match->devname, "<ignore>") == 0) {
+ if (c->verbose >= 0)
+ pr_err("array containing %s is explicitly"
+ " ignored by mdadm.conf\n",
+ devname);
+ goto out;
+ }
+
+ /* 3a/ if not, check for homehost match. If no match, continue
+ * but don't trust the 'name' in the array. Thus a 'random' minor
+ * number will be assigned, and the device name will be based
+ * on that. */
+ if (match)
+ trustworthy = LOCAL;
+ else if (st->ss->match_home(st, c->homehost) == 1)
+ trustworthy = LOCAL;
+ else if (st->ss->match_home(st, "any") == 1)
+ trustworthy = LOCAL_ANY;
+ else
+ trustworthy = FOREIGN;
+
+ if (!match && !conf_test_metadata(st->ss->name, policy,
+ (trustworthy == LOCAL))) {
+ if (c->verbose >= 1)
+ pr_err("%s has metadata type %s for which "
+ "auto-assembly is disabled\n",
+ devname, st->ss->name);
+ goto out;
+ }
+ if (trustworthy == LOCAL_ANY)
+ trustworthy = LOCAL;
+
+ /* There are three possible sources for 'autof': command line,
+ * ARRAY line in mdadm.conf, or CREATE line in mdadm.conf.
+ * ARRAY takes precedence, then command line, then
+ * CREATE.
+ */
+ if (match && match->autof)
+ c->autof = match->autof;
+ if (c->autof == 0)
+ c->autof = ci->autof;
+
+ name_to_use = info.name;
+ if (name_to_use[0] == 0 &&
+ info.array.level == LEVEL_CONTAINER) {
+ name_to_use = info.text_version;
+ trustworthy = METADATA;
+ }
+ if (name_to_use[0] && trustworthy != LOCAL &&
+ ! c->require_homehost &&
+ conf_name_is_free(name_to_use))
+ trustworthy = LOCAL;
+
+ /* strip "hostname:" prefix from name if we have decided
+ * to treat it as LOCAL
+ */
+ if (trustworthy == LOCAL && strchr(name_to_use, ':') != NULL)
+ name_to_use = strchr(name_to_use, ':')+1;
+
+ /* 4/ Check if array exists.
+ */
+ if (map_lock(&map))
+ pr_err("failed to get exclusive lock on "
+ "mapfile\n");
+ /* Now check we can get O_EXCL. If not, probably "mdadm -A" has
+ * taken over
+ */
+ dfd = dev_open(devname, O_RDONLY|O_EXCL);
+ if (dfd < 0) {
+ if (c->verbose >= 0)
+ pr_err("cannot reopen %s: %s.\n",
+ devname, strerror(errno));
+ goto out_unlock;
+ }
+ /* Cannot hold it open while we add the device to the array,
+ * so we must release the O_EXCL and depend on the map_lock()
+ * So now is the best time to remove any partitions.
+ */
+ remove_partitions(dfd);
+ close(dfd);
+ dfd = -1;
+
+ mp = map_by_uuid(&map, info.uuid);
+ if (mp)
+ mdfd = open_dev(mp->devnm);
+ else
+ mdfd = -1;
+
+ if (mdfd < 0) {
+
+ /* Couldn't find an existing array, maybe make a new one */
+ mdfd = create_mddev(match ? match->devname : NULL,
+ name_to_use, c->autof, trustworthy, chosen_name);
+
+ if (mdfd < 0)
+ goto out_unlock;
+
+ sysfs_init(&info, mdfd, NULL);
+
+ if (set_array_info(mdfd, st, &info) != 0) {
+ pr_err("failed to set array info for %s: %s\n",
+ chosen_name, strerror(errno));
+ rv = 2;
+ goto out_unlock;
+ }
+
+ dinfo = info;
+ dinfo.disk.major = major(stb.st_rdev);
+ dinfo.disk.minor = minor(stb.st_rdev);
+ if (add_disk(mdfd, st, &info, &dinfo) != 0) {
+ pr_err("failed to add %s to new array %s: %s.\n",
+ devname, chosen_name, strerror(errno));
+ ioctl(mdfd, STOP_ARRAY, 0);
+ rv = 2;
+ goto out_unlock;
+ }
+ sra = sysfs_read(mdfd, NULL, (GET_DEVS | GET_STATE |
+ GET_OFFSET | GET_SIZE));
+
+ if (!sra || !sra->devs || sra->devs->disk.raid_disk >= 0) {
+ /* It really should be 'none' - must be old buggy
+ * kernel, and mdadm -I may not be able to complete.
+ * So reject it.
+ */
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ pr_err("You have an old buggy kernel which cannot support\n"
+ " --incremental reliably. Aborting.\n");
+ rv = 2;
+ goto out_unlock;
+ }
+ info.array.working_disks = 1;
+ /* 6/ Make sure /var/run/mdadm.map contains this array. */
+ map_update(&map, fd2devnm(mdfd),
+ info.text_version,
+ info.uuid, chosen_name);
+ } else {
+ /* 5b/ if it does */
+ /* - check one drive in array to make sure metadata is a reasonably */
+ /* close match. Reject if not (e.g. different type) */
+ /* - add the device */
+ char dn[20];
+ int dfd2;
+ int err;
+ struct supertype *st2;
+ struct mdinfo info2, *d;
+
+ sra = sysfs_read(mdfd, NULL, (GET_DEVS | GET_STATE |
+ GET_OFFSET | GET_SIZE));
+
+ if (mp->path)
+ strcpy(chosen_name, mp->path);
+ else
+ strcpy(chosen_name, mp->devnm);
+
+ /* It is generally not OK to add non-spare drives to a
+ * running array as they are probably missing because
+ * they failed. However if runstop is 1, then the
+ * array was possibly started early and our best bet is
+ * to add this anyway.
+ * Also if action policy is re-add or better we allow
+ * re-add.
+ * This doesn't apply to containers as the 'non-spare'
+ * flag has a different meaning. The test has to happen
+ * at the device level there
+ */
+ if (!st->ss->external
+ && (info.disk.state & (1<<MD_DISK_SYNC)) != 0
+ && ! policy_action_allows(policy, st->ss->name,
+ act_re_add)
+ && c->runstop < 1) {
+ if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
+ pr_err("not adding %s to active array (without --run) %s\n",
+ devname, chosen_name);
+ rv = 2;
+ goto out_unlock;
+ }
+ }
+ if (!sra) {
+ rv = 2;
+ goto out_unlock;
+ }
+ if (sra->devs) {
+ sprintf(dn, "%d:%d", sra->devs->disk.major,
+ sra->devs->disk.minor);
+ dfd2 = dev_open(dn, O_RDONLY);
+ if (dfd2 < 0) {
+ pr_err("unable to open %s\n", devname);
+ rv = 2;
+ goto out_unlock;
+ }
+ st2 = dup_super(st);
+ if (st2->ss->load_super(st2, dfd2, NULL) ||
+ st->ss->compare_super(st, st2) != 0) {
+ pr_err("metadata mismatch between %s and "
+ "chosen array %s\n",
+ devname, chosen_name);
+ close(dfd2);
+ rv = 2;
+ goto out_unlock;
+ }
+ close(dfd2);
+ st2->ss->getinfo_super(st2, &info2, NULL);
+ st2->ss->free_super(st2);
+ if (info.array.level != info2.array.level ||
+ memcmp(info.uuid, info2.uuid, 16) != 0 ||
+ info.array.raid_disks != info2.array.raid_disks) {
+ pr_err("unexpected difference between %s and %s.\n",
+ chosen_name, devname);
+ rv = 2;
+ goto out_unlock;
+ }
+ }
+ info.disk.major = major(stb.st_rdev);
+ info.disk.minor = minor(stb.st_rdev);
+ /* add disk needs to know about containers */
+ if (st->ss->external)
+ sra->array.level = LEVEL_CONTAINER;
+ err = add_disk(mdfd, st, sra, &info);
+ if (err < 0 && errno == EBUSY) {
+ /* could be another device present with the same
+ * disk.number. Find and reject any such
+ */
+ find_reject(mdfd, st, sra, info.disk.number,
+ info.events, c->verbose, chosen_name);
+ err = add_disk(mdfd, st, sra, &info);
+ }
+ if (err < 0 && errno == EINVAL &&
+ info.disk.state & (1<<MD_DISK_SYNC)) {
+ /* Maybe it needs to be added as a spare */
+ if (policy_action_allows(policy, st->ss->name,
+ act_force_spare)) {
+ info.disk.state &= ~(1<<MD_DISK_SYNC);
+ err = add_disk(mdfd, st, sra, &info);
+ } else
+ if (c->verbose >= 0)
+ pr_err("can only add %s to %s as a spare, and force-spare is not set.\n",
+ devname, chosen_name);
+ }
+ if (err < 0) {
+ pr_err("failed to add %s to existing array %s: %s.\n",
+ devname, chosen_name, strerror(errno));
+ rv = 2;
+ goto out_unlock;
+ }
+ info.array.working_disks = 0;
+ for (d = sra->devs; d; d=d->next)
+ info.array.working_disks ++;
+
+ }
+ if (strncmp(chosen_name, "/dev/md/", 8) == 0)
+ md_devname = chosen_name+8;
+ else
+ md_devname = chosen_name;
+ if (c->export) {
+ printf("MD_DEVICE=%s\n", fd2devnm(mdfd));
+ printf("MD_DEVNAME=%s\n", md_devname);
+ printf("MD_FOREIGN=%s\n", trustworthy == FOREIGN ? "yes" : "no");
+ }
+
+ /* 7/ Is there enough devices to possibly start the array? */
+ /* 7a/ if not, finish with success. */
+ if (info.array.level == LEVEL_CONTAINER) {
+ char devnm[32];
+ /* Try to assemble within the container */
+ sysfs_uevent(sra, "change");
+ if (!c->export && c->verbose >= 0)
+ pr_err("container %s now has %d device%s\n",
+ chosen_name, info.array.working_disks,
+ info.array.working_disks == 1?"":"s");
+ wait_for(chosen_name, mdfd);
+ if (st->ss->external)
+ strcpy(devnm, fd2devnm(mdfd));
+ if (st->ss->load_container)
+ rv = st->ss->load_container(st, mdfd, NULL);
+ close(mdfd);
+ sysfs_free(sra);
+ if (!rv)
+ rv = Incremental_container(st, chosen_name, c, NULL);
+ map_unlock(&map);
+ /* after spare is added, ping monitor for external metadata
+ * so that it can eg. try to rebuild degraded array */
+ if (st->ss->external)
+ ping_monitor(devnm);
+ return rv;
+ }
+
+ /* We have added something to the array, so need to re-read the
+ * state. Eventually this state should be kept up-to-date as
+ * things change.
+ */
+ sysfs_free(sra);
+ sra = sysfs_read(mdfd, NULL, (GET_DEVS | GET_STATE |
+ GET_OFFSET | GET_SIZE));
+ active_disks = count_active(st, sra, mdfd, &avail, &info);
+ if (enough(info.array.level, info.array.raid_disks,
+ info.array.layout, info.array.state & 1,
+ avail) == 0) {
+ if (c->export) {
+ printf("MD_STARTED=no\n");
+ } else if (c->verbose >= 0)
+ pr_err("%s attached to %s, not enough to start (%d).\n",
+ devname, chosen_name, active_disks);
+ rv = 0;
+ goto out_unlock;
+ }
+
+ /* 7b/ if yes, */
+ /* - if number of OK devices match expected, or -R and there */
+ /* are enough, */
+ /* + add any bitmap file */
+ /* + start the array (auto-readonly). */
+
+ if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
+ if (c->export) {
+ printf("MD_STARTED=already\n");
+ } else if (c->verbose >= 0)
+ pr_err("%s attached to %s which is already active.\n",
+ devname, chosen_name);
+ rv = 0;
+ goto out_unlock;
+ }
+
+ map_unlock(&map);
+ if (c->runstop > 0 || active_disks >= info.array.working_disks) {
+ struct mdinfo *dsk;
+ /* Let's try to start it */
+
+ if (info.reshape_active && !(info.reshape_active & RESHAPE_NO_BACKUP)) {
+ pr_err("%s: This array is being reshaped and cannot be started\n",
+ chosen_name);
+ cont_err("by --incremental. Please use --assemble\n");
+ goto out;
+ }
+ if (match && match->bitmap_file) {
+ int bmfd = open(match->bitmap_file, O_RDWR);
+ if (bmfd < 0) {
+ pr_err("Could not open bitmap file %s.\n",
+ match->bitmap_file);
+ goto out;
+ }
+ if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
+ close(bmfd);
+ pr_err("Failed to set bitmapfile for %s.\n",
+ chosen_name);
+ goto out;
+ }
+ close(bmfd);
+ }
+ /* Need to remove from the array any devices which
+ * 'count_active' discerned were too old or inappropriate
+ */
+ for (d = sra ? sra->devs : NULL ; d ; d = d->next)
+ if (d->disk.state & (1<<MD_DISK_REMOVED))
+ remove_disk(mdfd, st, sra, d);
+
+ if ((sra == NULL || active_disks >= info.array.working_disks)
+ && trustworthy != FOREIGN)
+ rv = ioctl(mdfd, RUN_ARRAY, NULL);
+ else
+ rv = sysfs_set_str(sra, NULL,
+ "array_state", "read-auto");
+ /* Array might be O_EXCL which will interfere with
+ * fsck and mount. So re-open without O_EXCL.
+ */
+ reopen_mddev(mdfd);
+ if (rv == 0) {
+ if (c->export) {
+ printf("MD_STARTED=yes\n");
+ } else if (c->verbose >= 0)
+ pr_err("%s attached to %s, which has been started.\n",
+ devname, chosen_name);
+ rv = 0;
+ wait_for(chosen_name, mdfd);
+ /* We just started the array, so some devices
+ * might have been evicted from the array
+ * because their event counts were too old.
+ * If the action=re-add policy is in-force for
+ * those devices we should re-add them now.
+ */
+ for (dsk = sra->devs; dsk ; dsk = dsk->next) {
+ if (disk_action_allows(dsk, st->ss->name, act_re_add) &&
+ add_disk(mdfd, st, sra, dsk) == 0)
+ pr_err("%s re-added to %s\n",
+ dsk->sys_name, chosen_name);
+ }
+ } else {
+ pr_err("%s attached to %s, but failed to start: %s.\n",
+ devname, chosen_name, strerror(errno));
+ rv = 1;
+ }
+ } else {
+ if (c->export) {
+ printf("MD_STARTED=unsafe\n");
+ } else if (c->verbose >= 0)
+ pr_err("%s attached to %s, not enough to start safely.\n",
+ devname, chosen_name);
+ rv = 0;
+ }
+out:
+ free(avail);
+ if (dfd >= 0)
+ close(dfd);
+ if (mdfd >= 0)
+ close(mdfd);
+ if (policy)
+ dev_policy_free(policy);
+ if (sra)
+ sysfs_free(sra);
+ return rv;
+out_unlock:
+ map_unlock(&map);
+ goto out;
+}
+
+static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
+ int number, __u64 events, int verbose,
+ char *array_name)
+{
+ /* Find a device attached to this array with a disk.number of number
+ * and events less than the passed events, and remove the device.
+ */
+ struct mdinfo *d;
+ mdu_array_info_t ra;
+
+ if (ioctl(mdfd, GET_ARRAY_INFO, &ra) == 0)
+ return; /* not safe to remove from active arrays
+ * without thinking more */
+
+ for (d = sra->devs; d ; d = d->next) {
+ char dn[10];
+ int dfd;
+ struct mdinfo info;
+ sprintf(dn, "%d:%d", d->disk.major, d->disk.minor);
+ dfd = dev_open(dn, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ if (st->ss->load_super(st, dfd, NULL)) {
+ close(dfd);
+ continue;
+ }
+ st->ss->getinfo_super(st, &info, NULL);
+ st->ss->free_super(st);
+ close(dfd);
+
+ if (info.disk.number != number ||
+ info.events >= events)
+ continue;
+
+ if (d->disk.raid_disk > -1)
+ sysfs_set_str(sra, d, "slot", "none");
+ if (sysfs_set_str(sra, d, "state", "remove") == 0)
+ if (verbose >= 0)
+ pr_err("removing old device %s from %s\n",
+ d->sys_name+4, array_name);
+ }
+}
+
+static int count_active(struct supertype *st, struct mdinfo *sra,
+ int mdfd, char **availp,
+ struct mdinfo *bestinfo)
+{
+ /* count how many devices in sra think they are active */
+ struct mdinfo *d;
+ int cnt = 0;
+ int replcnt = 0;
+ __u64 max_events = 0;
+ char *avail = NULL;
+ int *best = NULL;
+ char *devmap = NULL;
+ int numdevs = 0;
+ int devnum;
+ int b, i;
+ int raid_disks = 0;
+
+ if (!sra)
+ return 0;
+
+ for (d = sra->devs ; d ; d = d->next)
+ numdevs++;
+ for (d = sra->devs, devnum = 0 ; d ; d = d->next, devnum++) {
+ char dn[30];
+ int dfd;
+ int ok;
+ struct mdinfo info;
+
+ sprintf(dn, "%d:%d", d->disk.major, d->disk.minor);
+ dfd = dev_open(dn, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ ok = st->ss->load_super(st, dfd, NULL);
+ close(dfd);
+ if (ok != 0)
+ continue;
+ info.array.raid_disks = raid_disks;
+ st->ss->getinfo_super(st, &info, devmap + raid_disks * devnum);
+ if (!avail) {
+ raid_disks = info.array.raid_disks;
+ avail = xcalloc(raid_disks, 1);
+ *availp = avail;
+
+ best = xcalloc(raid_disks, sizeof(int));
+ devmap = xcalloc(raid_disks, numdevs);
+
+ st->ss->getinfo_super(st, &info, devmap);
+ }
+
+ if (info.disk.state & (1<<MD_DISK_SYNC))
+ {
+ if (cnt == 0) {
+ cnt++;
+ max_events = info.events;
+ avail[info.disk.raid_disk] = 2;
+ best[info.disk.raid_disk] = devnum;
+ st->ss->getinfo_super(st, bestinfo, NULL);
+ } else if (info.events == max_events) {
+ avail[info.disk.raid_disk] = 2;
+ best[info.disk.raid_disk] = devnum;
+ } else if (info.events == max_events-1) {
+ if (avail[info.disk.raid_disk] == 0) {
+ avail[info.disk.raid_disk] = 1;
+ best[info.disk.raid_disk] = devnum;
+ }
+ } else if (info.events < max_events - 1)
+ ;
+ else if (info.events == max_events+1) {
+ int i;
+ max_events = info.events;
+ for (i = 0; i < raid_disks; i++)
+ if (avail[i])
+ avail[i]--;
+ avail[info.disk.raid_disk] = 2;
+ best[info.disk.raid_disk] = devnum;
+ st->ss->getinfo_super(st, bestinfo, NULL);
+ } else { /* info.events much bigger */
+ memset(avail, 0, raid_disks);
+ max_events = info.events;
+ avail[info.disk.raid_disk] = 2;
+ best[info.disk.raid_disk] = devnum;
+ st->ss->getinfo_super(st, bestinfo, NULL);
+ }
+ } else if (info.disk.state & (1<<MD_DISK_REPLACEMENT))
+ replcnt++;
+ st->ss->free_super(st);
+ }
+ if (!avail)
+ return 0;
+ /* We need to reject any device that thinks the best device is
+ * failed or missing */
+ for (b = 0; b < raid_disks; b++)
+ if (avail[b] == 2)
+ break;
+ cnt = 0;
+ for (i = 0 ; i < raid_disks ; i++) {
+ if (i != b && avail[i])
+ if (devmap[raid_disks * best[i] + b] == 0) {
+ /* This device thinks 'b' is failed -
+ * don't use it */
+ devnum = best[i];
+ for (d=sra->devs ; devnum; d = d->next)
+ devnum--;
+ d->disk.state |= (1 << MD_DISK_REMOVED);
+ avail[i] = 0;
+ }
+ if (avail[i])
+ cnt++;
+ }
+ /* Also need to reject any spare device with an event count that
+ * is too high
+ */
+ for (d = sra->devs; d; d = d->next) {
+ if (!(d->disk.state & (1<<MD_DISK_SYNC)) &&
+ d->events > max_events)
+ d->disk.state |= (1 << MD_DISK_REMOVED);
+ }
+ free(best);
+ free(devmap);
+ return cnt + replcnt;
+}
+
+/* test if container has degraded member(s) */
+static int container_members_max_degradation(struct map_ent *map, struct map_ent *me)
+{
+ mdu_array_info_t array;
+ int afd;
+ int max_degraded = 0;
+
+ for(; map; map = map->next) {
+ if (!metadata_container_matches(map->metadata, me->devnm))
+ continue;
+ afd = open_dev(map->devnm);
+ if (afd < 0)
+ continue;
+ /* most accurate information regarding array degradation */
+ if (ioctl(afd, GET_ARRAY_INFO, &array) >= 0) {
+ int degraded = array.raid_disks - array.active_disks -
+ array.spare_disks;
+ if (degraded > max_degraded)
+ max_degraded = degraded;
+ }
+ close(afd);
+ }
+ return (max_degraded);
+}
+
+static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+ struct map_ent *target, int bare,
+ struct supertype *st, int verbose)
+{
+ /* This device doesn't have any md metadata
+ * The device policy allows 'spare' and if !bare, it allows spare-same-slot.
+ * If 'st' is not set, then we only know that some metadata allows this,
+ * others possibly don't.
+ * So look for a container or array to attach the device to.
+ * Prefer 'target' if that is set and the array is found.
+ *
+ * If st is set, then only arrays of that type are considered
+ * Return 0 on success, or some exit code on failure, probably 1.
+ */
+ int rv = 1;
+ struct stat stb;
+ struct map_ent *mp, *map = NULL;
+ struct mdinfo *chosen = NULL;
+ int dfd = *dfdp;
+
+ if (fstat(dfd, &stb) != 0)
+ return 1;
+
+ /*
+ * Now we need to find a suitable array to add this to.
+ * We only accept arrays that:
+ * - match 'st'
+ * - are in the same domains as the device
+ * - are of an size for which the device will be useful
+ * and we choose the one that is the most degraded
+ */
+
+ if (map_lock(&map)) {
+ pr_err("failed to get exclusive lock on "
+ "mapfile\n");
+ return 1;
+ }
+ for (mp = map ; mp ; mp = mp->next) {
+ struct supertype *st2;
+ struct domainlist *dl = NULL;
+ struct mdinfo *sra;
+ unsigned long long devsize;
+ unsigned long long component_size = 0;
+
+ if (is_subarray(mp->metadata))
+ continue;
+ if (st) {
+ st2 = st->ss->match_metadata_desc(mp->metadata);
+ if (!st2 ||
+ (st->minor_version >= 0 &&
+ st->minor_version != st2->minor_version)) {
+ if (verbose > 1)
+ pr_err("not adding %s to %s as metadata type doesn't match\n",
+ devname, mp->path);
+ free(st2);
+ continue;
+ }
+ free(st2);
+ }
+ sra = sysfs_read(-1, mp->devnm,
+ GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
+ GET_DEGRADED|GET_COMPONENT|GET_VERSION);
+ if (!sra) {
+ /* Probably a container - no degraded info */
+ sra = sysfs_read(-1, mp->devnm,
+ GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
+ GET_COMPONENT|GET_VERSION);
+ if (sra)
+ sra->array.failed_disks = -1;
+ }
+ if (!sra)
+ continue;
+ if (st == NULL) {
+ int i;
+ st2 = NULL;
+ for(i = 0; !st2 && superlist[i]; i++)
+ st2 = superlist[i]->match_metadata_desc(
+ sra->text_version);
+ if (!st2) {
+ if (verbose > 1)
+ pr_err("not adding %s to %s"
+ " as metadata not recognised.\n",
+ devname, mp->path);
+ goto next;
+ }
+ /* Need to double check the 'act_spare' permissions applies
+ * to this metadata.
+ */
+ if (!policy_action_allows(pol, st2->ss->name, act_spare))
+ goto next;
+ if (!bare && !policy_action_allows(pol, st2->ss->name,
+ act_spare_same_slot))
+ goto next;
+ } else
+ st2 = st;
+ /* update number of failed disks for mostly degraded
+ * container member */
+ if (sra->array.failed_disks == -1)
+ sra->array.failed_disks = container_members_max_degradation(map, mp);
+
+ get_dev_size(dfd, NULL, &devsize);
+ if (sra->component_size == 0) {
+ /* true for containers, here we must read superblock
+ * to obtain minimum spare size */
+ struct supertype *st3 = dup_super(st2);
+ int mdfd = open_dev(mp->devnm);
+ if (mdfd < 0) {
+ free(st3);
+ goto next;
+ }
+ if (st3->ss->load_container &&
+ !st3->ss->load_container(st3, mdfd, mp->path)) {
+ component_size = st3->ss->min_acceptable_spare_size(st3);
+ st3->ss->free_super(st3);
+ }
+ free(st3);
+ close(mdfd);
+ }
+ if ((sra->component_size > 0 &&
+ st2->ss->avail_size(st2, devsize,
+ sra->devs
+ ? sra->devs->data_offset
+ : INVALID_SECTORS)
+ < sra->component_size)
+ ||
+ (sra->component_size == 0 && devsize < component_size)) {
+ if (verbose > 1)
+ pr_err("not adding %s to %s as it is too small\n",
+ devname, mp->path);
+ goto next;
+ }
+ /* test against target.
+ * If 'target' is set and 'bare' is false, we only accept
+ * arrays/containers that match 'target'.
+ * If 'target' is set and 'bare' is true, we prefer the
+ * array which matches 'target'.
+ * target is considered only if we deal with degraded array
+ */
+ if (target && policy_action_allows(pol, st2->ss->name,
+ act_spare_same_slot)) {
+ if (strcmp(target->metadata, mp->metadata) == 0 &&
+ memcmp(target->uuid, mp->uuid,
+ sizeof(target->uuid)) == 0 &&
+ sra->array.failed_disks > 0) {
+ /* This is our target!! */
+ if (chosen)
+ sysfs_free(chosen);
+ chosen = sra;
+ sra = NULL;
+ /* skip to end so we don't check any more */
+ while (mp->next)
+ mp = mp->next;
+ goto next;
+ }
+ /* not our target */
+ if (!bare)
+ goto next;
+ }
+
+ dl = domain_from_array(sra, st2->ss->name);
+ if (domain_test(dl, pol, st2->ss->name) != 1) {
+ /* domain test fails */
+ if (verbose > 1)
+ pr_err("not adding %s to %s as"
+ " it is not in a compatible domain\n",
+ devname, mp->path);
+
+ goto next;
+ }
+ /* all tests passed, OK to add to this array */
+ if (!chosen) {
+ chosen = sra;
+ sra = NULL;
+ } else if (chosen->array.failed_disks < sra->array.failed_disks) {
+ sysfs_free(chosen);
+ chosen = sra;
+ sra = NULL;
+ }
+ next:
+ if (sra)
+ sysfs_free(sra);
+ if (st != st2)
+ free(st2);
+ if (dl)
+ domain_free(dl);
+ }
+ if (chosen) {
+ /* add current device to chosen array as a spare */
+ int mdfd = open_dev(chosen->sys_name);
+ if (mdfd >= 0) {
+ struct mddev_dev devlist;
+ char devname[20];
+ devlist.next = NULL;
+ devlist.used = 0;
+ devlist.writemostly = 0;
+ devlist.devname = devname;
+ sprintf(devname, "%d:%d", major(stb.st_rdev),
+ minor(stb.st_rdev));
+ devlist.disposition = 'a';
+ close(dfd);
+ *dfdp = -1;
+ rv = Manage_subdevs(chosen->sys_name, mdfd, &devlist,
+ -1, 0, NULL, 0);
+ close(mdfd);
+ }
+ if (verbose > 0) {
+ if (rv == 0)
+ pr_err("added %s as spare for %s\n",
+ devname, chosen->sys_name);
+ else
+ pr_err("failed to add %s as spare for %s\n",
+ devname, chosen->sys_name);
+ }
+ sysfs_free(chosen);
+ }
+ map_unlock(&map);
+ return rv;
+}
+
+static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+ struct supertype *st, int verbose)
+{
+ /* we know that at least one partition virtual-metadata is
+ * allowed to incorporate spares like this device. We need to
+ * find a suitable device to copy partition information from.
+ *
+ * Getting a list of all disk (not partition) devices is
+ * slightly non-trivial. We could look at /sys/block, but
+ * that is theoretically due to be removed. Maybe best to use
+ * /dev/disk/by-path/?* and ignore names ending '-partNN' as
+ * we depend on this directory of 'path' info. But that fails
+ * to find loop devices and probably others. Maybe don't
+ * worry about that, they aren't the real target.
+ *
+ * So: check things in /dev/disk/by-path to see if they are in
+ * a compatible domain, then load the partition table and see
+ * if it is OK for the new device, and choose the largest
+ * partition table that fits.
+ */
+ DIR *dir;
+ struct dirent *de;
+ char *chosen = NULL;
+ unsigned long long chosen_size = 0;
+ struct supertype *chosen_st = NULL;
+ int fd;
+
+ dir = opendir("/dev/disk/by-path");
+ if (!dir)
+ return 1;
+ while ((de = readdir(dir)) != NULL) {
+ char *ep;
+ struct dev_policy *pol2 = NULL;
+ struct domainlist *domlist = NULL;
+ int fd = -1;
+ struct mdinfo info;
+ struct supertype *st2 = NULL;
+ char *devname = NULL;
+ unsigned long long devsectors;
+
+ if (de->d_ino == 0 ||
+ de->d_name[0] == '.' ||
+ (de->d_type != DT_LNK && de->d_type != DT_UNKNOWN))
+ goto next;
+
+ ep = de->d_name + strlen(de->d_name);
+ while (ep > de->d_name &&
+ isdigit(ep[-1]))
+ ep--;
+ if (ep > de->d_name + 5 &&
+ strncmp(ep-5, "-part", 5) == 0)
+ /* This is a partition - skip it */
+ goto next;
+
+ pol2 = path_policy(de->d_name, type_disk);
+
+ domain_merge(&domlist, pol2, st ? st->ss->name : NULL);
+ if (domain_test(domlist, pol, st ? st->ss->name : NULL) != 1)
+ /* new device is incompatible with this device. */
+ goto next;
+
+ domain_free(domlist);
+ domlist = NULL;
+
+ if (asprintf(&devname, "/dev/disk/by-path/%s", de->d_name) != 1) {
+ devname = NULL;
+ goto next;
+ }
+ fd = open(devname, O_RDONLY);
+ if (fd < 0)
+ goto next;
+ if (get_dev_size(fd, devname, &devsectors) == 0)
+ goto next;
+ devsectors >>= 9;
+
+ if (st)
+ st2 = dup_super(st);
+ else
+ st2 = guess_super_type(fd, guess_partitions);
+ if (st2 == NULL ||
+ st2->ss->load_super(st2, fd, NULL) < 0)
+ goto next;
+
+ if (!st) {
+ /* Check domain policy again, this time referring to metadata */
+ domain_merge(&domlist, pol2, st2->ss->name);
+ if (domain_test(domlist, pol, st2->ss->name) != 1)
+ /* Incompatible devices for this metadata type */
+ goto next;
+ if (!policy_action_allows(pol, st2->ss->name, act_spare))
+ /* Some partition types allow sparing, but not
+ * this one.
+ */
+ goto next;
+ }
+
+ st2->ss->getinfo_super(st2, &info, NULL);
+ if (info.component_size > devsectors)
+ /* This partitioning doesn't fit in the device */
+ goto next;
+
+ /* This is an acceptable device to copy partition
+ * metadata from. We could just stop here, but I
+ * think I want to keep looking incase a larger
+ * metadata which makes better use of the device can
+ * be found.
+ */
+ if (chosen == NULL ||
+ chosen_size < info.component_size) {
+ chosen_size = info.component_size;
+ free(chosen);
+ chosen = devname;
+ devname = NULL;
+ if (chosen_st) {
+ chosen_st->ss->free_super(chosen_st);
+ free(chosen_st);
+ }
+ chosen_st = st2;
+ st2 = NULL;
+ }
+
+ next:
+ free(devname);
+ domain_free(domlist);
+ dev_policy_free(pol2);
+ if (st2)
+ st2->ss->free_super(st2);
+ free(st2);
+
+ if (fd >= 0)
+ close(fd);
+ }
+
+ closedir(dir);
+
+ if (!chosen)
+ return 1;
+
+ /* 'chosen' is the best device we can find. Let's write its
+ * metadata to devname dfd is read-only so don't use that
+ */
+ fd = open(devname, O_RDWR);
+ if (fd >= 0) {
+ chosen_st->ss->store_super(chosen_st, fd);
+ close(fd);
+ }
+ free(chosen);
+ chosen_st->ss->free_super(chosen_st);
+ free(chosen_st);
+ return 0;
+}
+
+static int is_bare(int dfd)
+{
+ unsigned long long size = 0;
+ char bufpad[4096 + 4096];
+ char *buf = (char*)(((long)bufpad + 4096) & ~4095);
+
+ if (lseek(dfd, 0, SEEK_SET) != 0 ||
+ read(dfd, buf, 4096) != 4096)
+ return 0;
+
+ if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
+ return 0;
+ if (memcmp(buf, buf+1, 4095) != 0)
+ return 0;
+
+ /* OK, first 4K appear blank, try the end. */
+ get_dev_size(dfd, NULL, &size);
+ if (lseek(dfd, size-4096, SEEK_SET) < 0 ||
+ read(dfd, buf, 4096) != 4096)
+ return 0;
+
+ if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
+ return 0;
+ if (memcmp(buf, buf+1, 4095) != 0)
+ return 0;
+
+ return 1;
+}
+
+/* adding a spare to a regular array is quite different from adding one to
+ * a set-of-partitions virtual array.
+ * This function determines which is worth trying and tries as appropriate.
+ * Arrays are given priority over partitions.
+ */
+static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+ struct map_ent *target,
+ struct supertype *st, int verbose)
+{
+ int i;
+ int rv;
+ int arrays_ok = 0;
+ int partitions_ok = 0;
+ int dfd = *dfdp;
+ int bare;
+
+ /* Can only add a spare if device has at least one domain */
+ if (pol_find(pol, pol_domain) == NULL)
+ return 1;
+ /* And only if some action allows spares */
+ if (!policy_action_allows(pol, st?st->ss->name:NULL, act_spare))
+ return 1;
+
+ /* Now check if the device is bare.
+ * bare devices can always be added as a spare
+ * non-bare devices can only be added if spare-same-slot is permitted,
+ * and this device is replacing a previous device - in which case 'target'
+ * will be set.
+ */
+ if (!is_bare(dfd)) {
+ /* Must have a target and allow same_slot */
+ /* Later - may allow force_spare without target */
+ if (!target ||
+ !policy_action_allows(pol, st?st->ss->name:NULL,
+ act_spare_same_slot)) {
+ if (verbose > 1)
+ pr_err("%s is not bare, so not "
+ "considering as a spare\n",
+ devname);
+ return 1;
+ }
+ bare = 0;
+ } else
+ bare = 1;
+
+ /* It might be OK to add this device to an array - need to see
+ * what arrays might be candidates.
+ */
+ if (st) {
+ /* just try try 'array' or 'partition' based on this metadata */
+ if (st->ss->add_to_super)
+ return array_try_spare(devname, dfdp, pol, target, bare,
+ st, verbose);
+ else
+ return partition_try_spare(devname, dfdp, pol,
+ st, verbose);
+ }
+ /* No metadata was specified or found so options are open.
+ * Check for whether any array metadata, or any partition metadata
+ * might allow adding the spare. This check is just help to avoid
+ * a more costly scan of all arrays when we can be sure that will
+ * fail.
+ */
+ for (i = 0; (!arrays_ok || !partitions_ok) && superlist[i] ; i++) {
+ if (superlist[i]->add_to_super && !arrays_ok &&
+ policy_action_allows(pol, superlist[i]->name, act_spare))
+ arrays_ok = 1;
+ if (superlist[i]->add_to_super == NULL && !partitions_ok &&
+ policy_action_allows(pol, superlist[i]->name, act_spare))
+ partitions_ok = 1;
+ }
+ rv = 1;
+ if (arrays_ok)
+ rv = array_try_spare(devname, dfdp, pol, target, bare,
+ st, verbose);
+ if (rv != 0 && partitions_ok)
+ rv = partition_try_spare(devname, dfdp, pol, st, verbose);
+ return rv;
+}
+
+int IncrementalScan(struct context *c, char *devnm)
+{
+ /* look at every device listed in the 'map' file.
+ * If one is found that is not running then:
+ * look in mdadm.conf for bitmap file.
+ * if one exists, but array has none, add it.
+ * try to start array in auto-readonly mode
+ */
+ struct map_ent *mapl = NULL;
+ struct map_ent *me;
+ struct mddev_ident *devs, *mddev;
+ int rv = 0;
+ char container[32];
+ char *only = NULL;
+
+ map_read(&mapl);
+ devs = conf_get_ident(NULL);
+
+restart:
+ for (me = mapl ; me ; me = me->next) {
+ mdu_array_info_t array;
+ mdu_bitmap_file_t bmf;
+ struct mdinfo *sra;
+ int mdfd;
+
+ if (devnm && strcmp(devnm, me->devnm) != 0)
+ continue;
+ if (devnm && me->metadata[0] == '/') {
+ char *sl;
+ /* member array, need to work on container */
+ strncpy(container, me->metadata+1, 32);
+ container[31] = 0;
+ sl = strchr(container, '/');
+ if (sl)
+ *sl = 0;
+ only = devnm;
+ devnm = container;
+ goto restart;
+ }
+ mdfd = open_dev(me->devnm);
+
+ if (mdfd < 0)
+ continue;
+ if (!isdigit(me->metadata[0])) {
+ /* must be a container */
+ struct supertype *st = super_by_fd(mdfd, NULL);
+ int ret = 0;
+ struct map_ent *map = NULL;
+ if (st)
+ st->ignore_hw_compat = 1;
+ if (st && st->ss->load_container)
+ ret = st->ss->load_container(st, mdfd, NULL);
+ close(mdfd);
+ if (!ret && st->ss->container_content) {
+ if (map_lock(&map))
+ pr_err("failed to get exclusive lock on mapfile\n");
+ ret = Incremental_container(st, me->path, c, only);
+ map_unlock(&map);
+ }
+ if (ret)
+ rv = 1;
+ continue;
+ }
+ if (ioctl(mdfd, GET_ARRAY_INFO, &array) == 0 ||
+ errno != ENODEV) {
+ close(mdfd);
+ continue;
+ }
+ /* Ok, we can try this one. Maybe it needs a bitmap */
+ for (mddev = devs ; mddev ; mddev = mddev->next)
+ if (mddev->devname && me->path
+ && devname_matches(mddev->devname, me->path))
+ break;
+ if (mddev && mddev->bitmap_file) {
+ /*
+ * Note: early kernels will wrongly fail this, so it
+ * is a hint only
+ */
+ int added = -1;
+ if (ioctl(mdfd, GET_ARRAY_INFO, &bmf) < 0) {
+ int bmfd = open(mddev->bitmap_file, O_RDWR);
+ if (bmfd >= 0) {
+ added = ioctl(mdfd, SET_BITMAP_FILE,
+ bmfd);
+ close(bmfd);
+ }
+ }
+ if (c->verbose >= 0) {
+ if (added == 0)
+ pr_err("Added bitmap %s to %s\n",
+ mddev->bitmap_file, me->path);
+ else if (errno != EEXIST)
+ pr_err("Failed to add bitmap to %s: %s\n",
+ me->path, strerror(errno));
+ }
+ }
+ /* FIXME check for reshape_active and consider not
+ * starting array.
+ */
+ sra = sysfs_read(mdfd, NULL, 0);
+ if (sra) {
+ if (sysfs_set_str(sra, NULL,
+ "array_state", "read-auto") == 0) {
+ if (c->verbose >= 0)
+ pr_err("started array %s\n",
+ me->path ?: me->devnm);
+ } else {
+ pr_err("failed to start array %s: %s\n",
+ me->path ?: me->devnm,
+ strerror(errno));
+ rv = 1;
+ }
+ sysfs_free(sra);
+ }
+ }
+ return rv;
+}
+
+static char *container2devname(char *devname)
+{
+ char *mdname = NULL;
+
+ if (devname[0] == '/') {
+ int fd = open(devname, O_RDONLY);
+ if (fd >= 0) {
+ mdname = xstrdup(fd2devnm(fd));
+ close(fd);
+ }
+ } else {
+ int uuid[4];
+ struct map_ent *mp, *map = NULL;
+
+ if (!parse_uuid(devname, uuid))
+ return mdname;
+ mp = map_by_uuid(&map, uuid);
+ if (mp)
+ mdname = xstrdup(mp->devnm);
+ map_free(map);
+ }
+
+ return mdname;
+}
+
+static int Incremental_container(struct supertype *st, char *devname,
+ struct context *c, char *only)
+{
+ /* Collect the contents of this container and for each
+ * array, choose a device name and assemble the array.
+ */
+
+ struct mdinfo *list;
+ struct mdinfo *ra;
+ struct map_ent *map = NULL;
+ struct mdinfo info;
+ int trustworthy;
+ struct mddev_ident *match;
+ int rv = 0;
+ struct domainlist *domains;
+ struct map_ent *smp;
+ int suuid[4];
+ int sfd;
+ int ra_blocked = 0;
+ int ra_all = 0;
+ int result = 0;
+
+ st->ss->getinfo_super(st, &info, NULL);
+
+ if ((c->runstop > 0 && info.container_enough >= 0) ||
+ info.container_enough > 0)
+ /* pass */;
+ else {
+ if (c->export) {
+ printf("MD_STARTED=no\n");
+ } else if (c->verbose)
+ pr_err("not enough devices to start the container\n");
+ return 0;
+ }
+
+ match = conf_match(st, &info, devname, c->verbose, &rv);
+ if (match == NULL && rv == 2)
+ return rv;
+
+ /* Need to compute 'trustworthy' */
+ if (match)
+ trustworthy = LOCAL;
+ else if (st->ss->match_home(st, c->homehost) == 1)
+ trustworthy = LOCAL;
+ else if (st->ss->match_home(st, "any") == 1)
+ trustworthy = LOCAL;
+ else
+ trustworthy = FOREIGN;
+
+ list = st->ss->container_content(st, NULL);
+ /* when nothing to activate - quit */
+ if (list == NULL) {
+ if (c->export) {
+ printf("MD_STARTED=nothing\n");
+ }
+ return 0;
+ }
+ for (ra = list ; ra ; ra = ra->next) {
+ int mdfd;
+ char chosen_name[1024];
+ struct map_ent *mp;
+ struct mddev_ident *match = NULL;
+
+ ra_all++;
+ /* do not activate arrays blocked by metadata handler */
+ if (ra->array.state & (1 << MD_SB_BLOCK_VOLUME)) {
+ pr_err("Cannot activate array %s in %s.\n",
+ ra->text_version, devname);
+ ra_blocked++;
+ continue;
+ }
+ mp = map_by_uuid(&map, ra->uuid);
+
+ if (mp) {
+ mdfd = open_dev(mp->devnm);
+ if (mp->path)
+ strcpy(chosen_name, mp->path);
+ else
+ strcpy(chosen_name, mp->devnm);
+ } else if (!only) {
+
+ /* Check in mdadm.conf for container == devname and
+ * member == ra->text_version after second slash.
+ */
+ char *sub = strchr(ra->text_version+1, '/');
+ struct mddev_ident *array_list;
+ if (sub) {
+ sub++;
+ array_list = conf_get_ident(NULL);
+ } else
+ array_list = NULL;
+ for(; array_list ; array_list = array_list->next) {
+ char *dn;
+ if (array_list->member == NULL ||
+ array_list->container == NULL)
+ continue;
+ if (strcmp(array_list->member, sub) != 0)
+ continue;
+ if (array_list->uuid_set &&
+ !same_uuid(ra->uuid, array_list->uuid, st->ss->swapuuid))
+ continue;
+ dn = container2devname(array_list->container);
+ if (dn == NULL)
+ continue;
+ if (strncmp(dn, ra->text_version+1,
+ strlen(dn)) != 0 ||
+ ra->text_version[strlen(dn)+1] != '/') {
+ free(dn);
+ continue;
+ }
+ free(dn);
+ /* we have a match */
+ match = array_list;
+ if (c->verbose>0)
+ pr_err("match found for member %s\n",
+ array_list->member);
+ break;
+ }
+
+ if (match && match->devname &&
+ strcasecmp(match->devname, "<ignore>") == 0) {
+ if (c->verbose > 0)
+ pr_err("array %s/%s is "
+ "explicitly ignored by mdadm.conf\n",
+ match->container, match->member);
+ continue;
+ }
+ if (match)
+ trustworthy = LOCAL;
+
+ mdfd = create_mddev(match ? match->devname : NULL,
+ ra->name,
+ c->autof,
+ trustworthy,
+ chosen_name);
+ }
+ if (only && (!mp || strcmp(mp->devnm, only) != 0))
+ continue;
+
+ if (mdfd < 0) {
+ pr_err("failed to open %s: %s.\n",
+ chosen_name, strerror(errno));
+ return 2;
+ }
+
+ assemble_container_content(st, mdfd, ra, c,
+ chosen_name, &result);
+ close(mdfd);
+ }
+ if (c->export && result) {
+ char sep = '=';
+ printf("MD_STARTED");
+ if (result & INCR_NO) {
+ printf("%cno", sep);
+ sep = ',';
+ }
+ if (result & INCR_UNSAFE) {
+ printf("%cunsafe", sep);
+ sep = ',';
+ }
+ if (result & INCR_ALREADY) {
+ printf("%calready", sep);
+ sep = ',';
+ }
+ if (result & INCR_YES) {
+ printf("%cyes", sep);
+ sep = ',';
+ }
+ printf("\n");
+ }
+
+ /* don't move spares to container with volume being activated
+ when all volumes are blocked */
+ if (ra_all == ra_blocked)
+ return 0;
+
+ /* Now move all suitable spares from spare container */
+ domains = domain_from_array(list, st->ss->name);
+ memcpy(suuid, uuid_zero, sizeof(int[4]));
+ if (domains &&
+ (smp = map_by_uuid(&map, suuid)) != NULL &&
+ (sfd = open(smp->path, O_RDONLY)) >= 0) {
+ /* spare container found */
+ struct supertype *sst =
+ super_imsm.match_metadata_desc("imsm");
+ struct mdinfo *sinfo;
+ unsigned long long min_size = 0;
+ if (st->ss->min_acceptable_spare_size)
+ min_size = st->ss->min_acceptable_spare_size(st);
+ if (!sst->ss->load_container(sst, sfd, NULL)) {
+ close(sfd);
+ sinfo = container_choose_spares(sst, min_size,
+ domains, NULL,
+ st->ss->name, 0);
+ sst->ss->free_super(sst);
+ if (sinfo){
+ int count = 0;
+ struct mdinfo *disks = sinfo->devs;
+ while (disks) {
+ /* move spare from spare
+ * container to currently
+ * assembled one
+ */
+ if (move_spare(
+ smp->path,
+ devname,
+ makedev(disks->disk.major,
+ disks->disk.minor)))
+ count++;
+ disks = disks->next;
+ }
+ if (count)
+ pr_err("Added %d spare%s to %s\n",
+ count, count>1?"s":"", devname);
+ }
+ sysfs_free(sinfo);
+ } else
+ close(sfd);
+ }
+ domain_free(domains);
+ return 0;
+}
+
+static void run_udisks(char *arg1, char *arg2)
+{
+ int pid = fork();
+ int status;
+ if (pid == 0) {
+ execl("/usr/bin/udisks", "udisks", arg1, arg2, NULL);
+ execl("/bin/udisks", "udisks", arg1, arg2, NULL);
+ exit(1);
+ }
+ while (pid > 0 && wait(&status) != pid)
+ ;
+}
+
+/*
+ * IncrementalRemove - Attempt to see if the passed in device belongs to any
+ * raid arrays, and if so first fail (if needed) and then remove the device.
+ *
+ * @devname - The device we want to remove
+ * @id_path - name as found in /dev/disk/by-path for this device
+ *
+ * Note: the device name must be a kernel name like "sda", so
+ * that we can find it in /proc/mdstat
+ */
+int IncrementalRemove(char *devname, char *id_path, int verbose)
+{
+ int mdfd;
+ int rv = 0;
+ struct mdstat_ent *ent;
+ struct mddev_dev devlist;
+ struct mdinfo mdi;
+ char buf[32];
+
+ if (!id_path)
+ dprintf(Name ": incremental removal without --path <id_path> "
+ "lacks the possibility to re-add new device in this "
+ "port\n");
+
+ if (strchr(devname, '/')) {
+ pr_err("incremental removal requires a "
+ "kernel device name, not a file: %s\n", devname);
+ return 1;
+ }
+ ent = mdstat_by_component(devname);
+ if (!ent) {
+ if (verbose >= 0)
+ pr_err("%s does not appear to be a component "
+ "of any array\n", devname);
+ return 1;
+ }
+ sysfs_init(&mdi, -1, ent->devnm);
+ if (sysfs_get_str(&mdi, NULL, "array_state",
+ buf, sizeof(buf)) > 0) {
+ if (strncmp(buf, "active", 6) == 0 ||
+ strncmp(buf, "clean", 5) == 0)
+ sysfs_set_str(&mdi, NULL,
+ "array_state", "read-auto");
+ }
+ mdfd = open_dev(ent->devnm);
+ if (mdfd < 0) {
+ if (verbose >= 0)
+ pr_err("Cannot open array %s!!\n", ent->dev);
+ free_mdstat(ent);
+ return 1;
+ }
+
+ if (id_path) {
+ struct map_ent *map = NULL, *me;
+ me = map_by_devnm(&map, ent->devnm);
+ if (me)
+ policy_save_path(id_path, me);
+ map_free(map);
+ }
+
+ memset(&devlist, 0, sizeof(devlist));
+ devlist.devname = devname;
+ devlist.disposition = 'f';
+ /* for a container, we must fail each member array */
+ if (ent->metadata_version &&
+ strncmp(ent->metadata_version, "external:", 9) == 0) {
+ struct mdstat_ent *mdstat = mdstat_read(0, 0);
+ struct mdstat_ent *memb;
+ for (memb = mdstat ; memb ; memb = memb->next)
+ if (is_container_member(memb, ent->dev)) {
+ int subfd = open_dev(memb->devnm);
+ if (subfd >= 0) {
+ rv |= Manage_subdevs(
+ memb->dev, subfd,
+ &devlist, verbose, 0,
+ NULL, 0);
+ close(subfd);
+ }
+ }
+ free_mdstat(mdstat);
+ } else
+ rv |= Manage_subdevs(ent->dev, mdfd, &devlist,
+ verbose, 0, NULL, 0);
+ if (rv & 2) {
+ /* Failed due to EBUSY, try to stop the array.
+ * Give udisks a chance to unmount it first.
+ */
+ int devid = devnm2devid(ent->devnm);
+ run_udisks("--unmount", map_dev(major(devid),minor(devid), 0));
+ rv = Manage_stop(ent->dev, mdfd, verbose, 1);
+ if (rv)
+ /* At least we can try to trigger a 'remove' */
+ sysfs_uevent(&mdi, "remove");
+ if (verbose) {
+ if (rv)
+ pr_err("Fail to stop %s too.\n", ent->devnm);
+ }
+ } else {
+ devlist.disposition = 'r';
+ rv = Manage_subdevs(ent->dev, mdfd, &devlist,
+ verbose, 0, NULL, 0);
+ }
+ close(mdfd);
+ free_mdstat(ent);
+ return rv;
+}
diff --git a/Kill.c b/Kill.c
new file mode 100644
index 00000000..f2fdb856
--- /dev/null
+++ b/Kill.c
@@ -0,0 +1,146 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ *
+ * Added by Dale Stephenson
+ * steph@snapserver.com
+ */
+
+#include "mdadm.h"
+#include "md_u.h"
+#include "md_p.h"
+
+int Kill(char *dev, struct supertype *st, int force, int verbose, int noexcl)
+{
+ /*
+ * Nothing fancy about Kill. It just zeroes out a superblock
+ * Definitely not safe.
+ * Returns:
+ * 0 - a zero superblock was successfully written out
+ * 1 - failed to write the zero superblock
+ * 2 - failed to open the device or find a superblock.
+ */
+
+ int fd, rv = 0;
+
+ if (force)
+ noexcl = 1;
+ fd = open(dev, O_RDWR|(noexcl ? 0 : O_EXCL));
+ if (fd < 0) {
+ if (verbose >= 0)
+ pr_err("Couldn't open %s for write - not zeroing\n",
+ dev);
+ return 2;
+ }
+ if (st == NULL)
+ st = guess_super(fd);
+ if (st == NULL || st->ss->init_super == NULL) {
+ if (verbose >= 0)
+ pr_err("Unrecognised md component device - %s\n", dev);
+ close(fd);
+ return 2;
+ }
+ st->ignore_hw_compat = 1;
+ rv = st->ss->load_super(st, fd, dev);
+ if (rv == 0 || (force && rv >= 2)) {
+ st->ss->free_super(st);
+ st->ss->init_super(st, NULL, 0, "", NULL, NULL,
+ INVALID_SECTORS);
+ if (st->ss->store_super(st, fd)) {
+ if (verbose >= 0)
+ pr_err("Could not zero superblock on %s\n",
+ dev);
+ rv = 1;
+ } else if (rv) {
+ if (verbose >= 0)
+ pr_err("superblock zeroed anyway\n");
+ rv = 0;
+ }
+ }
+ close(fd);
+ return rv;
+}
+
+int Kill_subarray(char *dev, char *subarray, int verbose)
+{
+ /* Delete a subarray out of a container, the subarry must be
+ * inactive. The subarray string must be a subarray index
+ * number.
+ *
+ * 0 = successfully deleted subarray from all container members
+ * 1 = failed to sync metadata to one or more devices
+ * 2 = failed to find the container, subarray, or other resource
+ * issue
+ */
+ struct supertype supertype, *st = &supertype;
+ int fd, rv = 2;
+
+ memset(st, 0, sizeof(*st));
+
+ fd = open_subarray(dev, subarray, st, verbose < 0);
+ if (fd < 0)
+ return 2;
+
+ if (!st->ss->kill_subarray) {
+ if (verbose >= 0)
+ pr_err("Operation not supported for %s metadata\n",
+ st->ss->name);
+ goto free_super;
+ }
+
+ if (is_subarray_active(subarray, st->devnm)) {
+ if (verbose >= 0)
+ pr_err("Subarray-%s still active, aborting\n",
+ subarray);
+ goto free_super;
+ }
+
+ if (mdmon_running(st->devnm))
+ st->update_tail = &st->updates;
+
+ /* ok we've found our victim, drop the axe */
+ rv = st->ss->kill_subarray(st);
+ if (rv) {
+ if (verbose >= 0)
+ pr_err("Failed to delete subarray-%s from %s\n",
+ subarray, dev);
+ goto free_super;
+ }
+
+ /* FIXME these routines do not report success/failure */
+ if (st->update_tail)
+ flush_metadata_updates(st);
+ else
+ st->ss->sync_metadata(st);
+
+ if (verbose >= 0)
+ pr_err("Deleted subarray-%s from %s, UUIDs may have changed\n",
+ subarray, dev);
+
+ rv = 0;
+
+ free_super:
+ st->ss->free_super(st);
+ close(fd);
+
+ return rv;
+}
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..2a061c8a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,334 @@
+#
+# mdadm - manage Linux "md" devices aka RAID arrays.
+#
+# Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au>
+# Copyright (C) 2013 Neil Brown <neilb@suse.de>
+#
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Author: Neil Brown
+# Email: <neilb@cse.unsw.edu.au>
+# Paper: Neil Brown
+# School of Computer Science and Engineering
+# The University of New South Wales
+# Sydney, 2052
+# Australia
+#
+
+# define "CXFLAGS" to give extra flags to CC.
+# e.g. make CXFLAGS=-O to optimise
+TCC = tcc
+UCLIBC_GCC = $(shell for nm in i386-uclibc-linux-gcc i386-uclibc-gcc; do which $$nm > /dev/null && { echo $$nm ; exit; } ; done; echo false No uclibc found )
+#DIET_GCC = diet gcc
+# sorry, but diet-libc doesn't know about posix_memalign,
+# so we cannot use it any more.
+DIET_GCC = gcc -DHAVE_STDINT_H
+
+KLIBC=/home/src/klibc/klibc-0.77
+
+KLIBC_GCC = gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32
+
+CC = $(CROSS_COMPILE)gcc
+CXFLAGS ?= -ggdb
+CWFLAGS = -Wall -Wstrict-prototypes -Wextra -Wno-unused-parameter
+ifdef WARN_UNUSED
+CWFLAGS += -Wp,-D_FORTIFY_SOURCE=2 -O3
+endif
+
+ifdef DEBIAN
+CPPFLAGS += -DDEBIAN
+endif
+ifdef DEFAULT_OLD_METADATA
+ CPPFLAGS += -DDEFAULT_OLD_METADATA
+ DEFAULT_METADATA=0.90
+else
+ DEFAULT_METADATA=1.2
+endif
+CPPFLAGS += -DBINDIR=\"$(BINDIR)\"
+
+PKG_CONFIG ?= pkg-config
+
+SYSCONFDIR = /etc
+CONFFILE = $(SYSCONFDIR)/mdadm/mdadm.conf
+CONFFILE2 = $(SYSCONFDIR)/mdadm.conf
+MAILCMD =/usr/sbin/sendmail -t
+CONFFILEFLAGS = -DCONFFILE=\"$(CONFFILE)\" -DCONFFILE2=\"$(CONFFILE2)\"
+# Both MAP_DIR and MDMON_DIR should be somewhere that persists across the
+# pivotroot from early boot to late boot.
+# /run is best, but for distros that don't support that.
+# /dev can work, in which case you probably want /dev/.mdadm
+RUN_DIR=/run/mdadm
+CHECK_RUN_DIR=1
+MAP_DIR=$(RUN_DIR)
+MAP_FILE = map
+MAP_PATH = $(MAP_DIR)/$(MAP_FILE)
+MDMON_DIR = $(RUN_DIR)
+# place for autoreplace cookies
+FAILED_SLOTS_DIR = $(RUN_DIR)/failed-slots
+SYSTEMD_DIR=/lib/systemd/system
+DIRFLAGS = -DMAP_DIR=\"$(MAP_DIR)\" -DMAP_FILE=\"$(MAP_FILE)\"
+DIRFLAGS += -DMDMON_DIR=\"$(MDMON_DIR)\"
+DIRFLAGS += -DFAILED_SLOTS_DIR=\"$(FAILED_SLOTS_DIR)\"
+CFLAGS = $(CWFLAGS) $(CXFLAGS) -DSendmail=\""$(MAILCMD)"\" $(CONFFILEFLAGS) $(DIRFLAGS)
+
+VERSION = $(shell [ -d .git ] && git describe HEAD | sed 's/mdadm-//')
+VERS_DATE = $(shell [ -d .git ] && date --date="`git log -n1 --format=format:%cd --date=short`" '+%0dth %B %Y' | sed -e 's/1th/1st/' -e 's/2th/2nd/' -e 's/11st/11th/' -e 's/12nd/12th/')
+DVERS = $(if $(VERSION),-DVERSION=\"$(VERSION)\",)
+DDATE = $(if $(VERS_DATE),-DVERS_DATE="\"$(VERS_DATE)\"",)
+CFLAGS += $(DVERS) $(DDATE)
+
+# The glibc TLS ABI requires applications that call clone(2) to set up
+# TLS data structures, use pthreads until mdmon implements this support
+USE_PTHREADS = 1
+ifdef USE_PTHREADS
+CFLAGS += -DUSE_PTHREADS
+MON_LDFLAGS += -pthread
+endif
+
+# If you want a static binary, you might uncomment these
+# LDFLAGS = -static
+# STRIP = -s
+
+INSTALL = /usr/bin/install
+DESTDIR =
+BINDIR = /sbin
+MANDIR = /usr/share/man
+MAN4DIR = $(MANDIR)/man4
+MAN5DIR = $(MANDIR)/man5
+MAN8DIR = $(MANDIR)/man8
+
+UDEVDIR := $(shell $(PKG_CONFIG) --variable=udevdir udev 2>/dev/null)
+ifndef UDEVDIR
+ UDEVDIR = /lib/udev
+endif
+
+OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o util.o maps.o lib.o \
+ Manage.o Assemble.o Build.o \
+ Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
+ Incremental.o Dump.o \
+ mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
+ super-mbr.o super-gpt.o \
+ restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o xmalloc.o \
+ platform-intel.o probe_roms.o
+
+CHECK_OBJS = restripe.o sysfs.o maps.o lib.o xmalloc.o dlink.o
+
+SRCS = $(patsubst %.o,%.c,$(OBJS))
+
+INCL = mdadm.h part.h bitmap.h
+
+MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \
+ policy.o lib.o \
+ Kill.o sg_io.o dlink.o ReadMe.o super-intel.o \
+ super-mbr.o super-gpt.o \
+ super-ddf.o sha1.o crc32.o msg.o bitmap.o xmalloc.o \
+ platform-intel.o probe_roms.o
+
+MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS))
+
+STATICSRC = pwgr.c
+STATICOBJS = pwgr.o
+
+ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c policy.c dlink.c util.c \
+ maps.c lib.c xmalloc.c \
+ super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \
+ platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c
+ASSEMBLE_AUTO_SRCS := mdopen.c
+ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE
+ifdef MDASSEMBLE_AUTO
+ASSEMBLE_SRCS += $(ASSEMBLE_AUTO_SRCS)
+ASSEMBLE_FLAGS += -DMDASSEMBLE_AUTO
+endif
+
+all : check_rundir mdadm mdmon
+man : mdadm.man md.man mdadm.conf.man mdmon.man raid6check.man
+
+check_rundir:
+ @if [ ! -d "$(dir $(RUN_DIR))" -a "$(CHECK_RUN_DIR)" = 1 ]; then \
+ echo "***** Parent of $(RUN_DIR) does not exist. Maybe set different RUN_DIR="; \
+ echo "***** e.g. make RUN_DIR=/dev/.mdadm" ; \
+ echo "***** or set CHECK_RUN_DIR=0"; exit 1; \
+ fi
+
+everything: all mdadm.static swap_super test_stripe raid6check \
+ mdassemble mdassemble.auto mdassemble.static mdassemble.man \
+ mdadm.Os mdadm.O2 man
+everything-test: all mdadm.static swap_super test_stripe \
+ mdassemble.auto mdassemble.static mdassemble.man \
+ mdadm.Os mdadm.O2 man
+# mdadm.uclibc and mdassemble.uclibc don't work on x86-64
+# mdadm.tcc doesn't work..
+
+mdadm : $(OBJS)
+ $(CC) $(CFLAGS) $(LDFLAGS) -o mdadm $(OBJS) $(LDLIBS)
+
+mdadm.static : $(OBJS) $(STATICOBJS)
+ $(CC) $(CFLAGS) $(LDFLAGS) -static -o mdadm.static $(OBJS) $(STATICOBJS)
+
+mdadm.tcc : $(SRCS) $(INCL)
+ $(TCC) -o mdadm.tcc $(SRCS)
+
+mdadm.klibc : $(SRCS) $(INCL)
+ rm -f $(OBJS)
+ $(CC) -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) $(SRCS)
+
+mdadm.Os : $(SRCS) $(INCL)
+ $(CC) -o mdadm.Os $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -DHAVE_STDINT_H -Os $(SRCS)
+
+mdadm.O2 : $(SRCS) $(INCL) mdmon.O2
+ $(CC) -o mdadm.O2 $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -DHAVE_STDINT_H -O2 -D_FORTIFY_SOURCE=2 $(SRCS)
+
+mdmon.O2 : $(MON_SRCS) $(INCL) mdmon.h
+ $(CC) -o mdmon.O2 $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -DHAVE_STDINT_H -O2 -D_FORTIFY_SOURCE=2 $(MON_SRCS)
+
+# use '-z now' to guarantee no dynamic linker interactions with the monitor thread
+mdmon : $(MON_OBJS)
+ $(CC) $(CFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -Wl,-z,now -o mdmon $(MON_OBJS) $(LDLIBS)
+msg.o: msg.c msg.h
+
+test_stripe : restripe.c xmalloc.o mdadm.h
+ $(CC) $(CXFLAGS) $(LDFLAGS) -o test_stripe xmalloc.o -DMAIN restripe.c
+
+raid6check : raid6check.o mdadm.h $(CHECK_OBJS)
+ $(CC) $(CXFLAGS) $(LDFLAGS) -o raid6check raid6check.o $(CHECK_OBJS)
+
+mdassemble : $(ASSEMBLE_SRCS) $(INCL)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS) $(STATICSRC)
+
+mdassemble.diet : $(ASSEMBLE_SRCS) $(INCL)
+ rm -f $(OBJS)
+ $(DIET_GCC) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS) $(STATICSRC)
+
+mdassemble.static : $(ASSEMBLE_SRCS) $(INCL)
+ rm -f $(OBJS)
+ $(CC) $(LDFLAGS) $(CPPFLAGS) $(ASSEMBLE_FLAGS) -static -DHAVE_STDINT_H -o mdassemble.static $(ASSEMBLE_SRCS) $(STATICSRC)
+
+mdassemble.auto : $(ASSEMBLE_SRCS) $(INCL) $(ASSEMBLE_AUTO_SRCS)
+ rm -f mdassemble.static
+ $(MAKE) MDASSEMBLE_AUTO=1 mdassemble.static
+ mv mdassemble.static mdassemble.auto
+
+mdassemble.uclibc : $(ASSEMBLE_SRCS) $(INCL)
+ rm -f $(OJS)
+ $(UCLIBC_GCC) $(ASSEMBLE_FLAGS) -DUCLIBC -DHAVE_STDINT_H -static -o mdassemble.uclibc $(ASSEMBLE_SRCS) $(STATICSRC)
+
+# This doesn't work
+mdassemble.klibc : $(ASSEMBLE_SRCS) $(INCL)
+ rm -f $(OBJS)
+ $(KLIBC_GCC) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS)
+
+mdadm.8 : mdadm.8.in
+ sed -e 's/{DEFAULT_METADATA}/$(DEFAULT_METADATA)/g' \
+ -e 's,{MAP_PATH},$(MAP_PATH),g' mdadm.8.in > mdadm.8
+
+mdadm.man : mdadm.8
+ man -l mdadm.8 > mdadm.man
+
+mdmon.man : mdmon.8
+ man -l mdmon.8 > mdmon.man
+
+md.man : md.4
+ man -l md.4 > md.man
+
+mdadm.conf.man : mdadm.conf.5
+ man -l mdadm.conf.5 > mdadm.conf.man
+
+mdassemble.man : mdassemble.8
+ man -l mdassemble.8 > mdassemble.man
+
+raid6check.man : raid6check.8
+ man -l raid6check.8 > raid6check.man
+
+$(OBJS) : $(INCL) mdmon.h
+$(MON_OBJS) : $(INCL) mdmon.h
+
+sha1.o : sha1.c sha1.h md5.h
+ $(CC) $(CFLAGS) -DHAVE_STDINT_H -o sha1.o -c sha1.c
+
+install : mdadm mdmon install-man install-udev
+ $(INSTALL) -D $(STRIP) -m 755 mdadm $(DESTDIR)$(BINDIR)/mdadm
+ $(INSTALL) -D $(STRIP) -m 755 mdmon $(DESTDIR)$(BINDIR)/mdmon
+
+install-static : mdadm.static install-man
+ $(INSTALL) -D $(STRIP) -m 755 mdadm.static $(DESTDIR)$(BINDIR)/mdadm
+
+install-tcc : mdadm.tcc install-man
+ $(INSTALL) -D $(STRIP) -m 755 mdadm.tcc $(DESTDIR)$(BINDIR)/mdadm
+
+install-uclibc : mdadm.uclibc install-man
+ $(INSTALL) -D $(STRIP) -m 755 mdadm.uclibc $(DESTDIR)$(BINDIR)/mdadm
+
+install-klibc : mdadm.klibc install-man
+ $(INSTALL) -D $(STRIP) -m 755 mdadm.klibc $(DESTDIR)$(BINDIR)/mdadm
+
+install-man: mdadm.8 md.4 mdadm.conf.5 mdmon.8
+ $(INSTALL) -D -m 644 mdadm.8 $(DESTDIR)$(MAN8DIR)/mdadm.8
+ $(INSTALL) -D -m 644 mdmon.8 $(DESTDIR)$(MAN8DIR)/mdmon.8
+ $(INSTALL) -D -m 644 md.4 $(DESTDIR)$(MAN4DIR)/md.4
+ $(INSTALL) -D -m 644 mdadm.conf.5 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5
+
+install-udev: udev-md-raid-arrays.rules udev-md-raid-assembly.rules
+ @for file in 63-md-raid-arrays.rules 64-md-raid-assembly.rules ; \
+ do sed -e 's,BINDIR,$(BINDIR),g' udev-$${file#??-} > .install.tmp.1 && \
+ echo $(INSTALL) -D -m 644 udev-$${file#??-} $(DESTDIR)$(UDEVDIR)/rules.d/$$file ; \
+ $(INSTALL) -D -m 644 .install.tmp.1 $(DESTDIR)$(UDEVDIR)/rules.d/$$file ; \
+ rm -f .install.tmp.1; \
+ done
+
+install-systemd: systemd/mdmon@.service
+ @for file in mdmon@.service mdmonitor.service mdadm-last-resort@.timer \
+ mdadm-last-resort@.service mdadm-grow-continue@.service; \
+ do sed -e 's,BINDIR,$(BINDIR),g' systemd/$$file > .install.tmp.2 && \
+ echo $(INSTALL) -D -m 644 systemd/$$file $(DESTDIR)$(SYSTEMD_DIR)/$$file ; \
+ $(INSTALL) -D -m 644 .install.tmp.2 $(DESTDIR)$(SYSTEMD_DIR)/$$file ; \
+ rm -f .install.tmp.2; \
+ done
+ @for file in mdadm.shutdown ; \
+ do sed -e 's,BINDIR,$(BINDIR),g' systemd/$$file > .install.tmp.3 && \
+ echo $(INSTALL) -D -m 755 systemd/$$file $(DESTDIR)$(SYSTEMD_DIR)-shutdown/$$file ; \
+ $(INSTALL) -D -m 755 .install.tmp.3 $(DESTDIR)$(SYSTEMD_DIR)-shutdown/$$file ; \
+ rm -f .install.tmp.3; \
+ done
+ if [ -f /etc/SuSE-release -o -n "$(SUSE)" ] ;then $(INSTALL) -D -m 755 systemd/SUSE-mdadm_env.sh $(DESTDIR)$(SYSTEMD_DIR)/../scripts/mdadm_env.sh ;fi
+
+uninstall:
+ rm -f $(DESTDIR)$(MAN8DIR)/mdadm.8 $(DESTDIR)$(MAN8DIR)/mdmon.8 $(DESTDIR)$(MAN4DIR)/md.4 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5 $(DESTDIR)$(BINDIR)/mdadm
+
+test: mdadm mdmon test_stripe swap_super raid6check
+ @echo "Please run './test' as root"
+
+clean :
+ rm -f mdadm mdmon $(OBJS) $(MON_OBJS) $(STATICOBJS) core *.man \
+ mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt .merge_file_* \
+ mdadm.Os mdadm.O2 mdmon.O2 \
+ mdassemble mdassemble.static mdassemble.auto mdassemble.uclibc \
+ mdassemble.klibc swap_super \
+ init.cpio.gz mdadm.uclibc.static test_stripe raid6check raid6check.o mdmon \
+ mdadm.8
+
+dist : clean
+ ./makedist
+
+testdist : everything-test clean
+ ./makedist test
+
+TAGS :
+ etags *.h *.c
+
+DISTRO_MAKEFILE := $(wildcard distropkg/Makefile)
+ifdef DISTRO_MAKEFILE
+include $(DISTRO_MAKEFILE)
+endif
diff --git a/Manage.c b/Manage.c
new file mode 100644
index 00000000..206f34ef
--- /dev/null
+++ b/Manage.c
@@ -0,0 +1,1691 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include "md_u.h"
+#include "md_p.h"
+#include <ctype.h>
+
+#define REGISTER_DEV _IO (MD_MAJOR, 1)
+#define START_MD _IO (MD_MAJOR, 2)
+#define STOP_MD _IO (MD_MAJOR, 3)
+
+int Manage_ro(char *devname, int fd, int readonly)
+{
+ /* switch to readonly or rw
+ *
+ * requires >= 0.90.0
+ * first check that array is runing
+ * use RESTART_ARRAY_RW or STOP_ARRAY_RO
+ *
+ */
+ mdu_array_info_t array;
+#ifndef MDASSEMBLE
+ struct mdinfo *mdi;
+#endif
+ int rv = 0;
+
+ if (md_get_version(fd) < 9000) {
+ pr_err("need md driver version 0.90.0 or later\n");
+ return 1;
+ }
+#ifndef MDASSEMBLE
+ /* If this is an externally-managed array, we need to modify the
+ * metadata_version so that mdmon doesn't undo our change.
+ */
+ mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION);
+ if (mdi &&
+ mdi->array.major_version == -1 &&
+ is_subarray(mdi->text_version)) {
+ char vers[64];
+ strcpy(vers, "external:");
+ strcat(vers, mdi->text_version);
+ if (readonly > 0) {
+ int rv;
+ /* We set readonly ourselves. */
+ vers[9] = '-';
+ sysfs_set_str(mdi, NULL, "metadata_version", vers);
+
+ close(fd);
+ rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
+
+ if (rv < 0) {
+ pr_err("failed to set readonly for %s: %s\n",
+ devname, strerror(errno));
+
+ vers[9] = mdi->text_version[0];
+ sysfs_set_str(mdi, NULL, "metadata_version", vers);
+ rv = 1;
+ goto out;
+ }
+ } else {
+ char *cp;
+ /* We cannot set read/write - must signal mdmon */
+ vers[9] = '/';
+ sysfs_set_str(mdi, NULL, "metadata_version", vers);
+
+ cp = strchr(vers+10, '/');
+ if (cp)
+ *cp = 0;
+ ping_monitor(vers+10);
+ if (mdi->array.level <= 0)
+ sysfs_set_str(mdi, NULL, "array_state", "active");
+ }
+ goto out;
+ }
+#endif
+ if (ioctl(fd, GET_ARRAY_INFO, &array)) {
+ pr_err("%s does not appear to be active.\n",
+ devname);
+ rv = 1;
+ goto out;
+ }
+
+ if (readonly > 0) {
+ if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
+ pr_err("failed to set readonly for %s: %s\n",
+ devname, strerror(errno));
+ rv = 1;
+ goto out;
+ }
+ } else if (readonly < 0) {
+ if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
+ pr_err("failed to set writable for %s: %s\n",
+ devname, strerror(errno));
+ rv = 1;
+ goto out;
+ }
+ }
+out:
+#ifndef MDASSEMBLE
+ if (mdi)
+ sysfs_free(mdi);
+#endif
+ return rv;
+}
+
+#ifndef MDASSEMBLE
+
+static void remove_devices(char *devnm, char *path)
+{
+ /*
+ * Remove names at 'path' - possibly with
+ * partition suffixes - which link to the 'standard'
+ * name for devnm. These were probably created
+ * by mdadm when the array was assembled.
+ */
+ char base[40];
+ char *path2;
+ char link[1024];
+ int n;
+ int part;
+ char *be;
+ char *pe;
+
+ if (!path)
+ return;
+
+ sprintf(base, "/dev/%s", devnm);
+ be = base + strlen(base);
+
+ path2 = xmalloc(strlen(path)+20);
+ strcpy(path2, path);
+ pe = path2 + strlen(path2);
+
+ for (part = 0; part < 16; part++) {
+ if (part) {
+ sprintf(be, "p%d", part);
+
+ if (isdigit(pe[-1]))
+ sprintf(pe, "p%d", part);
+ else
+ sprintf(pe, "%d", part);
+ }
+ n = readlink(path2, link, sizeof(link));
+ if (n > 0 && (int)strlen(base) == n &&
+ strncmp(link, base, n) == 0)
+ unlink(path2);
+ }
+ free(path2);
+}
+
+int Manage_run(char *devname, int fd, struct context *c)
+{
+ /* Run the array. Array must already be configured
+ * Requires >= 0.90.0
+ */
+ char nm[32], *nmp;
+
+ if (md_get_version(fd) < 9000) {
+ pr_err("need md driver version 0.90.0 or later\n");
+ return 1;
+ }
+ nmp = fd2devnm(fd);
+ if (!nmp) {
+ pr_err("Cannot find %s in sysfs!!\n", devname);
+ return 1;
+ }
+ strcpy(nm, nmp);
+ return IncrementalScan(c, nm);
+}
+
+int Manage_stop(char *devname, int fd, int verbose, int will_retry)
+{
+ /* Stop the array. Array must already be configured
+ * 'will_retry' means that error messages are not wanted.
+ */
+ int rv = 0;
+ struct map_ent *map = NULL;
+ struct mdinfo *mdi;
+ char devnm[32];
+ char container[32];
+ int err;
+ int count;
+ char buf[32];
+ unsigned long long rd1, rd2;
+
+ if (will_retry && verbose == 0)
+ verbose = -1;
+
+ if (md_get_version(fd) < 9000) {
+ if (ioctl(fd, STOP_MD, 0) == 0)
+ return 0;
+ pr_err("stopping device %s "
+ "failed: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+
+ /* If this is an mdmon managed array, just write 'inactive'
+ * to the array state and let mdmon clear up.
+ */
+ strcpy(devnm, fd2devnm(fd));
+ /* Get EXCL access first. If this fails, then attempting
+ * to stop is probably a bad idea.
+ */
+ mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION);
+ if (mdi && is_subarray(mdi->text_version)) {
+ char *sl;
+ strncpy(container, mdi->text_version+1, sizeof(container));
+ container[sizeof(container)-1] = 0;
+ sl = strchr(container, '/');
+ if (sl)
+ *sl = 0;
+ } else
+ container[0] = 0;
+ close(fd);
+ count = 5;
+ while (((fd = ((devnm[0] == '/')
+ ?open(devname, O_RDONLY|O_EXCL)
+ :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0
+ || strcmp(fd2devnm(fd), devnm) != 0)
+ && container[0]
+ && mdmon_running(container)
+ && count) {
+ if (fd >= 0)
+ close(fd);
+ flush_mdmon(container);
+ count--;
+ }
+ if (fd < 0 || strcmp(fd2devnm(fd), devnm) != 0) {
+ if (fd >= 0)
+ close(fd);
+ if (verbose >= 0)
+ pr_err("Cannot get exclusive access to %s:"
+ "Perhaps a running "
+ "process, mounted filesystem "
+ "or active volume group?\n",
+ devname);
+ return 1;
+ }
+ if (mdi &&
+ mdi->array.level > 0 &&
+ is_subarray(mdi->text_version)) {
+ int err;
+ /* This is mdmon managed. */
+ close(fd);
+
+ /* As we have an O_EXCL open, any use of the device
+ * which blocks STOP_ARRAY is probably a transient use,
+ * so it is reasonable to retry for a while - 5 seconds.
+ */
+ count = 25;
+ while (count &&
+ (err = sysfs_set_str(mdi, NULL,
+ "array_state",
+ "inactive")) < 0
+ && errno == EBUSY) {
+ usleep(200000);
+ count--;
+ }
+ if (err) {
+ if (verbose >= 0)
+ pr_err("failed to stop array %s: %s\n",
+ devname, strerror(errno));
+ rv = 1;
+ goto out;
+ }
+
+ /* Give monitor a chance to act */
+ ping_monitor(mdi->text_version);
+
+ fd = open_dev_excl(devnm);
+ if (fd < 0) {
+ if (verbose >= 0)
+ pr_err("failed to completely stop %s"
+ ": Device is busy\n",
+ devname);
+ rv = 1;
+ goto out;
+ }
+ } else if (mdi &&
+ mdi->array.major_version == -1 &&
+ mdi->array.minor_version == -2 &&
+ !is_subarray(mdi->text_version)) {
+ struct mdstat_ent *mds, *m;
+ /* container, possibly mdmon-managed.
+ * Make sure mdmon isn't opening it, which
+ * would interfere with the 'stop'
+ */
+ ping_monitor(mdi->sys_name);
+
+ /* now check that there are no existing arrays
+ * which are members of this array
+ */
+ mds = mdstat_read(0, 0);
+ for (m = mds; m; m = m->next)
+ if (m->metadata_version &&
+ strncmp(m->metadata_version, "external:", 9)==0 &&
+ metadata_container_matches(m->metadata_version+9,
+ devnm)) {
+ if (verbose >= 0)
+ pr_err("Cannot stop container %s: "
+ "member %s still active\n",
+ devname, m->dev);
+ free_mdstat(mds);
+ rv = 1;
+ goto out;
+ }
+ }
+
+ /* If the array is undergoing a reshape which changes the number
+ * of devices, then it would be nice to stop it at a point where
+ * it has completed a full number of stripes in both old and
+ * new layouts as this will allow the reshape to be reverted.
+ * So if 'sync_action' is "reshape" and 'raid_disks' shows two
+ * different numbers, then
+ * - freeze reshape
+ * - set sync_max to next multiple of both data_disks and
+ * chunk sizes (or next but one)
+ * - unfreeze reshape
+ * - wait on 'sync_completed' for that point to be reached.
+ */
+ if (mdi && (mdi->array.level >= 4 && mdi->array.level <= 6) &&
+ sysfs_attribute_available(mdi, NULL, "sync_action") &&
+ sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
+ sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
+ strcmp(buf, "reshape\n") == 0 &&
+ sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2 &&
+ sysfs_set_str(mdi, NULL, "sync_action", "frozen") == 0) {
+ /* Array is frozen */
+ unsigned long long position, curr;
+ unsigned long long chunk1, chunk2;
+ unsigned long long rddiv, chunkdiv;
+ unsigned long long sectors;
+ unsigned long long sync_max, old_sync_max;
+ unsigned long long completed;
+ int backwards = 0;
+ int delay;
+ int scfd;
+
+ rd1 -= mdi->array.level == 6 ? 2 : 1;
+ rd2 -= mdi->array.level == 6 ? 2 : 1;
+ sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
+ if (strncmp(buf, "back", 4) == 0)
+ backwards = 1;
+ sysfs_get_ll(mdi, NULL, "reshape_position", &position);
+ sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
+ chunk1 /= 512;
+ chunk2 /= 512;
+ rddiv = GCD(rd1, rd2);
+ chunkdiv = GCD(chunk1, chunk2);
+ sectors = (chunk1/chunkdiv) * chunk2 * (rd1/rddiv) * rd2;
+
+ if (backwards) {
+ /* Need to subtract 'reshape_position' from
+ * array size to get equivalent of sync_max.
+ * Size calculation based on raid5_size in kernel.
+ */
+ unsigned long long size = mdi->component_size;
+ size &= ~(chunk1-1);
+ size &= ~(chunk2-1);
+ /* rd1 must be smaller */
+ position = (position / sectors - 1) * sectors;
+ sync_max = size - position/rd1;
+ } else {
+ position = (position / sectors + 2) * sectors;
+ sync_max = position/rd1;
+ }
+ if (sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) < 0)
+ old_sync_max = mdi->component_size;
+ /* Must not advance sync_max as that could confuse
+ * the reshape monitor */
+ if (sync_max < old_sync_max)
+ sysfs_set_num(mdi, NULL, "sync_max", sync_max);
+ sysfs_set_str(mdi, NULL, "sync_action", "idle");
+
+ /* That should have set things going again. Now we
+ * wait a little while (3 second max) for sync_completed
+ * to reach the target.
+ * The reshape process can block for 500msec if
+ * the sync speed limit is hit, so we need to wait
+ * a lot longer than that. 1 second is usually
+ * enough. 3 is safe.
+ */
+ delay = 3000;
+ scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
+ while (scfd >= 0 && delay > 0 && old_sync_max > 0) {
+ sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
+ sysfs_fd_get_str(scfd, buf, sizeof(buf));
+ if (strncmp(buf, "none", 4) == 0) {
+ /* Either reshape has aborted, or hasn't
+ * quite started yet. Wait a bit and
+ * check 'sync_action' to see.
+ */
+ usleep(10000);
+ sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf));
+ if (strncmp(buf, "reshape", 7) != 0)
+ break;
+ }
+
+ if (sysfs_fd_get_ll(scfd, &completed) == 0 &&
+ (completed > sync_max ||
+ (completed == sync_max && curr != position))) {
+ while (completed > sync_max) {
+ sync_max += sectors / rd1;
+ if (backwards)
+ position -= sectors;
+ else
+ position += sectors;
+ }
+ if (sync_max < old_sync_max)
+ sysfs_set_num(mdi, NULL, "sync_max", sync_max);
+ }
+
+ if (!backwards && curr >= position)
+ break;
+ if (backwards && curr <= position)
+ break;
+ sysfs_wait(scfd, &delay);
+ }
+ if (scfd >= 0)
+ close(scfd);
+
+ }
+
+ /* As we have an O_EXCL open, any use of the device
+ * which blocks STOP_ARRAY is probably a transient use,
+ * so it is reasonable to retry for a while - 5 seconds.
+ */
+ count = 25; err = 0;
+ while (count && fd >= 0
+ && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
+ && errno == EBUSY) {
+ usleep(200000);
+ count --;
+ }
+ if (fd >= 0 && err) {
+ if (verbose >= 0) {
+ pr_err("failed to stop array %s: %s\n",
+ devname, strerror(errno));
+ if (errno == EBUSY)
+ cont_err("Perhaps a running "
+ "process, mounted filesystem "
+ "or active volume group?\n");
+ }
+ rv = 1;
+ goto out;
+ }
+ /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
+ * was stopped, so We'll do it here just to be sure. Drop any
+ * partitions as well...
+ */
+ if (fd >= 0)
+ ioctl(fd, BLKRRPART, 0);
+ if (mdi)
+ sysfs_uevent(mdi, "change");
+
+ if (devnm[0] && use_udev()) {
+ struct map_ent *mp = map_by_devnm(&map, devnm);
+ remove_devices(devnm, mp ? mp->path : NULL);
+ }
+
+ if (verbose >= 0)
+ pr_err("stopped %s\n", devname);
+ map_lock(&map);
+ map_remove(&map, devnm);
+ map_unlock(&map);
+out:
+ if (mdi)
+ sysfs_free(mdi);
+
+ return rv;
+}
+
+static struct mddev_dev *add_one(struct mddev_dev *dv, char *name, char disp)
+{
+ struct mddev_dev *new;
+ new = xmalloc(sizeof(*new));
+ memset(new, 0, sizeof(*new));
+ new->devname = xstrdup(name);
+ new->disposition = disp;
+ new->next = dv->next;
+ dv->next = new;
+ return new;
+}
+
+static void add_faulty(struct mddev_dev *dv, int fd, char disp)
+{
+ mdu_array_info_t array;
+ mdu_disk_info_t disk;
+ int remaining_disks;
+ int i;
+
+ if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+ return;
+
+ remaining_disks = array.nr_disks;
+ for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
+ char buf[40];
+ disk.number = i;
+ if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ remaining_disks--;
+ if ((disk.state & 1) == 0) /* not faulty */
+ continue;
+ sprintf(buf, "%d:%d", disk.major, disk.minor);
+ dv = add_one(dv, buf, disp);
+ }
+}
+
+static void add_detached(struct mddev_dev *dv, int fd, char disp)
+{
+ mdu_array_info_t array;
+ mdu_disk_info_t disk;
+ int remaining_disks;
+ int i;
+
+ if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+ return;
+
+ remaining_disks = array.nr_disks;
+ for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
+ char buf[40];
+ int sfd;
+ disk.number = i;
+ if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ remaining_disks--;
+ if (disp == 'f' && (disk.state & 1) != 0) /* already faulty */
+ continue;
+ sprintf(buf, "%d:%d", disk.major, disk.minor);
+ sfd = dev_open(buf, O_RDONLY);
+ if (sfd >= 0) {
+ /* Not detached */
+ close(sfd);
+ continue;
+ }
+ if (errno != ENXIO)
+ /* Probably not detached */
+ continue;
+ dv = add_one(dv, buf, disp);
+ }
+}
+
+static void add_set(struct mddev_dev *dv, int fd, char set_char)
+{
+ mdu_array_info_t array;
+ mdu_disk_info_t disk;
+ int remaining_disks;
+ int copies, set;
+ int i;
+
+ if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+ return;
+ if (array.level != 10)
+ return;
+ copies = ((array.layout & 0xff) *
+ ((array.layout >> 8) & 0xff));
+ if (array.raid_disks % copies)
+ return;
+
+ remaining_disks = array.nr_disks;
+ for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
+ char buf[40];
+ disk.number = i;
+ if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ remaining_disks--;
+ set = disk.raid_disk % copies;
+ if (set_char != set + 'A')
+ continue;
+ sprintf(buf, "%d:%d", disk.major, disk.minor);
+ dv = add_one(dv, buf, dv->disposition);
+ }
+}
+
+int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
+ struct supertype *dev_st, struct supertype *tst,
+ unsigned long rdev,
+ char *update, char *devname, int verbose,
+ mdu_array_info_t *array)
+{
+ struct mdinfo mdi;
+ int duuid[4];
+ int ouuid[4];
+
+ dev_st->ss->getinfo_super(dev_st, &mdi, NULL);
+ dev_st->ss->uuid_from_super(dev_st, ouuid);
+ if (tst->sb)
+ tst->ss->uuid_from_super(tst, duuid);
+ else
+ /* Assume uuid matches: kernel will check */
+ memcpy(duuid, ouuid, sizeof(ouuid));
+ if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
+ !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
+ memcmp(duuid, ouuid, sizeof(ouuid))==0) {
+ /* Looks like it is worth a
+ * try. Need to make sure
+ * kernel will accept it
+ * though.
+ */
+ mdu_disk_info_t disc;
+ /* re-add doesn't work for version-1 superblocks
+ * before 2.6.18 :-(
+ */
+ if (array->major_version == 1 &&
+ get_linux_version() <= 2006018)
+ goto skip_re_add;
+ disc.number = mdi.disk.number;
+ if (ioctl(fd, GET_DISK_INFO, &disc) != 0
+ || disc.major != 0 || disc.minor != 0
+ )
+ goto skip_re_add;
+ disc.major = major(rdev);
+ disc.minor = minor(rdev);
+ disc.number = mdi.disk.number;
+ disc.raid_disk = mdi.disk.raid_disk;
+ disc.state = mdi.disk.state;
+ if (dv->writemostly == 1)
+ disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+ if (dv->writemostly == 2)
+ disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
+ remove_partitions(tfd);
+ if (update || dv->writemostly > 0) {
+ int rv = -1;
+ tfd = dev_open(dv->devname, O_RDWR);
+ if (tfd < 0) {
+ pr_err("failed to open %s for"
+ " superblock update during re-add\n", dv->devname);
+ return -1;
+ }
+
+ if (dv->writemostly == 1)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, "writemostly",
+ devname, verbose, 0, NULL);
+ if (dv->writemostly == 2)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, "readwrite",
+ devname, verbose, 0, NULL);
+ if (update)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, update,
+ devname, verbose, 0, NULL);
+ if (rv == 0)
+ rv = dev_st->ss->store_super(dev_st, tfd);
+ close(tfd);
+ if (rv != 0) {
+ pr_err("failed to update"
+ " superblock during re-add\n");
+ return -1;
+ }
+ }
+ /* don't even try if disk is marked as faulty */
+ errno = 0;
+ if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
+ if (verbose >= 0)
+ pr_err("re-added %s\n", dv->devname);
+ return 1;
+ }
+ if (errno == ENOMEM || errno == EROFS) {
+ pr_err("add new device failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ if (dv->disposition == 'M')
+ return 0;
+ return -1;
+ }
+ }
+skip_re_add:
+ return 0;
+}
+
+int Manage_add(int fd, int tfd, struct mddev_dev *dv,
+ struct supertype *tst, mdu_array_info_t *array,
+ int force, int verbose, char *devname,
+ char *update, unsigned long rdev, unsigned long long array_size)
+{
+ unsigned long long ldsize;
+ struct supertype *dev_st = NULL;
+ int j;
+ mdu_disk_info_t disc;
+
+ if (!get_dev_size(tfd, dv->devname, &ldsize)) {
+ if (dv->disposition == 'M')
+ return 0;
+ else
+ return -1;
+ }
+
+ if (tst->ss == &super0 && ldsize > 4ULL*1024*1024*1024*1024) {
+ /* More than 4TB is wasted on v0.90 */
+ if (!force) {
+ pr_err("%s is larger than %s can "
+ "effectively use.\n"
+ " Add --force is you "
+ "really want to add this device.\n",
+ dv->devname, devname);
+ return -1;
+ }
+ pr_err("%s is larger than %s can "
+ "effectively use.\n"
+ " Adding anyway as --force "
+ "was given.\n",
+ dv->devname, devname);
+ }
+ if (!tst->ss->external &&
+ array->major_version == 0 &&
+ md_get_version(fd)%100 < 2) {
+ if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
+ if (verbose >= 0)
+ pr_err("hot added %s\n",
+ dv->devname);
+ return 1;
+ }
+
+ pr_err("hot add failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ return -1;
+ }
+
+ if (array->not_persistent == 0 || tst->ss->external) {
+
+ /* need to find a sample superblock to copy, and
+ * a spare slot to use.
+ * For 'external' array (well, container based),
+ * We can just load the metadata for the array->
+ */
+ int array_failed;
+ if (tst->sb)
+ /* already loaded */;
+ else if (tst->ss->external) {
+ tst->ss->load_container(tst, fd, NULL);
+ } else for (j = 0; j < tst->max_devs; j++) {
+ char *dev;
+ int dfd;
+ disc.number = j;
+ if (ioctl(fd, GET_DISK_INFO, &disc))
+ continue;
+ if (disc.major==0 && disc.minor==0)
+ continue;
+ if ((disc.state & 4)==0) /* sync */
+ continue;
+ /* Looks like a good device to try */
+ dev = map_dev(disc.major, disc.minor, 1);
+ if (!dev)
+ continue;
+ dfd = dev_open(dev, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ if (tst->ss->load_super(tst, dfd,
+ NULL)) {
+ close(dfd);
+ continue;
+ }
+ close(dfd);
+ break;
+ }
+ /* FIXME this is a bad test to be using */
+ if (!tst->sb && (dv->disposition != 'a'
+ && dv->disposition != 'S')) {
+ /* we are re-adding a device to a
+ * completely dead array - have to depend
+ * on kernel to check
+ */
+ } else if (!tst->sb) {
+ pr_err("cannot load array metadata from %s\n", devname);
+ return -1;
+ }
+
+ /* Make sure device is large enough */
+ if (tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
+ array_size) {
+ if (dv->disposition == 'M')
+ return 0;
+ pr_err("%s not large enough to join array\n",
+ dv->devname);
+ return -1;
+ }
+
+ /* Possibly this device was recently part of
+ * the array and was temporarily removed, and
+ * is now being re-added. If so, we can
+ * simply re-add it.
+ */
+
+ if (array->not_persistent==0) {
+ dev_st = dup_super(tst);
+ dev_st->ss->load_super(dev_st, tfd, NULL);
+ }
+ if (dev_st && dev_st->sb && dv->disposition != 'S') {
+ int rv = attempt_re_add(fd, tfd, dv,
+ dev_st, tst,
+ rdev,
+ update, devname,
+ verbose,
+ array);
+ dev_st->ss->free_super(dev_st);
+ if (rv)
+ return rv;
+ }
+ if (dv->disposition == 'M') {
+ if (verbose > 0)
+ pr_err("--re-add for %s to %s is not possible\n",
+ dv->devname, devname);
+ return 0;
+ }
+ if (dv->disposition == 'A') {
+ pr_err("--re-add for %s to %s is not possible\n",
+ dv->devname, devname);
+ return -1;
+ }
+ if (array->active_disks < array->raid_disks) {
+ char *avail = xcalloc(array->raid_disks, 1);
+ int d;
+ int found = 0;
+
+ for (d = 0; d < MAX_DISKS && found < array->active_disks; d++) {
+ disc.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disc))
+ continue;
+ if (disc.major == 0 && disc.minor == 0)
+ continue;
+ found++;
+ if (!(disc.state & (1<<MD_DISK_SYNC)))
+ continue;
+ avail[disc.raid_disk] = 1;
+ }
+ array_failed = !enough(array->level, array->raid_disks,
+ array->layout, 1, avail);
+ free(avail);
+ } else
+ array_failed = 0;
+ if (array_failed) {
+ pr_err("%s has failed so using --add cannot work and might destroy\n",
+ devname);
+ pr_err("data on %s. You should stop the array and re-assemble it.\n",
+ dv->devname);
+ return -1;
+ }
+ } else {
+ /* non-persistent. Must ensure that new drive
+ * is at least array->size big.
+ */
+ if (ldsize/512 < array_size) {
+ pr_err("%s not large enough to join array\n",
+ dv->devname);
+ return -1;
+ }
+ }
+ /* committed to really trying this device now*/
+ remove_partitions(tfd);
+
+ /* in 2.6.17 and earlier, version-1 superblocks won't
+ * use the number we write, but will choose a free number.
+ * we must choose the same free number, which requires
+ * starting at 'raid_disks' and counting up
+ */
+ for (j = array->raid_disks; j < tst->max_devs; j++) {
+ disc.number = j;
+ if (ioctl(fd, GET_DISK_INFO, &disc))
+ break;
+ if (disc.major==0 && disc.minor==0)
+ break;
+ if (disc.state & 8) /* removed */
+ break;
+ }
+ disc.major = major(rdev);
+ disc.minor = minor(rdev);
+ disc.number =j;
+ disc.state = 0;
+ if (array->not_persistent==0) {
+ int dfd;
+ if (dv->writemostly == 1)
+ disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+ dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname, INVALID_SECTORS))
+ return -1;
+ if (tst->ss->write_init_super(tst))
+ return -1;
+ } else if (dv->disposition == 'A') {
+ /* this had better be raid1.
+ * As we are "--re-add"ing we must find a spare slot
+ * to fill.
+ */
+ char *used = xcalloc(array->raid_disks, 1);
+ for (j = 0; j < tst->max_devs; j++) {
+ mdu_disk_info_t disc2;
+ disc2.number = j;
+ if (ioctl(fd, GET_DISK_INFO, &disc2))
+ continue;
+ if (disc2.major==0 && disc2.minor==0)
+ continue;
+ if (disc2.state & 8) /* removed */
+ continue;
+ if (disc2.raid_disk < 0)
+ continue;
+ if (disc2.raid_disk > array->raid_disks)
+ continue;
+ used[disc2.raid_disk] = 1;
+ }
+ for (j = 0 ; j < array->raid_disks; j++)
+ if (!used[j]) {
+ disc.raid_disk = j;
+ disc.state |= (1<<MD_DISK_SYNC);
+ break;
+ }
+ free(used);
+ }
+ if (dv->writemostly == 1)
+ disc.state |= (1 << MD_DISK_WRITEMOSTLY);
+ if (tst->ss->external) {
+ /* add a disk
+ * to an external metadata container */
+ struct mdinfo new_mdi;
+ struct mdinfo *sra;
+ int container_fd;
+ char devnm[32];
+ int dfd;
+
+ strcpy(devnm, fd2devnm(fd));
+
+ container_fd = open_dev_excl(devnm);
+ if (container_fd < 0) {
+ pr_err("add failed for %s:"
+ " could not get exclusive access to container\n",
+ dv->devname);
+ tst->ss->free_super(tst);
+ return -1;
+ }
+
+ Kill(dv->devname, NULL, 0, -1, 0);
+ dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (mdmon_running(tst->container_devnm))
+ tst->update_tail = &tst->updates;
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname, INVALID_SECTORS)) {
+ close(dfd);
+ close(container_fd);
+ return -1;
+ }
+ if (tst->update_tail)
+ flush_metadata_updates(tst);
+ else
+ tst->ss->sync_metadata(tst);
+
+ sra = sysfs_read(container_fd, NULL, 0);
+ if (!sra) {
+ pr_err("add failed for %s: sysfs_read failed\n",
+ dv->devname);
+ close(container_fd);
+ tst->ss->free_super(tst);
+ return -1;
+ }
+ sra->array.level = LEVEL_CONTAINER;
+ /* Need to set data_offset and component_size */
+ tst->ss->getinfo_super(tst, &new_mdi, NULL);
+ new_mdi.disk.major = disc.major;
+ new_mdi.disk.minor = disc.minor;
+ new_mdi.recovery_start = 0;
+ /* Make sure fds are closed as they are O_EXCL which
+ * would block add_disk */
+ tst->ss->free_super(tst);
+ if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
+ pr_err("add new device to external metadata"
+ " failed for %s\n", dv->devname);
+ close(container_fd);
+ sysfs_free(sra);
+ return -1;
+ }
+ ping_monitor(devnm);
+ sysfs_free(sra);
+ close(container_fd);
+ } else {
+ tst->ss->free_super(tst);
+ if (ioctl(fd, ADD_NEW_DISK, &disc)) {
+ pr_err("add new device failed for %s as %d: %s\n",
+ dv->devname, j, strerror(errno));
+ return -1;
+ }
+ }
+ if (verbose >= 0)
+ pr_err("added %s\n", dv->devname);
+ return 1;
+}
+
+int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
+ int sysfd, unsigned long rdev, int verbose, char *devname)
+{
+ int lfd = -1;
+ int err;
+
+ if (tst->ss->external) {
+ /* To remove a device from a container, we must
+ * check that it isn't in use in an array.
+ * This involves looking in the 'holders'
+ * directory - there must be just one entry,
+ * the container.
+ * To ensure that it doesn't get used as a
+ * hot spare while we are checking, we
+ * get an O_EXCL open on the container
+ */
+ int ret;
+ char devnm[32];
+ strcpy(devnm, fd2devnm(fd));
+ lfd = open_dev_excl(devnm);
+ if (lfd < 0) {
+ pr_err("Cannot get exclusive access "
+ " to container - odd\n");
+ return -1;
+ }
+ /* We may not be able to check on holders in
+ * sysfs, either because we don't have the dev num
+ * (rdev == 0) or because the device has been detached
+ * and the 'holders' directory no longer exists
+ * (ret == -1). In that case, assume it is OK to
+ * remove.
+ */
+ if (rdev == 0)
+ ret = -1;
+ else
+ ret = sysfs_unique_holder(devnm, rdev);
+ if (ret == 0) {
+ pr_err("%s is not a member, cannot remove.\n",
+ dv->devname);
+ close(lfd);
+ return -1;
+ }
+ if (ret >= 2) {
+ pr_err("%s is still in use, cannot remove.\n",
+ dv->devname);
+ close(lfd);
+ return -1;
+ }
+ }
+ /* FIXME check that it is a current member */
+ if (sysfd >= 0) {
+ /* device has been removed and we don't know
+ * the major:minor number
+ */
+ int n = write(sysfd, "remove", 6);
+ if (n != 6)
+ err = -1;
+ else
+ err = 0;
+ } else {
+ err = ioctl(fd, HOT_REMOVE_DISK, rdev);
+ if (err && errno == ENODEV) {
+ /* Old kernels rejected this if no personality
+ * is registered */
+ struct mdinfo *sra = sysfs_read(fd, NULL, GET_DEVS);
+ struct mdinfo *dv = NULL;
+ if (sra)
+ dv = sra->devs;
+ for ( ; dv ; dv=dv->next)
+ if (dv->disk.major == (int)major(rdev) &&
+ dv->disk.minor == (int)minor(rdev))
+ break;
+ if (dv)
+ err = sysfs_set_str(sra, dv,
+ "state", "remove");
+ else
+ err = -1;
+ if (sra)
+ sysfs_free(sra);
+ }
+ }
+ if (err) {
+ pr_err("hot remove failed "
+ "for %s: %s\n", dv->devname,
+ strerror(errno));
+ if (lfd >= 0)
+ close(lfd);
+ return -1;
+ }
+ if (tst->ss->external) {
+ /*
+ * Before dropping our exclusive open we make an
+ * attempt at preventing mdmon from seeing an
+ * 'add' event before reconciling this 'remove'
+ * event.
+ */
+ char *devnm = fd2devnm(fd);
+
+ if (!devnm) {
+ pr_err("unable to get container name\n");
+ return -1;
+ }
+
+ ping_manager(devnm);
+ }
+ if (lfd >= 0)
+ close(lfd);
+ if (verbose >= 0)
+ pr_err("hot removed %s from %s\n",
+ dv->devname, devname);
+ return 1;
+}
+
+int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
+ unsigned long rdev, int verbose, char *devname)
+{
+ struct mdinfo *mdi, *di;
+ if (tst->ss->external) {
+ pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
+ return -1;
+ }
+ /* Need to find the device in sysfs and add 'want_replacement' to the
+ * status.
+ */
+ mdi = sysfs_read(fd, NULL, GET_DEVS);
+ if (!mdi || !mdi->devs) {
+ pr_err("Cannot find status of %s to enable replacement - strange\n",
+ devname);
+ return -1;
+ }
+ for (di = mdi->devs; di; di = di->next)
+ if (di->disk.major == (int)major(rdev) &&
+ di->disk.minor == (int)minor(rdev))
+ break;
+ if (di) {
+ int rv;
+ if (di->disk.raid_disk < 0) {
+ pr_err("%s is not active and so cannot be replaced.\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ rv = sysfs_set_str(mdi, di,
+ "state", "want_replacement");
+ if (rv) {
+ sysfs_free(mdi);
+ pr_err("Failed to request replacement for %s\n",
+ dv->devname);
+ return -1;
+ }
+ if (verbose >= 0)
+ pr_err("Marked %s (device %d in %s) for replacement\n",
+ dv->devname, di->disk.raid_disk, devname);
+ /* If there is a matching 'with', we need to tell it which
+ * raid disk
+ */
+ while (dv && dv->disposition != 'W')
+ dv = dv->next;
+ if (dv) {
+ dv->disposition = 'w';
+ dv->used = di->disk.raid_disk;
+ }
+ return 1;
+ }
+ sysfs_free(mdi);
+ pr_err("%s not found in %s so cannot --replace it\n",
+ dv->devname, devname);
+ return -1;
+}
+
+int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
+ unsigned long rdev, int verbose, char *devname)
+{
+ struct mdinfo *mdi, *di;
+ /* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */
+ mdi = sysfs_read(fd, NULL, GET_DEVS|GET_STATE);
+ if (!mdi || !mdi->devs) {
+ pr_err("Cannot find status of %s to enable replacement - strange\n",
+ devname);
+ return -1;
+ }
+ for (di = mdi->devs; di; di = di->next)
+ if (di->disk.major == (int)major(rdev) &&
+ di->disk.minor == (int)minor(rdev))
+ break;
+ if (di) {
+ int rv;
+ if (di->disk.state & (1<<MD_DISK_FAULTY)) {
+ pr_err("%s is faulty and cannot be a replacement\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ if (di->disk.raid_disk >= 0) {
+ pr_err("%s is active and cannot be a replacement\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ rv = sysfs_set_num(mdi, di,
+ "slot", dv->used);
+ if (rv) {
+ sysfs_free(mdi);
+ pr_err("Failed to set %s as preferred replacement.\n",
+ dv->devname);
+ return -1;
+ }
+ if (verbose >= 0)
+ pr_err("Marked %s in %s as replacement for device %d\n",
+ dv->devname, devname, dv->used);
+ return 1;
+ }
+ sysfs_free(mdi);
+ pr_err("%s not found in %s so cannot make it preferred replacement\n",
+ dv->devname, devname);
+ return -1;
+}
+
+int Manage_subdevs(char *devname, int fd,
+ struct mddev_dev *devlist, int verbose, int test,
+ char *update, int force)
+{
+ /* Do something to each dev.
+ * devmode can be
+ * 'a' - add the device
+ * try HOT_ADD_DISK
+ * If that fails EINVAL, try ADD_NEW_DISK
+ * 'S' - add the device as a spare - don't try re-add
+ * 'A' - re-add the device
+ * 'r' - remove the device: HOT_REMOVE_DISK
+ * device can be 'faulty' or 'detached' in which case all
+ * matching devices are removed.
+ * 'f' - set the device faulty SET_DISK_FAULTY
+ * device can be 'detached' in which case any device that
+ * is inaccessible will be marked faulty.
+ * 'R' - mark this device as wanting replacement.
+ * 'W' - this device is added if necessary and activated as
+ * a replacement for a previous 'R' device.
+ * -----
+ * 'w' - 'W' will be changed to 'w' when it is paired with
+ * a 'R' device. If a 'W' is found while walking the list
+ * it must be unpaired, and is an error.
+ * 'M' - this is created by a 'missing' target. It is a slight
+ * variant on 'A'
+ * 'F' - Another variant of 'A', where the device was faulty
+ * so must be removed from the array first.
+ *
+ * For 'f' and 'r', the device can also be a kernel-internal
+ * name such as 'sdb'.
+ */
+ mdu_array_info_t array;
+ unsigned long long array_size;
+ struct mddev_dev *dv;
+ int tfd = -1;
+ struct supertype *tst;
+ char *subarray = NULL;
+ int sysfd = -1;
+ int count = 0; /* number of actions taken */
+ struct mdinfo info;
+ int frozen = 0;
+ int busy = 0;
+
+ if (ioctl(fd, GET_ARRAY_INFO, &array)) {
+ pr_err("Cannot get array info for %s\n",
+ devname);
+ goto abort;
+ }
+ sysfs_init(&info, fd, NULL);
+
+ /* array.size is only 32 bits and may be truncated.
+ * So read from sysfs if possible, and record number of sectors
+ */
+
+ array_size = get_component_size(fd);
+ if (array_size <= 0)
+ array_size = array.size * 2;
+
+ tst = super_by_fd(fd, &subarray);
+ if (!tst) {
+ pr_err("unsupport array - version %d.%d\n",
+ array.major_version, array.minor_version);
+ goto abort;
+ }
+
+ for (dv = devlist; dv; dv = dv->next) {
+ unsigned long rdev = 0; /* device to add/remove etc */
+ int rv;
+ int mj,mn;
+
+ if (strcmp(dv->devname, "failed") == 0 ||
+ strcmp(dv->devname, "faulty") == 0) {
+ if (dv->disposition != 'A'
+ && dv->disposition != 'r') {
+ pr_err("%s only meaningful "
+ "with -r or --re-add, not -%c\n",
+ dv->devname, dv->disposition);
+ goto abort;
+ }
+ add_faulty(dv, fd, (dv->disposition == 'A'
+ ? 'F' : 'r'));
+ continue;
+ }
+ if (strcmp(dv->devname, "detached") == 0) {
+ if (dv->disposition != 'r' && dv->disposition != 'f') {
+ pr_err("%s only meaningful "
+ "with -r of -f, not -%c\n",
+ dv->devname, dv->disposition);
+ goto abort;
+ }
+ add_detached(dv, fd, dv->disposition);
+ continue;
+ }
+
+ if (strcmp(dv->devname, "missing") == 0) {
+ struct mddev_dev *add_devlist = NULL;
+ struct mddev_dev **dp;
+ if (dv->disposition != 'A') {
+ pr_err("'missing' only meaningful "
+ "with --re-add\n");
+ goto abort;
+ }
+ add_devlist = conf_get_devs();
+ if (add_devlist == NULL) {
+ pr_err("no devices to scan for missing members.");
+ continue;
+ }
+ for (dp = &add_devlist; *dp; dp = & (*dp)->next)
+ /* 'M' (for 'missing') is like 'A' without errors */
+ (*dp)->disposition = 'M';
+ *dp = dv->next;
+ dv->next = add_devlist;
+ continue;
+ }
+
+ if (strncmp(dv->devname, "set-", 4) == 0 &&
+ strlen(dv->devname) == 5) {
+ int copies;
+
+ if (dv->disposition != 'r' &&
+ dv->disposition != 'f') {
+ pr_err("'%s' only meaningful with -r or -f\n",
+ dv->devname);
+ goto abort;
+ }
+ if (array.level != 10) {
+ pr_err("'%s' only meaningful with RAID10 arrays\n",
+ dv->devname);
+ goto abort;
+ }
+ copies = ((array.layout & 0xff) *
+ ((array.layout >> 8) & 0xff));
+ if (array.raid_disks % copies != 0 ||
+ dv->devname[4] < 'A' ||
+ dv->devname[4] >= 'A' + copies ||
+ copies > 26) {
+ pr_err("'%s' not meaningful with this array\n",
+ dv->devname);
+ goto abort;
+ }
+ add_set(dv, fd, dv->devname[4]);
+ continue;
+ }
+
+ if (strchr(dv->devname, '/') == NULL &&
+ strchr(dv->devname, ':') == NULL &&
+ strlen(dv->devname) < 50) {
+ /* Assume this is a kernel-internal name like 'sda1' */
+ int found = 0;
+ char dname[55];
+ if (dv->disposition != 'r' && dv->disposition != 'f') {
+ pr_err("%s only meaningful "
+ "with -r or -f, not -%c\n",
+ dv->devname, dv->disposition);
+ goto abort;
+ }
+
+ sprintf(dname, "dev-%s", dv->devname);
+ sysfd = sysfs_open(fd2devnm(fd), dname, "block/dev");
+ if (sysfd >= 0) {
+ char dn[20];
+ if (sysfs_fd_get_str(sysfd, dn, 20) > 0 &&
+ sscanf(dn, "%d:%d", &mj,&mn) == 2) {
+ rdev = makedev(mj,mn);
+ found = 1;
+ }
+ close(sysfd);
+ sysfd = -1;
+ }
+ if (!found) {
+ sysfd = sysfs_open(fd2devnm(fd), dname, "state");
+ if (sysfd < 0) {
+ pr_err("%s does not appear "
+ "to be a component of %s\n",
+ dv->devname, devname);
+ goto abort;
+ }
+ }
+ } else if ((dv->disposition == 'r' || dv->disposition == 'f')
+ && get_maj_min(dv->devname, &mj, &mn)) {
+ /* for 'fail' and 'remove', the device might
+ * not exist.
+ */
+ rdev = makedev(mj, mn);
+ } else {
+ struct stat stb;
+ tfd = dev_open(dv->devname, O_RDONLY);
+ if (tfd >= 0)
+ fstat(tfd, &stb);
+ else {
+ int open_err = errno;
+ if (stat(dv->devname, &stb) != 0) {
+ pr_err("Cannot find %s: %s\n",
+ dv->devname, strerror(errno));
+ goto abort;
+ }
+ if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+ if (dv->disposition == 'M')
+ /* non-fatal. Also improbable */
+ continue;
+ pr_err("%s is not a block device.\n",
+ dv->devname);
+ goto abort;
+ }
+ if (dv->disposition == 'r')
+ /* Be happy, the stat worked, that is
+ * enough for --remove
+ */
+ ;
+ else {
+ if (dv->disposition == 'M')
+ /* non-fatal */
+ continue;
+ pr_err("Cannot open %s: %s\n",
+ dv->devname, strerror(open_err));
+ goto abort;
+ }
+ }
+ rdev = stb.st_rdev;
+ }
+ switch(dv->disposition){
+ default:
+ pr_err("internal error - devmode[%s]=%d\n",
+ dv->devname, dv->disposition);
+ goto abort;
+ case 'a':
+ case 'S': /* --add-spare */
+ case 'A':
+ case 'M': /* --re-add missing */
+ case 'F': /* --re-add faulty */
+ /* add the device */
+ if (subarray) {
+ pr_err("Cannot add disks to a"
+ " \'member\' array, perform this"
+ " operation on the parent container\n");
+ goto abort;
+ }
+ if (dv->disposition == 'F')
+ /* Need to remove first */
+ ioctl(fd, HOT_REMOVE_DISK, rdev);
+ /* Make sure it isn't in use (in 2.6 or later) */
+ tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
+ if (tfd >= 0) {
+ /* We know no-one else is using it. We'll
+ * need non-exclusive access to add it, so
+ * do that now.
+ */
+ close(tfd);
+ tfd = dev_open(dv->devname, O_RDONLY);
+ }
+ if (tfd < 0) {
+ if (dv->disposition == 'M')
+ continue;
+ pr_err("Cannot open %s: %s\n",
+ dv->devname, strerror(errno));
+ goto abort;
+ }
+ if (!frozen) {
+ if (sysfs_freeze_array(&info) == 1)
+ frozen = 1;
+ else
+ frozen = -1;
+ }
+ rv = Manage_add(fd, tfd, dv, tst, &array,
+ force, verbose, devname, update,
+ rdev, array_size);
+ close(tfd);
+ tfd = -1;
+ if (rv < 0)
+ goto abort;
+ if (rv > 0)
+ count++;
+ break;
+
+ case 'r':
+ /* hot remove */
+ if (subarray) {
+ pr_err("Cannot remove disks from a"
+ " \'member\' array, perform this"
+ " operation on the parent container\n");
+ rv = -1;
+ } else
+ rv = Manage_remove(tst, fd, dv, sysfd,
+ rdev, verbose,
+ devname);
+ if (sysfd >= 0)
+ close(sysfd);
+ sysfd = -1;
+ if (rv < 0)
+ goto abort;
+ if (rv > 0)
+ count++;
+ break;
+
+ case 'f': /* set faulty */
+ /* FIXME check current member */
+ if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
+ (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
+ rdev))) {
+ if (errno == EBUSY)
+ busy = 1;
+ pr_err("set device faulty failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ if (sysfd >= 0)
+ close(sysfd);
+ goto abort;
+ }
+ if (sysfd >= 0)
+ close(sysfd);
+ sysfd = -1;
+ count++;
+ if (verbose >= 0)
+ pr_err("set %s faulty in %s\n",
+ dv->devname, devname);
+ break;
+ case 'R': /* Mark as replaceable */
+ if (subarray) {
+ pr_err("Cannot replace disks in a"
+ " \'member\' array, perform this"
+ " operation on the parent container\n");
+ rv = -1;
+ } else {
+ if (!frozen) {
+ if (sysfs_freeze_array(&info) == 1)
+ frozen = 1;
+ else
+ frozen = -1;
+ }
+ rv = Manage_replace(tst, fd, dv,
+ rdev, verbose,
+ devname);
+ }
+ if (rv < 0)
+ goto abort;
+ if (rv > 0)
+ count++;
+ break;
+ case 'W': /* --with device that doesn't match */
+ pr_err("No matching --replace device for --with %s\n",
+ dv->devname);
+ goto abort;
+ case 'w': /* --with device which was matched */
+ rv = Manage_with(tst, fd, dv,
+ rdev, verbose, devname);
+ if (rv < 0)
+ goto abort;
+ break;
+ }
+ }
+ if (frozen > 0)
+ sysfs_set_str(&info, NULL, "sync_action","idle");
+ if (test && count == 0)
+ return 2;
+ return 0;
+
+abort:
+ if (frozen > 0)
+ sysfs_set_str(&info, NULL, "sync_action","idle");
+ return !test && busy ? 2 : 1;
+}
+
+int autodetect(void)
+{
+ /* Open any md device, and issue the RAID_AUTORUN ioctl */
+ int rv = 1;
+ int fd = dev_open("9:0", O_RDONLY);
+ if (fd >= 0) {
+ if (ioctl(fd, RAID_AUTORUN, 0) == 0)
+ rv = 0;
+ close(fd);
+ }
+ return rv;
+}
+
+int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int verbose)
+{
+ struct supertype supertype, *st = &supertype;
+ int fd, rv = 2;
+
+ memset(st, 0, sizeof(*st));
+
+ fd = open_subarray(dev, subarray, st, verbose < 0);
+ if (fd < 0)
+ return 2;
+
+ if (!st->ss->update_subarray) {
+ if (verbose >= 0)
+ pr_err("Operation not supported for %s metadata\n",
+ st->ss->name);
+ goto free_super;
+ }
+
+ if (mdmon_running(st->devnm))
+ st->update_tail = &st->updates;
+
+ rv = st->ss->update_subarray(st, subarray, update, ident);
+
+ if (rv) {
+ if (verbose >= 0)
+ pr_err("Failed to update %s of subarray-%s in %s\n",
+ update, subarray, dev);
+ } else if (st->update_tail)
+ flush_metadata_updates(st);
+ else
+ st->ss->sync_metadata(st);
+
+ if (rv == 0 && strcmp(update, "name") == 0 && verbose >= 0)
+ pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
+ subarray, dev);
+
+ free_super:
+ st->ss->free_super(st);
+ close(fd);
+
+ return rv;
+}
+
+/* Move spare from one array to another If adding to destination array fails
+ * add back to original array.
+ * Returns 1 on success, 0 on failure */
+int move_spare(char *from_devname, char *to_devname, dev_t devid)
+{
+ struct mddev_dev devlist;
+ char devname[20];
+
+ /* try to remove and add */
+ int fd1 = open(to_devname, O_RDONLY);
+ int fd2 = open(from_devname, O_RDONLY);
+
+ if (fd1 < 0 || fd2 < 0) {
+ if (fd1>=0) close(fd1);
+ if (fd2>=0) close(fd2);
+ return 0;
+ }
+
+ devlist.next = NULL;
+ devlist.used = 0;
+ devlist.writemostly = 0;
+ devlist.devname = devname;
+ sprintf(devname, "%d:%d", major(devid), minor(devid));
+
+ devlist.disposition = 'r';
+ if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
+ devlist.disposition = 'a';
+ if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
+ /* make sure manager is aware of changes */
+ ping_manager(to_devname);
+ ping_manager(from_devname);
+ close(fd1);
+ close(fd2);
+ return 1;
+ }
+ else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
+ }
+ close(fd1);
+ close(fd2);
+ return 0;
+}
+#endif
diff --git a/Monitor.c b/Monitor.c
new file mode 100644
index 00000000..5cb24fab
--- /dev/null
+++ b/Monitor.c
@@ -0,0 +1,1109 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include "md_p.h"
+#include "md_u.h"
+#include <sys/wait.h>
+#include <signal.h>
+#include <limits.h>
+#include <syslog.h>
+
+struct state {
+ char *devname;
+ char devnm[32]; /* to sync with mdstat info */
+ long utime;
+ int err;
+ char *spare_group;
+ int active, working, failed, spare, raid;
+ int from_config;
+ int from_auto;
+ int expected_spares;
+ int devstate[MAX_DISKS];
+ dev_t devid[MAX_DISKS];
+ int percent;
+ char parent_devnm[32]; /* For subarray, devnm of parent.
+ * For others, ""
+ */
+ struct supertype *metadata;
+ struct state *subarray;/* for a container it is a link to first subarray
+ * for a subarray it is a link to next subarray
+ * in the same container */
+ struct state *parent; /* for a subarray it is a link to its container
+ */
+ struct state *next;
+};
+
+struct alert_info {
+ char *mailaddr;
+ char *mailfrom;
+ char *alert_cmd;
+ int dosyslog;
+};
+static int make_daemon(char *pidfile);
+static int check_one_sharer(int scan);
+static void alert(char *event, char *dev, char *disc, struct alert_info *info);
+static int check_array(struct state *st, struct mdstat_ent *mdstat,
+ int test, struct alert_info *info,
+ int increments, char *prefer);
+static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
+ int test, struct alert_info *info);
+static void try_spare_migration(struct state *statelist, struct alert_info *info);
+static void link_containers_with_subarrays(struct state *list);
+
+int Monitor(struct mddev_dev *devlist,
+ char *mailaddr, char *alert_cmd,
+ struct context *c,
+ int daemonise, int oneshot,
+ int dosyslog, char *pidfile, int increments,
+ int share)
+{
+ /*
+ * Every few seconds, scan every md device looking for changes
+ * When a change is found, log it, possibly run the alert command,
+ * and possibly send Email
+ *
+ * For each array, we record:
+ * Update time
+ * active/working/failed/spare drives
+ * State of each device.
+ * %rebuilt if rebuilding
+ *
+ * If the update time changes, check out all the data again
+ * It is possible that we cannot get the state of each device
+ * due to bugs in the md kernel module.
+ * We also read /proc/mdstat to get rebuild percent,
+ * and to get state on all active devices incase of kernel bug.
+ *
+ * Events are:
+ * Fail
+ * An active device had Faulty set or Active/Sync removed
+ * FailSpare
+ * A spare device had Faulty set
+ * SpareActive
+ * An active device had a reverse transition
+ * RebuildStarted
+ * percent went from -1 to +ve
+ * RebuildNN
+ * percent went from below to not-below NN%
+ * DeviceDisappeared
+ * Couldn't access a device which was previously visible
+ *
+ * if we detect an array with active<raid and spare==0
+ * we look at other arrays that have same spare-group
+ * If we find one with active==raid and spare>0,
+ * and if we can get_disk_info and find a name
+ * Then we hot-remove and hot-add to the other array
+ *
+ * If devlist is NULL, then we can monitor everything because --scan
+ * was given. We get an initial list from config file and add anything
+ * that appears in /proc/mdstat
+ */
+
+ struct state *statelist = NULL;
+ struct state *st2;
+ int finished = 0;
+ struct mdstat_ent *mdstat = NULL;
+ char *mailfrom = NULL;
+ struct alert_info info;
+
+ if (!mailaddr) {
+ mailaddr = conf_get_mailaddr();
+ if (mailaddr && ! c->scan)
+ pr_err("Monitor using email address \"%s\" from config file\n",
+ mailaddr);
+ }
+ mailfrom = conf_get_mailfrom();
+
+ if (!alert_cmd) {
+ alert_cmd = conf_get_program();
+ if (alert_cmd && ! c->scan)
+ pr_err("Monitor using program \"%s\" from config file\n",
+ alert_cmd);
+ }
+ if (c->scan && !mailaddr && !alert_cmd && !dosyslog) {
+ pr_err("No mail address or alert command - not monitoring.\n");
+ return 1;
+ }
+ info.alert_cmd = alert_cmd;
+ info.mailaddr = mailaddr;
+ info.mailfrom = mailfrom;
+ info.dosyslog = dosyslog;
+
+ if (daemonise) {
+ int rv = make_daemon(pidfile);
+ if (rv >= 0)
+ return rv;
+ }
+
+ if (share)
+ if (check_one_sharer(c->scan))
+ return 1;
+
+ if (devlist == NULL) {
+ struct mddev_ident *mdlist = conf_get_ident(NULL);
+ for (; mdlist; mdlist=mdlist->next) {
+ struct state *st;
+ if (mdlist->devname == NULL)
+ continue;
+ if (strcasecmp(mdlist->devname, "<ignore>") == 0)
+ continue;
+ st = xcalloc(1, sizeof *st);
+ if (mdlist->devname[0] == '/')
+ st->devname = xstrdup(mdlist->devname);
+ else {
+ st->devname = xmalloc(8+strlen(mdlist->devname)+1);
+ strcpy(strcpy(st->devname, "/dev/md/"),
+ mdlist->devname);
+ }
+ st->next = statelist;
+ st->devnm[0] = 0;
+ st->percent = RESYNC_UNKNOWN;
+ st->from_config = 1;
+ st->expected_spares = mdlist->spare_disks;
+ if (mdlist->spare_group)
+ st->spare_group = xstrdup(mdlist->spare_group);
+ statelist = st;
+ }
+ } else {
+ struct mddev_dev *dv;
+ for (dv=devlist ; dv; dv=dv->next) {
+ struct mddev_ident *mdlist = conf_get_ident(dv->devname);
+ struct state *st = xcalloc(1, sizeof *st);
+ st->devname = xstrdup(dv->devname);
+ st->next = statelist;
+ st->devnm[0] = 0;
+ st->percent = RESYNC_UNKNOWN;
+ st->expected_spares = -1;
+ if (mdlist) {
+ st->expected_spares = mdlist->spare_disks;
+ if (mdlist->spare_group)
+ st->spare_group = xstrdup(mdlist->spare_group);
+ }
+ statelist = st;
+ }
+ }
+
+ while (! finished) {
+ int new_found = 0;
+ struct state *st, **stp;
+ int anydegraded = 0;
+
+ if (mdstat)
+ free_mdstat(mdstat);
+ mdstat = mdstat_read(oneshot?0:1, 0);
+
+ for (st=statelist; st; st=st->next)
+ if (check_array(st, mdstat, c->test, &info,
+ increments, c->prefer))
+ anydegraded = 1;
+
+ /* now check if there are any new devices found in mdstat */
+ if (c->scan)
+ new_found = add_new_arrays(mdstat, &statelist, c->test,
+ &info);
+
+ /* If an array has active < raid && spare == 0 && spare_group != NULL
+ * Look for another array with spare > 0 and active == raid and same spare_group
+ * if found, choose a device and hotremove/hotadd
+ */
+ if (share && anydegraded)
+ try_spare_migration(statelist, &info);
+ if (!new_found) {
+ if (oneshot)
+ break;
+ else
+ mdstat_wait(c->delay);
+ }
+ c->test = 0;
+
+ for (stp = &statelist; (st = *stp) != NULL; ) {
+ if (st->from_auto && st->err > 5) {
+ *stp = st->next;
+ free(st->devname);
+ free(st->spare_group);
+ free(st);
+ } else
+ stp = &st->next;
+ }
+ }
+ for (st2 = statelist; st2; st2 = statelist) {
+ statelist = st2->next;
+ free(st2);
+ }
+
+ if (pidfile)
+ unlink(pidfile);
+ return 0;
+}
+
+static int make_daemon(char *pidfile)
+{
+ /* Return:
+ * -1 in the forked daemon
+ * 0 in the parent
+ * 1 on error
+ * so a none-negative becomes the exit code.
+ */
+ int pid = fork();
+ if (pid > 0) {
+ if (!pidfile)
+ printf("%d\n", pid);
+ else {
+ FILE *pid_file;
+ pid_file=fopen(pidfile, "w");
+ if (!pid_file)
+ perror("cannot create pid file");
+ else {
+ fprintf(pid_file,"%d\n", pid);
+ fclose(pid_file);
+ }
+ }
+ return 0;
+ }
+ if (pid < 0) {
+ perror("daemonise");
+ return 1;
+ }
+ close(0);
+ open("/dev/null", O_RDWR);
+ dup2(0,1);
+ dup2(0,2);
+ setsid();
+ return -1;
+}
+
+static int check_one_sharer(int scan)
+{
+ int pid, rv;
+ FILE *fp;
+ char dir[20];
+ char path[100];
+ struct stat buf;
+ sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
+ fp = fopen(path, "r");
+ if (fp) {
+ if (fscanf(fp, "%d", &pid) != 1)
+ pid = -1;
+ sprintf(dir, "/proc/%d", pid);
+ rv = stat(dir, &buf);
+ if (rv != -1) {
+ if (scan) {
+ pr_err("Only one "
+ "autorebuild process allowed"
+ " in scan mode, aborting\n");
+ fclose(fp);
+ return 1;
+ } else {
+ pr_err("Warning: One"
+ " autorebuild process already"
+ " running.\n");
+ }
+ }
+ fclose(fp);
+ }
+ if (scan) {
+ if (mkdir(MDMON_DIR, S_IRWXU) < 0 &&
+ errno != EEXIST) {
+ pr_err("Can't create "
+ "autorebuild.pid file\n");
+ } else {
+ fp = fopen(path, "w");
+ if (!fp)
+ pr_err("Cannot create"
+ " autorebuild.pid"
+ "file\n");
+ else {
+ pid = getpid();
+ fprintf(fp, "%d\n", pid);
+ fclose(fp);
+ }
+ }
+ }
+ return 0;
+}
+
+static void alert(char *event, char *dev, char *disc, struct alert_info *info)
+{
+ int priority;
+
+ if (!info->alert_cmd && !info->mailaddr && !info->dosyslog) {
+ time_t now = time(0);
+
+ printf("%1.15s: %s on %s %s\n", ctime(&now)+4, event, dev, disc?disc:"unknown device");
+ }
+ if (info->alert_cmd) {
+ int pid = fork();
+ switch(pid) {
+ default:
+ waitpid(pid, NULL, 0);
+ break;
+ case -1:
+ break;
+ case 0:
+ execl(info->alert_cmd, info->alert_cmd,
+ event, dev, disc, NULL);
+ exit(2);
+ }
+ }
+ if (info->mailaddr &&
+ (strncmp(event, "Fail", 4)==0 ||
+ strncmp(event, "Test", 4)==0 ||
+ strncmp(event, "Spares", 6)==0 ||
+ strncmp(event, "Degrade", 7)==0)) {
+ FILE *mp = popen(Sendmail, "w");
+ if (mp) {
+ FILE *mdstat;
+ char hname[256];
+ gethostname(hname, sizeof(hname));
+ signal(SIGPIPE, SIG_IGN);
+ if (info->mailfrom)
+ fprintf(mp, "From: %s\n", info->mailfrom);
+ else
+ fprintf(mp, "From: " Name " monitoring <root>\n");
+ fprintf(mp, "To: %s\n", info->mailaddr);
+ fprintf(mp, "Subject: %s event on %s:%s\n\n",
+ event, dev, hname);
+
+ fprintf(mp,
+ "This is an automatically generated"
+ " mail message from " Name "\n");
+ fprintf(mp, "running on %s\n\n", hname);
+
+ fprintf(mp,
+ "A %s event had been detected on"
+ " md device %s.\n\n", event, dev);
+
+ if (disc && disc[0] != ' ')
+ fprintf(mp,
+ "It could be related to"
+ " component device %s.\n\n", disc);
+ if (disc && disc[0] == ' ')
+ fprintf(mp, "Extra information:%s.\n\n", disc);
+
+ fprintf(mp, "Faithfully yours, etc.\n");
+
+ mdstat = fopen("/proc/mdstat", "r");
+ if (mdstat) {
+ char buf[8192];
+ int n;
+ fprintf(mp,
+ "\nP.S. The /proc/mdstat file"
+ " currently contains the following:\n\n");
+ while ( (n=fread(buf, 1, sizeof(buf), mdstat)) > 0)
+ n=fwrite(buf, 1, n, mp);
+ fclose(mdstat);
+ }
+ pclose(mp);
+ }
+ }
+
+ /* log the event to syslog maybe */
+ if (info->dosyslog) {
+ /* Log at a different severity depending on the event.
+ *
+ * These are the critical events: */
+ if (strncmp(event, "Fail", 4)==0 ||
+ strncmp(event, "Degrade", 7)==0 ||
+ strncmp(event, "DeviceDisappeared", 17)==0)
+ priority = LOG_CRIT;
+ /* Good to know about, but are not failures: */
+ else if (strncmp(event, "Rebuild", 7)==0 ||
+ strncmp(event, "MoveSpare", 9)==0 ||
+ strncmp(event, "Spares", 6) != 0)
+ priority = LOG_WARNING;
+ /* Everything else: */
+ else
+ priority = LOG_INFO;
+
+ if (disc)
+ syslog(priority,
+ "%s event detected on md device %s,"
+ " component device %s", event, dev, disc);
+ else
+ syslog(priority,
+ "%s event detected on md device %s",
+ event, dev);
+ }
+}
+
+static int check_array(struct state *st, struct mdstat_ent *mdstat,
+ int test, struct alert_info *ainfo,
+ int increments, char *prefer)
+{
+ /* Update the state 'st' to reflect any changes shown in mdstat,
+ * or found by directly examining the array, and return
+ * '1' if the array is degraded, or '0' if it is optimal (or dead).
+ */
+ struct { int state, major, minor; } info[MAX_DISKS];
+ mdu_array_info_t array;
+ struct mdstat_ent *mse = NULL, *mse2;
+ char *dev = st->devname;
+ int fd;
+ int i;
+ int remaining_disks;
+ int last_disk;
+ int new_array = 0;
+
+ if (test)
+ alert("TestMessage", dev, NULL, ainfo);
+ fd = open(dev, O_RDONLY);
+ if (fd < 0) {
+ if (!st->err)
+ alert("DeviceDisappeared", dev, NULL, ainfo);
+ st->err++;
+ return 0;
+ }
+ fcntl(fd, F_SETFD, FD_CLOEXEC);
+ if (ioctl(fd, GET_ARRAY_INFO, &array)<0) {
+ if (!st->err)
+ alert("DeviceDisappeared", dev, NULL, ainfo);
+ st->err++;
+ close(fd);
+ return 0;
+ }
+ /* It's much easier to list what array levels can't
+ * have a device disappear than all of them that can
+ */
+ if (array.level == 0 || array.level == -1) {
+ if (!st->err && !st->from_config)
+ alert("DeviceDisappeared", dev, "Wrong-Level", ainfo);
+ st->err++;
+ close(fd);
+ return 0;
+ }
+ if (st->devnm[0] == 0)
+ strcpy(st->devnm, fd2devnm(fd));
+
+ for (mse2 = mdstat ; mse2 ; mse2=mse2->next)
+ if (strcmp(mse2->devnm, st->devnm) == 0) {
+ mse2->devnm[0] = 0; /* flag it as "used" */
+ mse = mse2;
+ }
+
+ if (!mse) {
+ /* duplicated array in statelist
+ * or re-created after reading mdstat*/
+ st->err++;
+ close(fd);
+ return 0;
+ }
+ /* this array is in /proc/mdstat */
+ if (array.utime == 0)
+ /* external arrays don't update utime, so
+ * just make sure it is always different. */
+ array.utime = st->utime + 1;;
+
+ if (st->err) {
+ /* New array appeared where previously had an error */
+ st->err = 0;
+ st->percent = RESYNC_NONE;
+ new_array = 1;
+ alert("NewArray", st->devname, NULL, ainfo);
+ }
+
+ if (st->utime == array.utime &&
+ st->failed == array.failed_disks &&
+ st->working == array.working_disks &&
+ st->spare == array.spare_disks &&
+ (mse == NULL || (
+ mse->percent == st->percent
+ ))) {
+ close(fd);
+ if ((st->active < st->raid) && st->spare == 0)
+ return 1;
+ else
+ return 0;
+ }
+ if (st->utime == 0 && /* new array */
+ mse->pattern && strchr(mse->pattern, '_') /* degraded */
+ )
+ alert("DegradedArray", dev, NULL, ainfo);
+
+ if (st->utime == 0 && /* new array */
+ st->expected_spares > 0 &&
+ array.spare_disks < st->expected_spares)
+ alert("SparesMissing", dev, NULL, ainfo);
+ if (st->percent < 0 && st->percent != RESYNC_UNKNOWN &&
+ mse->percent >= 0)
+ alert("RebuildStarted", dev, NULL, ainfo);
+ if (st->percent >= 0 &&
+ mse->percent >= 0 &&
+ (mse->percent / increments) > (st->percent / increments)) {
+ char percentalert[15]; // "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
+
+ if((mse->percent / increments) == 0)
+ snprintf(percentalert, sizeof(percentalert), "RebuildStarted");
+ else
+ snprintf(percentalert, sizeof(percentalert), "Rebuild%02d", mse->percent);
+
+ alert(percentalert, dev, NULL, ainfo);
+ }
+
+ if (mse->percent == RESYNC_NONE &&
+ st->percent >= 0) {
+ /* Rebuild/sync/whatever just finished.
+ * If there is a number in /mismatch_cnt,
+ * we should report that.
+ */
+ struct mdinfo *sra =
+ sysfs_read(-1, st->devnm, GET_MISMATCH);
+ if (sra && sra->mismatch_cnt > 0) {
+ char cnt[80];
+ snprintf(cnt, sizeof(cnt),
+ " mismatches found: %d (on raid level %d)",
+ sra->mismatch_cnt, array.level);
+ alert("RebuildFinished", dev, cnt, ainfo);
+ } else
+ alert("RebuildFinished", dev, NULL, ainfo);
+ if (sra)
+ free(sra);
+ }
+ st->percent = mse->percent;
+
+ remaining_disks = array.nr_disks;
+ for (i=0; i<MAX_DISKS && remaining_disks > 0;
+ i++) {
+ mdu_disk_info_t disc;
+ disc.number = i;
+ if (ioctl(fd, GET_DISK_INFO, &disc) >= 0) {
+ info[i].state = disc.state;
+ info[i].major = disc.major;
+ info[i].minor = disc.minor;
+ if (disc.major || disc.minor)
+ remaining_disks --;
+ } else
+ info[i].major = info[i].minor = 0;
+ }
+ last_disk = i;
+
+ if (mse->metadata_version &&
+ strncmp(mse->metadata_version, "external:", 9) == 0 &&
+ is_subarray(mse->metadata_version+9)) {
+ char *sl;
+ strcpy(st->parent_devnm,
+ mse->metadata_version+10);
+ sl = strchr(st->parent_devnm, '/');
+ if (sl)
+ *sl = 0;
+ } else
+ st->parent_devnm[0] = 0;
+ if (st->metadata == NULL &&
+ st->parent_devnm[0] == 0)
+ st->metadata = super_by_fd(fd, NULL);
+
+ close(fd);
+
+ for (i=0; i<MAX_DISKS; i++) {
+ mdu_disk_info_t disc = {0,0,0,0,0};
+ int newstate=0;
+ int change;
+ char *dv = NULL;
+ disc.number = i;
+ if (i < last_disk &&
+ (info[i].major || info[i].minor)) {
+ newstate = info[i].state;
+ dv = map_dev_preferred(
+ info[i].major, info[i].minor, 1,
+ prefer);
+ disc.state = newstate;
+ disc.major = info[i].major;
+ disc.minor = info[i].minor;
+ } else
+ newstate = (1 << MD_DISK_REMOVED);
+
+ if (dv == NULL && st->devid[i])
+ dv = map_dev_preferred(
+ major(st->devid[i]),
+ minor(st->devid[i]), 1, prefer);
+ change = newstate ^ st->devstate[i];
+ if (st->utime && change && !st->err && !new_array) {
+ if ((st->devstate[i]&change)&(1<<MD_DISK_SYNC))
+ alert("Fail", dev, dv, ainfo);
+ else if ((newstate & (1<<MD_DISK_FAULTY)) &&
+ (disc.major || disc.minor) &&
+ st->devid[i] == makedev(disc.major, disc.minor))
+ alert("FailSpare", dev, dv, ainfo);
+ else if ((newstate&change)&(1<<MD_DISK_SYNC))
+ alert("SpareActive", dev, dv, ainfo);
+ }
+ st->devstate[i] = newstate;
+ st->devid[i] = makedev(disc.major, disc.minor);
+ }
+ st->active = array.active_disks;
+ st->working = array.working_disks;
+ st->spare = array.spare_disks;
+ st->failed = array.failed_disks;
+ st->utime = array.utime;
+ st->raid = array.raid_disks;
+ st->err = 0;
+ if ((st->active < st->raid) && st->spare == 0)
+ return 1;
+ return 0;
+}
+
+static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
+ int test, struct alert_info *info)
+{
+ struct mdstat_ent *mse;
+ int new_found = 0;
+
+ for (mse=mdstat; mse; mse=mse->next)
+ if (mse->devnm[0] &&
+ (!mse->level || /* retrieve containers */
+ (strcmp(mse->level, "raid0") != 0 &&
+ strcmp(mse->level, "linear") != 0))
+ ) {
+ struct state *st = xcalloc(1, sizeof *st);
+ mdu_array_info_t array;
+ int fd;
+ st->devname = xstrdup(get_md_name(mse->devnm));
+ if ((fd = open(st->devname, O_RDONLY)) < 0 ||
+ ioctl(fd, GET_ARRAY_INFO, &array)< 0) {
+ /* no such array */
+ if (fd >=0) close(fd);
+ put_md_name(st->devname);
+ free(st->devname);
+ if (st->metadata) {
+ st->metadata->ss->free_super(st->metadata);
+ free(st->metadata);
+ }
+ free(st);
+ continue;
+ }
+ close(fd);
+ st->next = *statelist;
+ st->err = 1;
+ st->from_auto = 1;
+ strcpy(st->devnm, mse->devnm);
+ st->percent = RESYNC_UNKNOWN;
+ st->expected_spares = -1;
+ if (mse->metadata_version &&
+ strncmp(mse->metadata_version, "external:", 9) == 0 &&
+ is_subarray(mse->metadata_version+9)) {
+ char *sl;
+ strcpy(st->parent_devnm,
+ mse->metadata_version+10);
+ sl = strchr(st->parent_devnm, '/');
+ *sl = 0;
+ } else
+ st->parent_devnm[0] = 0;
+ *statelist = st;
+ if (test)
+ alert("TestMessage", st->devname, NULL, info);
+ new_found = 1;
+ }
+ return new_found;
+}
+
+static int get_min_spare_size_required(struct state *st, unsigned long long *sizep)
+{
+ int fd;
+
+ if (!st->metadata ||
+ !st->metadata->ss->min_acceptable_spare_size) {
+ *sizep = 0;
+ return 0;
+ }
+
+ fd = open(st->devname, O_RDONLY);
+ if (fd < 0)
+ return 1;
+ if (st->metadata->ss->external)
+ st->metadata->ss->load_container(st->metadata, fd, st->devname);
+ else
+ st->metadata->ss->load_super(st->metadata, fd, st->devname);
+ close(fd);
+ if (!st->metadata->sb)
+ return 1;
+ *sizep = st->metadata->ss->min_acceptable_spare_size(st->metadata);
+ st->metadata->ss->free_super(st->metadata);
+
+ return 0;
+}
+
+static int check_donor(struct state *from, struct state *to)
+{
+ struct state *sub;
+
+ if (from == to)
+ return 0;
+ if (from->parent)
+ /* Cannot move from a member */
+ return 0;
+ if (from->err)
+ return 0;
+ for (sub = from->subarray; sub; sub = sub->subarray)
+ /* If source array has degraded subarrays, don't
+ * remove anything
+ */
+ if (sub->active < sub->raid)
+ return 0;
+ if (from->metadata->ss->external == 0)
+ if (from->active < from->raid)
+ return 0;
+ if (from->spare <= 0)
+ return 0;
+ return 1;
+}
+
+static dev_t choose_spare(struct state *from, struct state *to,
+ struct domainlist *domlist, unsigned long long min_size)
+{
+ int d;
+ dev_t dev = 0;
+
+ for (d = from->raid; !dev && d < MAX_DISKS; d++) {
+ if (from->devid[d] > 0 &&
+ from->devstate[d] == 0) {
+ struct dev_policy *pol;
+ unsigned long long dev_size;
+
+ if (to->metadata->ss->external &&
+ test_partition_from_id(from->devid[d]))
+ continue;
+
+ if (min_size &&
+ dev_size_from_id(from->devid[d], &dev_size) &&
+ dev_size < min_size)
+ continue;
+
+ pol = devid_policy(from->devid[d]);
+ if (from->spare_group)
+ pol_add(&pol, pol_domain,
+ from->spare_group, NULL);
+ if (domain_test(domlist, pol, to->metadata->ss->name) == 1)
+ dev = from->devid[d];
+ dev_policy_free(pol);
+ }
+ }
+ return dev;
+}
+
+static dev_t container_choose_spare(struct state *from, struct state *to,
+ struct domainlist *domlist,
+ unsigned long long min_size, int active)
+{
+ /* This is similar to choose_spare, but we cannot trust devstate,
+ * so we need to read the metadata instead
+ */
+ struct mdinfo *list;
+ struct supertype *st = from->metadata;
+ int fd = open(from->devname, O_RDONLY);
+ int err;
+ dev_t dev = 0;
+
+ if (fd < 0)
+ return 0;
+ if (!st->ss->getinfo_super_disks) {
+ close(fd);
+ return 0;
+ }
+
+ err = st->ss->load_container(st, fd, NULL);
+ close(fd);
+ if (err)
+ return 0;
+
+ if (from == to) {
+ /* We must check if number of active disks has not increased
+ * since ioctl in main loop. mdmon may have added spare
+ * to subarray. If so we do not need to look for more spares
+ * so return non zero value */
+ int active_cnt = 0;
+ struct mdinfo *dp;
+ list = st->ss->getinfo_super_disks(st);
+ if (!list) {
+ st->ss->free_super(st);
+ return 1;
+ }
+ dp = list->devs;
+ while (dp) {
+ if (dp->disk.state & (1<<MD_DISK_SYNC) &&
+ !(dp->disk.state & (1<<MD_DISK_FAULTY)))
+ active_cnt++;
+ dp = dp->next;
+ }
+ sysfs_free(list);
+ if (active < active_cnt) {
+ /* Spare just activated.*/
+ st->ss->free_super(st);
+ return 1;
+ }
+ }
+
+ /* We only need one spare so full list not needed */
+ list = container_choose_spares(st, min_size, domlist, from->spare_group,
+ to->metadata->ss->name, 1);
+ if (list) {
+ struct mdinfo *disks = list->devs;
+ if (disks)
+ dev = makedev(disks->disk.major, disks->disk.minor);
+ sysfs_free(list);
+ }
+ st->ss->free_super(st);
+ return dev;
+}
+
+static void try_spare_migration(struct state *statelist, struct alert_info *info)
+{
+ struct state *from;
+ struct state *st;
+
+ link_containers_with_subarrays(statelist);
+ for (st = statelist; st; st = st->next)
+ if (st->active < st->raid &&
+ st->spare == 0 && !st->err) {
+ struct domainlist *domlist = NULL;
+ int d;
+ struct state *to = st;
+ unsigned long long min_size;
+
+ if (to->parent_devnm[0] && !to->parent)
+ /* subarray monitored without parent container
+ * we can't move spares here */
+ continue;
+
+ if (to->parent)
+ /* member of a container */
+ to = to->parent;
+
+ if (get_min_spare_size_required(to, &min_size))
+ continue;
+ if (to->metadata->ss->external) {
+ /* We must make sure there is
+ * no suitable spare in container already.
+ * If there is we don't add more */
+ dev_t devid = container_choose_spare(
+ to, to, NULL, min_size, st->active);
+ if (devid > 0)
+ continue;
+ }
+ for (d = 0; d < MAX_DISKS; d++)
+ if (to->devid[d])
+ domainlist_add_dev(&domlist,
+ to->devid[d],
+ to->metadata->ss->name);
+ if (to->spare_group)
+ domain_add(&domlist, to->spare_group);
+ /*
+ * No spare migration if the destination
+ * has no domain. Skip this array.
+ */
+ if (!domlist)
+ continue;
+ for (from=statelist ; from ; from=from->next) {
+ dev_t devid;
+ if (!check_donor(from, to))
+ continue;
+ if (from->metadata->ss->external)
+ devid = container_choose_spare(
+ from, to, domlist, min_size, 0);
+ else
+ devid = choose_spare(from, to, domlist,
+ min_size);
+ if (devid > 0
+ && move_spare(from->devname, to->devname, devid)) {
+ alert("MoveSpare", to->devname, from->devname, info);
+ break;
+ }
+ }
+ domain_free(domlist);
+ }
+}
+
+/* search the statelist to connect external
+ * metadata subarrays with their containers
+ * We always completely rebuild the tree from scratch as
+ * that is safest considering the possibility of entries
+ * disappearing or changing.
+ */
+static void link_containers_with_subarrays(struct state *list)
+{
+ struct state *st;
+ struct state *cont;
+ for (st = list; st; st = st->next) {
+ st->parent = NULL;
+ st->subarray = NULL;
+ }
+ for (st = list; st; st = st->next)
+ if (st->parent_devnm[0])
+ for (cont = list; cont; cont = cont->next)
+ if (!cont->err &&
+ cont->parent_devnm[0] == 0 &&
+ strcmp(cont->devnm, st->parent_devnm) == 0) {
+ st->parent = cont;
+ st->subarray = cont->subarray;
+ cont->subarray = st;
+ break;
+ }
+}
+
+/* Not really Monitor but ... */
+int Wait(char *dev)
+{
+ struct stat stb;
+ char devnm[32];
+ int rv = 1;
+
+ if (stat(dev, &stb) != 0) {
+ pr_err("Cannot find %s: %s\n", dev,
+ strerror(errno));
+ return 2;
+ }
+ strcpy(devnm, stat2devnm(&stb));
+
+ while(1) {
+ struct mdstat_ent *ms = mdstat_read(1, 0);
+ struct mdstat_ent *e;
+
+ for (e=ms ; e; e=e->next)
+ if (strcmp(e->devnm, devnm) == 0)
+ break;
+
+ if (e->percent == RESYNC_NONE) {
+ /* We could be in the brief pause before something
+ * starts. /proc/mdstat doesn't show that, but
+ * sync_action does.
+ */
+ struct mdinfo mdi;
+ char buf[21];
+ sysfs_init(&mdi, -1, devnm);
+ if (sysfs_get_str(&mdi, NULL, "sync_action",
+ buf, 20) > 0 &&
+ strcmp(buf,"idle\n") != 0)
+ e->percent = RESYNC_UNKNOWN;
+ }
+ if (!e || e->percent == RESYNC_NONE) {
+ if (e && e->metadata_version &&
+ strncmp(e->metadata_version, "external:", 9) == 0) {
+ if (is_subarray(&e->metadata_version[9]))
+ ping_monitor(&e->metadata_version[9]);
+ else
+ ping_monitor(devnm);
+ }
+ free_mdstat(ms);
+ return rv;
+ }
+ free_mdstat(ms);
+ rv = 0;
+ mdstat_wait(5);
+ }
+}
+
+#ifndef MDASSEMBLE
+
+static char *clean_states[] = {
+ "clear", "inactive", "readonly", "read-auto", "clean", NULL };
+
+int WaitClean(char *dev, int sock, int verbose)
+{
+ int fd;
+ struct mdinfo *mdi;
+ int rv = 1;
+ char devnm[32];
+
+ fd = open(dev, O_RDONLY);
+ if (fd < 0) {
+ if (verbose)
+ pr_err("Couldn't open %s: %s\n", dev, strerror(errno));
+ return 1;
+ }
+
+ strcpy(devnm, fd2devnm(fd));
+ mdi = sysfs_read(fd, devnm, GET_VERSION|GET_LEVEL|GET_SAFEMODE);
+ if (!mdi) {
+ if (verbose)
+ pr_err("Failed to read sysfs attributes for "
+ "%s\n", dev);
+ close(fd);
+ return 0;
+ }
+
+ switch(mdi->array.level) {
+ case LEVEL_LINEAR:
+ case LEVEL_MULTIPATH:
+ case 0:
+ /* safemode delay is irrelevant for these levels */
+ rv = 0;
+ }
+
+ /* for internal metadata the kernel handles the final clean
+ * transition, containers can never be dirty
+ */
+ if (!is_subarray(mdi->text_version))
+ rv = 0;
+
+ /* safemode disabled ? */
+ if (mdi->safe_mode_delay == 0)
+ rv = 0;
+
+ if (rv) {
+ int state_fd = sysfs_open(fd2devnm(fd), NULL, "array_state");
+ char buf[20];
+ int delay = 5000;
+
+ /* minimize the safe_mode_delay and prepare to wait up to 5s
+ * for writes to quiesce
+ */
+ sysfs_set_safemode(mdi, 1);
+
+ /* wait for array_state to be clean */
+ while (1) {
+ rv = read(state_fd, buf, sizeof(buf));
+ if (rv < 0)
+ break;
+ if (sysfs_match_word(buf, clean_states) <= 4)
+ break;
+ rv = sysfs_wait(state_fd, &delay);
+ if (rv < 0 && errno != EINTR)
+ break;
+ lseek(state_fd, 0, SEEK_SET);
+ }
+ if (rv < 0)
+ rv = 1;
+ else if (fping_monitor(sock) == 0 ||
+ ping_monitor(mdi->text_version) == 0) {
+ /* we need to ping to close the window between array
+ * state transitioning to clean and the metadata being
+ * marked clean
+ */
+ rv = 0;
+ } else
+ rv = 1;
+ if (rv && verbose)
+ pr_err("Error waiting for %s to be clean\n",
+ dev);
+
+ /* restore the original safe_mode_delay */
+ sysfs_set_safemode(mdi, mdi->safe_mode_delay);
+ close(state_fd);
+ }
+
+ sysfs_free(mdi);
+ close(fd);
+
+ return rv;
+}
+#endif /* MDASSEMBLE */
diff --git a/Query.c b/Query.c
new file mode 100644
index 00000000..fbc1d103
--- /dev/null
+++ b/Query.c
@@ -0,0 +1,126 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2002-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include "md_p.h"
+#include "md_u.h"
+
+int Query(char *dev)
+{
+ /* Give a brief description of the device,
+ * whether it is an md device and whether it has
+ * a superblock
+ */
+ int fd = open(dev, O_RDONLY);
+ int vers;
+ int ioctlerr;
+ int superror;
+ struct mdinfo info;
+ mdu_array_info_t array;
+ struct supertype *st = NULL;
+
+ unsigned long long larray_size;
+ struct stat stb;
+ char *mddev;
+ mdu_disk_info_t disc;
+ char *activity;
+
+ if (fd < 0){
+ pr_err("cannot open %s: %s\n",
+ dev, strerror(errno));
+ return 1;
+ }
+
+ vers = md_get_version(fd);
+ if (ioctl(fd, GET_ARRAY_INFO, &array)<0)
+ ioctlerr = errno;
+ else ioctlerr = 0;
+
+ fstat(fd, &stb);
+
+ if (vers>=9000 && !ioctlerr) {
+ if (!get_dev_size(fd, NULL, &larray_size))
+ larray_size = 0;
+ }
+
+ if (vers < 0)
+ printf("%s: is not an md array\n", dev);
+ else if (vers < 9000)
+ printf("%s: is an md device, but kernel cannot provide details\n", dev);
+ else if (ioctlerr == ENODEV)
+ printf("%s: is an md device which is not active\n", dev);
+ else if (ioctlerr)
+ printf("%s: is an md device, but gives \"%s\" when queried\n",
+ dev, strerror(ioctlerr));
+ else {
+ printf("%s: %s %s %d devices, %d spare%s. Use mdadm --detail for more detail.\n",
+ dev,
+ human_size_brief(larray_size,IEC),
+ map_num(pers, array.level),
+ array.raid_disks,
+ array.spare_disks, array.spare_disks==1?"":"s");
+ }
+ st = guess_super(fd);
+ if (st && st->ss->compare_super != NULL)
+ superror = st->ss->load_super(st, fd, dev);
+ else
+ superror = -1;
+ close(fd);
+ if (superror == 0) {
+ /* array might be active... */
+ int uuid[4];
+ struct map_ent *me, *map = NULL;
+ st->ss->getinfo_super(st, &info, NULL);
+ st->ss->uuid_from_super(st, uuid);
+ me = map_by_uuid(&map, uuid);
+ if (me) {
+ mddev = me->path;
+ disc.number = info.disk.number;
+ activity = "undetected";
+ if (mddev && (fd = open(mddev, O_RDONLY))>=0) {
+ if (md_get_version(fd) >= 9000 &&
+ ioctl(fd, GET_ARRAY_INFO, &array)>= 0) {
+ if (ioctl(fd, GET_DISK_INFO, &disc) >= 0 &&
+ makedev((unsigned)disc.major,(unsigned)disc.minor) == stb.st_rdev)
+ activity = "active";
+ else
+ activity = "mismatch";
+ }
+ close(fd);
+ }
+ } else {
+ activity = "inactive";
+ mddev = "array";
+ }
+ printf("%s: device %d in %d device %s %s %s. Use mdadm --examine for more detail.\n",
+ dev,
+ info.disk.number, info.array.raid_disks,
+ activity,
+ map_num(pers, info.array.level),
+ mddev);
+ if (st->ss == &super0)
+ put_md_name(mddev);
+ }
+ return 0;
+}
diff --git a/README.initramfs b/README.initramfs
new file mode 100644
index 00000000..8f9b8ddf
--- /dev/null
+++ b/README.initramfs
@@ -0,0 +1,123 @@
+Assembling md arrays at boot time.
+---------------------------------
+December 2005
+
+These notes apply to 2.6 kernels only and, in some cases,
+to 2.6.15 or later.
+
+Md arrays can be assembled at boot time using the 'autodetect' functionality
+which is triggered by storing components of an array in partitions of type
+'fd' - Linux Raid Autodetect.
+They can also be assembled by specifying the component devices in a
+kernel parameter such as
+ md=0,/dev/sda,/dev/sdb
+In this case, /dev/md0 will be assembled (because of the 0) from the listed
+devices.
+
+These mechanisms, while useful, do not provide complete functionality
+and are unlikely to be extended. The preferred way to assemble md
+arrays at boot time is using 'mdadm' or 'mdassemble' (which is a
+trimmed-down mdadm). To assemble an array which contains the root
+filesystem, mdadm needs to be run before that filesystem is mounted,
+and so needs to be run from an initial-ram-fs. It is how this can
+work that is the primary focus of this document.
+
+It should be noted up front that only the array containing the root
+filesystem should be assembled from the initramfs. Any other arrays
+should be assembled under the control of files on the main filesystem
+as this enhanced flexibility and maintainability.
+
+A minimal initramfs for assembling md arrays can be created using 3
+files and one directory. These are:
+
+/bin Directory
+/bin/mdadm statically linked mdadm binary
+/bin/busybox statically linked busybox binary
+/bin/sh hard link to /bin/busybox
+/init a shell script which call mdadm appropriately.
+
+An example init script is:
+
+==============================================
+#!/bin/sh
+
+echo 'Auto-assembling boot md array'
+mkdir /proc
+mount -t proc proc /proc
+if [ -n "$rootuuid" ]
+then arg=--uuid=$rootuuid
+elif [ -n "$mdminor" ]
+then arg=--super-minor=$mdminor
+else arg=--super-minor=0
+fi
+echo "Using $arg"
+mdadm -Acpartitions $arg --auto=part /dev/mda
+cd /
+mount /dev/mda1 /root || mount /dev/mda /root
+umount /proc
+cd /root
+exec chroot . /sbin/init < /dev/console > /dev/console 2>&1
+=============================================
+
+This could certainly be extended, or merged into a larger init script.
+Though tested and in production use, it is not presented here as
+"The Right Way" to do it, but as a useful example.
+Some key points are:
+
+ /proc needs to be mounted so that /proc/partitions can be accessed
+ by mdadm, and so that /proc/filesystems can be accessed by mount.
+
+ The uuid of the array can be passed in as a kernel parameter
+ (rootuuid). As the kernel doesn't use this value, it is made available
+ in the environment for /init
+
+ If no uuid is given, we default to md0, (--super-minor=0) which is a
+ commonly used to store the root filesystem. This may not work in
+ all situations.
+
+ We assemble the array as a partitionable array (/dev/mda) even if we
+ end up using the whole array. There is no cost in using the partitionable
+ interface, and in this context it is simpler.
+
+ We try mounting both /dev/mda1 and /dev/mda as they are the most like
+ part of the array to contain the root filesystem.
+
+ The --auto flag is given to mdadm so that it will create /dev/md*
+ files automatically. This is needed as /dev will not contain
+ and md files, and udev will not create them (as udev only created device
+ files after the device exists, and mdadm need the device file to create
+ the device). Note that the created md files may not exist in /dev
+ of the mounted root filesystem. This needs to be deal with separately
+ from mdadm - possibly using udev.
+
+ We do not need to create device files for the components which will
+ be assembled into /dev/mda. mdadm finds the major/minor numbers from
+ /proc/partitions and creates a temporary /dev file if one doesn't already
+ exist.
+
+The script "mkinitramfs" which is included with the mdadm distribution
+can be used to create a minimal initramfs. It creates a file called
+'init.cpio.gz' which can be specified as an 'initrd' to lilo or grub
+(or whatever boot loader is being used).
+
+
+
+
+Resume from an md array
+-----------------------
+
+If you want to make use of the suspend-to-disk/resume functionality in Linux,
+and want to have swap on an md array, you will need to assemble the array
+before resume is possible.
+However, because the array is active in the resumed image, you do not want
+anything written to any drives during the resume process, such as superblock
+updates or array resync.
+
+This can be achieved in 2.6.15-rc1 and later kernels using the
+'start_readonly' module parameter.
+Simply include the command
+ echo 1 > /sys/module/md_mod/parameters/start_ro
+before assembling the array with 'mdadm'.
+You can then echo
+ 9:0
+or whatever is appropriate to /sys/power/resume to trigger the resume.
diff --git a/ReadMe.c b/ReadMe.c
new file mode 100644
index 00000000..ec8e3487
--- /dev/null
+++ b/ReadMe.c
@@ -0,0 +1,636 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2014 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+
+#ifndef VERSION
+#define VERSION "3.3.2"
+#endif
+#ifndef VERS_DATE
+#define VERS_DATE "21st August 2014"
+#endif
+char Version[] = Name " - v" VERSION " - " VERS_DATE "\n";
+
+/*
+ * File: ReadMe.c
+ *
+ * This file contains general comments about the implementation
+ * and the various usage messages that can be displayed by mdadm
+ *
+ */
+
+/*
+ * mdadm has 7 major modes of operation:
+ * 1/ Create
+ * This mode is used to create a new array with a superblock
+ * 2/ Assemble
+ * This mode is used to assemble the parts of a previously created
+ * array into an active array. Components can be explicitly given
+ * or can be searched for. mdadm (optionally) checks that the components
+ * do form a bona-fide array, and can, on request, fiddle superblock
+ * version numbers so as to assemble a faulty array.
+ * 3/ Build
+ * This is for building legacy arrays without superblocks
+ * 4/ Manage
+ * This is for doing something to one or more devices
+ * in an array, such as add,remove,fail.
+ * run/stop/readonly/readwrite are also available
+ * 5/ Misc
+ * This is for doing things to individual devices.
+ * They might be parts of an array so
+ * zero-superblock, examine might be appropriate
+ * They might be md arrays so
+ * run,stop,rw,ro,detail might be appropriate
+ * Also query will treat it as either
+ * 6/ Monitor
+ * This mode never exits but just monitors arrays and reports changes.
+ * 7/ Grow
+ * This mode allows for changing of key attributes of a raid array, such
+ * as size, number of devices, and possibly even layout.
+ * 8/ Incremental
+ * Is assembles an array incrementally instead of all at once.
+ * As devices are discovered they can be passed to "mdadm --incremental"
+ * which will collect them. When enough devices to for an array are
+ * found, it is started.
+ */
+
+char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:";
+char short_bitmap_options[]=
+ "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:";
+char short_bitmap_auto_options[]=
+ "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sa:rfRSow1tye:";
+
+struct option long_options[] = {
+ {"manage", 0, 0, ManageOpt},
+ {"misc", 0, 0, MiscOpt},
+ {"assemble", 0, 0, 'A'},
+ {"build", 0, 0, 'B'},
+ {"create", 0, 0, 'C'},
+ {"detail", 0, 0, 'D'},
+ {"examine", 0, 0, 'E'},
+ {"follow", 0, 0, 'F'},
+ {"grow", 0, 0, 'G'},
+ {"incremental",0,0, 'I'},
+ {"zero-superblock", 0, 0, KillOpt}, /* deliberately not a short_option */
+ {"query", 0, 0, 'Q'},
+ {"examine-bitmap", 0, 0, 'X'},
+ {"auto-detect", 0, 0, AutoDetect},
+ {"detail-platform", 0, 0, DetailPlatform},
+ {"kill-subarray", 1, 0, KillSubarray},
+ {"update-subarray", 1, 0, UpdateSubarray},
+ {"udev-rules", 2, 0, UdevRules},
+ {"offroot", 0, 0, OffRootOpt},
+ {"examine-badblocks", 0, 0, ExamineBB},
+
+ {"dump", 1, 0, Dump},
+ {"restore", 1, 0, Restore},
+
+ /* synonyms */
+ {"monitor", 0, 0, 'F'},
+
+ /* after those will normally come the name of the md device */
+
+ {"help", 0, 0, 'h'},
+ {"help-options",0,0, HelpOptions},
+ {"version", 0, 0, 'V'},
+ {"verbose", 0, 0, 'v'},
+ {"quiet", 0, 0, 'q'},
+
+ /* For create or build: */
+ {"chunk", 1, 0, ChunkSize},
+ {"rounding", 1, 0, ChunkSize}, /* for linear, chunk is really a
+ * rounding number */
+ {"level", 1, 0, 'l'}, /* 0,1,4,5,6,linear */
+ {"parity", 1, 0, Layout}, /* {left,right}-{a,}symmetric */
+ {"layout", 1, 0, Layout},
+ {"raid-disks",1, 0, 'n'},
+ {"raid-devices",1, 0, 'n'},
+ {"spare-disks",1,0, 'x'},
+ {"spare-devices",1,0, 'x'},
+ {"size", 1, 0, 'z'},
+ {"auto", 1, 0, Auto}, /* also for --assemble */
+ {"assume-clean",0,0, AssumeClean },
+ {"metadata", 1, 0, 'e'}, /* superblock format */
+ {"bitmap", 1, 0, Bitmap},
+ {"bitmap-chunk", 1, 0, BitmapChunk},
+ {"write-behind", 2, 0, WriteBehind},
+ {"write-mostly",0, 0, WriteMostly},
+ {"re-add", 0, 0, ReAdd},
+ {"homehost", 1, 0, HomeHost},
+ {"symlinks", 1, 0, Symlinks},
+ {"data-offset",1, 0, DataOffset},
+
+ /* For assemble */
+ {"uuid", 1, 0, 'u'},
+ {"super-minor",1,0, SuperMinor},
+ {"name", 1, 0, 'N'},
+ {"config", 1, 0, ConfigFile},
+ {"scan", 0, 0, 's'},
+ {"force", 0, 0, Force},
+ {"update", 1, 0, 'U'},
+ {"freeze-reshape", 0, 0, FreezeReshape},
+
+ /* Management */
+ {"add", 0, 0, Add},
+ {"add-spare", 0, 0, AddSpare},
+ {"remove", 0, 0, Remove},
+ {"fail", 0, 0, Fail},
+ {"set-faulty",0, 0, Fail},
+ {"replace", 0, 0, Replace},
+ {"with", 0, 0, With},
+ {"run", 0, 0, 'R'},
+ {"stop", 0, 0, 'S'},
+ {"readonly", 0, 0, 'o'},
+ {"readwrite", 0, 0, 'w'},
+ {"no-degraded",0,0, NoDegraded },
+ {"wait", 0, 0, WaitOpt},
+ {"wait-clean", 0, 0, Waitclean },
+ {"action", 1, 0, Action },
+
+ /* For Detail/Examine */
+ {"brief", 0, 0, Brief},
+ {"export", 0, 0, 'Y'},
+ {"sparc2.2", 0, 0, Sparc22},
+ {"test", 0, 0, 't'},
+ {"prefer", 1, 0, Prefer},
+
+ /* For Follow/monitor */
+ {"mail", 1, 0, EMail},
+ {"program", 1, 0, ProgramOpt},
+ {"alert", 1, 0, ProgramOpt},
+ {"increment", 1, 0, Increment},
+ {"delay", 1, 0, 'd'},
+ {"daemonise", 0, 0, Fork},
+ {"daemonize", 0, 0, Fork},
+ {"oneshot", 0, 0, '1'},
+ {"pid-file", 1, 0, 'i'},
+ {"syslog", 0, 0, 'y'},
+ {"no-sharing", 0, 0, NoSharing},
+
+ /* For Grow */
+ {"backup-file", 1,0, BackupFile},
+ {"invalid-backup",0,0,InvalidBackup},
+ {"array-size", 1, 0, 'Z'},
+ {"continue", 0, 0, Continue},
+
+ /* For Incremental */
+ {"rebuild-map", 0, 0, RebuildMapOpt},
+ {"path", 1, 0, IncrementalPath},
+
+ {0, 0, 0, 0}
+};
+
+char Usage[] =
+"Usage: mdadm --help\n"
+" for help\n"
+;
+
+char Help[] =
+"mdadm is used for building, managing, and monitoring\n"
+"Linux md devices (aka RAID arrays)\n"
+"Usage: mdadm --create device options...\n"
+" Create a new array from unused devices.\n"
+" mdadm --assemble device options...\n"
+" Assemble a previously created array.\n"
+" mdadm --build device options...\n"
+" Create or assemble an array without metadata.\n"
+" mdadm --manage device options...\n"
+" make changes to an existing array.\n"
+" mdadm --misc options... devices\n"
+" report on or modify various md related devices.\n"
+" mdadm --grow options device\n"
+" resize/reshape an active array\n"
+" mdadm --incremental device\n"
+" add/remove a device to/from an array as appropriate\n"
+" mdadm --monitor options...\n"
+" Monitor one or more array for significant changes.\n"
+" mdadm device options...\n"
+" Shorthand for --manage.\n"
+"Any parameter that does not start with '-' is treated as a device name\n"
+"or, for --examine-bitmap, a file name.\n"
+"The first such name is often the name of an md device. Subsequent\n"
+"names are often names of component devices.\n"
+"\n"
+" For detailed help on the above major modes use --help after the mode\n"
+" e.g.\n"
+" mdadm --assemble --help\n"
+" For general help on options use\n"
+" mdadm --help-options\n"
+;
+
+char OptionHelp[] =
+"Any parameter that does not start with '-' is treated as a device name\n"
+"or, for --examine-bitmap, a file name.\n"
+"The first such name is often the name of an md device. Subsequent\n"
+"names are often names of component devices.\n"
+"\n"
+"Some common options are:\n"
+" --help -h : General help message or, after above option,\n"
+" mode specific help message\n"
+" --help-options : This help message\n"
+" --version -V : Print version information for mdadm\n"
+" --verbose -v : Be more verbose about what is happening\n"
+" --quiet -q : Don't print un-necessary messages\n"
+" --brief -b : Be less verbose, more brief\n"
+" --export -Y : With --detail, --detail-platform or --examine use\n"
+" key=value format for easy import into environment\n"
+" --force -f : Override normal checks and be more forceful\n"
+"\n"
+" --assemble -A : Assemble an array\n"
+" --build -B : Build an array without metadata\n"
+" --create -C : Create a new array\n"
+" --detail -D : Display details of an array\n"
+" --examine -E : Examine superblock on an array component\n"
+" --examine-bitmap -X: Display the detail of a bitmap file\n"
+" --examine-badblocks: Display list of known bad blocks on device\n"
+" --monitor -F : monitor (follow) some arrays\n"
+" --grow -G : resize/ reshape and array\n"
+" --incremental -I : add/remove a single device to/from an array as appropriate\n"
+" --query -Q : Display general information about how a\n"
+" device relates to the md driver\n"
+" --auto-detect : Start arrays auto-detected by the kernel\n"
+;
+/*
+"\n"
+" For create or build:\n"
+" --bitmap= -b : File to store bitmap in - may pre-exist for --build\n"
+" --chunk= -c : chunk size of kibibytes\n"
+" --rounding= : rounding factor for linear array (==chunk size)\n"
+" --level= -l : raid level: 0,1,4,5,6,10,linear, or mp for create.\n"
+" : 0,1,10,mp,faulty or linear for build.\n"
+" --parity= -p : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n"
+" --layout= : same as --parity, for RAID10: [fno]NN \n"
+" --raid-devices= -n : number of active devices in array\n"
+" --spare-devices= -x: number of spare (eXtra) devices in initial array\n"
+" --size= -z : Size (in K) of each drive in RAID1/4/5/6/10 - optional\n"
+" --force -f : Honour devices as listed on command line. Don't\n"
+" : insert a missing drive for RAID5.\n"
+" --assume-clean : Assume the array is already in-sync. This is dangerous for RAID5.\n"
+" --bitmap-chunk= : chunksize of bitmap in bitmap file (Kilobytes)\n"
+" --delay= -d : seconds between bitmap updates\n"
+" --write-behind= : number of simultaneous write-behind requests to allow (requires bitmap)\n"
+" --name= -N : Textual name for array - max 32 characters\n"
+"\n"
+" For assemble:\n"
+" --bitmap= -b : File to find bitmap information in\n"
+" --uuid= -u : uuid of array to assemble. Devices which don't\n"
+" have this uuid are excluded\n"
+" --super-minor= -m : minor number to look for in super-block when\n"
+" choosing devices to use.\n"
+" --name= -N : Array name to look for in super-block.\n"
+" --config= -c : config file\n"
+" --scan -s : scan config file for missing information\n"
+" --force -f : Assemble the array even if some superblocks appear out-of-date\n"
+" --update= -U : Update superblock: try '-A --update=?' for list of options.\n"
+" --no-degraded : Do not start any degraded arrays - default unless --scan.\n"
+"\n"
+" For detail or examine:\n"
+" --brief -b : Just print device name and UUID\n"
+"\n"
+" For follow/monitor:\n"
+" --mail= -m : Address to mail alerts of failure to\n"
+" --program= -p : Program to run when an event is detected\n"
+" --alert= : same as --program\n"
+" --delay= -d : seconds of delay between polling state. default=60\n"
+"\n"
+" General management:\n"
+" --add -a : add, or hotadd subsequent devices\n"
+" --re-add : re-add a recently removed device\n"
+" --remove -r : remove subsequent devices\n"
+" --fail -f : mark subsequent devices as faulty\n"
+" --set-faulty : same as --fail\n"
+" --replace : mark a device for replacement\n"
+" --run -R : start a partially built array\n"
+" --stop -S : deactivate array, releasing all resources\n"
+" --readonly -o : mark array as readonly\n"
+" --readwrite -w : mark array as readwrite\n"
+" --zero-superblock : erase the MD superblock from a device.\n"
+" --wait -W : wait for recovery/resync/reshape to finish.\n"
+;
+*/
+
+char Help_create[] =
+"Usage: mdadm --create device -chunk=X --level=Y --raid-devices=Z devices\n"
+"\n"
+" This usage will initialise a new md array, associate some\n"
+" devices with it, and activate the array. In order to create an\n"
+" array with some devices missing, use the special word 'missing' in\n"
+" place of the relevant device name.\n"
+"\n"
+" Before devices are added, they are checked to see if they already contain\n"
+" raid superblocks or filesystems. They are also checked to see if\n"
+" the variance in device size exceeds 1%.\n"
+" If any discrepancy is found, the user will be prompted for confirmation\n"
+" before the array is created. The presence of a '--run' can override this\n"
+" caution.\n"
+"\n"
+" If the --size option is given then only that many kilobytes of each\n"
+" device is used, no matter how big each device is.\n"
+" If no --size is given, the apparent size of the smallest drive given\n"
+" is used for raid level 1 and greater, and the full device is used for\n"
+" other levels.\n"
+"\n"
+" Options that are valid with --create (-C) are:\n"
+" --bitmap= : Create a bitmap for the array with the given filename\n"
+" : or an internal bitmap is 'internal' is given\n"
+" --chunk= -c : chunk size in kibibytes\n"
+" --rounding= : rounding factor for linear array (==chunk size)\n"
+" --level= -l : raid level: 0,1,4,5,6,10,linear,multipath and synonyms\n"
+" --parity= -p : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n"
+" --layout= : same as --parity, for RAID10: [fno]NN \n"
+" --raid-devices= -n : number of active devices in array\n"
+" --spare-devices= -x: number of spare (eXtra) devices in initial array\n"
+" --size= -z : Size (in K) of each drive in RAID1/4/5/6/10 - optional\n"
+" --data-offset= : Space to leave between start of device and start\n"
+" : of array data.\n"
+" --force -f : Honour devices as listed on command line. Don't\n"
+" : insert a missing drive for RAID5.\n"
+" --run -R : insist of running the array even if not all\n"
+" : devices are present or some look odd.\n"
+" --readonly -o : start the array readonly - not supported yet.\n"
+" --name= -N : Textual name for array - max 32 characters\n"
+" --bitmap-chunk= : bitmap chunksize in Kilobytes.\n"
+" --delay= -d : bitmap update delay in seconds.\n"
+"\n"
+;
+
+char Help_build[] =
+"Usage: mdadm --build device -chunk=X --level=Y --raid-devices=Z devices\n"
+"\n"
+" This usage is similar to --create. The difference is that it creates\n"
+" a legacy array without a superblock. With these arrays there is no\n"
+" different between initially creating the array and subsequently\n"
+" assembling the array, except that hopefully there is useful data\n"
+" there in the second case.\n"
+"\n"
+" The level may only be 0, 1, 10, linear, multipath, or faulty.\n"
+" All devices must be listed and the array will be started once complete.\n"
+" Options that are valid with --build (-B) are:\n"
+" --bitmap= : file to store/find bitmap information in.\n"
+" --chunk= -c : chunk size of kibibytes\n"
+" --rounding= : rounding factor for linear array (==chunk size)\n"
+" --level= -l : 0, 1, 10, linear, multipath, faulty\n"
+" --raid-devices= -n : number of active devices in array\n"
+" --bitmap-chunk= : bitmap chunksize in Kilobytes.\n"
+" --delay= -d : bitmap update delay in seconds.\n"
+;
+
+char Help_assemble[] =
+"Usage: mdadm --assemble device options...\n"
+" mdadm --assemble --scan options...\n"
+"\n"
+"This usage assembles one or more raid arrays from pre-existing\n"
+"components.\n"
+"For each array, mdadm needs to know the md device, the identity of\n"
+"the array, and a number of sub devices. These can be found in a number\n"
+"of ways.\n"
+"\n"
+"The md device is given on the command line, is found listed in the\n"
+"config file, or can be deduced from the array identity.\n"
+"The array identity is determined either from the --uuid, --name, or\n"
+"--super-minor commandline arguments, from the config file,\n"
+"or from the first component device on the command line.\n"
+"\n"
+"The different combinations of these are as follows:\n"
+" If the --scan option is not given, then only devices and identities\n"
+" listed on the command line are considered.\n"
+" The first device will be the array device, and the remainder will be\n"
+" examined when looking for components.\n"
+" If an explicit identity is given with --uuid or --super-minor, then\n"
+" only devices with a superblock which matches that identity is considered,\n"
+" otherwise every device listed is considered.\n"
+"\n"
+" If the --scan option is given, and no devices are listed, then\n"
+" every array listed in the config file is considered for assembly.\n"
+" The identity of candidate devices are determined from the config file.\n"
+" After these arrays are assembled, mdadm will look for other devices\n"
+" that could form further arrays and tries to assemble them. This can\n"
+" be disabled using the 'AUTO' option in the config file.\n"
+"\n"
+" If the --scan option is given as well as one or more devices, then\n"
+" Those devices are md devices that are to be assembled. Their identity\n"
+" and components are determined from the config file.\n"
+"\n"
+" If mdadm can not find all of the components for an array, it will assemble\n"
+" it but not activate it unless --run or --scan is given. To preserve this\n"
+" behaviour even with --scan, add --no-degraded. Note that \"all of the\n"
+" components\" means as many as were present the last time the array was running\n"
+" as recorded in the superblock. If the array was already degraded, and\n"
+" the missing device is not a new problem, it will still be assembled. It\n"
+" is only newly missing devices that cause the array not to be started.\n"
+"\n"
+"Options that are valid with --assemble (-A) are:\n"
+" --bitmap= : bitmap file to use with the array\n"
+" --uuid= -u : uuid of array to assemble. Devices which don't\n"
+" have this uuid are excluded\n"
+" --super-minor= -m : minor number to look for in super-block when\n"
+" choosing devices to use.\n"
+" --name= -N : Array name to look for in super-block.\n"
+" --config= -c : config file\n"
+" --scan -s : scan config file for missing information\n"
+" --run -R : Try to start the array even if not enough devices\n"
+" for a full array are present\n"
+" --force -f : Assemble the array even if some superblocks appear\n"
+" : out-of-date. This involves modifying the superblocks.\n"
+" --update= -U : Update superblock: try '-A --update=?' for option list.\n"
+" --no-degraded : Assemble but do not start degraded arrays.\n"
+" --readonly -o : Mark the array as read-only. No resync will start.\n"
+;
+
+char Help_manage[] =
+"Usage: mdadm arraydevice options component devices...\n"
+"\n"
+"This usage is for managing the component devices within an array.\n"
+"The --manage option is not needed and is assumed if the first argument\n"
+"is a device name or a management option.\n"
+"The first device listed will be taken to be an md array device, any\n"
+"subsequent devices are (potential) components of that array.\n"
+"\n"
+"Options that are valid with management mode are:\n"
+" --add -a : hotadd subsequent devices to the array\n"
+" --re-add : subsequent devices are re-added if there were\n"
+" : recent members of the array\n"
+" --remove -r : remove subsequent devices, which must not be active\n"
+" --fail -f : mark subsequent devices a faulty\n"
+" --set-faulty : same as --fail\n"
+" --replace : mark device(s) to be replaced by spares. Once\n"
+" : replacement completes, device will be marked faulty\n"
+" --with : Indicate which spare a previous '--replace' should\n"
+" : prefer to use\n"
+" --run -R : start a partially built array\n"
+" --stop -S : deactivate array, releasing all resources\n"
+" --readonly -o : mark array as readonly\n"
+" --readwrite -w : mark array as readwrite\n"
+;
+
+char Help_misc[] =
+"Usage: mdadm misc_option devices...\n"
+"\n"
+"This usage is for performing some task on one or more devices, which\n"
+"may be arrays or components, depending on the task.\n"
+"The --misc option is not needed (though it is allowed) and is assumed\n"
+"if the first argument in a misc option.\n"
+"\n"
+"Options that are valid with the miscellaneous mode are:\n"
+" --query -Q : Display general information about how a\n"
+" device relates to the md driver\n"
+" --detail -D : Display details of an array\n"
+" --detail-platform : Display hardware/firmware details\n"
+" --examine -E : Examine superblock on an array component\n"
+" --examine-bitmap -X: Display contents of a bitmap file\n"
+" --examine-badblocks: Display list of known bad blocks on device\n"
+" --zero-superblock : erase the MD superblock from a device.\n"
+" --run -R : start a partially built array\n"
+" --stop -S : deactivate array, releasing all resources\n"
+" --readonly -o : mark array as readonly\n"
+" --readwrite -w : mark array as readwrite\n"
+" --test -t : exit status 0 if ok, 1 if degrade, 2 if dead, 4 if missing\n"
+" --wait -W : wait for resync/rebuild/recovery to finish\n"
+" --action= : initiate or abort ('idle' or 'frozen') a 'check' or 'repair'.\n"
+;
+
+char Help_monitor[] =
+"Usage: mdadm --monitor options devices\n"
+"\n"
+"This usage causes mdadm to monitor a number of md arrays by periodically\n"
+"polling their status and acting on any changes.\n"
+"If any devices are listed then those devices are monitored, otherwise\n"
+"all devices listed in the config file are monitored.\n"
+"The address for mailing advisories to, and the program to handle\n"
+"each change can be specified in the config file or on the command line.\n"
+"There must be at least one destination for advisories, whether\n"
+"an email address, a program, or --syslog\n"
+"\n"
+"Options that are valid with the monitor (-F --follow) mode are:\n"
+" --mail= -m : Address to mail alerts of failure to\n"
+" --program= -p : Program to run when an event is detected\n"
+" --alert= : same as --program\n"
+" --syslog -y : Report alerts via syslog\n"
+" --increment= -r : Report RebuildNN events in the given increment. default=20\n"
+" --delay= -d : seconds of delay between polling state. default=60\n"
+" --config= -c : specify a different config file\n"
+" --scan -s : find mail-address/program in config file\n"
+" --daemonise -f : Fork and continue in child, parent exits\n"
+" --pid-file= -i : In daemon mode write pid to specified file instead of stdout\n"
+" --oneshot -1 : Check for degraded arrays, then exit\n"
+" --test -t : Generate a TestMessage event against each array at startup\n"
+;
+
+char Help_grow[] =
+"Usage: mdadm --grow device options\n"
+"\n"
+"This usage causes mdadm to attempt to reconfigure a running array.\n"
+"This is only possibly if the kernel being used supports a particular\n"
+"reconfiguration.\n"
+"\n"
+"Options that are valid with the grow (-G --grow) mode are:\n"
+" --level= -l : Tell mdadm what level to convert the array to.\n"
+" --layout= -p : For a FAULTY array, set/change the error mode.\n"
+" : for other arrays, update the layout\n"
+" --size= -z : Change the active size of devices in an array.\n"
+" : This is useful if all devices have been replaced\n"
+" : with larger devices. Value is in Kilobytes, or\n"
+" : the special word 'max' meaning 'as large as possible'.\n"
+" --assume-clean : When increasing the --size, this flag will avoid\n"
+" : a resync of the new space\n"
+" --chunk= -c : Change the chunksize of the array\n"
+" --raid-devices= -n : Change the number of active devices in an array.\n"
+" --add= -a : Add listed devices as part of reshape. This is\n"
+" : needed for resizing a RAID0 which cannot have\n"
+" : spares already present.\n"
+" --bitmap= -b : Add or remove a write-intent bitmap.\n"
+" --backup-file= file : A file on a different device to store data for a\n"
+" : short time while increasing raid-devices on a\n"
+" : RAID4/5/6 array. Also needed throughout a reshape\n"
+" : when changing parameters other than raid-devices\n"
+" --array-size= -Z : Change visible size of array. This does not change\n"
+" : any data on the device, and is not stable across restarts.\n"
+" --data-offset= : Location on device to move start of data to.\n"
+;
+
+char Help_incr[] =
+"Usage: mdadm --incremental [-Rqrsf] device\n"
+"\n"
+"This usage allows for incremental assembly of md arrays. Devices can be\n"
+"added one at a time as they are discovered. Once an array has all expected\n"
+"devices, it will be started.\n"
+"\n"
+"Optionally, the process can be reversed by using the fail option.\n"
+"When fail mode is invoked, mdadm will see if the device belongs to an array\n"
+"and then both fail (if needed) and remove the device from that array.\n"
+"\n"
+"Options that are valid with incremental assembly (-I --incremental) are:\n"
+" --run -R : Run arrays as soon as a minimal number of devices are\n"
+" : present rather than waiting for all expected.\n"
+" --quiet -q : Don't print any information messages, just errors.\n"
+" --rebuild-map -r : Rebuild the 'map' file that mdadm uses for tracking\n"
+" : partial arrays.\n"
+" --scan -s : Use with -R to start any arrays that have the minimal\n"
+" : required number of devices, but are not yet started.\n"
+" --fail -f : First fail (if needed) and then remove device from\n"
+" : any array that it is a member of.\n"
+;
+
+char Help_config[] =
+"The /etc/mdadm/mdadm.conf config file:\n\n"
+" The config file contains, apart from blank lines and comment lines that\n"
+" start with a hash(#), array lines, device lines, and various\n"
+" configuration lines.\n"
+" Each line is constructed of a number of space separated words, and can\n"
+" be continued on subsequent physical lines by indenting those lines.\n"
+"\n"
+" A device line starts with the word 'device' and then has a number of words\n"
+" which identify devices. These words should be names of devices in the\n"
+" filesystem, and can contain wildcards. There can be multiple words or each\n"
+" device line, and multiple device lines. All devices so listed are checked\n"
+" for relevant super blocks when assembling arrays.\n"
+"\n"
+" An array line start with the word 'array'. This is followed by the name of\n"
+" the array device in the filesystem, e.g. '/dev/md2'. Subsequent words\n"
+" describe the identity of the array, used to recognise devices to include in the\n"
+" array. The identity can be given as a UUID with a word starting 'uuid=', or\n"
+" as a minor-number stored in the superblock using 'super-minor=', or as a list\n"
+" of devices. This is given as a comma separated list of names, possibly\n"
+" containing wildcards, preceded by 'devices='. If multiple critea are given,\n"
+" than a device must match all of them to be considered.\n"
+"\n"
+" Other configuration lines include:\n"
+" mailaddr, mailfrom, program used for --monitor mode\n"
+" create, auto used when creating device names in /dev\n"
+" homehost, policy, part-policy used to guide policy in various\n"
+" situations\n"
+"\n"
+;
+
+char *mode_help[mode_count] = {
+ [0] = Help,
+ [ASSEMBLE] = Help_assemble,
+ [BUILD] = Help_build,
+ [CREATE] = Help_create,
+ [MANAGE] = Help_manage,
+ [MISC] = Help_misc,
+ [MONITOR] = Help_monitor,
+ [GROW] = Help_grow,
+ [INCREMENTAL] = Help_incr,
+};
diff --git a/TODO b/TODO
new file mode 100644
index 00000000..279d20db
--- /dev/null
+++ b/TODO
@@ -0,0 +1,213 @@
+ - add 'name' field to metadata type and use it.
+ - use validate_geometry more
+ - metadata should be able to check/reject bitmap stuff.
+
+DDF:
+ Three new metadata types:
+ ddf - used only to create a container.
+ ddf-bvd - used to create an array in a container
+ ddf-svd - used to create a secondary array from bvds.
+
+ Usage:
+ mdadm -C /dev/ddf1 /dev/sd[abcdef]
+ mdadm -C /dev/md1 -e ddf /dev/sd[a-f]
+ mdadm -C /dev/md1 -l container /dev/sd[a-f]
+
+ Each of these create a new ddf container using all those
+ devices. The name 'ddf*' signals that ddf metadata should be used.
+ '-e ddf' only supports one level - 'container'. 'container' is only
+ supported by ddf.
+
+ mdadm -C /dev/md1 -l0 -n4 /dev/ddf1 # or maybe not ???
+ mdadm -C /dev/md1 -l1 -n2 /dev/sda /dev/sdb
+ If exactly one device is given, and it is a container, we select
+ devices from that container.
+ If devices are given that are already in use, they must be in use by
+ a container, and the array is created in the container.
+ If devices given are bvds, we slip under the hood to make
+ the svd arrays.
+
+ mdadm -A /dev/ddf ......
+ base drives make a container. Anything in that container is started
+ auto-read-only.
+ if /dev/ddf is already assembled, we assemble bvds and svds inside it.
+
+
+2005-dec-20
+ Want an incremental assembly mode to work nicely with udev.
+ Core usage would be something like
+ mdadm --incr-assemble /dev/newdevice
+ This would
+ - examine the device to determine uuid etc.
+ - look for a match in /etc/mdadm.conf, abort if not found
+ - find that device and collect current contents
+ - perform an 'assemble' analysis to make sure we have the best set of devices.
+ - remove or add devices as appropriate
+ - possibly start the array if it was complete
+
+ Other usages could involve
+ - specify which array to auto-add to.
+ This requires an existing array for uuid matching... is there any point?
+
+ -
+
+
+2004-june-02
+ * Don't print 'errors' flag, it is meaningless. DONE
+ * Handle new superblock format
+ * create device file on demand, particularly partitionable devices. DONE
+ BUT figure a way to create the partition devices.
+ auto=partN
+ * Use Event: interface to listen for events. DONE, untested
+ * Make sure mdadm -As can assemble multi-level RAIDs ok.
+ * --build to build raid1 or multipath arrays
+ clean or not ???
+
+----------------------------------------------------------------------------
+* mdadm --monitor to monitor failed multipath paths and re-instate them.
+
+* Maybe make "--help" fit in 80x24 and have a --long-help with more info. DONE
+
+
+* maybe "missing" instead of <bold>missing</> in doco DONE
+* possibly wait for resync to start, or even finish while assembling.- NO
+
+* -Db should have a devices= entry if possible. - DONE
+* when assembling multipath arrays, ignore any error indicators. - DONE
+* rationalise --monitor usage:
+ mdadm --monitor
+ doesn't do as expected. DONE
+
+* --assemble could have a --update option. - DONE
+ following word can be:
+ sparc2.2
+ super-minor
+
+* mdadm /dev/md11, where md11 is raid0 can segfault, particularly when looking in the
+ [UU_UUU] string ... which doesn't exist !
+It should be more sensible. DONE
+
+Example:
+
+from Raimund Sacherer <raimund.sacherer@ngit.at>
+
+mke2fs -m0 -q /dev/ram1 300
+mount -n -t ext2 /dev/ram1 /tmp
+echo DEVICE /dev/[sh]* >> /tmp/mdadm.conf
+mdadm -Esb /dev/[sh]* 2>/dev/null >> /tmp/mdadm.conf
+mdadm -ARsc /tmp/mdadm.conf
+umount /tmp
+
+
+?? Allow -S /dev/md? - current complains subsequent not a/d/r - DONE
+
+* new "Query" mode to subsume --detail and --examine.
+ --query or -Q, takes a device and tells if it is an MD device,
+ and also tells in a raid superblock is found.
+ DONE
+
+* write mdstat.c to parse /proc/mdstat file
+ Build list of arrays: name, rebuild-percent
+ DONE
+
+* parse /proc/partitions and map major/minor into /dev/* names,
+ and use that for default DEVICE list ????
+
+* --detail --scan to read /proc/mdstat, and then iterate over these,
+ but assume --brief. --verbose can override
+ check each subdevice to see if it is in conf_get_devs.
+ Warn if not.
+ DONE, but don't warn yet...
+
+* Support multipath ... maybe...
+ maybe DONE
+
+* --follow to syslog
+
+* --follow to move spares around DONE
+
+* --follow to notice other events: DONE
+ rebuild started
+ spare activated
+ spare removed
+ spare added
+
+------------------------------------
+- --examine --scan scans all drives and build an mdadm.conf file DONE
+
+- check superblock checksum in examine DONE
+- report "chunk" or "rounding" depending on raid level DONE
+- report "linear" instead of "-1" for raid level DONE
+- decode ayout depending on raid level DONE
+- --verbose and --force flags. DONE
+
+- set md_minor, *_disks for Create - DONE
+- for create raid5, how to choose between
+ all working, but not insync
+ one missing, one spare, insync DONE (--force)
+- and for raid1 - some failed drives... (missing)
+
+- when RUN_ARRAY, make sure *_disks counts are right
+
+- get --detail to extract extra stuff from superblock,
+ like uuid DONE
+- --detail --brief to give a config file line DONE
+- parse config file. DONE
+- test...
+
+- when --assemble --scan, if an underlying device is an md device,
+ then try to assemble that device first.
+
+
+- mdadm -S /dev/md0 /dev/md1 gives internal error FIXED
+
+- mdadm --detail --scan print summary of what it can find? DONE
+
+
+---------
+Assemble doesn't add spares. - DONE
+Create to allow "missing" name for devices.
+Create to accept "--force" for do exactly what is requested
+- get Assemble to upgrade devices if force flag.
+ARRAY lines in config file to have super_minor=n
+ARRAY lines in config file to have device=pattern, and only accept
+ those devices
+ If UUID given, insist on that
+ If not, but super_minor given, require all found with that minor
+ to have same uuid
+ If only device given, all valid supers on those devices must have
+ same uuid
+allow /dev/mdX as first argument before any options
+Possible --dry-run option for create and assemble--force
+
+Assemble to check that all devices mentioned in superblock
+ are present.
+
+New mode: --Monitor (or --Follow)
+ Periodically check status of all arrays (listed in config file).
+ Log every event and apparent cause - or differences
+ Email and alert - or run a program - for important events
+ Move spares around if necessary.
+
+ An Array line can have a spare-group= field that indicates that
+ the array shares spares with other arrays with the same
+ spare-group name.
+ If an array has a failed and no spares, then check all other
+ arrays in the spare group. If one has no failures and a spare,
+ then consider that spare.
+ Choose the smallest considered spare that is large enough.
+ If there is one, then hot-remove it from it's home, and
+ hot-add it to the array in question.
+
+ --mail-to address
+ --alert-handler program
+
+ Will also extract information from /proc/mdstat if present,
+ and consider 20% marks in rebuild as events.
+
+ Events are:
+ drive fails - causes mail to be sent
+ rebuild started
+ spare activated
+ spare removed
+ spare added
diff --git a/bitmap.c b/bitmap.c
new file mode 100644
index 00000000..020f10d9
--- /dev/null
+++ b/bitmap.c
@@ -0,0 +1,448 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2004 Paul Clements, SteelEye Technology, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "mdadm.h"
+
+inline void sb_le_to_cpu(bitmap_super_t *sb)
+{
+ sb->magic = __le32_to_cpu(sb->magic);
+ sb->version = __le32_to_cpu(sb->version);
+ /* uuid gets no translation */
+ sb->events = __le64_to_cpu(sb->events);
+ sb->events_cleared = __le64_to_cpu(sb->events_cleared);
+ sb->state = __le32_to_cpu(sb->state);
+ sb->chunksize = __le32_to_cpu(sb->chunksize);
+ sb->daemon_sleep = __le32_to_cpu(sb->daemon_sleep);
+ sb->sync_size = __le64_to_cpu(sb->sync_size);
+ sb->write_behind = __le32_to_cpu(sb->write_behind);
+}
+
+inline void sb_cpu_to_le(bitmap_super_t *sb)
+{
+ sb_le_to_cpu(sb); /* these are really the same thing */
+}
+
+mapping_t bitmap_states[] = {
+ { "OK", 0 },
+ { "Out of date", 2 },
+ { NULL, -1 }
+};
+
+const char *bitmap_state(int state_num)
+{
+ char *state = map_num(bitmap_states, state_num);
+ return state ? state : "Unknown";
+}
+
+const char *human_chunksize(unsigned long bytes)
+{
+ static char buf[16];
+ char *suffixes[] = { "B", "KB", "MB", "GB", "TB", NULL };
+ int i = 0;
+
+ while (bytes >> 10) {
+ bytes >>= 10;
+ i++;
+ }
+
+ snprintf(buf, sizeof(buf), "%lu %s", bytes, suffixes[i]);
+
+ return buf;
+}
+
+typedef struct bitmap_info_s {
+ bitmap_super_t sb;
+ unsigned long long total_bits;
+ unsigned long long dirty_bits;
+} bitmap_info_t;
+
+/* count the dirty bits in the first num_bits of byte */
+inline int count_dirty_bits_byte(char byte, int num_bits)
+{
+ int num = 0;
+
+ switch (num_bits) { /* fall through... */
+ case 8: if (byte & 128) num++;
+ case 7: if (byte & 64) num++;
+ case 6: if (byte & 32) num++;
+ case 5: if (byte & 16) num++;
+ case 4: if (byte & 8) num++;
+ case 3: if (byte & 4) num++;
+ case 2: if (byte & 2) num++;
+ case 1: if (byte & 1) num++;
+ default: break;
+ }
+
+ return num;
+}
+
+int count_dirty_bits(char *buf, int num_bits)
+{
+ int i, num = 0;
+
+ for (i = 0; i < num_bits / 8; i++)
+ num += count_dirty_bits_byte(buf[i], 8);
+
+ if (num_bits % 8) /* not an even byte boundary */
+ num += count_dirty_bits_byte(buf[i], num_bits % 8);
+
+ return num;
+}
+
+/* calculate the size of the bitmap given the array size and bitmap chunksize */
+unsigned long long bitmap_bits(unsigned long long array_size,
+ unsigned long chunksize)
+{
+ return (array_size * 512 + chunksize - 1) / chunksize;
+}
+
+unsigned long bitmap_sectors(struct bitmap_super_s *bsb)
+{
+ unsigned long long bits = bitmap_bits(__le64_to_cpu(bsb->sync_size),
+ __le32_to_cpu(bsb->chunksize));
+ int bits_per_sector = 8*512;
+ return (bits + bits_per_sector - 1) / bits_per_sector;
+}
+
+bitmap_info_t *bitmap_fd_read(int fd, int brief)
+{
+ /* Note: fd might be open O_DIRECT, so we must be
+ * careful to align reads properly
+ */
+ unsigned long long total_bits = 0, read_bits = 0, dirty_bits = 0;
+ bitmap_info_t *info;
+ void *buf;
+ unsigned int n, skip;
+
+ if (posix_memalign(&buf, 4096, 8192) != 0) {
+ pr_err("failed to allocate 8192 bytes\n");
+ return NULL;
+ }
+ n = read(fd, buf, 8192);
+
+ info = xmalloc(sizeof(*info));
+
+ if (n < sizeof(info->sb)) {
+ pr_err("failed to read superblock of bitmap "
+ "file: %s\n", strerror(errno));
+ free(info);
+ free(buf);
+ return NULL;
+ }
+ memcpy(&info->sb, buf, sizeof(info->sb));
+ skip = sizeof(info->sb);
+
+ sb_le_to_cpu(&info->sb); /* convert superblock to CPU byte ordering */
+
+ if (brief || info->sb.sync_size == 0 || info->sb.chunksize == 0)
+ goto out;
+
+ /* read the rest of the file counting total bits and dirty bits --
+ * we stop when either:
+ * 1) we hit EOF, in which case we assume the rest of the bits (if any)
+ * are dirty
+ * 2) we've read the full bitmap, in which case we ignore any trailing
+ * data in the file
+ */
+ total_bits = bitmap_bits(info->sb.sync_size, info->sb.chunksize);
+
+ while(read_bits < total_bits) {
+ unsigned long long remaining = total_bits - read_bits;
+
+ if (n == 0) {
+ n = read(fd, buf, 8192);
+ skip = 0;
+ if (n <= 0)
+ break;
+ }
+ if (remaining > (n-skip) * 8) /* we want the full buffer */
+ remaining = (n-skip) * 8;
+
+ dirty_bits += count_dirty_bits(buf+skip, remaining);
+
+ read_bits += remaining;
+ n = 0;
+ }
+
+ if (read_bits < total_bits) { /* file truncated... */
+ pr_err("WARNING: bitmap file is not large "
+ "enough for array size %llu!\n\n",
+ (unsigned long long)info->sb.sync_size);
+ total_bits = read_bits;
+ }
+out:
+ free(buf);
+ info->total_bits = total_bits;
+ info->dirty_bits = dirty_bits;
+ return info;
+}
+
+int bitmap_file_open(char *filename, struct supertype **stp)
+{
+ int fd;
+ struct stat stb;
+ struct supertype *st = *stp;
+
+ if (stat(filename, &stb) < 0) {
+ pr_err("failed to find file %s: %s\n",
+ filename, strerror(errno));
+ return -1;
+ }
+ if ((S_IFMT & stb.st_mode) == S_IFBLK) {
+ fd = open(filename, O_RDONLY|O_DIRECT);
+ if (fd < 0) {
+ pr_err("failed to open bitmap file %s: %s\n",
+ filename, strerror(errno));
+ return -1;
+ }
+ /* block device, so we are probably after an internal bitmap */
+ if (!st) st = guess_super(fd);
+ if (!st) {
+ /* just look at device... */
+ lseek(fd, 0, 0);
+ } else if (!st->ss->locate_bitmap) {
+ pr_err("No bitmap possible with %s metadata\n",
+ st->ss->name);
+ return -1;
+ } else
+ st->ss->locate_bitmap(st, fd);
+
+ *stp = st;
+ } else {
+ fd = open(filename, O_RDONLY|O_DIRECT);
+ if (fd < 0) {
+ pr_err("failed to open bitmap file %s: %s\n",
+ filename, strerror(errno));
+ return -1;
+ }
+ }
+
+ return fd;
+}
+
+__u32 swapl(__u32 l)
+{
+ char *c = (char*)&l;
+ char t= c[0];
+ c[0] = c[3];
+ c[3] = t;
+
+ t = c[1];
+ c[1] = c[2];
+ c[2] = t;
+ return l;
+}
+int ExamineBitmap(char *filename, int brief, struct supertype *st)
+{
+ /*
+ * Read the bitmap file and display its contents
+ */
+
+ bitmap_super_t *sb;
+ bitmap_info_t *info;
+ int rv = 1;
+ char buf[64];
+ int swap;
+ int fd;
+ __u32 uuid32[4];
+
+ fd = bitmap_file_open(filename, &st);
+ if (fd < 0)
+ return rv;
+
+ info = bitmap_fd_read(fd, brief);
+ if (!info)
+ return rv;
+ sb = &info->sb;
+ if (sb->magic != BITMAP_MAGIC && md_get_version(fd) > 0) {
+ pr_err("This is an md array. To view a bitmap you need to examine\n");
+ pr_err("a member device, not the array.\n");
+ pr_err("Reporting bitmap that would be used if this array were used\n");
+ pr_err("as a member of some other array\n");
+ }
+ close(fd);
+ printf(" Filename : %s\n", filename);
+ printf(" Magic : %08x\n", sb->magic);
+ if (sb->magic != BITMAP_MAGIC) {
+ pr_err("invalid bitmap magic 0x%x, the bitmap file appears\n",
+ sb->magic);
+ pr_err("to be corrupted or missing.\n");
+ }
+ printf(" Version : %d\n", sb->version);
+ if (sb->version < BITMAP_MAJOR_LO ||
+ sb->version > BITMAP_MAJOR_HI) {
+ pr_err("unknown bitmap version %d, either the bitmap file\n",
+ sb->version);
+ pr_err("is corrupted or you need to upgrade your tools\n");
+ goto free_info;
+ }
+
+ rv = 0;
+ if (st)
+ swap = st->ss->swapuuid;
+ else
+#if __BYTE_ORDER == BIG_ENDIAN
+ swap = 0;
+#else
+ swap = 1;
+#endif
+ memcpy(uuid32, sb->uuid, 16);
+ if (swap)
+ printf(" UUID : %08x:%08x:%08x:%08x\n",
+ swapl(uuid32[0]),
+ swapl(uuid32[1]),
+ swapl(uuid32[2]),
+ swapl(uuid32[3]));
+ else
+ printf(" UUID : %08x:%08x:%08x:%08x\n",
+ uuid32[0],
+ uuid32[1],
+ uuid32[2],
+ uuid32[3]);
+
+ printf(" Events : %llu\n", (unsigned long long)sb->events);
+ printf(" Events Cleared : %llu\n", (unsigned long long)sb->events_cleared);
+ printf(" State : %s\n", bitmap_state(sb->state));
+ printf(" Chunksize : %s\n", human_chunksize(sb->chunksize));
+ printf(" Daemon : %ds flush period\n", sb->daemon_sleep);
+ if (sb->write_behind)
+ sprintf(buf, "Allow write behind, max %d", sb->write_behind);
+ else
+ sprintf(buf, "Normal");
+ printf(" Write Mode : %s\n", buf);
+ printf(" Sync Size : %llu%s\n", (unsigned long long)sb->sync_size/2,
+ human_size(sb->sync_size * 512));
+ if (brief)
+ goto free_info;
+ printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n",
+ info->total_bits, info->dirty_bits,
+ 100.0 * info->dirty_bits / (info->total_bits?:1));
+free_info:
+ free(info);
+ return rv;
+}
+
+int CreateBitmap(char *filename, int force, char uuid[16],
+ unsigned long chunksize, unsigned long daemon_sleep,
+ unsigned long write_behind,
+ unsigned long long array_size /* sectors */,
+ int major)
+{
+ /*
+ * Create a bitmap file with a superblock and (optionally) a full bitmap
+ */
+
+ FILE *fp;
+ int rv = 1;
+ char block[512];
+ bitmap_super_t sb;
+ long long bytes, filesize;
+
+ if (!force && access(filename, F_OK) == 0) {
+ pr_err("bitmap file %s already exists, use --force to overwrite\n", filename);
+ return rv;
+ }
+
+ fp = fopen(filename, "w");
+ if (fp == NULL) {
+ pr_err("failed to open bitmap file %s: %s\n",
+ filename, strerror(errno));
+ return rv;
+ }
+
+ if (chunksize == UnSet) {
+ /* We don't want more than 2^21 chunks, as 2^11 fill up one
+ * 4K page (2 bytes per chunk), and 2^10 address of those
+ * fill up a 4K indexing page. 2^20 might be safer, especially
+ * on 64bit hosts, so use that.
+ */
+ chunksize = DEFAULT_BITMAP_CHUNK;
+ /* <<20 for 2^20 chunks, >>9 to convert bytes to sectors */
+ while (array_size > ((unsigned long long)chunksize << (20-9)))
+ chunksize <<= 1;
+ }
+
+ memset(&sb, 0, sizeof(sb));
+ sb.magic = BITMAP_MAGIC;
+ sb.version = major;
+ if (uuid != NULL)
+ memcpy(sb.uuid, uuid, 16);
+ sb.chunksize = chunksize;
+ sb.daemon_sleep = daemon_sleep;
+ sb.write_behind = write_behind;
+ sb.sync_size = array_size;
+
+ sb_cpu_to_le(&sb); /* convert to on-disk byte ordering */
+
+ if (fwrite(&sb, sizeof(sb), 1, fp) != 1) {
+ pr_err("failed to write superblock to bitmap file %s: %s\n", filename, strerror(errno));
+ goto out;
+ }
+
+ /* calculate the size of the bitmap and write it to disk */
+ bytes = (bitmap_bits(array_size, chunksize) + 7) / 8;
+ if (!bytes) {
+ rv = 0;
+ goto out;
+ }
+
+ filesize = bytes + sizeof(sb);
+
+ memset(block, 0xff, sizeof(block));
+
+ while (bytes > 0) {
+ if (fwrite(block, sizeof(block), 1, fp) != 1) {
+ pr_err("failed to write bitmap file %s: %s\n", filename, strerror(errno));
+ goto out;
+ }
+ bytes -= sizeof(block);
+ }
+
+ rv = 0;
+ fflush(fp);
+ /* make the file be the right size (well, to the nearest byte) */
+ if (ftruncate(fileno(fp), filesize))
+ perror("ftrunace");
+out:
+ fclose(fp);
+ if (rv)
+ unlink(filename); /* possibly corrupted, better get rid of it */
+ return rv;
+}
+
+int bitmap_update_uuid(int fd, int *uuid, int swap)
+{
+ struct bitmap_super_s bm;
+ if (lseek(fd, 0, 0) != 0)
+ return 1;
+ if (read(fd, &bm, sizeof(bm)) != sizeof(bm))
+ return 1;
+ if (bm.magic != __cpu_to_le32(BITMAP_MAGIC))
+ return 1;
+ copy_uuid(bm.uuid, uuid, swap);
+ if (lseek(fd, 0, 0) != 0)
+ return 2;
+ if (write(fd, &bm, sizeof(bm)) != sizeof(bm)) {
+ lseek(fd, 0, 0);
+ return 2;
+ }
+ lseek(fd, 0, 0);
+ return 0;
+}
diff --git a/bitmap.h b/bitmap.h
new file mode 100644
index 00000000..c8725a30
--- /dev/null
+++ b/bitmap.h
@@ -0,0 +1,287 @@
+/*
+ * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
+ *
+ * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
+ */
+#ifndef BITMAP_H
+#define BITMAP_H 1
+
+#define BITMAP_MAJOR_LO 3
+/* version 4 insists the bitmap is in little-endian order
+ * with version 3, it is host-endian which is non-portable
+ */
+#define BITMAP_MAJOR_HI 4
+#define BITMAP_MAJOR_HOSTENDIAN 3
+
+#define BITMAP_MINOR 39
+
+/*
+ * in-memory bitmap:
+ *
+ * Use 16 bit block counters to track pending writes to each "chunk".
+ * The 2 high order bits are special-purpose, the first is a flag indicating
+ * whether a resync is needed. The second is a flag indicating whether a
+ * resync is active.
+ * This means that the counter is actually 14 bits:
+ *
+ * +--------+--------+------------------------------------------------+
+ * | resync | resync | counter |
+ * | needed | active | |
+ * | (0-1) | (0-1) | (0-16383) |
+ * +--------+--------+------------------------------------------------+
+ *
+ * The "resync needed" bit is set when:
+ * a '1' bit is read from storage at startup.
+ * a write request fails on some drives
+ * a resync is aborted on a chunk with 'resync active' set
+ * It is cleared (and resync-active set) when a resync starts across all drives
+ * of the chunk.
+ *
+ *
+ * The "resync active" bit is set when:
+ * a resync is started on all drives, and resync_needed is set.
+ * resync_needed will be cleared (as long as resync_active wasn't already set).
+ * It is cleared when a resync completes.
+ *
+ * The counter counts pending write requests, plus the on-disk bit.
+ * When the counter is '1' and the resync bits are clear, the on-disk
+ * bit can be cleared aswell, thus setting the counter to 0.
+ * When we set a bit, or in the counter (to start a write), if the fields is
+ * 0, we first set the disk bit and set the counter to 1.
+ *
+ * If the counter is 0, the on-disk bit is clear and the stipe is clean
+ * Anything that dirties the stipe pushes the counter to 2 (at least)
+ * and sets the on-disk bit (lazily).
+ * If a periodic sweep find the counter at 2, it is decremented to 1.
+ * If the sweep find the counter at 1, the on-disk bit is cleared and the
+ * counter goes to zero.
+ *
+ * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
+ * counters as a fallback when "page" memory cannot be allocated:
+ *
+ * Normal case (page memory allocated):
+ *
+ * page pointer (32-bit)
+ *
+ * [ ] ------+
+ * |
+ * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters)
+ * c1 c2 c2048
+ *
+ * Hijacked case (page memory allocation failed):
+ *
+ * hijacked page pointer (32-bit)
+ *
+ * [ ][ ] (no page memory allocated)
+ * counter #1 (16-bit) counter #2 (16-bit)
+ *
+ */
+
+#ifdef __KERNEL__
+
+#define PAGE_BITS (PAGE_SIZE << 3)
+#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
+
+typedef __u16 bitmap_counter_t;
+#define COUNTER_BITS 16
+#define COUNTER_BIT_SHIFT 4
+#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8)
+#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
+
+#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
+#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
+#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
+#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
+#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
+#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
+
+/* how many counters per page? */
+#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
+/* same, except a shift value for more efficient bitops */
+#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
+/* same, except a mask value for more efficient bitops */
+#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1)
+
+#define BITMAP_BLOCK_SIZE 512
+#define BITMAP_BLOCK_SHIFT 9
+
+/* how many blocks per chunk? (this is variable) */
+#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT)
+#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT)
+#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1)
+
+/* when hijacked, the counters and bits represent even larger "chunks" */
+/* there will be 1024 chunks represented by each counter in the page pointers */
+#define PAGEPTR_BLOCK_RATIO(bitmap) \
+ (CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1)
+#define PAGEPTR_BLOCK_SHIFT(bitmap) \
+ (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
+#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
+
+/*
+ * on-disk bitmap:
+ *
+ * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
+ * file a page at a time. There's a superblock at the start of the file.
+ */
+
+/* map chunks (bits) to file pages - offset by the size of the superblock */
+#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
+
+#endif
+
+/*
+ * bitmap structures:
+ */
+
+#define BITMAP_MAGIC 0x6d746962
+
+/* use these for bitmap->flags and bitmap->sb->state bit-fields */
+enum bitmap_state {
+ BITMAP_ACTIVE = 0x001, /* the bitmap is in use */
+ BITMAP_STALE = 0x002 /* the bitmap file is out of date or had -EIO */
+};
+
+/* the superblock at the front of the bitmap file -- little endian */
+typedef struct bitmap_super_s {
+ __u32 magic; /* 0 BITMAP_MAGIC */
+ __u32 version; /* 4 the bitmap major for now, could change... */
+ __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */
+ __u64 events; /* 24 event counter for the bitmap (1)*/
+ __u64 events_cleared;/*32 event counter when last bit cleared (2) */
+ __u64 sync_size; /* 40 the size of the md device's sync range(3) */
+ __u32 state; /* 48 bitmap state information */
+ __u32 chunksize; /* 52 the bitmap chunk size in bytes */
+ __u32 daemon_sleep; /* 56 seconds between disk flushes */
+ __u32 write_behind; /* 60 number of outstanding write-behind writes */
+
+ __u8 pad[256 - 64]; /* set to zero */
+} bitmap_super_t;
+
+/* notes:
+ * (1) This event counter is updated before the eventcounter in the md superblock
+ * When a bitmap is loaded, it is only accepted if this event counter is equal
+ * to, or one greater than, the event counter in the superblock.
+ * (2) This event counter is updated when the other one is *if*and*only*if* the
+ * array is not degraded. As bits are not cleared when the array is degraded,
+ * this represents the last time that any bits were cleared.
+ * If a device is being added that has an event count with this value or
+ * higher, it is accepted as conforming to the bitmap.
+ * (3)This is the number of sectors represented by the bitmap, and is the range that
+ * resync happens across. For raid1 and raid5/6 it is the size of individual
+ * devices. For raid10 it is the size of the array.
+ */
+
+#ifdef __KERNEL__
+
+/* the in-memory bitmap is represented by bitmap_pages */
+struct bitmap_page {
+ /*
+ * map points to the actual memory page
+ */
+ char *map;
+ /*
+ * in emergencies (when map cannot be alloced), hijack the map
+ * pointer and use it as two counters itself
+ */
+ unsigned int hijacked;
+ /*
+ * count of dirty bits on the page
+ */
+ int count;
+};
+
+/* keep track of bitmap file pages that have pending writes on them */
+struct page_list {
+ struct list_head list;
+ struct page *page;
+};
+
+/* the main bitmap structure - one per mddev */
+struct bitmap {
+ struct bitmap_page *bp;
+ unsigned long pages; /* total number of pages in the bitmap */
+ unsigned long missing_pages; /* number of pages not yet allocated */
+
+ mddev_t *mddev; /* the md device that the bitmap is for */
+
+ int counter_bits; /* how many bits per block counter */
+
+ /* bitmap chunksize -- how much data does each bit represent? */
+ unsigned long chunksize;
+ unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */
+ unsigned long chunks; /* total number of data chunks for the array */
+
+ /* We hold a count on the chunk currently being synced, and drop
+ * it when the last block is started. If the resync is aborted
+ * midway, we need to be able to drop that count, so we remember
+ * the counted chunk..
+ */
+ unsigned long syncchunk;
+
+ __u64 events_cleared;
+
+ /* bitmap spinlock */
+ spinlock_t lock;
+
+ struct file *file; /* backing disk file */
+ struct page *sb_page; /* cached copy of the bitmap file superblock */
+ struct page **filemap; /* list of cache pages for the file */
+ unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
+ unsigned long file_pages; /* number of pages in the file */
+
+ unsigned long flags;
+
+ /*
+ * the bitmap daemon - periodically wakes up and sweeps the bitmap
+ * file, cleaning up bits and flushing out pages to disk as necessary
+ */
+ mdk_thread_t *daemon;
+ unsigned long daemon_sleep; /* how many seconds between updates? */
+
+ /*
+ * bitmap write daemon - this daemon performs writes to the bitmap file
+ * this thread is only needed because of a limitation in ext3 (jbd)
+ * that does not allow a task to have two journal transactions ongoing
+ * simultaneously (even if the transactions are for two different
+ * filesystems) -- in the case of bitmap, that would be the filesystem
+ * that the bitmap file resides on and the filesystem that is mounted
+ * on the md device -- see current->journal_info in jbd/transaction.c
+ */
+ mdk_thread_t *write_daemon;
+ mdk_thread_t *writeback_daemon;
+ spinlock_t write_lock;
+ struct semaphore write_ready;
+ struct semaphore write_done;
+ unsigned long writes_pending;
+ wait_queue_head_t write_wait;
+ struct list_head write_pages;
+ struct list_head complete_pages;
+ mempool_t *write_pool;
+};
+
+/* the bitmap API */
+
+/* these are used only by md/bitmap */
+int bitmap_create(mddev_t *mddev);
+void bitmap_destroy(mddev_t *mddev);
+int bitmap_active(struct bitmap *bitmap);
+
+char *file_path(struct file *file, char *buf, int count);
+void bitmap_print_sb(struct bitmap *bitmap);
+int bitmap_update_sb(struct bitmap *bitmap);
+
+int bitmap_setallbits(struct bitmap *bitmap);
+
+/* these are exported */
+void bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors);
+void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
+ int success);
+int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks);
+void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted);
+void bitmap_close_sync(struct bitmap *bitmap);
+
+int bitmap_unplug(struct bitmap *bitmap);
+#endif
+
+#endif
diff --git a/config.c b/config.c
new file mode 100644
index 00000000..909f83f4
--- /dev/null
+++ b/config.c
@@ -0,0 +1,1184 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include "dlink.h"
+#include <dirent.h>
+#include <glob.h>
+#include <fnmatch.h>
+#include <ctype.h>
+#include <pwd.h>
+#include <grp.h>
+
+/*
+ * Read the config file
+ *
+ * conf_get_uuids gets a list of devicename+uuid pairs
+ * conf_get_devs gets device names after expanding wildcards
+ *
+ * Each keeps the returned list and frees it when asked to make
+ * a new list.
+ *
+ * The format of the config file needs to be fairly extensible.
+ * Now, arrays only have names and uuids and devices merely are.
+ * But later arrays might want names, and devices might want superblock
+ * versions, and who knows what else.
+ * I like free format, abhore backslash line continuation, adore
+ * indentation for structure and am ok about # comments.
+ *
+ * So, each line that isn't blank or a #comment must either start
+ * with a key word, and not be indented, or must start with a
+ * non-key-word and must be indented.
+ *
+ * Keywords are DEVICE and ARRAY ... and several others.
+ * DEV{ICE} introduces some devices that might contain raid components.
+ * e.g.
+ * DEV style=0 /dev/sda* /dev/hd*
+ * DEV style=1 /dev/sd[b-f]*
+ * ARR{AY} describes an array giving md device and attributes like uuid=whatever
+ * e.g.
+ * ARRAY /dev/md0 uuid=whatever name=something
+ * Spaces separate words on each line. Quoting, with "" or '' protects them,
+ * but may not wrap over lines
+ *
+ */
+
+#ifndef CONFFILE
+#define CONFFILE "/etc/mdadm.conf"
+#endif
+#ifndef CONFFILE2
+/* for Debian compatibility .... */
+#define CONFFILE2 "/etc/mdadm/mdadm.conf"
+#endif
+char DefaultConfFile[] = CONFFILE;
+char DefaultConfDir[] = CONFFILE ".d";
+char DefaultAltConfFile[] = CONFFILE2;
+char DefaultAltConfDir[] = CONFFILE2 ".d";
+
+enum linetype { Devices, Array, Mailaddr, Mailfrom, Program, CreateDev,
+ Homehost, AutoMode, Policy, PartPolicy, LTEnd };
+char *keywords[] = {
+ [Devices] = "devices",
+ [Array] = "array",
+ [Mailaddr] = "mailaddr",
+ [Mailfrom] = "mailfrom",
+ [Program] = "program",
+ [CreateDev]= "create",
+ [Homehost] = "homehost",
+ [AutoMode] = "auto",
+ [Policy] = "policy",
+ [PartPolicy]="part-policy",
+ [LTEnd] = NULL
+};
+
+/*
+ * match_keyword returns an index into the keywords array, or -1 for no match
+ * case is ignored, and at least three characters must be given
+ */
+
+int match_keyword(char *word)
+{
+ int len = strlen(word);
+ int n;
+
+ if (len < 3) return -1;
+ for (n=0; keywords[n]; n++) {
+ if (strncasecmp(word, keywords[n], len)==0)
+ return n;
+ }
+ return -1;
+}
+
+struct conf_dev {
+ struct conf_dev *next;
+ char *name;
+} *cdevlist = NULL;
+
+struct mddev_dev *load_partitions(void)
+{
+ FILE *f = fopen("/proc/partitions", "r");
+ char buf[1024];
+ struct mddev_dev *rv = NULL;
+ if (f == NULL) {
+ pr_err("cannot open /proc/partitions\n");
+ return NULL;
+ }
+ while (fgets(buf, 1024, f)) {
+ int major, minor;
+ char *name, *mp;
+ struct mddev_dev *d;
+
+ buf[1023] = '\0';
+ if (buf[0] != ' ')
+ continue;
+ major = strtoul(buf, &mp, 10);
+ if (mp == buf || *mp != ' ')
+ continue;
+ minor = strtoul(mp, NULL, 10);
+
+ name = map_dev(major, minor, 1);
+ if (!name)
+ continue;
+ d = xmalloc(sizeof(*d));
+ memset(d, 0, sizeof(*d));
+ d->devname = xstrdup(name);
+ d->next = rv;
+ rv = d;
+ }
+ fclose(f);
+ return rv;
+}
+
+struct mddev_dev *load_containers(void)
+{
+ struct mdstat_ent *mdstat = mdstat_read(0, 0);
+ struct mdstat_ent *ent;
+ struct mddev_dev *d;
+ struct mddev_dev *rv = NULL;
+ struct map_ent *map = NULL, *me;
+
+ if (!mdstat)
+ return NULL;
+
+ for (ent = mdstat; ent; ent = ent->next)
+ if (ent->metadata_version &&
+ strncmp(ent->metadata_version, "external:", 9) == 0 &&
+ !is_subarray(&ent->metadata_version[9])) {
+ d = xmalloc(sizeof(*d));
+ memset(d, 0, sizeof(*d));
+ me = map_by_devnm(&map, ent->dev);
+ if (me)
+ d->devname = xstrdup(me->path);
+ else if (asprintf(&d->devname, "/dev/%s", ent->dev) < 0) {
+ free(d);
+ continue;
+ }
+ d->next = rv;
+ rv = d;
+ }
+ free_mdstat(mdstat);
+ map_free(map);
+
+ return rv;
+}
+
+struct createinfo createinfo = {
+ .autof = 2, /* by default, create devices with standard names */
+ .symlinks = 1,
+ .names = 0, /* By default, stick with numbered md devices. */
+ .bblist = 1, /* Use a bad block list by default */
+#ifdef DEBIAN
+ .gid = 6, /* disk */
+ .mode = 0660,
+#else
+ .mode = 0600,
+#endif
+};
+
+int parse_auto(char *str, char *msg, int config)
+{
+ int autof;
+ if (str == NULL || *str == 0)
+ autof = 2;
+ else if (strcasecmp(str,"no")==0)
+ autof = 1;
+ else if (strcasecmp(str,"yes")==0)
+ autof = 2;
+ else if (strcasecmp(str,"md")==0)
+ autof = config?5:3;
+ else {
+ /* There might be digits, and maybe a hypen, at the end */
+ char *e = str + strlen(str);
+ int num = 4;
+ int len;
+ while (e > str && isdigit(e[-1]))
+ e--;
+ if (*e) {
+ num = atoi(e);
+ if (num <= 0) num = 1;
+ }
+ if (e > str && e[-1] == '-')
+ e--;
+ len = e - str;
+ if ((len == 2 && strncasecmp(str,"md",2)==0)) {
+ autof = config ? 5 : 3;
+ } else if ((len == 3 && strncasecmp(str,"yes",3)==0)) {
+ autof = 2;
+ } else if ((len == 3 && strncasecmp(str,"mdp",3)==0)) {
+ autof = config ? 6 : 4;
+ } else if ((len == 1 && strncasecmp(str,"p",1)==0) ||
+ (len >= 4 && strncasecmp(str,"part",4)==0)) {
+ autof = 6;
+ } else {
+ pr_err("%s arg of \"%s\" unrecognised: use no,yes,md,mdp,part\n"
+ " optionally followed by a number.\n",
+ msg, str);
+ exit(2);
+ }
+ autof |= num << 3;
+ }
+ return autof;
+}
+
+static void createline(char *line)
+{
+ char *w;
+ char *ep;
+
+ for (w=dl_next(line); w!=line; w=dl_next(w)) {
+ if (strncasecmp(w, "auto=", 5) == 0)
+ createinfo.autof = parse_auto(w+5, "auto=", 1);
+ else if (strncasecmp(w, "owner=", 6) == 0) {
+ if (w[6] == 0) {
+ pr_err("missing owner name\n");
+ continue;
+ }
+ createinfo.uid = strtoul(w+6, &ep, 10);
+ if (*ep != 0) {
+ struct passwd *pw;
+ /* must be a name */
+ pw = getpwnam(w+6);
+ if (pw)
+ createinfo.uid = pw->pw_uid;
+ else
+ pr_err("CREATE user %s not found\n", w+6);
+ }
+ } else if (strncasecmp(w, "group=", 6) == 0) {
+ if (w[6] == 0) {
+ pr_err("missing group name\n");
+ continue;
+ }
+ createinfo.gid = strtoul(w+6, &ep, 10);
+ if (*ep != 0) {
+ struct group *gr;
+ /* must be a name */
+ gr = getgrnam(w+6);
+ if (gr)
+ createinfo.gid = gr->gr_gid;
+ else
+ pr_err("CREATE group %s not found\n", w+6);
+ }
+ } else if (strncasecmp(w, "mode=", 5) == 0) {
+ if (w[5] == 0) {
+ pr_err("missing CREATE mode\n");
+ continue;
+ }
+ createinfo.mode = strtoul(w+5, &ep, 8);
+ if (*ep != 0) {
+ createinfo.mode = 0600;
+ pr_err("unrecognised CREATE mode %s\n",
+ w+5);
+ }
+ } else if (strncasecmp(w, "metadata=", 9) == 0) {
+ /* style of metadata to use by default */
+ int i;
+ for (i=0; superlist[i] && !createinfo.supertype; i++)
+ createinfo.supertype =
+ superlist[i]->match_metadata_desc(w+9);
+ if (!createinfo.supertype)
+ pr_err("metadata format %s unknown, ignoring\n",
+ w+9);
+ } else if (strncasecmp(w, "symlinks=yes", 12) == 0)
+ createinfo.symlinks = 1;
+ else if (strncasecmp(w, "symlinks=no", 11) == 0)
+ createinfo.symlinks = 0;
+ else if (strncasecmp(w, "names=yes", 12) == 0)
+ createinfo.names = 1;
+ else if (strncasecmp(w, "names=no", 11) == 0)
+ createinfo.names = 0;
+ else if (strncasecmp(w, "bbl=no", 11) == 0)
+ createinfo.bblist = 0;
+ else if (strncasecmp(w, "bbl=yes", 11) == 0)
+ createinfo.bblist = 1;
+ else {
+ pr_err("unrecognised word on CREATE line: %s\n",
+ w);
+ }
+ }
+}
+
+void devline(char *line)
+{
+ char *w;
+ struct conf_dev *cd;
+
+ for (w=dl_next(line); w != line; w=dl_next(w)) {
+ if (w[0] == '/' || strcasecmp(w, "partitions") == 0 ||
+ strcasecmp(w, "containers") == 0) {
+ cd = xmalloc(sizeof(*cd));
+ cd->name = xstrdup(w);
+ cd->next = cdevlist;
+ cdevlist = cd;
+ } else {
+ pr_err("unreconised word on DEVICE line: %s\n",
+ w);
+ }
+ }
+}
+
+struct mddev_ident *mddevlist = NULL;
+struct mddev_ident **mddevlp = &mddevlist;
+
+static int is_number(char *w)
+{
+ /* check if there are 1 or more digits and nothing else */
+ int digits = 0;
+ while (*w && isdigit(*w)) {
+ digits++;
+ w++;
+ }
+ return (digits && ! *w);
+}
+
+void arrayline(char *line)
+{
+ char *w;
+
+ struct mddev_ident mis;
+ struct mddev_ident *mi;
+
+ mis.uuid_set = 0;
+ mis.super_minor = UnSet;
+ mis.level = UnSet;
+ mis.raid_disks = UnSet;
+ mis.spare_disks = 0;
+ mis.devices = NULL;
+ mis.devname = NULL;
+ mis.spare_group = NULL;
+ mis.autof = 0;
+ mis.next = NULL;
+ mis.st = NULL;
+ mis.bitmap_fd = -1;
+ mis.bitmap_file = NULL;
+ mis.name[0] = 0;
+ mis.container = NULL;
+ mis.member = NULL;
+
+ for (w=dl_next(line); w!=line; w=dl_next(w)) {
+ if (w[0] == '/' || strchr(w, '=') == NULL) {
+ /* This names the device, or is '<ignore>'.
+ * The rules match those in create_mddev.
+ * 'w' must be:
+ * /dev/md/{anything}
+ * /dev/mdNN
+ * /dev/md_dNN
+ * <ignore>
+ * or anything that doesn't start '/' or '<'
+ */
+ if (strcasecmp(w, "<ignore>") == 0 ||
+ strncmp(w, "/dev/md/", 8) == 0 ||
+ (w[0] != '/' && w[0] != '<') ||
+ (strncmp(w, "/dev/md", 7) == 0 &&
+ is_number(w+7)) ||
+ (strncmp(w, "/dev/md_d", 9) == 0 &&
+ is_number(w+9))
+ ) {
+ /* This is acceptable */;
+ if (mis.devname)
+ pr_err("only give one "
+ "device per ARRAY line: %s and %s\n",
+ mis.devname, w);
+ else
+ mis.devname = w;
+ }else {
+ pr_err("%s is an invalid name for "
+ "an md device - ignored.\n", w);
+ }
+ } else if (strncasecmp(w, "uuid=", 5)==0 ) {
+ if (mis.uuid_set)
+ pr_err("only specify uuid once, %s ignored.\n",
+ w);
+ else {
+ if (parse_uuid(w+5, mis.uuid))
+ mis.uuid_set = 1;
+ else
+ pr_err("bad uuid: %s\n", w);
+ }
+ } else if (strncasecmp(w, "super-minor=", 12)==0 ) {
+ if (mis.super_minor != UnSet)
+ pr_err("only specify super-minor once, %s ignored.\n",
+ w);
+ else {
+ char *endptr;
+ int minor = strtol(w+12, &endptr, 10);
+
+ if (w[12]==0 || endptr[0]!=0 || minor < 0)
+ pr_err("invalid super-minor number: %s\n",
+ w);
+ else
+ mis.super_minor = minor;
+ }
+ } else if (strncasecmp(w, "name=", 5)==0) {
+ if (mis.name[0])
+ pr_err("only specify name once, %s ignored.\n",
+ w);
+ else if (strlen(w+5) > 32)
+ pr_err("name too long, ignoring %s\n", w);
+ else
+ strcpy(mis.name, w+5);
+
+ } else if (strncasecmp(w, "bitmap=", 7) == 0) {
+ if (mis.bitmap_file)
+ pr_err("only specify bitmap file once. %s ignored\n",
+ w);
+ else
+ mis.bitmap_file = xstrdup(w+7);
+
+ } else if (strncasecmp(w, "devices=", 8 ) == 0 ) {
+ if (mis.devices)
+ pr_err("only specify devices once (use a comma separated list). %s ignored\n",
+ w);
+ else
+ mis.devices = xstrdup(w+8);
+ } else if (strncasecmp(w, "spare-group=", 12) == 0 ) {
+ if (mis.spare_group)
+ pr_err("only specify one spare group per array. %s ignored.\n",
+ w);
+ else
+ mis.spare_group = xstrdup(w+12);
+ } else if (strncasecmp(w, "level=", 6) == 0 ) {
+ /* this is mainly for compatability with --brief output */
+ mis.level = map_name(pers, w+6);
+ } else if (strncasecmp(w, "disks=", 6) == 0 ) {
+ /* again, for compat */
+ mis.raid_disks = atoi(w+6);
+ } else if (strncasecmp(w, "num-devices=", 12) == 0 ) {
+ /* again, for compat */
+ mis.raid_disks = atoi(w+12);
+ } else if (strncasecmp(w, "spares=", 7) == 0 ) {
+ /* for warning if not all spares present */
+ mis.spare_disks = atoi(w+7);
+ } else if (strncasecmp(w, "metadata=", 9) == 0) {
+ /* style of metadata on the devices. */
+ int i;
+
+ for(i=0; superlist[i] && !mis.st; i++)
+ mis.st = superlist[i]->match_metadata_desc(w+9);
+
+ if (!mis.st)
+ pr_err("metadata format %s unknown, ignored.\n", w+9);
+ } else if (strncasecmp(w, "auto=", 5) == 0 ) {
+ /* whether to create device special files as needed */
+ mis.autof = parse_auto(w+5, "auto type", 0);
+ } else if (strncasecmp(w, "member=", 7) == 0) {
+ /* subarray within a container */
+ mis.member = xstrdup(w+7);
+ } else if (strncasecmp(w, "container=", 10) == 0) {
+ /* the container holding this subarray. Either a device name
+ * or a uuid */
+ mis.container = xstrdup(w+10);
+ } else {
+ pr_err("unrecognised word on ARRAY line: %s\n",
+ w);
+ }
+ }
+ if (mis.uuid_set == 0 && mis.devices == NULL &&
+ mis.super_minor == UnSet && mis.name[0] == 0 &&
+ (mis.container == NULL || mis.member == NULL))
+ pr_err("ARRAY line %s has no identity information.\n", mis.devname);
+ else {
+ mi = xmalloc(sizeof(*mi));
+ *mi = mis;
+ mi->devname = mis.devname ? xstrdup(mis.devname) : NULL;
+ mi->next = NULL;
+ *mddevlp = mi;
+ mddevlp = &mi->next;
+ }
+}
+
+static char *alert_email = NULL;
+void mailline(char *line)
+{
+ char *w;
+
+ for (w=dl_next(line); w != line ; w=dl_next(w))
+ if (alert_email == NULL)
+ alert_email = xstrdup(w);
+}
+
+static char *alert_mail_from = NULL;
+void mailfromline(char *line)
+{
+ char *w;
+
+ for (w=dl_next(line); w != line ; w=dl_next(w)) {
+ if (alert_mail_from == NULL)
+ alert_mail_from = xstrdup(w);
+ else {
+ char *t = NULL;
+
+ if (xasprintf(&t, "%s %s", alert_mail_from, w) > 0) {
+ free(alert_mail_from);
+ alert_mail_from = t;
+ }
+ }
+ }
+}
+
+static char *alert_program = NULL;
+void programline(char *line)
+{
+ char *w;
+
+ for (w=dl_next(line); w != line ; w=dl_next(w))
+ if (alert_program == NULL)
+ alert_program = xstrdup(w);
+}
+
+static char *home_host = NULL;
+static int require_homehost = 1;
+void homehostline(char *line)
+{
+ char *w;
+
+ for (w=dl_next(line); w != line ; w=dl_next(w)) {
+ if (strcasecmp(w, "<ignore>")==0)
+ require_homehost = 0;
+ else if (home_host == NULL) {
+ if (strcasecmp(w, "<none>")==0)
+ home_host = xstrdup("");
+ else
+ home_host = xstrdup(w);
+ }
+ }
+}
+
+char auto_yes[] = "yes";
+char auto_no[] = "no";
+char auto_homehost[] = "homehost";
+
+static int auto_seen = 0;
+void autoline(char *line)
+{
+ char *w;
+ char *seen;
+ int super_cnt;
+ char *dflt = auto_yes;
+ int homehost = 0;
+ int i;
+
+ if (auto_seen)
+ return;
+ auto_seen = 1;
+
+ /* Parse the 'auto' line creating policy statements for the 'auto' policy.
+ *
+ * The default is 'yes' but the 'auto' line might over-ride that.
+ * Words in the line are processed in order with the first
+ * match winning.
+ * word can be:
+ * +version - that version can be assembled
+ * -version - that version cannot be auto-assembled
+ * yes or +all - any other version can be assembled
+ * no or -all - no other version can be assembled.
+ * homehost - any array associated by 'homehost' to this
+ * host can be assembled.
+ *
+ * Thus:
+ * +ddf -0.90 homehost -all
+ * will auto-assemble any ddf array, no 0.90 array, and
+ * any other array (imsm, 1.x) if and only if it is identified
+ * as belonging to this host.
+ *
+ * We translate that to policy by creating 'auto=yes' when we see
+ * a '+version' line, 'auto=no' if we see '-version' before 'homehost',
+ * or 'auto=homehost' if we see '-version' after 'homehost'.
+ * When we see yes, no, +all or -all we stop and any version that hasn't
+ * been seen gets an appropriate auto= entry.
+ */
+
+ /* If environment variable MDADM_CONF_AUTO is defined, then
+ * it is prepended to the auto line. This allow a script
+ * to easily disable some metadata types.
+ */
+ w = getenv("MDADM_CONF_AUTO");
+ if (w && *w) {
+ char *l = xstrdup(w);
+ char *head = line;
+ w = strtok(l, " \t");
+ while (w) {
+ char *nw = dl_strdup(w);
+ dl_insert(head, nw);
+ head = nw;
+ w = strtok(NULL, " \t");
+ }
+ free(l);
+ }
+
+ for (super_cnt = 0; superlist[super_cnt]; super_cnt++)
+ ;
+ seen = xcalloc(super_cnt, 1);
+
+ for (w = dl_next(line); w != line ; w = dl_next(w)) {
+ char *val;
+
+ if (strcasecmp(w, "yes") == 0) {
+ dflt = auto_yes;
+ break;
+ }
+ if (strcasecmp(w, "no") == 0) {
+ if (homehost)
+ dflt = auto_homehost;
+ else
+ dflt = auto_no;
+ break;
+ }
+ if (strcasecmp(w, "homehost") == 0) {
+ homehost = 1;
+ continue;
+ }
+ if (w[0] == '+')
+ val = auto_yes;
+ else if (w[0] == '-') {
+ if (homehost)
+ val = auto_homehost;
+ else
+ val = auto_no;
+ } else
+ continue;
+
+ if (strcasecmp(w+1, "all") == 0) {
+ dflt = val;
+ break;
+ }
+ for (i = 0; superlist[i]; i++) {
+ const char *version = superlist[i]->name;
+ if (strcasecmp(w+1, version) == 0)
+ break;
+ /* 1 matches 1.x, 0 matches 0.90 */
+ if (version[1] == '.' &&
+ strlen(w+1) == 1 &&
+ w[1] == version[0])
+ break;
+ /* 1.anything matches 1.x */
+ if (strcmp(version, "1.x") == 0 &&
+ strncmp(w+1, "1.", 2) == 0)
+ break;
+ }
+ if (superlist[i] == NULL)
+ /* ignore this word */
+ continue;
+ if (seen[i])
+ /* already know about this metadata */
+ continue;
+ policy_add(rule_policy, pol_auto, val, pol_metadata, superlist[i]->name, NULL);
+ seen[i] = 1;
+ }
+ for (i = 0; i < super_cnt; i++)
+ if (!seen[i])
+ policy_add(rule_policy, pol_auto, dflt, pol_metadata, superlist[i]->name, NULL);
+
+ free(seen);
+}
+
+int loaded = 0;
+
+static char *conffile = NULL;
+void set_conffile(char *file)
+{
+ conffile = file;
+}
+
+void conf_file(FILE *f)
+{
+ char *line;
+ while ((line=conf_line(f))) {
+ switch(match_keyword(line)) {
+ case Devices:
+ devline(line);
+ break;
+ case Array:
+ arrayline(line);
+ break;
+ case Mailaddr:
+ mailline(line);
+ break;
+ case Mailfrom:
+ mailfromline(line);
+ break;
+ case Program:
+ programline(line);
+ break;
+ case CreateDev:
+ createline(line);
+ break;
+ case Homehost:
+ homehostline(line);
+ break;
+ case AutoMode:
+ autoline(line);
+ break;
+ case Policy:
+ policyline(line, rule_policy);
+ break;
+ case PartPolicy:
+ policyline(line, rule_part);
+ break;
+ default:
+ pr_err("Unknown keyword %s\n", line);
+ }
+ free_line(line);
+ }
+}
+
+struct fname {
+ struct fname *next;
+ char name[];
+};
+
+void conf_file_or_dir(FILE *f)
+{
+ struct stat st;
+ DIR *dir;
+ struct dirent *dp;
+ struct fname *list = NULL;
+
+ fstat(fileno(f), &st);
+ if (S_ISREG(st.st_mode))
+ conf_file(f);
+ else if (!S_ISDIR(st.st_mode))
+ return;
+#if _XOPEN_SOURCE >= 700 || _POSIX_C_SOURCE >= 200809L
+ dir = fdopendir(fileno(f));
+ if (!dir)
+ return;
+ while ((dp = readdir(dir)) != NULL) {
+ int l;
+ struct fname *fn, **p;
+ if (dp->d_ino == 0)
+ continue;
+ if (dp->d_name[0] == '.')
+ continue;
+ l = strlen(dp->d_name);
+ if (l < 6 || strcmp(dp->d_name+l-5, ".conf") != 0)
+ continue;
+ fn = xmalloc(sizeof(*fn)+l+1);
+ strcpy(fn->name, dp->d_name);
+ for (p = &list;
+ *p && strcmp((*p)->name, fn->name) < 0;
+ p = & (*p)->next)
+ ;
+ fn->next = *p;
+ *p = fn;
+ }
+ while (list) {
+ int fd;
+ FILE *f2;
+ struct fname *fn = list;
+ list = list->next;
+ fd = openat(fileno(f), fn->name, O_RDONLY);
+ free(fn);
+ if (fd < 0)
+ continue;
+ f2 = fdopen(fd, "r");
+ if (!f2) {
+ close(fd);
+ continue;
+ }
+ conf_file(f2);
+ fclose(f2);
+ }
+ closedir(dir);
+#endif
+}
+
+void load_conffile(void)
+{
+ FILE *f;
+ char *confdir = NULL;
+ char *head;
+
+ if (loaded)
+ return;
+ if (conffile == NULL) {
+ conffile = DefaultConfFile;
+ confdir = DefaultConfDir;
+ }
+
+ if (strcmp(conffile, "partitions")==0) {
+ char *list = dl_strdup("DEV");
+ dl_init(list);
+ dl_add(list, dl_strdup("partitions"));
+ devline(list);
+ free_line(list);
+ } else if (strcmp(conffile, "none") != 0) {
+ f = fopen(conffile, "r");
+ /* Debian chose to relocate mdadm.conf into /etc/mdadm/.
+ * To allow Debian users to compile from clean source and still
+ * have a working mdadm, we read /etc/mdadm/mdadm.conf
+ * if /etc/mdadm.conf doesn't exist
+ */
+ if (f == NULL &&
+ conffile == DefaultConfFile) {
+ f = fopen(DefaultAltConfFile, "r");
+ if (f) {
+ conffile = DefaultAltConfFile;
+ confdir = DefaultAltConfDir;
+ }
+ }
+ if (f) {
+ conf_file_or_dir(f);
+ fclose(f);
+ }
+ if (confdir) {
+ f = fopen(confdir, "r");
+ if (f) {
+ conf_file_or_dir(f);
+ fclose(f);
+ }
+ }
+ }
+ /* If there was no AUTO line, process an empty line
+ * now so that the MDADM_CONF_AUTO env var gets processed.
+ */
+ head = dl_strdup("AUTO");
+ dl_init(head);
+ autoline(head);
+ free_line(head);
+
+ loaded = 1;
+}
+
+char *conf_get_mailaddr(void)
+{
+ load_conffile();
+ return alert_email;
+}
+
+char *conf_get_mailfrom(void)
+{
+ load_conffile();
+ return alert_mail_from;
+}
+
+char *conf_get_program(void)
+{
+ load_conffile();
+ return alert_program;
+}
+
+char *conf_get_homehost(int *require_homehostp)
+{
+ load_conffile();
+ if (require_homehostp)
+ *require_homehostp = require_homehost;
+ return home_host;
+}
+
+struct createinfo *conf_get_create_info(void)
+{
+ load_conffile();
+ return &createinfo;
+}
+
+struct mddev_ident *conf_get_ident(char *dev)
+{
+ struct mddev_ident *rv;
+ load_conffile();
+ rv = mddevlist;
+ while (dev && rv && (rv->devname == NULL
+ || !devname_matches(dev, rv->devname)))
+ rv = rv->next;
+ return rv;
+}
+
+static void append_dlist(struct mddev_dev **dlp, struct mddev_dev *list)
+{
+ while (*dlp)
+ dlp = &(*dlp)->next;
+ *dlp = list;
+}
+
+struct mddev_dev *conf_get_devs()
+{
+ glob_t globbuf;
+ struct conf_dev *cd;
+ int flags = 0;
+ static struct mddev_dev *dlist = NULL;
+ unsigned int i;
+
+ while (dlist) {
+ struct mddev_dev *t = dlist;
+ dlist = dlist->next;
+ free(t->devname);
+ free(t);
+ }
+
+ load_conffile();
+
+ if (cdevlist == NULL) {
+ /* default to 'partitions' and 'containers' */
+ dlist = load_partitions();
+ append_dlist(&dlist, load_containers());
+ }
+
+ for (cd=cdevlist; cd; cd=cd->next) {
+ if (strcasecmp(cd->name, "partitions")==0)
+ append_dlist(&dlist, load_partitions());
+ else if (strcasecmp(cd->name, "containers")==0)
+ append_dlist(&dlist, load_containers());
+ else {
+ glob(cd->name, flags, NULL, &globbuf);
+ flags |= GLOB_APPEND;
+ }
+ }
+ if (flags & GLOB_APPEND) {
+ for (i=0; i<globbuf.gl_pathc; i++) {
+ struct mddev_dev *t = xmalloc(sizeof(*t));
+ memset(t, 0, sizeof(*t));
+ t->devname = xstrdup(globbuf.gl_pathv[i]);
+ t->next = dlist;
+ dlist = t;
+/* printf("one dev is %s\n", t->devname);*/
+ }
+ globfree(&globbuf);
+ }
+
+ return dlist;
+}
+
+int conf_test_dev(char *devname)
+{
+ struct conf_dev *cd;
+ if (cdevlist == NULL)
+ /* allow anything by default */
+ return 1;
+ for (cd = cdevlist ; cd ; cd = cd->next) {
+ if (strcasecmp(cd->name, "partitions") == 0)
+ return 1;
+ if (fnmatch(cd->name, devname, FNM_PATHNAME) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+int conf_test_metadata(const char *version, struct dev_policy *pol, int is_homehost)
+{
+ /* If anyone said 'yes', that sticks.
+ * else if homehost applies, use that
+ * else if there is a 'no', say 'no'.
+ * else 'yes'.
+ */
+ struct dev_policy *p;
+ int no=0, found_homehost=0;
+ load_conffile();
+
+ pol = pol_find(pol, pol_auto);
+ pol_for_each(p, pol, version) {
+ if (strcmp(p->value, "yes") == 0)
+ return 1;
+ if (strcmp(p->value, "homehost") == 0)
+ found_homehost = 1;
+ if (strcmp(p->value, "no") == 0)
+ no = 1;
+ }
+ if (is_homehost && found_homehost)
+ return 1;
+ if (no)
+ return 0;
+ return 1;
+}
+
+int match_oneof(char *devices, char *devname)
+{
+ /* check if one of the comma separated patterns in devices
+ * matches devname
+ */
+
+ while (devices && *devices) {
+ char patn[1024];
+ char *p = devices;
+ devices = strchr(devices, ',');
+ if (!devices)
+ devices = p + strlen(p);
+ if (devices-p < 1024) {
+ strncpy(patn, p, devices-p);
+ patn[devices-p] = 0;
+ if (fnmatch(patn, devname, FNM_PATHNAME)==0)
+ return 1;
+ }
+ if (*devices == ',')
+ devices++;
+ }
+ return 0;
+}
+
+int devname_matches(char *name, char *match)
+{
+ /* See if the given array name matches the
+ * given match from config file.
+ *
+ * First strip and /dev/md/ or /dev/, then
+ * see if there might be a numeric match of
+ * mdNN with NN
+ * then just strcmp
+ */
+ if (strncmp(name, "/dev/md/", 8) == 0)
+ name += 8;
+ else if (strncmp(name, "/dev/", 5) == 0)
+ name += 5;
+
+ if (strncmp(match, "/dev/md/", 8) == 0)
+ match += 8;
+ else if (strncmp(match, "/dev/", 5) == 0)
+ match += 5;
+
+ if (strncmp(name, "md", 2) == 0 &&
+ isdigit(name[2]))
+ name += 2;
+ if (strncmp(match, "md", 2) == 0 &&
+ isdigit(match[2]))
+ match += 2;
+
+ return (strcmp(name, match) == 0);
+}
+
+int conf_name_is_free(char *name)
+{
+ /* Check if this name is already taken by an ARRAY entry in
+ * the config file.
+ * It can be taken either by a match on devname, name, or
+ * even super-minor.
+ */
+ struct mddev_ident *dev;
+
+ load_conffile();
+ for (dev = mddevlist; dev; dev = dev->next) {
+ char nbuf[100];
+ if (dev->devname && devname_matches(name, dev->devname))
+ return 0;
+ if (dev->name[0] && devname_matches(name, dev->name))
+ return 0;
+ sprintf(nbuf, "%d", dev->super_minor);
+ if (dev->super_minor != UnSet &&
+ devname_matches(name, nbuf))
+ return 0;
+ }
+ return 1;
+}
+
+struct mddev_ident *conf_match(struct supertype *st,
+ struct mdinfo *info,
+ char *devname,
+ int verbose, int *rvp)
+{
+ struct mddev_ident *array_list, *match;
+ array_list = conf_get_ident(NULL);
+ match = NULL;
+ for (; array_list; array_list = array_list->next) {
+ if (array_list->uuid_set &&
+ same_uuid(array_list->uuid, info->uuid, st->ss->swapuuid)
+ == 0) {
+ if (verbose >= 2 && array_list->devname)
+ pr_err("UUID differs from %s.\n",
+ array_list->devname);
+ continue;
+ }
+ if (array_list->name[0] &&
+ strcasecmp(array_list->name, info->name) != 0) {
+ if (verbose >= 2 && array_list->devname)
+ pr_err("Name differs from %s.\n",
+ array_list->devname);
+ continue;
+ }
+ if (array_list->devices && devname &&
+ !match_oneof(array_list->devices, devname)) {
+ if (verbose >= 2 && array_list->devname)
+ pr_err("Not a listed device for %s.\n",
+ array_list->devname);
+ continue;
+ }
+ if (array_list->super_minor != UnSet &&
+ array_list->super_minor != info->array.md_minor) {
+ if (verbose >= 2 && array_list->devname)
+ pr_err("Different super-minor to %s.\n",
+ array_list->devname);
+ continue;
+ }
+ if (!array_list->uuid_set &&
+ !array_list->name[0] &&
+ !array_list->devices &&
+ array_list->super_minor == UnSet) {
+ if (verbose >= 2 && array_list->devname)
+ pr_err("%s doesn't have any identifying"
+ " information.\n",
+ array_list->devname);
+ continue;
+ }
+ /* FIXME, should I check raid_disks and level too?? */
+
+ if (match) {
+ if (verbose >= 0) {
+ if (match->devname && array_list->devname)
+ pr_err("we match both %s and %s - "
+ "cannot decide which to use.\n",
+ match->devname,
+ array_list->devname);
+ else
+ pr_err("multiple lines in mdadm.conf"
+ " match\n");
+ }
+ if (rvp)
+ *rvp = 2;
+ match = NULL;
+ break;
+ }
+ match = array_list;
+ }
+ return match;
+}
+
+int conf_verify_devnames(struct mddev_ident *array_list)
+{
+ struct mddev_ident *a1, *a2;
+
+ for (a1 = array_list; a1; a1 = a1->next) {
+ if (!a1->devname)
+ continue;
+ if (strcmp(a1->devname, "<ignore>") == 0)
+ continue;
+ for (a2 = a1->next; a2; a2 = a2->next) {
+ if (!a2->devname)
+ continue;
+ if (strcmp(a1->devname, a2->devname) != 0)
+ continue;
+
+ if (a1->uuid_set && a2->uuid_set) {
+ char nbuf[64];
+ __fname_from_uuid(a1->uuid, 0, nbuf, ':');
+ pr_err("Devices %s and ",
+ nbuf);
+ __fname_from_uuid(a2->uuid, 0, nbuf, ':');
+ fprintf(stderr,
+ "%s have the same name: %s\n",
+ nbuf, a1->devname);
+ } else
+ pr_err("Device %s given twice"
+ " in config file\n", a1->devname);
+ return 1;
+ }
+ }
+
+ return 0;
+}
diff --git a/crc32.c b/crc32.c
new file mode 100644
index 00000000..94fda06a
--- /dev/null
+++ b/crc32.c
@@ -0,0 +1,360 @@
+/* crc32.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-2003 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * Note: zlib license from from zlib.h added explicitly as mdadm does
+ * not include zlib.h. License from v1.2.2 of zlib:
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ *
+ * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
+ * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
+ * tables for updating the shift register in one step with three exclusive-ors
+ * instead of four steps with four exclusive-ors. This results about a factor
+ * of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
+ */
+
+/* @(#) $Id$ */
+
+/*
+ Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
+ protection on the static variables used to control the first-use generation
+ of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
+ first call get_crc_table() to initialize the tables before allowing more than
+ one thread to use crc32().
+ */
+
+#ifdef MAKECRCH
+# include <stdio.h>
+# ifndef DYNAMIC_CRC_TABLE
+# define DYNAMIC_CRC_TABLE
+# endif /* !DYNAMIC_CRC_TABLE */
+#endif /* MAKECRCH */
+
+/* #include "zutil.h" / * for STDC and FAR definitions */
+#define STDC
+#define FAR
+#define Z_NULL ((void*)0)
+#define OF(X) X
+#define ZEXPORT
+typedef long ptrdiff_t;
+#define NOBYFOUR
+
+#define local static
+
+/* Find a four-byte integer type for crc32_little() and crc32_big(). */
+#ifndef NOBYFOUR
+# ifdef STDC /* need ANSI C limits.h to determine sizes */
+# include <limits.h>
+# define BYFOUR
+# if (UINT_MAX == 0xffffffffUL)
+ typedef unsigned int u4;
+# else
+# if (ULONG_MAX == 0xffffffffUL)
+ typedef unsigned long u4;
+# else
+# if (USHRT_MAX == 0xffffffffUL)
+ typedef unsigned short u4;
+# else
+# undef BYFOUR /* can't find a four-byte integer type! */
+# endif
+# endif
+# endif
+# endif /* STDC */
+#endif /* !NOBYFOUR */
+
+/* Definitions for doing the crc four data bytes at a time. */
+#ifdef BYFOUR
+# define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \
+ (((w)&0xff00)<<8)+(((w)&0xff)<<24))
+ local unsigned long crc32_little OF((unsigned long,
+ const unsigned char FAR *, unsigned));
+ local unsigned long crc32_big OF((unsigned long,
+ const unsigned char FAR *, unsigned));
+# define TBLS 8
+#else
+# define TBLS 1
+#endif /* BYFOUR */
+
+#ifdef DYNAMIC_CRC_TABLE
+
+local volatile int crc_table_empty = 1;
+local unsigned long FAR crc_table[TBLS][256];
+local void make_crc_table OF((void));
+#ifdef MAKECRCH
+ local void write_table OF((FILE *, const unsigned long FAR *));
+#endif /* MAKECRCH */
+
+/*
+ Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
+ x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
+
+ Polynomials over GF(2) are represented in binary, one bit per coefficient,
+ with the lowest powers in the most significant bit. Then adding polynomials
+ is just exclusive-or, and multiplying a polynomial by x is a right shift by
+ one. If we call the above polynomial p, and represent a byte as the
+ polynomial q, also with the lowest power in the most significant bit (so the
+ byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
+ where a mod b means the remainder after dividing a by b.
+
+ This calculation is done using the shift-register method of multiplying and
+ taking the remainder. The register is initialized to zero, and for each
+ incoming bit, x^32 is added mod p to the register if the bit is a one (where
+ x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
+ x (which is shifting right by one and adding x^32 mod p if the bit shifted
+ out is a one). We start with the highest power (least significant bit) of
+ q and repeat for all eight bits of q.
+
+ The first table is simply the CRC of all possible eight bit values. This is
+ all the information needed to generate CRCs on data a byte at a time for all
+ combinations of CRC register values and incoming bytes. The remaining tables
+ allow for word-at-a-time CRC calculation for both big-endian and little-
+ endian machines, where a word is four bytes.
+*/
+local void make_crc_table()
+{
+ unsigned long c;
+ int n, k;
+ unsigned long poly; /* polynomial exclusive-or pattern */
+ /* terms of polynomial defining this crc (except x^32): */
+ static volatile int first = 1; /* flag to limit concurrent making */
+ static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
+
+ /* See if another task is already doing this (not thread-safe, but better
+ than nothing -- significantly reduces duration of vulnerability in
+ case the advice about DYNAMIC_CRC_TABLE is ignored) */
+ if (first) {
+ first = 0;
+
+ /* make exclusive-or pattern from polynomial (0xedb88320UL) */
+ poly = 0UL;
+ for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++)
+ poly |= 1UL << (31 - p[n]);
+
+ /* generate a crc for every 8-bit value */
+ for (n = 0; n < 256; n++) {
+ c = (unsigned long)n;
+ for (k = 0; k < 8; k++)
+ c = c & 1 ? poly ^ (c >> 1) : c >> 1;
+ crc_table[0][n] = c;
+ }
+
+#ifdef BYFOUR
+ /* generate crc for each value followed by one, two, and three zeros,
+ and then the byte reversal of those as well as the first table */
+ for (n = 0; n < 256; n++) {
+ c = crc_table[0][n];
+ crc_table[4][n] = REV(c);
+ for (k = 1; k < 4; k++) {
+ c = crc_table[0][c & 0xff] ^ (c >> 8);
+ crc_table[k][n] = c;
+ crc_table[k + 4][n] = REV(c);
+ }
+ }
+#endif /* BYFOUR */
+
+ crc_table_empty = 0;
+ }
+ else { /* not first */
+ /* wait for the other guy to finish (not efficient, but rare) */
+ while (crc_table_empty)
+ ;
+ }
+
+#ifdef MAKECRCH
+ /* write out CRC tables to crc32.h */
+ {
+ FILE *out;
+
+ out = fopen("crc32.h", "w");
+ if (out == NULL) return;
+ fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
+ fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
+ fprintf(out, "local const unsigned long FAR ");
+ fprintf(out, "crc_table[TBLS][256] =\n{\n {\n");
+ write_table(out, crc_table[0]);
+# ifdef BYFOUR
+ fprintf(out, "#ifdef BYFOUR\n");
+ for (k = 1; k < 8; k++) {
+ fprintf(out, " },\n {\n");
+ write_table(out, crc_table[k]);
+ }
+ fprintf(out, "#endif\n");
+# endif /* BYFOUR */
+ fprintf(out, " }\n};\n");
+ fclose(out);
+ }
+#endif /* MAKECRCH */
+}
+
+#ifdef MAKECRCH
+local void write_table(out, table)
+ FILE *out;
+ const unsigned long FAR *table;
+{
+ int n;
+
+ for (n = 0; n < 256; n++)
+ fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", table[n],
+ n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
+}
+#endif /* MAKECRCH */
+
+#else /* !DYNAMIC_CRC_TABLE */
+/* ========================================================================
+ * Tables of CRC-32s of all single-byte values, made by make_crc_table().
+ */
+#include "crc32.h"
+#endif /* DYNAMIC_CRC_TABLE */
+
+/* =========================================================================
+ * This function can be used by asm versions of crc32()
+ */
+const unsigned long FAR * ZEXPORT get_crc_table(void)
+{
+#ifdef DYNAMIC_CRC_TABLE
+ if (crc_table_empty)
+ make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+ return (const unsigned long FAR *)crc_table;
+}
+
+/* ========================================================================= */
+#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
+#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
+
+/* ========================================================================= */
+unsigned long ZEXPORT crc32(
+ unsigned long crc,
+ const unsigned char FAR *buf,
+ unsigned len)
+{
+ if (buf == Z_NULL) return 0UL;
+
+#ifdef DYNAMIC_CRC_TABLE
+ if (crc_table_empty)
+ make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+
+#ifdef BYFOUR
+ if (sizeof(void *) == sizeof(ptrdiff_t)) {
+ u4 endian;
+
+ endian = 1;
+ if (*((unsigned char *)(&endian)))
+ return crc32_little(crc, buf, len);
+ else
+ return crc32_big(crc, buf, len);
+ }
+#endif /* BYFOUR */
+/* crc = crc ^ 0xffffffffUL;*/
+ while (len >= 8) {
+ DO8;
+ len -= 8;
+ }
+ if (len) do {
+ DO1;
+ } while (--len);
+ return crc /* ^ 0xffffffffUL*/;
+}
+
+#ifdef BYFOUR
+
+/* ========================================================================= */
+#define DOLIT4 c ^= *buf4++; \
+ c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
+ crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
+#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
+
+/* ========================================================================= */
+local unsigned long crc32_little(crc, buf, len)
+ unsigned long crc;
+ const unsigned char FAR *buf;
+ unsigned len;
+{
+ register u4 c;
+ register const u4 FAR *buf4;
+
+ c = (u4)crc;
+ c = ~c;
+ while (len && ((ptrdiff_t)buf & 3)) {
+ c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+ len--;
+ }
+
+ buf4 = (const u4 FAR *)buf;
+ while (len >= 32) {
+ DOLIT32;
+ len -= 32;
+ }
+ while (len >= 4) {
+ DOLIT4;
+ len -= 4;
+ }
+ buf = (const unsigned char FAR *)buf4;
+
+ if (len) do {
+ c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+ } while (--len);
+ c = ~c;
+ return (unsigned long)c;
+}
+
+/* ========================================================================= */
+#define DOBIG4 c ^= *++buf4; \
+ c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
+ crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
+#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
+
+/* ========================================================================= */
+local unsigned long crc32_big(crc, buf, len)
+ unsigned long crc;
+ const unsigned char FAR *buf;
+ unsigned len;
+{
+ register u4 c;
+ register const u4 FAR *buf4;
+
+ c = REV((u4)crc);
+ c = ~c;
+ while (len && ((ptrdiff_t)buf & 3)) {
+ c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
+ len--;
+ }
+
+ buf4 = (const u4 FAR *)buf;
+ buf4--;
+ while (len >= 32) {
+ DOBIG32;
+ len -= 32;
+ }
+ while (len >= 4) {
+ DOBIG4;
+ len -= 4;
+ }
+ buf4++;
+ buf = (const unsigned char FAR *)buf4;
+
+ if (len) do {
+ c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
+ } while (--len);
+ c = ~c;
+ return (unsigned long)(REV(c));
+}
+
+#endif /* BYFOUR */
diff --git a/crc32.h b/crc32.h
new file mode 100644
index 00000000..8053b611
--- /dev/null
+++ b/crc32.h
@@ -0,0 +1,441 @@
+/* crc32.h -- tables for rapid CRC calculation
+ * Generated automatically by crc32.c
+ */
+
+local const unsigned long FAR crc_table[TBLS][256] =
+{
+ {
+ 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+ 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+ 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+ 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+ 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+ 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+ 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+ 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+ 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+ 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+ 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+ 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+ 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+ 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+ 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+ 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+ 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+ 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+ 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+ 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+ 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+ 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+ 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+ 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+ 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+ 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+ 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+ 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+ 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+ 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+ 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+ 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+ 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+ 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+ 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+ 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+ 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+ 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+ 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+ 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+ 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+ 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+ 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+ 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+ 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+ 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+ 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+ 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+ 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+ 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+ 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+ 0x2d02ef8dUL
+#ifdef BYFOUR
+ },
+ {
+ 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
+ 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
+ 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
+ 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
+ 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
+ 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
+ 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
+ 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
+ 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
+ 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
+ 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
+ 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
+ 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
+ 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
+ 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
+ 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
+ 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
+ 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
+ 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
+ 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
+ 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
+ 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
+ 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
+ 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
+ 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
+ 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
+ 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
+ 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
+ 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
+ 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
+ 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
+ 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
+ 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
+ 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
+ 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
+ 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
+ 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
+ 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
+ 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
+ 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
+ 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
+ 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
+ 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
+ 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
+ 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
+ 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
+ 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
+ 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
+ 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
+ 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
+ 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
+ 0x9324fd72UL
+ },
+ {
+ 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
+ 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
+ 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
+ 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
+ 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
+ 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
+ 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
+ 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
+ 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
+ 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
+ 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
+ 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
+ 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
+ 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
+ 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
+ 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
+ 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
+ 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
+ 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
+ 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
+ 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
+ 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
+ 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
+ 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
+ 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
+ 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
+ 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
+ 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
+ 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
+ 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
+ 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
+ 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
+ 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
+ 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
+ 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
+ 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
+ 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
+ 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
+ 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
+ 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
+ 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
+ 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
+ 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
+ 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
+ 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
+ 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
+ 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
+ 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
+ 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
+ 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
+ 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
+ 0xbe9834edUL
+ },
+ {
+ 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
+ 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
+ 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
+ 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
+ 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
+ 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
+ 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
+ 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
+ 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
+ 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
+ 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
+ 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
+ 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
+ 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
+ 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
+ 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
+ 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
+ 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
+ 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
+ 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
+ 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
+ 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
+ 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
+ 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
+ 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
+ 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
+ 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
+ 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
+ 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
+ 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
+ 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
+ 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
+ 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
+ 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
+ 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
+ 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
+ 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
+ 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
+ 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
+ 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
+ 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
+ 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
+ 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
+ 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
+ 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
+ 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
+ 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
+ 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
+ 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
+ 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
+ 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
+ 0xde0506f1UL
+ },
+ {
+ 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL,
+ 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL,
+ 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL,
+ 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL,
+ 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL,
+ 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL,
+ 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL,
+ 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL,
+ 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL,
+ 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL,
+ 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL,
+ 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL,
+ 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL,
+ 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL,
+ 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL,
+ 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL,
+ 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL,
+ 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL,
+ 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL,
+ 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL,
+ 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL,
+ 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL,
+ 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL,
+ 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL,
+ 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL,
+ 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL,
+ 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL,
+ 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL,
+ 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL,
+ 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL,
+ 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL,
+ 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL,
+ 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL,
+ 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL,
+ 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL,
+ 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL,
+ 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL,
+ 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL,
+ 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL,
+ 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL,
+ 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL,
+ 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL,
+ 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL,
+ 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL,
+ 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL,
+ 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL,
+ 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL,
+ 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL,
+ 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL,
+ 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL,
+ 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL,
+ 0x8def022dUL
+ },
+ {
+ 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL,
+ 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL,
+ 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL,
+ 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL,
+ 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL,
+ 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL,
+ 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL,
+ 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL,
+ 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL,
+ 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL,
+ 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL,
+ 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL,
+ 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL,
+ 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL,
+ 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL,
+ 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL,
+ 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL,
+ 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL,
+ 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL,
+ 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL,
+ 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL,
+ 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL,
+ 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL,
+ 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL,
+ 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL,
+ 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL,
+ 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL,
+ 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL,
+ 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL,
+ 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL,
+ 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL,
+ 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL,
+ 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL,
+ 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL,
+ 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL,
+ 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL,
+ 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL,
+ 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL,
+ 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL,
+ 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL,
+ 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL,
+ 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL,
+ 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL,
+ 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL,
+ 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL,
+ 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL,
+ 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL,
+ 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL,
+ 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL,
+ 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL,
+ 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL,
+ 0x72fd2493UL
+ },
+ {
+ 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL,
+ 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL,
+ 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL,
+ 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL,
+ 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL,
+ 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL,
+ 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL,
+ 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL,
+ 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL,
+ 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL,
+ 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL,
+ 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL,
+ 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL,
+ 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL,
+ 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL,
+ 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL,
+ 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL,
+ 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL,
+ 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL,
+ 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL,
+ 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL,
+ 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL,
+ 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL,
+ 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL,
+ 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL,
+ 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL,
+ 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL,
+ 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL,
+ 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL,
+ 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL,
+ 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL,
+ 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL,
+ 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL,
+ 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL,
+ 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL,
+ 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL,
+ 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL,
+ 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL,
+ 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL,
+ 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL,
+ 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL,
+ 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL,
+ 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL,
+ 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL,
+ 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL,
+ 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL,
+ 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL,
+ 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL,
+ 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL,
+ 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL,
+ 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL,
+ 0xed3498beUL
+ },
+ {
+ 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL,
+ 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL,
+ 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL,
+ 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL,
+ 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL,
+ 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL,
+ 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL,
+ 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL,
+ 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL,
+ 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL,
+ 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL,
+ 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL,
+ 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL,
+ 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL,
+ 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL,
+ 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL,
+ 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL,
+ 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL,
+ 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL,
+ 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL,
+ 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL,
+ 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL,
+ 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL,
+ 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL,
+ 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL,
+ 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL,
+ 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL,
+ 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL,
+ 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL,
+ 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL,
+ 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL,
+ 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL,
+ 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL,
+ 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL,
+ 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL,
+ 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL,
+ 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL,
+ 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL,
+ 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL,
+ 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL,
+ 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL,
+ 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL,
+ 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL,
+ 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL,
+ 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL,
+ 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL,
+ 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL,
+ 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL,
+ 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL,
+ 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL,
+ 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL,
+ 0xf10605deUL
+#endif
+ }
+};
diff --git a/debian/FAQ b/debian/FAQ
new file mode 100644
index 00000000..325fcb59
--- /dev/null
+++ b/debian/FAQ
@@ -0,0 +1,581 @@
+Frequently asked questions -- Debian mdadm
+==========================================
+
+Also see /usr/share/doc/mdadm/README.recipes.gz .
+
+The latest version of this FAQ is available here:
+ http://git.debian.org/?p=pkg-mdadm/mdadm.git;a=blob;f=debian/FAQ;hb=HEAD
+
+0. What does MD stand for?
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+ MD is an abbreviation for "multiple device" (also often called "multi-
+ disk"). The Linux MD implementation implements various strategies for
+ combining multiple physical devices into single logical ones. The most
+ common use case is commonly known as "Software RAID". Linux supports RAID
+ levels 1, 4, 5, 6, and 10, as well as the "pseudo-redundant" RAID level 0.
+ In addition, the MD implementation covers linear and multipath
+ configurations.
+
+ Most people refer to MD as RAID. Since the original name of the RAID
+ configuration software is "md"adm, I chose to use MD consistently instead.
+
+1. How do I overwrite ("zero") the superblock?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdadm --zero-superblock /dev/mdX
+
+ Note that this is a destructive operation. It does not actually delete any
+ data, but the device will have lost its "authority". You cannot assemble the
+ array with it anymore, and if you add the device to another array, the
+ synchronisation process *will* *overwrite* all data on the device.
+
+ Nevertheless, sometimes it is necessary to zero the superblock:
+
+ - If you are reusing a disk that has been part of an array with an different
+ superblock version and/or location. In this case you zero the superblock
+ before you assemble the array, or add the device to an array.
+
+ - If you are trying to prevent a device from being recognised as part of an
+ array. Say for instance you are trying to change an array spanning sd[ab]1
+ to sd[bc]1 (maybe because sda is failing or too slow), then automatic
+ (scan) assembly will still recognise sda1 as a valid device. You can limit
+ the devices to scan with the DEVICE keyword in the configuration file, but
+ this may not be what you want. Instead, zeroing the superblock will
+ (permanently) prevent a device from being considered as part of an array.
+
+2. How do I change the preferred minor of an MD array (RAID)?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ See item 12 in /usr/share/doc/mdadm/README.recipes.gz and read the mdadm
+ manpage (search for 'preferred').
+
+3. How does mdadm determine which /dev/mdX or /dev/md/X to use?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The logic used by mdadm to determine the device node name in the mdadm
+ --examine output (which is used to generate mdadm.conf) depends on several
+ factors. Here's how mdadm determines it:
+
+ It first checks the superblock version of a given array (or each array in
+ turn when iterating all of them). Run
+
+ mdadm --detail /dev/mdX | sed -ne 's,.*Version : ,,p'
+
+ to determine the superblock version of a running array, or
+
+ mdadm --examine /dev/sdXY | sed -ne 's,.*Version : ,,p'
+
+ to determine the superblock version from a component device of an array.
+
+ Version 0 superblocks (00.90.XX)
+ ''''''''''''''''''''''''''''''''
+ You need to know the preferred minor number stored in the superblock,
+ so run either of
+
+ mdadm --detail /dev/mdX | sed -ne 's,.*Preferred Minor : ,,p'
+ mdadm --examine /dev/sdXY | sed -ne 's,.*Preferred Minor : ,,p'
+
+ Let's call the resulting number MINOR. Also see FAQ 2 further up.
+
+ Given MINOR, mdadm will output /dev/md<MINOR> if the device node
+ /dev/md<MINOR> exists.
+ Otherwise, it outputs /dev/md/<MINOR>
+
+ Version 1 superblocks (01.XX.XX)
+ ''''''''''''''''''''''''''''''''
+ Version 1 superblocks actually seem to ignore preferred minors and instead
+ use the value of the name field in the superblock. Unless specified
+ explicitly during creation (-N|--name) the name is determined from the
+ device name used, using the following regexp: 's,/dev/md/?(.*),$1,', thus:
+
+ /dev/md0 -> 0
+ /dev/md/0 -> 0
+ /dev/md_d0 -> _d0 (d0 in later versions)
+ /dev/md/d0 -> d0
+ /dev/md/name -> name
+ (/dev/name does not seem to work)
+
+ mdadm will append the name to '/dev/md/', so it will always output device
+ names under the /dev/md/ directory. Newer versions can create a symlink
+ from /dev/mdX. See the symlinks option in mdadm.con(5) and mdadm(8).
+
+ If you want to change the name, you can do so during assembly:
+
+ mdadm -A -U name -N newname /dev/mdX /dev/sd[abc]X
+
+ I know this all sounds inconsistent and upstream has some work to do.
+ We're on it.
+
+4. Which RAID level should I use?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Many people seem to prefer RAID4/5/6 because it makes more efficient use of
+ space. For example, if you have disks of size X, then in order to get 2X
+ storage, you need 3 disks for RAID5, but 4 if you use RAID10 or RAID1+0 (or
+ RAID6).
+
+ This gain in usable space comes at a price: performance; RAID1/10 can be up
+ to four times faster than RAID4/5/6.
+
+ At the same time, however, RAID4/5/6 provide somewhat better redundancy in
+ the event of two failing disks. In a RAID10 configuration, if one disk is
+ already dead, the RAID can only survive if any of the two disks in the other
+ RAID1 array fails, but not if the second disk in the degraded RAID1 array
+ fails (see next item, 4b). A RAID6 across four disks can cope with any two
+ disks failing. However, RAID6 is noticeably slower than RAID5. RAID5 and
+ RAID4 do not differ much, but can only handle single-disk failures.
+
+ If you can afford the extra disks (storage *is* cheap these days), I suggest
+ RAID1/10 over RAID4/5/6. If you don't care about performance but need as
+ much space as possible, go with RAID4/5/6, but make sure to have backups.
+ Heck, make sure to have backups whatever you do.
+
+ Let it be said, however, that I thoroughly regret putting my primary
+ workstation on RAID5. Anything disk-intensive brings the system to its
+ knees; I will have to migrate to RAID10 at one point.
+
+4b. Can a 4-disk RAID10 survive two disk failures?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ I am assuming that you are talking about a setup with two copies of each
+ block, so --layout=near2/far2/offset2:
+
+ In two thirds of the cases, yes[0], and it does not matter which layout you
+ use. When you assemble 4 disks into a RAID10, you essentially stripe a RAID0
+ across two RAID1, so the four disks A,B,C,D become two pairs: A,B and C,D.
+ If A fails, the RAID10 can only survive if the second failing disk is either
+ C or D; If B fails, your array is dead.
+
+ Thus, if you see a disk failing, replace it as soon as possible!
+
+ If you need to handle two failing disks out of a set of four, you have to
+ use RAID6, or store more than two copies of each block (see the --layout
+ option in the mdadm(8) manpage).
+
+ See also question 18 further down.
+
+ 0. it's actually (n-2)/(n-1), where n is the number of disks. I am not
+ a mathematician, see http://aput.net/~jheiss/raid10/, which gives the
+ chance of *failure* as 1/(n-1), so the chance of success is 1-1/(n-1), or
+ (n-2)/(n-1), or 2/3 in the four disk example.
+ (Thanks to Per Olofsson for clarifying this in #493577).
+
+5. How to convert RAID5 to RAID10?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ To convert RAID5 to RAID10, you need a spare disk (either a spare, forth
+ disk in the array, or a new one). Then you remove the spare and one of the
+ three disks from the RAID5, create a degraded RAID10 across them, create
+ the filesystem and copy the data (or do a raw copy), then add the other two
+ disks to the new RAID10. However, mdadm cannot assemble a RAID10 with 50%
+ missing devices the way you might like it:
+
+ mdadm --create -l 10 -n4 -pn2 /dev/md1 /dev/sd[cd] missing missing
+
+ For reasons that may be answered by question 20 further down, mdadm actually
+ cares about the order of devices you give it. If you intersperse the missing
+ keywords with the physical drives, it should work:
+
+ mdadm --create -l 10 -n4 -pn2 /dev/md1 /dev/sdc missing /dev/sdd missing
+
+ or even
+
+ mdadm --create -l 10 -n4 -pn2 /dev/md1 missing /dev/sd[cd] missing
+
+ Also see item (4b) further up, and this thread:
+ http://marc.theaimsgroup.com/?l=linux-raid&m=116004333406395&w=2
+
+6. What is the difference between RAID1+0 and RAID10?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ RAID1+0 is a form of RAID in which a RAID0 is striped across two RAID1
+ arrays. To assemble it, you create two RAID1 arrays and then create a RAID0
+ array with the two md arrays.
+
+ The Linux kernel provides the RAID10 level to do pretty much exactly the
+ same for you, but with greater flexibility (and somewhat improved
+ performance). While RAID1+0 makes sense with 4 disks, RAID10 can be
+ configured to work with only 3 disks. Also, RAID10 has a little less
+ overhead than RAID1+0, which has data pass the md layer twice.
+
+ I prefer RAID10 over RAID1+0.
+
+6b. What's the difference between RAID1+0 and RAID0+1?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ In short: RAID1+0 concatenates two mirrored arrays while RAID0+1 mirrors two
+ concatenated arrays. However, the two are also often switched.
+
+ The linux MD driver supports RAID10, which is equivalent to the above
+ RAID1+0 definition.
+
+ RAID1+0/10 has a greater chance to survive two disk failures, its
+ performance suffers less when in degraded state, and it resyncs faster after
+ replacing a failed disk.
+
+ See http://aput.net/~jheiss/raid10/ for more details.
+
+7. Which RAID10 layout scheme should I use
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ RAID10 gives you the choice between three ways of laying out the blocks on
+ the disk. Assuming a simple 4 drive setup with 2 copies of each block, then
+ if A,B,C are data blocks, a,b their parts, and 1,2 denote their copies, the
+ following would be a classic RAID1+0 where 1,2 and 3,4 are RAID0 pairs
+ combined into a RAID1:
+
+ near=2 would be (this is the classic RAID1+0)
+
+ hdd1 Aa1 Ba1 Ca1
+ hdd2 Aa2 Ba2 Ca2
+ hdd3 Ab1 Bb1 Cb1
+ hdd4 Ab2 Bb2 Cb2
+
+ offset=2 would be
+
+ hdd1 Aa1 Bb2 Ca1 Db2
+ hdd2 Ab1 Aa2 Cb1 Ca2
+ hdd3 Ba1 Ab2 Da1 Cb2
+ hdd4 Bb1 Ba2 Db1 Da2
+
+ far=2 would be
+
+ hdd1 Aa1 Ca1 .... Bb2 Db2
+ hdd2 Ab1 Cb1 .... Aa2 Ca2
+ hdd3 Ba1 Da1 .... Ab2 Cb2
+ hdd4 Bb1 Db1 .... Ba2 Da2
+
+ Where the second set start half-way through the drives.
+
+ The advantage of far= is that you can easily spread a long sequential read
+ across the drives. The cost is more seeking for writes. offset= can
+ possibly get similar benefits with large enough chunk size. Neither upstream
+ nor the package maintainer have tried to understand all the implications of
+ that layout. It was added simply because it is a supported layout in DDF and
+ DDF support is a goal.
+
+8. (One of) my RAID arrays is busy and cannot be stopped. What gives?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ It is perfectly normal for mdadm to report the array with the root
+ filesystem to be busy on shutdown. The reason for this is that the root
+ filesystem must be mounted to be able to stop the array (or otherwise
+ /sbin/mdadm does not exist), but to stop the array, the root filesystem
+ cannot be mounted. Catch 22. The kernel actually stops the array just before
+ halting, so it's all well.
+
+ If mdadm cannot stop other arrays on your system, check that these arrays
+ aren't used anymore. Common causes for busy/locked arrays are:
+
+ * The array contains a mounted filesystem (check the `mount' output)
+ * The array is used as a swap backend (check /proc/swaps)
+ * The array is used by the device-mapper (check with `dmsetup')
+ * LVM
+ * dm-crypt
+ * EVMS
+ * The array contains a swap partition used for suspend-to-ram
+ (check /etc/initramfs-tools/conf.d/resume)
+ * The array is used by a process (check with `lsof')
+
+9. Should I use RAID0 (or linear)?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ No. Unless you know what you're doing and keep backups, or use it for data
+ that can be lost.
+
+9b. Why not?
+~~~~~~~~~~~~
+ RAID0 has zero redundancy. If you stripe a RAID0 across X disks, you
+ increase the likelyhood of complete loss of the filesystem by a factor of X.
+
+ The same applies to LVM by the way.
+
+ If you want/must used LVM or RAID0, stripe it across RAID1 arrays
+ (RAID10/RAID1+0, or LVM on RAID1), and keep backups!
+
+10. Can I cancel a running array check (checkarray)?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ See the -x option in the `/usr/share/mdadm/checkarray --help` output.
+
+11. mdadm warns about duplicate/similar superblocks; what gives?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ In certain configurations, especially if your last partition extends all the
+ way to the end of the disk, mdadm may display a warning like:
+
+ mdadm: WARNING /dev/hdc3 and /dev/hdc appear to have very similar
+ superblocks. If they are really different, please --zero the superblock on
+ one. If they are the same or overlap, please remove one from the DEVICE
+ list in mdadm.conf.
+
+ There are two ways to solve this:
+
+ (a) recreate the arrays with version-1 superblocks, which is not always an
+ option -- you cannot yet upgrade version-0 to version-1 superblocks for
+ existing arrays.
+
+ (b) instead of 'DEVICE partitions', list exactly those devices that are
+ components of MD arrays on your system. So in the above example:
+
+ - DEVICE partitions
+ + DEVICE /dev/hd[ab]* /dev/hdc[123]
+
+12. mdadm -E / mkconf report different arrays with the same device
+ name / minor number. What gives?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ In almost all cases, mdadm updates the super-minor field in an array's
+ superblock when assembling the array. It does *not* do this for RAID0
+ arrays. Thus, you may end up seeing something like this when you run
+ mdadm -E or mkconf:
+
+ ARRAY /dev/md0 level=raid0 num-devices=2 UUID=abcd...
+ ARRAY /dev/md0 level=raid1 num-devices=2 UUID=dcba...
+
+ Note how the two arrays have different UUIDs but both appear as /dev/md0.
+
+ The solution in this case is to explicitly tell mdadm to update the
+ superblock of the RAID0 array. Assuming that the RAID0 array in the above
+ example should really be /dev/md1:
+
+ mdadm --stop /dev/md1
+ mdadm --assemble --update=super-minor --uuid=abcd... /dev/md1
+
+ See question 2 of this FAQ, and also http://bugs.debian.org/386315 and
+ recipe #12 in README.recipes .
+
+13. Can a MD array be partitioned?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Since kernel 2.6.28, MD arrays can be partitioned like any other block
+ device.
+
+ Prior to 2.6.28, for a MD array to be able to hold partitions, it must be
+ created as a "partitionable array", using the configuration auto=part on the
+ command line or in the configuration file, or by using the standard naming
+ scheme (md_d* or md/d*) for partitionable arrays:
+
+ mdadm --create --auto=yes ... /dev/md_d0 ...
+ # see mdadm(8) manpage about the values of the --auto keyword
+
+14. When would I partition an array?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ This answer by Doug Ledford is shamelessly adapted from [0] (with
+ permission):
+
+ First, not all MD types make sense to be split up, e.g. multipath. For
+ those types, when a disk fails, the *entire* disk is considered to have
+ failed, but with different arrays you won't switch over to the next path
+ until each MD array has attempted to access the bad path. This can have
+ obvious bad consequences for certain array types that do automatic
+ failover from one port to another (you can end up getting the array in
+ a loop of switching ports repeatedly to satisfy the fact that one array
+ failed over during a path down, then the path came back up, and another
+ array stayed on the old path because it didn't send any commands during
+ the path down time period).
+
+ Second, convenience. Assume you have a 6 disk RAID5 array. If a disk
+ fails and you are using a partitioned MD array, then all the partitions on
+ the disk will already be handled without using that disk. No need to
+ manually fail any still active array members from other arrays.
+
+ Third, safety. Again with the raid5 array. If you use multiple arrays on
+ a single disk, and that disk fails, but it only failed on one array, then
+ you now need to manually fail that disk from the other arrays before
+ shutting down or hot swapping the disk. Generally speaking, that's not
+ a big deal, but people do occasionally have fat finger syndrome and this
+ is a good opportunity for someone to accidentally fail the wrong disk, and
+ when you then go to remove the disk you create a two disk failure instead
+ of one and now you are in real trouble.
+
+ Forth, to respond to what you wrote about independent of each other --
+ part of the reason why you partition. I would argue that's not true. If
+ your goal is to salvage as much use from a failing disk as possible, then
+ OK. But, generally speaking, people that have something of value on their
+ disks don't want to salvage any part of a failing disk, they want that
+ disk gone and replaced immediately. There simply is little to no value in
+ an already malfunctioning disk. They're too cheap and the data stored on
+ them too valuable to risk loosing something in an effort to further
+ utilize broken hardware. This of course is written with the understanding
+ that the latest MD RAID code will do read error rewrites to compensate for
+ minor disk issues, so anything that will throw a disk out of an array is
+ more than just a minor sector glitch.
+
+ 0. http://marc.theaimsgroup.com/?l=linux-raid&m=116117813315590&w=2
+
+15. How can I start a dirty degraded array?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ A degraded array (e.g. a RAID5 with only two disks) that has not been
+ properly stopped cannot be assembled just like that; mdadm will refuse and
+ complain about a "dirty degraded array", for good reasons.
+
+ The solution might be to force-assemble it, and then to start it. Please see
+ recipes 4 and 4b of /usr/share/doc/mdadm/README.recipes.gz and make sure you
+ know what you're doing.
+
+16. How can I influence the speed with which an array is resynchronised?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ For each array, the MD subsystem exports parameters governing the
+ synchronisation speed via sysfs. The values are in kB/sec.
+
+ /sys/block/mdX/md/sync_speed -- the current speed
+ /sys/block/mdX/md/sync_speed_max -- the maximum speed
+ /sys/block/mdX/md/sync_speed_min -- the guaranteed minimum speed
+
+17. When I create a new array, why does it resynchronise at first?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ See the mdadm(8) manpage:
+ When creating a RAID5 array, mdadm will automatically create a degraded
+ array with an extra spare drive. This is because building the spare into
+ a degraded array is in general faster than resyncing the parity on
+ a non-degraded, but not clean, array. This feature can be over-ridden with
+ the --force option.
+
+ This also applies to RAID levels 4 and 6.
+
+ It does not make much sense for RAID levels 1 and 10 and can thus be
+ overridden with the --force and --assume-clean options, but it is not
+ recommended. Read the manpage.
+
+18. How many failed disks can a RAID10 handle?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ (see also question 4b)
+
+ The following table shows how many disks you can lose and still have an
+ operational array. In some cases, you *can* lose more than the given number
+ of disks, but there is no guarantee that the array survives. Thus, the
+ following is the guaranteed number of failed disks a RAID10 array survives
+ and the maximum number of failed disks the array can (but is not guaranteed
+ to) handle, given the number of disks used and the number of data block
+ copies. Note that 2 copies means original + 1 copy. Thus, if you only have
+ one copy (the original), you cannot handle any failures.
+
+ 1 2 3 4 (# of copies)
+ 1 0/0 0/0 0/0 0/0
+ 2 0/0 1/1 1/1 1/1
+ 3 0/0 1/1 2/2 2/2
+ 4 0/0 1/2 2/2 3/3
+ 5 0/0 1/2 2/2 3/3
+ 6 0/0 1/3 2/3 3/3
+ 7 0/0 1/3 2/3 3/3
+ 8 0/0 1/4 2/3 3/4
+ (# of disks)
+
+ Note: I have not really verified the above information. Please don't count
+ on it. If a disk fails, replace it as soon as possible. Corrections welcome.
+
+19. What should I do if a disk fails?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Replace it as soon as possible:
+
+ mdadm --remove /dev/md0 /dev/sda1
+ halt
+ <replace disk and start the machine>
+ mdadm --add /dev/md0 /dev/sda1
+
+20. So how do I find out which other disk(s) can fail without killing the
+ array?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Did you read the previous question and its answer?
+
+ For cases when you have two copies of each block, the question is easily
+ answered by looking at the output of /proc/mdstat. For instance on a four
+ disk array:
+
+ md3 : active raid10 sdg7[3] sde7[0] sdh7[2] sdf7[1]
+
+ you know that sde7/sdf7 form one pair and sdg7/sgh7 the other.
+
+ If sdh now fails, this will become
+
+ md3 : active raid10 sdg7[3] sde7[0] sdh7[4](F) sdf7[1]
+
+ So now the second pair is broken; the array could take another failure in
+ the first pair, but if sdg now also fails, you're history.
+
+ Now go and read question 19.
+
+ For cases with more copies per block, it becomes more complicated. Let's
+ think of a seven disk array with three copies:
+
+ md5 : active raid10 sdg7[6] sde7[4] sdb7[5] sdf7[2] sda7[3] sdc7[1] sdd7[0]
+
+ Each mirror now has 7/3 = 2.33 disks to it, so in order to determine groups,
+ you need to round up. Note how the disks are arranged in increasing order of
+ their indices (the number in brackes in /proc/mdstat):
+
+ disk: -sdd7- -sdc7- -sdf7- -sda7- -sde7- -sdb7- -sdg7-
+ group: [ one ][ two ][ three ]
+
+ Basically this means that after two disk failed, you need to make sure that
+ the third failed disk doesn't destroy all copies of any given block. And
+ that's not always easy as it depends on the layout chosen: whether the
+ blocks are near (same offset within each group), far (spread apart in a way
+ to maximise the mean distance), or offset (offset by size/n within each
+ block).
+
+ I'll leave it up to you to figure things out. Now go read question 19.
+
+21. Why does the kernel speak of 'resync' when using checkarray?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Please see README.checkarray and
+ http://www.mail-archive.com/linux-raid@vger.kernel.org/msg04835.html .
+
+ In short: it's a bug. checkarray is actually not a resync, but the kernel
+ does not distinguish between them.
+
+22. Can I prioritise the sync process and sync certain arrays before others?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Upon start, md will resynchronise any unclean arrays, starting in somewhat
+ random order. Sometimes it's desirable to sync e.g. /dev/md3 first (because
+ it's the most important), but while /dev/md1 is synchronising, /dev/md3 will
+ be DELAYED (see /proc/mdstat; only if they share the same physical
+ components.
+
+ It is possible to delay the synchronisation via /sys:
+
+ echo idle >/sys/block/md1/md/sync_action
+
+ This will cause md1 to go idle and md to synchronise md3 (or whatever is
+ queued next; repeat the above for other devices if necessary). md will also
+ realise that md1 is still not in sync and queue it for resynchronisation,
+ so it will sync automatically when its turn has come.
+
+23. mdadm's init script fails because it cannot find any arrays. What gives?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ This does not happen anymore, if no arrays present in config file, no arrays
+ will be started.
+
+24. What happened to mdrun? How do I replace it?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdrun used to be the sledgehammer approach to assembling arrays. It has
+ accumulated several problems over the years (e.g. #354705) and thus has been
+ deprecated and removed with the 2.6.7-2 version of this package.
+
+ If you are still using mdrun, please ensure that you have a valid
+ /etc/mdadm/mdadm.conf file (run /usr/share/mdadm/mkconf --generate to get
+ one), and run
+
+ mdadm --assemble --scan --auto=yes
+
+ instead of mdrun.
+
+25. Why are my arrays marked auto-read-only in /proc/mdstat?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Arrays are kept read-only until the first write occurs. This allows md to
+ skip lengthy resynchronisation for arrays that have not been properly shut
+ down, but which also not have changed.
+
+26. Why doesn't mdadm find arrays specified in the config file and causes the
+ boot to fail?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ My boot process dies at an early stage and drops me into the busybox shell.
+ The last relevant output seems to be from mdadm and is something like
+
+ "/dev/md2 does not exist"
+
+ or
+
+ "No devices listed in conf file found"
+
+ Why does mdadm break my system?
+
+ Short answer: It doesn't, the underlying devices aren't yet available yet
+ when mdadm runs during the early boot process.
+
+ Long answer: It doesn't. but the drivers of those devices incorrectly
+ communicate to the kernel that the devices are ready, when in fact they are
+ not. I consider this a bug in those drivers. Please consider reporting it.
+
+ Workaround: there is nothing mdadm can or will do against this. Fortunately
+ though, initramfs provides a method, documented at
+ http://wiki.debian.org/InitramfsDebug. Please append rootdelay=10 to the
+ kernel command line and try if the boot now works.
+
+ -- martin f. krafft <madduck@debian.org> Wed, 13 May 2009 09:59:53 +0200
diff --git a/debian/NEWS b/debian/NEWS
new file mode 100644
index 00000000..691f7171
--- /dev/null
+++ b/debian/NEWS
@@ -0,0 +1,107 @@
+mdadm (3.2.2-1) unstable; urgency=low
+
+ Metadata format change requires recent Grub
+ The following only applies to users who want to let the grub-pc bootloader
+ load the kernel directly off a RAID device created with mdadm 3.x and
+ default values, or when the metadata version is explicitly set using -e.
+
+ Specifically, this includes all arrays created during or after the
+ installation of Debian squeeze (mdadm-3.1.4+8efb9d1). Arrays created with
+ older mdadm versions, and RAIDs created with the command-line option
+ -e 0.9 are not affected.
+
+ Versions of grub-pc older than 1.98+20100720-1 will not be able to boot
+ directly off a RAID with the 1.x metadata formats (the new default is 1.2).
+ To ensure a bootable system, please make sure to use grub-pc 1.98+20100720-1
+ or later, which is provided by Debian squeeze. An unbootable system may be
+ rescued with Super Grub2 Disk (http://www.supergrubdisk.org/super-grub2-disk/)
+ or grml (http://grml.org/).
+
+ -- Scott Schaefer <saschaefer@neurodiverse.org> Wed, 27 Jul 2011 20:21:50 -0400
+
+mdadm (3.1.4-1+8efb9d1) unstable; urgency=low
+
+ Default metadata format for newly created arrays has changed from
+ 0.90 to 1.2. Location of the superblock is now 4Kb from the start
+ of the device, instead of at the end of the device for 0.90.
+ The change from 0.9 to 1.x lifted many restrictions of the old
+ metadata format, and change in location (from end to 4k after
+ start for 1.2) reduced chances to confuse a raid array with
+ filesystem inside it. It is now less easy to mount a component
+ device as separate filesystem by incident, thus destroying the
+ array.
+
+ Also, chunk size by default is 512K (was 64K) and bitmap chunk size
+ is 64Mb.
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Sat, 10 Sep 2011 13:35:12 +0400
+
+mdadm (2.6.7-2) unstable; urgency=low
+
+ /dev/disk symlinks:
+ mdadm now creates symlinks in /dev/disk/by-id, using the template
+ md-uuid-* for the array UUIDs and md-name-* for any names assigned to
+ arrays (version-1 superblocks only). Thanks to Suse for the udev rules
+ file.
+
+ mdrun removed:
+ This version also removes mdrun once and for all. If you are still using
+ mdrun, please ensure that you have a valid /etc/mdadm/mdadm.conf file (run
+ /usr/share/mdadm/mkconf --generate to get one), and run
+
+ mdadm --assemble --scan --auto=yes
+
+ -- martin f. krafft <madduck@debian.org> Wed, 02 Jul 2008 10:57:32 +0200
+
+mdadm (2.5.3.git200608201206-1) unstable; urgency=low
+
+ This version makes mdadm.conf mandatory. If you do not have such a file, it
+ will be created for you.
+
+ You must verify the contents of this file and ensure that it represents your
+ local configuration. See /usr/share/doc/mdadm/README.upgrading-2.5.3.gz for
+ more information.
+
+ -- martin f. krafft <madduck@debian.org> Sun, 20 Aug 2006 21:58:43 +0100
+
+mdadm (2.5-1) unstable; urgency=low
+
+ mdrun has been (finally) obsoleted, and an appropriate warning message is
+ written to the console if you (or a script) attempts to run it. If you
+ cannot live without mdrun, you can disable the warning by setting
+ USE_DEPRECATED_MDRUN=1 in /etc/default/mdadm. Note that mdrun will *not* be
+ supported. Please also see /usr/share/doc/mdadm/README.mdrun .
+
+ -- martin f. krafft <madduck@debian.org> Tue, 30 May 2006 23:25:13 +0200
+
+mdadm (2.4.1-5) unstable; urgency=low
+
+ This version drops the automatic generation of the /etc/mdadm/mdadm.conf
+ file on every boot (if it was missing). This means that you need to ensure
+ that you have a valid configuration file. If none is present during package
+ configuration, mdadm *will* try to generate one, but it will only contain
+ information about arrays that were running at the time of package
+ configuration. Arrays not listed in the configuration file will *not* be
+ started automatically after boot (with the exception of the root partition).
+
+ If you want to recreate your configuration file, either figure out what it
+ should contain from the mdadm.conf(5) manpage, or simply assemble and run
+ all the arrays the way you like it, then run
+ /usr/share/mdadm/mkconf force-generate /etc/mdadm/mdadm.conf
+
+ -- martin f. krafft <madduck@debian.org> Sat, 03 Jun 2006 17:45:47 +0200
+
+mdadm (2.4.1-1) unstable; urgency=low
+
+ As of version 2.3, mdadm uses /etc/mdadm.conf as its main configuration
+ file, and falls back to /etc/mdadm/mdadm.conf if the former is not found.
+ Since Debian uses /etc/mdadm/mdadm.conf as the configuration file path, this
+ order was reverted: Debian's mdadm reads /etc/mdadm/mdadm.conf as its main
+ file and falls back to /etc/mdadm.conf if the former is not found.
+
+ An incompatible change in the reshaping of RAID 5 arrays was made in this
+ upstream release. If you want to reshape a RAID 5 array with a version-1
+ superblock, please make sure to use mdadm 2.4.1 and at least a 2.6.17-rc2
+ kernel.
+
+ -- martin f. krafft <madduck@debian.org> Tue, 16 May 2006 13:07:49 -0500
diff --git a/debian/README.Debian b/debian/README.Debian
new file mode 100644
index 00000000..bfca8cb3
--- /dev/null
+++ b/debian/README.Debian
@@ -0,0 +1,148 @@
+mdadm for Debian
+================
+
+Please make sure you read into /usr/share/doc/mdadm/NEWS.Debian.gz and the
+documents listed under "further reading" a little later in this file.
+
+The latest version of this document is available here:
+ http://git.debian.org/?p=pkg-mdadm/mdadm.gita=blob;f=debian/README.Debian;hb=HEAD
+
+Autostarting devices
+~~~~~~~~~~~~~~~~~~~~
+The mdadm.conf file controls which devices are to be started automatically by
+mdadm during boot, and various other parameters about how they are to be started.
+The file can also contain some control parameters for the mdadm monitor daemon.
+See mdadm.conf(5) for more information.
+
+Note: this only applies to modular kernels. If you use a monolithic kernel,
+you can control which devices are started automatically by changing the
+partition type: 0xfd for autostart, 0x83 to prevent autostart. mdadm does not
+actually care about the partition type, only the kernel does.
+
+Common recipes
+~~~~~~~~~~~~~~
+Check /usr/share/doc/mdadm/README.recipes.gz for some simple examples of how
+to do the most common stuff with mdadm.
+
+To RAID5 or not to RAID5
+~~~~~~~~~~~~~~~~~~~~~~~~
+See http://www.miracleas.com/BAARF/BAARF2.html . The package maintainer could
+not possibly come up with so much emotion over such a technical topic.
+
+Further reading
+~~~~~~~~~~~~~~~
+The documentation for the kernel md driver is included in
+/usr/share/doc/mdadm/md.txt.gz. In addition, the md(4) manpage provides
+valuable information about the applicable concepts. Do read those!
+
+Further documents of interest:
+ - Linux-RAID reference Wiki:
+ http://linux-raid.osdl.org
+ - Linux software RAID HOWTO:
+ http://tldp.org/HOWTO/Software-RAID-HOWTO.html
+ - linux-raid mailing list info:
+ http://vger.kernel.org/vger-lists.html#linux-raid
+ - linux-raid mailing list FAQ:
+ http://www.faqs.org/contrib/linux-raid/
+
+Upstream
+~~~~~~~~
+For completeness: The upstream repository is available from
+ git clone git://neil.brown.name/mdadm
+
+You can browse Neil's repository here:
+ http://neil.brown.name/git?p=mdadm
+
+You can also clone from Debian's Git repository, where upstream's code is in
+the 'upstream' branch:
+ git://git.debian.org/git/pkg-mdadm/mdadm
+
+Reporting bugs
+~~~~~~~~~~~~~~
+For reporting bugs, please use the reportbug tool, as it collects useful
+information about the system where you're experiencing the problem.
+
+If the system is another, please include the output of
+/usr/share/bug/mdadm/scripts with your report.
+
+If you are turning to the linux-raid@vger.kernel.org mailing list because you
+already know that the issue is with the md kernel driver and certainly not
+Debian-specific, please also include the output of
+/usr/share/bug/mdadm/scripts.
+
+In general, report bugs against the mdadm Debian package, using reportbug.
+I am happy to route reports to where they belong.
+
+Debian package maintenance
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+The package is maintained with Git and published on git.debian.org. To obtain
+the source:
+
+ git clone git://git.debian.org/git/pkg-mdadm/mdadm
+
+You can browse the repository here:
+ http://git.debian.org/?p=pkg-mdadm/mdadm
+
+If you want to join the mdadm effort, please send me an email. I'll be very
+glad for any help I get.
+
+There are things to do listed in debian/TODO.
+
+You might also be interested in the following document, which explains how
+package maintenance of mdadm was migrated from SVN to Git:
+ http://blog.madduck.net/debian/2007.10.07_converting-a-package-to-git
+
+Patches
+~~~~~~~
+The best way to submit patches is with git-format-patch, as outlined in the
+following. If this is too complicated for you, please feel free to make
+normal diffs, or contact me for assistance if you'd like to learn how to use
+Git.
+
+Please try to follow the guidelines outlined in
+ http://repo.or.cz/w/git.git?a=blob;f=Documentation/SubmittingPatches;hb=HEAD
+
+First, the setup, which you only have to do once on each machine you work with:
+
+# leave out --global if you want to set your identity only for mdadm
+git config --global user.name 'your name'
+git config --global user.email 'your@email.address'
+git clone git://git.debian.org/git/pkg-mdadm/mdadm.git
+
+To prepare the actual patch, do the following:
+
+git pull
+git checkout -b some-name-identifying-my-work
+while not finished:
+ // if resuming after a while, maybe update your branch:
+ git rebase master
+ // edit files
+ git add files
+ git commit
+ ...
+end
+
+After you've brought your change to a state where you want to submit it, please
+squash it into logical single commits. If you only made one change, then this
+will do:
+
+git checkout -b temp-squash master
+git merge --squash some-name-identifying-my-work
+git commit // ... remove the "Squashed commit of the following:" leader
+git format-patch -M -s master
+// now inspect the files this created in $PWD
+// when you're ready to submit, do:
+git send-email --to your@email.address
+// check that it's okay when it arrives
+git send-email --to pkg-mdadm-devel@lists.alioth.debian.org
+
+For multiple logical changes, cherry-pick or squash-merge every commit
+belonging to a change to the integration branch and then commit it.
+
+Also, read the git-send-email manpage in case you're submitting multiple
+logical changes, in case you want to thread them.
+
+The manpage also includes information about adding a prologue message explaining your patch, or how to insert it into an existing
+thread (in-reply-to).
+
+ -- martin f. krafft <madduck@debian.org> Tue, 16 Oct 2007 18:12:13 +0100
diff --git a/debian/README.checkarray b/debian/README.checkarray
new file mode 100644
index 00000000..8071a4d6
--- /dev/null
+++ b/debian/README.checkarray
@@ -0,0 +1,33 @@
+checkarray notes
+================
+
+checkarray will run parity checks across all your redundant arrays. By
+default, it is configured to run on the first Sunday of each month, at 01:06
+in the morning. This is realised by asking cron to wake up every Sunday with
+/etc/cron.d/mdadm, but then only running the script when the day of the month
+is less than or equal to 7. See #380425.
+
+Cron will try to run the check at "idle I/O priority" (see ionice(1)), so that
+the check does not overload the system too much. Note that this will only
+work if all the component devices of the array employ the (default) "cfq" I/O
+scheduler. See the kernel documentation[0] for information on how to verify
+and modify the scheduler. checkarray does not verify this for you.
+
+ 0. http://www.kernel.org/doc/Documentation/block/switching-sched.txt
+
+If you manually invoke checkarray, it runs with default I/O priority. Should
+you need to run a check at a higher (or lower) I/O priority, then have a look
+at the --idle, --slow, --fast, and --realtime options.
+
+'check' is a read-only operation, even though the kernel logs may suggest
+otherwise (e.g. /proc/mdstat and several kernel messages will mention
+"resync"). Please also see question 21 of the FAQ.
+
+If, however, while reading, a read error occurs, the check will trigger the
+normal response to read errors which is to generate the 'correct' data and try
+to write that out - so it is possible that a 'check' will trigger a write.
+However in the absence of read errors it is read-only.
+
+You can cancel a running array check with the -x option to checkarray.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 02 Sep 2010 10:27:29 +0200
diff --git a/debian/README.recipes b/debian/README.recipes
new file mode 100644
index 00000000..2b1891e0
--- /dev/null
+++ b/debian/README.recipes
@@ -0,0 +1,149 @@
+mdadm recipes
+=============
+
+The following examples/recipes may help you with your mdadm experience. I'll
+leave it as an exercise to use the correct device names and parameters in each
+case. You can find pointers to additional documentation in the README.Debian
+file.
+
+Enjoy. Submissions welcome.
+
+The latest version of this document is available here:
+ http://git.debian.org/?p=pkg-mdadm/mdadm.git;a=blob;f=debian/README.recipes;hb=HEAD
+
+0. create a new array
+~~~~~~~~~~~~~~~~~~~~~
+ mdadm --create -l1 -n2 -x1 /dev/md0 /dev/sd[abc]1 # RAID 1, 1 spare
+ mdadm --create -l5 -n3 -x1 /dev/md0 /dev/sd[abcd]1 # RAID 5, 1 spare
+ mdadm --create -l6 -n4 -x1 /dev/md0 /dev/sd[abcde]1 # RAID 6, 1 spare
+
+1. create a degraded array
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdadm --create -l5 -n3 /dev/md0 /dev/sda1 missing /dev/sdb1
+ mdadm --create -l6 -n4 /dev/md0 /dev/sda1 missing /dev/sdb1 missing
+
+2. assemble an existing array
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdadm --assemble --auto=yes /dev/md0 /dev/sd[abc]1
+
+ # if the array is degraded, it won't be started. use --run:
+ mdadm --assemble --auto=yes --run /dev/md0 /dev/sd[ab]1
+
+ # or start it by hand:
+ mdadm --run /dev/md0
+
+3. assemble all arrays in /etc/mdadm/mdadm.conf
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdadm --assemble --auto=yes --scan
+
+4. assemble a dirty degraded array
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdadm --assemble --auto=yes --force /dev/md0 /dev/sd[ab]1
+ mdadm --run /dev/md0
+
+4b. assemble a dirty degraded array at boot-time
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ If the array is started at boot time by the kernel (partition type 0xfd),
+ you can force-assemble it by passing the kernel boot parameter
+
+ md-mod.start_dirty_degraded=1
+
+5. stop arrays
+~~~~~~~~~~~~~~
+ mdadm --stop /dev/md0
+
+ # to stop all arrays in /etc/mdadm/mdadm.conf
+ mdadm --stop --scan
+
+6. hot-add components
+~~~~~~~~~~~~~~~~~~~~~
+ # on the running array:
+ mdadm --add /dev/md0 /dev/sdc1
+ # if you add more components than the array was setup with, additional
+ # components will be spares
+
+7. hot-remove components
+~~~~~~~~~~~~~~~~~~~~~~~~
+ # on the running array:
+ mdadm --fail /dev/md0 /dev/sdb1
+ # if you have configured spares, watch /proc/mdstat how it fills in
+ mdadm --remove /dev/md0 /dev/sdb1
+
+8. hot-grow a RAID1 by adding new components
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # on the running array, in either order:
+ mdadm --grow -n3 /dev/md0
+ mdadm --add /dev/md0 /dev/sdc1
+ # note: without growing first, additional devices become spares and are
+ # *not* synchronised after the add.
+
+9. hot-shrink a RAID1 by removing components
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdadm --fail /dev/md0 /dev/sdc1
+ mdadm --remove /dev/md0 /dev/sdc1
+ mdadm --grow -n2 /dev/md0
+
+10. convert existing filesystem to RAID 1
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # The idea is to create a degraded RAID 1 on the second partition, move
+ # data, then hot add the first. This seems safer to me than simply to
+ # force-add a superblock to the existing filesystem.
+ #
+ # Assume /dev/sda1 holds the data (and let's assume it's mounted on
+ # /home) and /dev/sdb1 is empty and of the same size...
+ #
+ mdadm --create /dev/md0 -l1 -n2 /dev/sdb1 missing
+ mkfs -t <type> /dev/md0
+ mount /dev/md0 /mnt
+ tar -cf- -C /home . | tar -xf- -C /mnt -p
+ # consider verifying the data
+ umount /home
+ umount /mnt
+ mount /dev/md0 /home # also change /etc/fstab
+ mdadm --add /dev/md0 /dev/sda1
+
+ Warren Togami has a document explaining how to convert a filesystem on
+ a remote system via SSH: http://togami.com/~warren/guides/remoteraidcrazies/
+
+10b. convert existing filesystem to RAID 1 in-place
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ In-place conversion of /dev/sda1 to /dev/md0 is effectively
+ mdadm --create /dev/md0 -l1 -n2 /dev/sda1 missing
+ however, do NOT do this, as you risk filesystem corruption.
+
+ If you need to do this, first unmount and shrink the filesystem by
+ a megabyte (if supported). Then run the above command, then (optionally)
+ again grow the filesystem as much as possible.
+
+ Do make sure you have backups. If you do not yet, consider method (10)
+ instead (and make backups anyway!).
+
+11. convert existing filesystem to RAID 5/6
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # See (10) for the basics.
+ mdadm --create /dev/md0 -l5 -n3 /dev/sdb1 /dev/sdc1 missing
+ #mdadm --create /dev/md0 -l6 -n4 /dev/sdb1 /dev/sdc1 /dev/sdd1 missing
+ mkfs -t <type> /dev/md0
+ mount /dev/md0 /mnt
+ tar -cf- -C /home . | tar -xf- -C /mnt -p
+ # consider verifying the data
+ umount /home
+ umount /mnt
+ mount /dev/md0 /home # also change /etc/fstab
+ mdadm --add /dev/md0 /dev/sda1
+
+12. change the preferred minor of an MD array (RAID)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # you need to manually assemble the array to change the preferred minor
+ # if you manually assemble, the superblock will be updated to reflect
+ # the preferred minor as you indicate with the assembly.
+ # for example, to set the preferred minor to 4:
+ mdadm --assemble /dev/md4 /dev/sd[abc]1
+
+ # this only works on 2.6 kernels, and only for RAID levels of 1 and above.
+ # for other MD arrays, you need to specify --update explicitly:
+ mdadm --assemble --update=super-minor /dev/md4 /dev/sd[abc]1
+
+ # see also item 12 in the FAQ contained with the Debian package.
+
+ -- martin f. krafft <madduck@debian.org> Fri, 06 Oct 2006 15:39:58 +0200
diff --git a/debian/TODO b/debian/TODO
new file mode 100644
index 00000000..eb3299e0
--- /dev/null
+++ b/debian/TODO
@@ -0,0 +1,29 @@
+debian mdadm TODO list
+======================
+
+- version-1 is a nightmare. E.g. on partitionable arrays, with / on
+ /dev/md_d0p3, mdadm -Es ignores /dev/md_d0 and just uses the name, so
+ /dev/md/<arrayname>.
+- figure out something about device names.
+- (better) udev integration
+
+- check whether mdadm.conf and system are consistent during initramfs creation
+ and fail otherwise (#381303).
+- add code to compare existing and expected configuration, after standardising
+ the files. In most cases, we'll have to answer DUNNO as to whether the
+ existing configuration file is okay, but I guess in some cases we can
+ determine that the configuration is okay. A conservative approach would be
+ beneficial to the user. Not sure if it's worth the effort though.
+- one nice^W important thing would be to check device names and UUIDs at least.
+
+- verify operation without udev
+ - udev removed before mdadm installed
+ - udev removed after mdadm installed
+
+- more granular handling of init.d starts/stops, don't force all arrays to be
+ started.
+- let user specify when to start/stop which array (#398310).
+- also only stop those array we started; this can be easily done with
+ sentinels in $STATEDIR
+
+- manage DAEMON_OPTIONS with debconf
diff --git a/debian/bugscript b/debian/bugscript
new file mode 100755
index 00000000..dcb88ebd
--- /dev/null
+++ b/debian/bugscript
@@ -0,0 +1,219 @@
+#!/bin/bash
+#
+# mdadm bug submission control script
+#
+# allows Debian's bug tools to include relevant information in bug reports.
+#
+# Copyright © martin f. krafft <madduck@debian.org>
+# distributed under the terms of the Artistic Licence 2.0
+#
+# we need /bin/bash for readline and -n capabalities in the prompt(s)
+#
+
+# maximise information output even in the case of errors
+set +eu
+
+if ! command -v yesno >/dev/null; then
+ if [ -r /usr/share/reportbug/handle_bugscript ]; then
+ exec /usr/share/reportbug/handle_bugscript ". $0" /dev/stdout
+ fi
+ yesno() {
+ read -n1 -p"$1" REPLY
+ case "$REPLY" in
+ [yY]) REPLY=yep;;
+ [nN]) REPLY=nop;;
+ ('') REPLY="$2";;
+ esac
+ }
+ exec 3>&1
+fi
+
+# do not let people ctrl-c out of the bugscript
+trap : INT
+
+if [ $(id -u) != 0 ]; then
+ if [ -x "$(command -v sudo)" ]; then
+ yesno "Gather system information as root using sudo? (Y/n) " yep
+ if [ "$REPLY" = yep ]; then
+ echo running sudo "$0" "$@"...
+ sudo "$0" "$@" >&3 && exit 0
+ echo "sudo invocation failed, trying /bin/su..."
+ fi
+ fi
+
+ yesno "Gather system information as root using su? (Y/n) " yep
+ if [ "$REPLY" = yep ]; then
+ ARGS=
+ for i in "$@"; do ARGS="${ARGS:+$ARGS }'$1'"; shift; done
+ echo "running su root -s '/bin/sh -c $0${ARGS:+ $ARGS}'..."
+ su root -s /bin/sh -c "$0 $ARGS" >&3 && exit 0
+ unset ARGS
+ echo "su invocation failed."
+ fi
+
+ # arrive here only if neither sudo nor su worked:
+ yesno "Will you provide system information in the bug report yourself? (N/y) " nop
+ if [ "$REPLY" = yep ]; then
+ cat <<_eof >&3
+
+IMPORTANT:
+ please do not forget to include all relevant system information with this
+ bug report. You could run
+ /usr/share/bug/mdadm/script 3>&1
+ as root and attach or include the output.
+
+_eof
+ exit 0
+ fi
+
+ # try our best
+ cat <<_eof >&3
+
+WARNING:
+ the following output was not generated by the root user. If you can, please
+ replace the following up until "-- System Information:" with the output of
+ /usr/share/bug/mdadm/script 3>&1
+ run as root. Thanks!
+
+_eof
+fi
+
+if [ ! -r /proc/mdstat ]; then
+ echo "The local system does not have MD (RAID) support: no drivers loaded."
+ echo "Without MD support, I cannot collect as much information as I'd like."
+
+ #yesno "Are you sure you want to report a bug at this time? " yep
+ yesno "Hit any key to continue..." yep
+ #[ "$REPLY" = yep ] || exit 1
+fi
+
+echo "--- mdadm.conf" >&3
+if [ -r /etc/mdadm/mdadm.conf ]; then
+ grep '^[^#]' /etc/mdadm/mdadm.conf >&3
+elif [ -r /etc/mdadm.conf ]; then
+ grep '^[^#]' /etc/mdadm.conf >&3
+else
+ echo no mdadm.conf file. >&3
+fi
+echo >&3
+
+echo "--- /etc/default/mdadm" >&3
+if [ -r /etc/default/mdadm ]; then
+ grep '^[^#]' /etc/default/mdadm >&3
+else
+ echo no /etc/default/mdadm file. >&3
+fi
+echo >&3
+
+echo "--- /proc/mdstat:" >&3
+cat /proc/mdstat >&3 2>&3 || :
+echo >&3
+
+echo "--- /proc/partitions:" >&3
+cat /proc/partitions >&3 2>&3 || :
+echo >&3
+
+echo "--- LVM physical volumes:" >&3
+if [ -x "$(command -v pvs)" ]; then
+ pvs >&3
+else
+ echo "LVM does not seem to be used." >&3
+fi
+
+echo "--- mount output" >&3
+mount >&3
+echo >&3
+
+echo "--- initrd.img-$(uname -r):" >&3
+if [ -r /boot/initrd.img-$(uname -r) ]; then
+ TEMPDIR=$(mktemp -d)
+ OLDPWD="$PWD"
+ cd "$TEMPDIR"
+ zcat /boot/initrd.img-$(uname -r) 2>&3 | cpio -i 2>&3
+ find -regex '.*/md[a/].+' -type f -exec md5sum {} \; >&3
+
+ echo >&3
+ echo "--- initrd's /conf/conf.d/md:" >&3
+ if [ -r conf/conf.d/md ]; then
+ cat conf/conf.d/md >&3
+ else
+ echo "no conf/md file." >&3
+ fi
+
+ cd "$OLDPWD"
+ rm -rf "$TEMPDIR"
+ unset TEMPDIR
+else
+ echo "no initrd.img-$(uname -r) found." >&3
+fi
+echo >&3
+
+if [ -r /proc/modules ]; then
+ echo "--- /proc/modules:" >&3
+ egrep '(dm_|raid|linear|multipath|faulty)' < /proc/modules >&3 || :
+ echo >&3
+fi
+
+if [ -f /var/log/syslog ]; then
+ if [ -r /var/log/syslog ]; then
+ echo "--- /var/log/syslog:" >&3
+ egrep "^\w{3} [ :[:digit:]]{11} ($(hostname)|localhost) (kernel: md|mdadm): " /var/log/syslog >&3 || :
+ echo >&3
+ else
+ echo "syslog not readable by user." >&3
+ fi
+fi
+
+echo "--- volume detail:" >&3
+for dev in /dev/[hsv]d[a-z]*; do
+ [ ! -r $dev ] && echo "$dev not readable by user." && continue
+ mdadm -E $dev 2>/dev/null && echo -- || echo "$dev is not recognised by mdadm."
+done >&3
+echo >&3
+
+if [ -r /proc/cmdline ]; then
+ echo "--- /proc/cmdline" >&3
+ cat /proc/cmdline >&3
+ echo >&3
+fi
+
+if [ -f /boot/grub/grub.cfg ]; then
+ echo "--- grub2:" >&3
+ if [ -r /boot/grub/grub.cfg ]; then
+ egrep '^[^#].*\<(root=|raid)' /boot/grub/grub.cfg >&3 || :
+ else
+ echo grub.cfg file not readable. >&3
+ fi
+ echo >&3
+fi
+
+if [ -f /boot/grub/menu.lst ]; then
+ echo "--- grub legacy:" >&3
+ if [ -r /boot/grub/menu.lst ]; then
+ grep '^[^#].*\<root=' /boot/grub/menu.lst >&3 || :
+ else
+ echo menu.lst file not readable. >&3
+ fi
+ echo >&3
+fi
+
+if [ -f /etc/lilo.conf ]; then
+ echo "--- lilo:" >&3
+ if [ -r /etc/lilo.conf ]; then
+ egrep '^([^#].*)?root=' /etc/lilo.conf >&3 || :
+ else
+ echo lilo.conf file not readable. >&3
+ fi
+ echo >&3
+fi
+
+echo "--- udev:" >&3
+COLUMNS=70 dpkg -l udev | grep '\<udev\>' >&3
+md5sum /etc/udev/rules.d/*md* /lib/udev/rules.d/*md* >&3 2>/dev/null
+echo >&3
+
+echo "--- /dev:" >&3
+ls -l /dev/md* /dev/disk/by-* >&3
+echo >&3
+
+echo "Auto-generated on $(date -R) by mdadm bugscript" >&3
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 00000000..0a6a95a6
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,1819 @@
+mdadm (3.3.2-5) unstable; urgency=medium
+
+ * use-tempnode-not-devnode.patch: change udev rules file to use
+ $tempnode which works both on wheezy and jessie udev, instead
+ of $devnode which only works in jessie. At this stage it is
+ better to make rules file compatible with old version instead
+ of adding versioned dependency. Should be removed for jessie+1.
+ (Closes: #770883)
+ * fix Closes: list in previous entry (Closes: #771852)
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Sat, 20 Dec 2014 11:48:44 +0300
+
+mdadm (3.3.2-4) unstable; urgency=medium
+
+ * really remove /var/lib/mdadm in postinst, fixing a brown-paper bag
+ bug in previous upload (I fixed it earlier but forgot to commit it
+ before 3.3.2-3 release). (Closes: #764036, #771852)
+ * mention closing of #588965 #599352 #694513 by 3.3-1
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Fri, 05 Dec 2014 17:29:22 +0300
+
+mdadm (3.3.2-3) unstable; urgency=medium
+
+ * remove /var/lib/mdadm dir in postinst to clean up from old pkg,
+ remove config files on purge (restore extraneous cleanup from
+ last change) (Closes: #764036)
+ * remove set -u (error on unset variables) from maintscripts
+ (Closes: #766308)
+ * rebuildmap-strip-local-host-name-from-device-name.patch - a patch
+ from upstream fixing a bug when mdadm have to re-create device
+ nodes after assembling arrays (eg, when switching from initramfs
+ without preserving /dev and /run), to choose the same device names
+ as when doing inital assembly
+ * readlink-path.patch: readlink is in /bin not /usr/bin on debian
+ (Closes: #766416)
+ * mdmonitor-service-simplify.diff: simplify mdmonitor.service
+ systemd file, do not try to read non-existing files (Closes: #764647)
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Fri, 28 Nov 2014 09:55:14 +0300
+
+mdadm (3.3.2-2) unstable; urgency=medium
+
+ * remove more leftovers from old versions
+ * do not embed $VERSION to mkconf and bugscript
+ * removed unneeded lintian-overrides file
+ * removed examples/mdadd.sh
+ * removed references to MAIL_TO from /etc/default/mdadm (pre-2.x mdadm)
+ * removed AUTOSTART variable from /etc/default/mdadm
+ (system will start arrays listed in mdadm.conf)
+ * simplify d/rules, build udeb in a subdir (for now, to be removed)
+ * install systemd services and disable some initscripts (mask them)
+ when systemd is running (Closes: #763959)
+ * build-sys-no-check_rundir.patch: stop (re)linking executables
+ at install time
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Sat, 04 Oct 2014 20:38:36 +0400
+
+mdadm (3.3.2-1) unstable; urgency=low
+
+ * new minor/bugfix upstream release (Closes: #731884, #763080)
+ * removed remove-bashism-from-makefile.patch (applied upstream)
+ * copy 64-md-raid-assembly.rules to initramfs too, this should
+ bring us array auto-assemble during initramfs run (Closes: #678691)
+ * denote inability to initialize md subsystem in local-top script
+ from fatal error to warning (Closes: #733574)
+ * create /etc/modprobe.d/mdadm.conf to set start_ro=1 there
+ instead of doing it in the initramfs
+ * use install -D in d/rules consistently and remove dirs from d/mdadm.dirs
+ * remove references to old (2.x and 1.x) mdadm versions (Closes: #728804)
+ * remove references to very old (pre-wheeze) breaks/replaces/depends
+ * remove support of makedev
+ * reformat mdadm-raid initscript a bit
+ * remove rebuilding-raid.html (had invisible refs) and
+ RAID5_versus_RAID10.txt docs. Maybe it's a good idea to remove
+ other docs shipped with the package, since these becoming stale
+ and these days, better alternatives exist online
+ * removed debian-specific mdadm-startall script which does nothing more than
+ "mdadm --assemble --scan". Remove docbook-to-man from build-deps.
+ * pass dpkg-buildflags to upstream build system and
+ stop using our own -O0, -Os etc
+ * remove separate CFLAGS for udeb and simplify *FLAGS handling
+ * bump Standards-Version to 3.9.5 (no changes needed)
+ * fix VCS links to point to anonscm.d.o
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Wed, 01 Oct 2014 15:09:07 +0400
+
+mdadm (3.3-2) unstable; urgency=low
+
+ * use 63-md-raid-arrays.rules instead of old 64-md-raid.rules
+ (Closes: #726237)
+ * do not use builtin blkid in udev rules, as our udev (at least
+ on wheezy) does not have it (use-external-blkid.diff)
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Mon, 14 Oct 2013 15:49:54 +0400
+
+mdadm (3.3-1) unstable; urgency=low
+
+ [ Michael Tokarev ]
+ * new upstream 3.3 release (Closes: #718896, #588965, #599352, #694513)
+ See ANNOUNCE-3.3 for details.
+ Patches:
+ - refreshed debian-conffile-location.diff
+ (added .conf.d)
+ - removed debian-disable-udev-incr-assembly.diff
+ (do not ship udev-md-raid-assembly.rules for now)
+ - refreshed debian-no-Werror.diff
+ - refreshed sha1-includes.diff
+ - removed patches (included upstream)A:
+ spelling-and-manpages.patch
+ fix-enough-function-for-RAID10.patch
+ fix-segfaults-in-detail.patch
+ super0-do-not-override-uuid-with-homehost.patch
+ mdmon-allow-takeover-when-original-was-started-with-.patch
+ mdmon-fix-arg-parsing.patch
+ mdmon-fix-arg-processing-for-a.path
+ Install udev-md-raid-arrays.rules instead of udev-md-raid.rules,
+ don't install new udev-md-raid-assembly.rules for now.
+ * remove Martin F. Krafft from uploaders per his request.
+ Thank you for your contributions!
+ * added remove-bashism-from-makefile.patch patch to work around
+ newly introduced bashism
+ * remove debian/source/options, there's no need to set compression
+ options for debian.tar.gz.
+ * remove outdated debian/docs/md_superblock_formats.txt and
+ debian/docs/md.txt (Closes: #714977, #714978)
+ * ship ANNOUNCE-*, external-reshape-design.txt, mdmon-design.txt
+ files as documentation (Closes: #715324)
+
+ [ Dmitrijs Ledkovs ]
+ * Properly remove 65-mdadm.vol_id.rules, instead of trying to remove a
+ never-existed 65_mdadm.vol_id.rules (note the 65- vs 65_).
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Fri, 11 Oct 2013 10:12:47 +0400
+
+mdadm (3.2.5-6) unstable; urgency=low
+
+ * replace home-grown and not-working-since-etch udevsettle call
+ in initramfs script with proper wait_for_udev function (from
+ common initramfs functions). This unbreaks situations when
+ the underlying device needs some udev magic to happen before
+ being available, which includes stacked devices (md on lvm)
+ and other cases. Thanks to Thomas Parmelan and Dave Whitla
+ for finding the root cause of breakage and for providing
+ the fix. (Closes: #644876)
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Tue, 05 Mar 2013 13:32:21 +0400
+
+mdadm (3.2.5-5) unstable; urgency=low
+
+ * add (empty) restart and force-reload actions to mdadm-waitidle
+ script -- this script is used only when the system needs to be
+ shut down or rebooted, there's nothing to start or restart.
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Thu, 24 Jan 2013 17:04:49 +0400
+
+mdadm (3.2.5-4+mdmon) experimental; urgency=low
+
+ * fix `/etc/init.d/mdadm-raid status' inverse logic (Closes: #686100)
+ * /etc/init.d/mdadm: change RUNDIR to /run instead of /var/run.
+ Mdadm itself uses /run internally, we properly depend on initscripts
+ version which creates /run, and the initscript itself is started
+ after local_fs is processed, so this is merely a no-op, but let's
+ do it for consistency.
+ * Fix 'enough' function for RAID10, to prevent starting of a RAID10
+ array which does not have required minimum of component devices.
+ (Closes: #691668).
+ * fix segfaults in Detail() - mdadm --detail may segfault if a drive
+ has been removed from the array (Closes: #691670)
+ * super0: do not override uuid with homehost. The bug prevented
+ re-creating an array with v0.90 superblock with the specified uuid
+ when homehost is also specified. (Closes: #686703)
+ * several fixes for mdmon argument processing (Closes: #691671):
+ - allow --takeover when original was started with --offroot
+ - fix arg parsing.
+ - fix arg processing for -a
+ * Changes based on a patch by Miquel van Smoorenburg (Closes: #684708):
+ - install mdmon in udeb and initramfs, so imsm arrays can work.
+ mdadm runs mdmon automatically when needed (currently for imsm
+ arrays), and mdmon is required to make such arrays read-write
+ (they're read-only by default) so merely presence of mdmon is
+ enough to be able to use imsm arrays.
+ - /etc/init.d/mdadm start: if a mdmon pidfile is found in /run/mdadm,
+ restart mdmon (--takeover --all)
+ - /etc/init.d/mdadm stop: link pidfiles of mdmon processes into
+ /run/sendsigs.omit.d, and make sure that happens before sendsigs runs.
+ - stop mdadm before sendsigs, so that the above code works
+ - add script mdadm-waitidle that runs just before reboot/halt.
+ For each array that is still running, it sets sync_action to idle,
+ and uses mdadm --wait-clean to wait for all arrays to go idle
+ (it has a short timeout).
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Sat, 20 Oct 2012 19:20:12 +0400
+
+mdadm (3.2.5-3) unstable; urgency=low
+
+ * revert "Drop unused debconf templates" change -- the templates
+ are actually being (indirectly) used in debian/mdadm.config
+ * fix initramfs-script config name (slipped in the initramfs fixes)
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Sat, 25 Aug 2012 23:12:50 +0400
+
+mdadm (3.2.5-2) unstable; urgency=low
+
+ [ Sergey B Kirpichev ]
+ * Fix mdadm.lintian-overrides
+ * Fix spelling in binaries, fix lintian warnings
+ manpage-has-errors-from-man and hyphen-used-as-minus-sign
+ * Drop unused debconf templates
+ * Implement status option for mdadm-raid init.d script
+ * Fix lintian info's conflicts-with-version: Conflicts -> Breaks
+
+ [ Michael Tokarev ]
+ * fix spelling mistakes in previous changelog entry
+ * some cleanups for checkarray:
+ - change --help printing and shorten/simplify the text
+ - make --quiet cumulative and stop documenting --real-quiet
+ - do not produce help in case of incorrect usage, and exit with 1
+ * fixes for initramfs integration (Closes: #644389, #678262, #685161):
+ - check INITRDSTART=none early
+ - do not explicitly load raid level modules (modprobe/kmod does this)
+ - do not collect needed raid levels (we include all modules anyway)
+ - load md_mod explicitly since we need to change global parameter
+
+ [ Dmitrijs Ledkovs ]
+ * Use dh_installinit with --no-restart-on-upgrade, which will start
+ arrays, but will not stop them during upgrade. (Closes: 678971)
+ * Copy local administrator's modified udev rules as well as the system
+ one. (Closes: #678973)
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Sat, 25 Aug 2012 16:25:37 +0400
+
+mdadm (3.2.5-1) unstable; urgency=low
+
+ [ Michael Tokarev ]
+ * new upstream (bugfix) release, fixing regression when --add'ing
+ device to an array, introduced in 3.2.4, plus other minor fixes
+ (Closes: #673104, #673344)
+ * new patch: sha1-includes.diff to fix #include mess in new sha1.h
+ * added a check into debian/checkarray to skip checking arrays created
+ less than 2 weeks ago (Closes: #624273)
+
+ [ Dmitrijs Ledkovs ]
+ * Remove obsolete documentation dating back to ~etch release
+ * Remove reference to obsolete documention from debconf templates
+ * Update debconf templates translations
+ * Remove compatibility with ancient initramfs-tools
+ * Remove debian-specific mdadm-startall.8 in clean target
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Fri, 25 May 2012 20:23:52 +0400
+
+mdadm (3.2.4-1) unstable; urgency=low
+
+ * new upstream (bugfix) release (Closes: #664088, #661552)
+ * removed debian-run-udev.diff (applied upstream), and
+ all RUNDIR handling from debian/rules (it is the default now)
+ * add build-arch and build-indep targets to debian/rules, and
+ bump Standards-Version to 3.9.3
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Thu, 10 May 2012 17:51:41 +0400
+
+mdadm (3.2.3-3) unstable; urgency=low
+
+ * switch from topgit to plain 3.0 (quilt) layout, creating
+ debian/patches. Don't build-depend on quilt as patching
+ is done automatically by dpkg-source.
+ * debian/patches/debian-run-udev.diff by Roger Leigh (Closes: #644319, #627774)
+ * update debian/mdadm.logcheck.ignore.server to recognize "k" in
+ addition of "blocks" in kernel messages. Thanks to Frédéric Brière
+ for the patch (Closes: #656038)
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Mon, 30 Apr 2012 14:12:38 +0400
+
+mdadm (3.2.3-2) unstable; urgency=low
+
+ [ Michael Tokarev ]
+ * new upstream bugfix/stable version, with lots of fixes all over.
+ Closes: #641886, #628667, #645563, #651880, #607375, #633880
+ * update Neil's email (Closes: #650630)
+ * update mdadd.sh to version 1.52 (Closes: #655212)
+ * fixed a typo (RAID6 vs RAID10) in FAQ (Closes: #637068)
+ * declare ordering dependency for multipath-tools-boot in
+ mdadm-raid init script (Closes: #641584)
+ While at it, remove mention of devfsd
+ * added Slovak (sk.po) po-debconf translation from Slavko <linux@slavino.sk>
+ (Closes: #641972)
+ * set nice value of the check/resync thread too, together with I/O
+ scheduling class, based on patch by Sergey B Kirpichev (Closes: #652547)
+ * small changes for debian/checkarray
+ * (internal) move files from contrib/* topgit branches into debian directory
+ * remove dh_testroot from clean target
+ * add myself to uploaders
+
+ [ Peter Eisentraut ]
+ * Added support for "status" action to mdadm init script (Closes: #651737)
+
+ -- Michael Tokarev <mjt@tls.msk.ru> Wed, 18 Jan 2012 22:33:01 +0400
+
+mdadm (3.2.2-1) unstable; urgency=low
+
+ [ martin f. krafft ]
+ * New upstream version (closes: #615494), which addresses:
+ - --grow option aware of size change of underlying device (closes: #618463)
+ - builds with -Werror on gcc-4.6 (closes: #625392).
+ - new features/functionality:
+ + Policy framework.
+ + Improved management of reshaping arrays.
+ + Support for Intel Matrix Storage Manager (IMSM)
+ Thanks to Scott Schaefer for his help in preparing this release!
+ * Do not print io rescheduling info message when run by cron
+ (closes: #598957).
+ * Fix checkarray script so that it does not die after scheduling the first
+ device when there is no scheduling class specified; thanks to Mario
+ 'BitKoenig' Holbe (closes: #611627).
+ * Do not include DEVICE line in generated configs but use the built-in value
+ by default (closes: #604702).
+ * Make mdadm-raid init script depend on hostname; thanks to Mario
+ 'BitKoenig' Holbe (closes: #610421).
+ * Schedule start/stop of mdadm-raid before/after filesystems are
+ checked&mounted/unmounted; thanks to Mario 'BitKoenig' Holbe
+ (closes: #611632).
+ * Work around a shell coding bug for cases when there are zero active
+ devices (closes: #618561).
+ * Add NEWS entry about metadata change and requirement on new Grub version
+ (closes: #595516).
+ * Update mdadd.sh script from Arno's webpage.
+ * Update md.txt from latest kernel source.
+ * Updated debconf translations:
+ - Catalan by Innocent De Marchi (closes: #628371)
+ - Danish by Joe Dalton (closes: #621346)
+ * Small typo fix in RAID5_versus_RAID10.txt
+ * Bump Standards-Version to 3.9.2; no changes required.
+
+ [ Michael Tokarev ]
+ * don't print W: auto-read-only in checkarray in quiet mode,
+ thanks to Bernd Hanisch for the patch (Closes: #605722)
+ * move initscript metadata from /lib/init/rw/.mdadm to /run/mdadm,
+ and depend on initscripts (>= 2.88dsf-13.3) for /run (Closes: #633054)
+ * move runtime stuff from /dev/.mdadm to /run/mdadm
+ * document defaults change in debian/NEWS (it was forgotten to be
+ done for squeeze) - metadata & chunk size (Closes: #595516)
+
+ -- martin f. krafft <madduck@debian.org> Mon, 01 Aug 2011 10:41:41 +0200
+
+mdadm (3.1.4-1+8efb9d1) unstable; urgency=low
+
+ * Added patch with Makefile fix from upstream (commit 8efb9d1) to fix
+ compiler/linker problem on non-x86 architectures (closes: #595290).
+
+ -- martin f. krafft <madduck@debian.org> Fri, 03 Sep 2010 10:45:01 +0200
+
+mdadm (3.1.4-1) unstable; urgency=low
+
+ * New upstream release, which closes:#595039 and addresses the following
+ issues too:
+ - reverts move of incremental map (closes: #585015).
+ - fixes mdadm monitor in the case of an inactive (or start-failed) raid0
+ or linear array (closes: #539154).
+ - prevent --remove faulty from skipping renumbered devices
+ (closes: #587550).
+ - fixed overflow when growing a RAID6 (closes: #589493).
+ * However, disable the incremental assembly upstream turned on in 3.1.3 for
+ now, this will have to wait until after the squeeze release.
+ * initramfs/hook: make sure configuration file exists before accessing it;
+ thanks to Michael Prokop for the fix and NMU (closes: #589836).
+ * initramfs/hook: Match UUID case-insensitive while checking for running
+ arrays not listed in mdadm.conf; thanks to Mario 'BitKoenig' Holbe for the
+ patch (closes: #583545).
+ * Fix URL in the bug reporting preamble (presubj) (closes: #589833).
+ * Add I/O rescheduling functionality to the checkarray script and make the
+ cronjob use the idle priority; this should now minimise the impact of the
+ monthly re-check on the running system; Florian Heigl had the idea
+ (closes: #592149, #508123).
+
+ -- martin f. krafft <madduck@debian.org> Sun, 29 Aug 2010 13:44:59 +0200
+
+mdadm (3.1.2-2) unstable; urgency=low
+
+ * Fix logcheck regexp to cure "egrep: Unmatched [ or [^" message
+ (closes: #583376).
+ * Cherry-pick 94fcb80 from upstream to fix compiler error due to argument
+ type error (at least on ia64, sparc, powerpc) (closes: #583495).
+
+ -- martin f. krafft <madduck@debian.org> Fri, 28 May 2010 09:35:42 +0200
+
+mdadm (3.1.2-1) unstable; urgency=low
+
+ * New upstream release (closes: #567167).
+ * Ignore lintian error about not stopping in runlevel 1.
+ * Include more information about the configuration, initrd, and LVM in
+ bugscript output.
+ * Check active devices against configuration file based on UUID, not device
+ name (closes: #553896).
+ * When preparing the list of devices, treat /dev/mdX and /dev/md/X equally.
+ * Bump Standards-Version to 3.8.4 without having to make changes.
+ * Fix logcheck rule with patch from Frédéric Brière, since Rebuild events
+ are now arbitrary, no longer multiples of 20 (closes: #570315).
+ * checkarray: do not exit non-zero when there are no arrays found (see
+ #582360).
+ * Do not exit checkarray cronjob non-zero when the tests whether to run
+ fail (closes: #580825). This is related to a recent change in behaviour in
+ cron 3.0pl1-110 (see e.g. #581612). Thanks to everyone for their
+ suggestions!
+ * Call MAKEDEV to create device nodes only if MAKEDEV is installed
+ (closes: #569360).
+
+ -- martin f. krafft <madduck@debian.org> Thu, 27 May 2010 09:34:24 +0200
+
+mdadm (3.1.1-1) unstable; urgency=low
+
+ * New upstream release.
+ * Retire fixed/blkid-dev branch.
+ * Reword warning about unbootable system when mdadm is purged
+ (closes: #544558).
+ * Updated FAQ to include s2ram as one of the reasons that can prevent an
+ array from being stopped; thanks to Pascal Hambourg for writing in.
+ * Install udev rules into udeb package (closes: #558823).
+ * Update mdadd.sh (formerly /usr/share/doc/mdadm/examples/newdisk.gz) from
+ Arno's website, refactoring the script into its own branch/patch
+ (closes: #539103).
+ * Do not single-quote homehost in initramfs script (closes: #549083).
+
+ -- martin f. krafft <madduck@debian.org> Wed, 27 Jan 2010 10:14:25 +1300
+
+mdadm (3.0.3-2) unstable; urgency=low
+
+ * Bumped Standards-Version to 3.8.3 without having to make changes.
+ * Fixed init dependencies of mdadm daemon init.d script; thanks Petter
+ Reinholdtsen (closes: #541396).
+ * Switched source package to v3-quilt format.
+
+ -- martin f. krafft <madduck@debian.org> Fri, 06 Nov 2009 10:06:03 +0100
+
+mdadm (3.0.3-1) unstable; urgency=low
+
+ * New upstream release.
+ * Acknowledge 3.0-3.1 NMU by Christian Kujau (patch by Marco d'Itri)
+ (closes: #541884), and add util-linux dependency.
+ * Copy udev rules into initramfs, which udev stopped doing
+ (closes: #549535, #549083, #538843, #538143).
+ * Fix the bug script to write debug information to the correct file
+ descriptor (closes: #537734).
+ * Switch to TopGit and split Debian diff into topic branches; add
+ README.source.
+ * Drop the outdated rootraiddoc.97.html document.
+
+ -- martin f. krafft <madduck@debian.org> Tue, 27 Oct 2009 18:06:13 +0100
+
+mdadm (3.0-3.1) unstable; urgency=medium
+
+ * Non-maintainer upload.
+ * use blkid instead of vold_id in udev-md-raid.rules (closes: #541884)
+
+ -- christian kujau <lists@nerdbynature.de> Mon, 14 Sep 2009 10:15:21 +0200
+
+mdadm (3.0-2) unstable; urgency=low
+
+ * Fixed initramfs script with patch from Steffen Hau: it was still using
+ --auto-update-homehost, which has been removed and replaced by a better
+ heuristic: arrays created for a different "homehost" will now be
+ assembled read-only, rather than shoehorned into the system with
+ --auto-update-homehost (closes: #537820).
+ * Add version stamps to bugscript and mkconf scripts to facilitate
+ debugging.
+
+ -- martin f. krafft <madduck@debian.org> Tue, 21 Jul 2009 10:33:30 +0200
+
+mdadm (3.0-1) unstable; urgency=low
+
+ * New stable upstream release.
+ * Add information about udev and device links in /dev to bugscript output.
+ * Add pointer to FAQ and in particular rootdelay to the bug script
+ pre-subject file, which is displayed by tools like reportbug and thus
+ hopefully reduces the numbers of duplicated bugs.
+ * Patch from Frédéric Brière to make logcheck rules printk_time aware
+ (closes: #537460).
+ * Updated German translation due to typos and old spelling rules; thanks to
+ Helge Kreutzmann for the patch (closes: #534663).
+ * Bumped Standard-Version to 3.8.2; no changes necessary.
+
+ -- martin f. krafft <madduck@debian.org> Mon, 20 Jul 2009 16:12:41 +0200
+
+mdadm (3.0~devel3-43-g2800528-1) experimental; urgency=low
+
+ * Merge tip of upstream's devel-3.0 branch at commit 2800528.
+ * Drop our own udev rules in favour of upstream's. If
+ /etc/udev/rules.d/65_mdadm.vol_id.rules has not been modified (md5sum
+ check), it is automatically removed; else, a warning is emitted.
+ * Add information about udev and device links in /dev to bugscript output.
+
+ -- martin f. krafft <madduck@debian.org> Tue, 05 May 2009 15:10:46 +0200
+
+mdadm (2.6.9-3) unstable; urgency=low
+
+ * Fix the multipath prereq patch (#516605) and make it exit after printing
+ the prerequisites (closes: #526793).
+ * Change my previous recommendation for postfix over to the new virtual
+ package default-mta (see #522300 and #508644).
+ * Enhance bugscript, which now asks to run as root (sudo/su) if invoked by
+ a normal user.
+ * Include MD5 sums of md-related files in initrd in bug reports.
+ * Add grub2 information retrieval to bugscript.
+ * Trap SIGINT and thus prevent ctrl-c from terminating the bugscript
+ prematurely.
+
+ -- martin f. krafft <madduck@debian.org> Tue, 05 May 2009 11:46:22 +0200
+
+mdadm (3.0~devel3-1) experimental; urgency=low
+
+ * Initial release of DEVELOPMENT BRANCH 3.0 to experimental.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 30 Apr 2009 11:51:39 +0200
+
+mdadm (2.6.9-2) unstable; urgency=low
+
+ * Fix the check of whether mdadm.conf defines all devices known to the
+ system; thanks Cristian Ionescu-Idbohrn (closes: #525655).
+ * No longer pass -k to modprobe, which has been deprecated for a long time;
+ thanks to Jan Hudec (closes: #519999).
+ * Remove Mario Joußen from the uploaders list, since his email started
+ bouncing.
+ * Prepare mdadm source to use quilt, with the long-term goal to switch to
+ TopGit, once I find the time.
+ * Cherry-pick caa0f6c & 667e66d from Neil into a quilt patch to fix gcc-4.4
+ compiler issues (closes: #505375).
+
+ -- martin f. krafft <madduck@debian.org> Sun, 26 Apr 2009 16:08:28 +0200
+
+mdadm (2.6.9-1) unstable; urgency=low
+
+ * New upstream release.
+ * Do not set -eu in the bugscript to maximise information output in the case
+ of errors.
+ * Make initramfs script depend on multipath to ensure its script is run
+ before ours (closes: #516605).
+ * Provide an alternative (postfix) for mail-transport-agent (closes:
+ #522300). I chose postfix because that's the only one I could recommend,
+ and since the alternative does not affect people who already have an MTA
+ installed, or have a preference, it won't affect them.
+ * Honour debconf pre-selection of mdadm/initrdstart (closes: #516802).
+ * Incorporate patch from Adrian Bridgett: the initramfs hook now checks to
+ see if all known arrays are listed in mdadm.conf and issues a warning if
+ this is not the case (closes: #519328).
+ * Make checkarray skip over arrays still marked auto-read-only
+ (closes: #510641).
+ * Add cron.daily snippet from Paul Slootman to run one-shot scans every day
+ to ensure that failed arrays don't go unnoticed (closes: #497949).
+ * Bumped Standards-Version to 3.8.1; no changes necessary.
+
+ -- martin f. krafft <madduck@debian.org> Sat, 25 Apr 2009 19:04:47 +0200
+
+mdadm (2.6.8-12-gb47dff6-2) unstable; urgency=low
+
+ * Brown paper bag release: I built from the wrong branch which caused some
+ Debian-specific changes not to get into the package. Thus build fixes it.
+
+ -- martin f. krafft <madduck@debian.org> Mon, 16 Feb 2009 12:15:37 +0100
+
+mdadm (2.6.8-12-gb47dff6-1) unstable; urgency=low
+
+ * New upstream release.
+ - better checks asprintf() return codes, thanks to patch from Dustin
+ Kirkland (closes: #509167).
+ * Fix start/stop runlevels in header of mdadm monitor init.d script
+ (closes: #514923).
+ * Use modprobe -q instead of --syslog from initramfs (closes: #502988).
+
+ -- martin f. krafft <madduck@debian.org> Mon, 16 Feb 2009 11:07:18 +0100
+
+mdadm (2.6.7.2-1) unstable; urgency=low
+
+ * New upstream release, created for Debian lenny:
+ - fixes assembly of arrays that are being reshaped (closes: #512475)
+ - this bug was also responsible for other assembly problems
+ (closes: #498505, #499643, #496334)
+ Again, many thanks to Neil Brown for being such an awesome upstream.
+
+ * Documentation updates:
+ - Actually install David Pashley's blog post added in 2.6.7.1-1, and
+ register it with doc-base.
+ - Update md.txt to version 2.6.26 (the lenny kernel).
+ - Add a dump of a website detailing md superblock formats.
+ - Register FAQ, md.txt, RAID5-vs-RAID10, README.recipes with doc-base
+ - Cherry-picked UID/UUID typo in mdadm.conf(5) manpage fix (commit
+ 0e69da7) (closes: #506245).
+
+ * Added Italian debconf translation; thanks Luca Monducci (closes: #506572).
+
+ -- martin f. krafft <madduck@debian.org> Tue, 03 Feb 2009 21:28:34 +0100
+
+mdadm (2.6.7.1-1) unstable; urgency=low
+
+ * New upstream release, specifically created for Debian lenny to fix the RC
+ bugs, which
+ - fixes typo in forced assembly code (closes: #496334, #499643, #498505).
+ - fixes array component size detection (closes: 500309).
+ Thanks Neil Brown, mdadm upstream: you are spoiling me. :)
+
+ * Minor fixes to documentation:
+ - Add David Pashley's Rebuilding RAID blog post.
+ - Add new (bugfix) version 1.40 of the newdisk script
+ (/usr/share/doc/mdadm/examples/newdisk.gz) (closes: #490955).
+ - Add link to Warren Togami's writeup about remote RAID-1 conversion to
+ README.recipes.
+ - Fix probability of survival in FAQ 4b, since I erroneously labeled the
+ chance of failure as the chance of survival; thanks to Per Olofssen for
+ clarification (closes: #493577).
+ - Cherry-picked 6d6de2e from Neil, which adds HOMEHOST to the manpage and
+ closes: #489257.
+
+ -- martin f. krafft <madduck@debian.org> Wed, 15 Oct 2008 10:27:23 +0200
+
+mdadm (2.6.7-3.1) unstable; urgency=low
+
+ * Non-maintainer upload with permission.
+ * Ask mdadm/initrdstart at medium priority in chrooted environment.
+ (Closes: #493099)
+ * Update Swedish debconf translations. Thanks Martin Ã…gren.
+ (Closes: #492074)
+
+ -- Jérémy Bobbio <lunar@debian.org> Mon, 25 Aug 2008 22:28:53 +0200
+
+mdadm (2.6.7-3) unstable; urgency=low
+
+ * Correct credits in the NEWS file: Suse authored the udev rules, not Ubuntu
+ (who use the same file without credit).
+
+ -- martin f. krafft <madduck@debian.org> Sat, 05 Jul 2008 12:22:58 +0200
+
+mdadm (2.6.7-2) unstable; urgency=low
+
+ * Remove mdrun completely.
+ * Import udev vol_id handling from Suse, and thus finally export
+ /dev/disk/by-id/* and /dev/disk/by-uuid symlinks (closes: #435983).
+ * Update upstream URLs in copyright and watch file (closes: #488364).
+ * Bump Standards-Version to 3.8.0; no changes required.
+ * Add mdadm homepage link to debian/control.
+
+ -- martin f. krafft <madduck@debian.org> Wed, 02 Jul 2008 11:13:18 +0200
+
+mdadm (2.6.7-1) unstable; urgency=low
+
+ * New upstream version, which
+ - fixes a segfault when reading /proc/mdstat (closes: #462154).
+ - fixes a possible bug with v1 bitmap space allocation (closes: #474548).
+ - supports large files for loop assembly (closes: #463769).
+ * Moved mdadm-raid init.d script to position S60 (from S50) for runlevels
+ 0 and 6, so that arrays get stopped after cryptdisks-early; thanks to
+ J.M.Roth (closes: #486012).
+ * Fixed a typo in checkarray; thanks to Helmut Grohne (closes: #445540).
+ * Updated debconf translations:
+ - Spanish; thanks to Javier Fernández-Sanguino (closes: #477920).
+ - Basque; thanks to Piarres Beobide (closes: #478676).
+ * Updated logcheck rule so that it matches mdadm log entries with and without
+ PID (2.6.5 introduces PIDs in the messages).
+
+ -- martin f. krafft <madduck@debian.org> Wed, 25 Jun 2008 17:31:15 +0200
+
+mdadm (2.6.4-2) unstable; urgency=low
+
+ * Adjusted logcheck rules to follow kernel changes; thanks to Frédéric
+ Brière (closes: #462478).
+ * Debconf templates and debian/control reviewed by the debian-l10n-
+ english team as part of the Smith review project. Closes: #463673
+ * Debconf translation updates (thanks to Christian Perrier for compiling
+ them):
+ * Japanese. Closes: #464438
+ * Galician. Closes: #464454
+ * French. Closes: #465984
+ * Czech. Closes: #466306
+ * Dutch. Closes: #466543
+ * Russian. Closes: #466577
+ * Portuguese. Closes: #466794
+ * German. Closes: #466989
+ * Vietnamese. Closes: #467118
+ * New debconf translations:
+ * Finnish. Closes: #468048
+ * Fixed bashism in mdadm-raid init.d script; thanks to Raphael Geisser
+ (closes: #471874).
+ * Do not output warning when run from cron and no arrays are found
+ (closes: #474542).
+ * Add doc-base registration file; thanks to Roberto C. Sanchez
+ (closes: #451684).
+ * Reschedule "mdadm Sunday" to 00:57 instead of 01:06 to prevent double
+ invocation on DST change (closes: #449244).
+ * Bump DH compatibility level to 6; no changes required.
+
+ -- martin f. krafft <madduck@debian.org> Fri, 11 Apr 2008 10:48:45 +0200
+
+mdadm (2.6.4-1) unstable; urgency=low
+
+ * New upstream release.
+ * Apply patch by Petter Reinholdtsen to fix dependency loop in
+ init.d script (closes: #460256).
+ * Bump Standards-Version to 3.7.3; no changes needed.
+
+ -- martin f. krafft <madduck@debian.org> Mon, 14 Jan 2008 12:47:14 +0100
+
+mdadm (2.6.3+200709292116+4450e59-3) unstable; urgency=low
+
+ * Patch by Jérémy Bobbio which completes the fix for #444682.
+
+ -- martin f. krafft <madduck@debian.org> Mon, 01 Oct 2007 16:16:19 +0100
+
+mdadm (2.6.3+200709292116+4450e59-2) unstable; urgency=low
+
+ * Patch the routine loading v1 superblocks to fix a segfault on amd64
+ (closes: #444682).
+
+ -- martin f. krafft <madduck@debian.org> Sun, 30 Sep 2007 14:10:41 +0100
+
+mdadm (2.6.3+200709292116+4450e59-1) unstable; urgency=low
+
+ * New merge from upstream @4450e59ffaf75623fa4261e244b0717a7463aa84
+ - makes "--write-mostly" effective when re-adding a device to an array.
+ (closes: #442874).
+ * Do not call update-initramfs -k all, it can be set via
+ /etc/initramfs-tools/update-initramfs.conf (closes: #439334).
+ * Depend on udev|makedev instead of just makedev and invoke /dev/MAKEDEV,
+ not /sbin/MAKEDEV (closes: #436998).
+ * De-escalate the initramfs hook warning about an "emergency procedure" and
+ simply note that this involves initramfs assembling arrays it finds
+ automatically at boot. Also added a FAQ entry on how to turn off the init
+ script warning when no arrays are found (closes: #434934).
+ * Add --scan to the single-device-assembly-codepath in the initramfs
+ local-top script; thanks to Mario 'BitKoenig' Holbe for the patch
+ (closes: #440703).
+ * Use short option in initramfs script mkdir call, so make it klibc-utils
+ compatible; thanks maximilian attems for the patch (closes: #443436).
+ * Handle module name synonyms in initramfs hook script (closes: #432585).
+
+ -- martin f. krafft <madduck@debian.org> Sat, 29 Sep 2007 21:21:25 +0100
+
+mdadm (2.6.2-2) unstable; urgency=low
+
+ * Fix typos in md(4) manpage; thanks Jeroen (closes: #425576).
+ * Make init script not report failure when there are no arrays defined in
+ config file.
+ * Add /usr/share/doc/mdadm/examples/newdisk, a script to integrate
+ a replacement disk into an existing array with minimal effort; will remain
+ in examples/ until I had a chance to really test and understand it. Thanks
+ to Arno van Amersfoort (closes: #427880).
+ * Does some sanity checking for proper format of level= arguments in
+ mdadm.conf and bails if an error is found. Since the RAID levels are used
+ verbatim as module names, a discrepancy might lead to an unusable system.
+ Instead, we thus use the emergency fallback. Thanks to Andrew
+ Sackville-West for spotting this and helping me with the fix.
+
+ -- martin f. krafft <madduck@debian.org> Tue, 10 Jul 2007 09:59:45 +0200
+
+mdadm (2.6.2-1) unstable; urgency=low
+
+ * New upstream release
+ - new options --fail detach and --remove faulty can be used to fail and
+ remove devices that are no longer physically present (closes: #416512).
+ - --help output now goes to stdout (closes: #416653).
+ - plenty of manpage fixes, thanks Peter Samuelson (closes: #414688).
+ * Incorporated patch by Mikko Rasa to fix detecting of raid6 (and raid10)
+ devices with --scan (closes: #421915).
+
+ -- martin f. krafft <madduck@debian.org> Mon, 21 May 2007 14:25:43 +0200
+
+mdadm (2.6.1-1) unstable; urgency=low
+
+ * Release to unstable.
+ * Start arrays read-only in initramfs to prevent syncing and hence enable
+ resuming/freezing. The arrays will automatically sync as soon something
+ writes to it; thanks to Tim Dijkstra, Neil Brown, and Luis Rodrigo
+ Gallardo Cruz (closes: #415441).
+ * mkconf now tries to preserve existing values for DEVICE, CREATE and
+ HOMEHOST (in addition to MAILADDR, which it preserved previously already).
+ PROGRAM is preserved but only added to mdadm.conf if it occured in the
+ previously existing configuration file.
+ * startall is now mdadm-startall and lives in /sbin, thanks to Eduard Bloch.
+ It now can handle existing mdadm.conf files much more gracefully, mostly
+ thanks to the above mkconf enhancements (closes: #415336).
+
+ -- martin f. krafft <madduck@debian.org> Sat, 05 May 2007 16:12:29 +0200
+
+mdadm (2.6.1-1~exp.5) experimental; urgency=low
+
+ * Fix mdadm.conf typo; thanks Tim Phipps (closes: #416626).
+ * Execute udevtrigger after assembly of arrays during initramfs processing;
+ thanks to Michael Prokop (closes: #416658).
+
+ -- martin f. krafft <madduck@debian.org> Sat, 31 Mar 2007 12:12:27 +0200
+
+mdadm (2.6.1-1~exp.4) experimental; urgency=low
+
+ * Incorporate patches by Peter Samuelson fixing several typography as well
+ as typesetting issues in the manpages. Thanks a lot! (closes: #414688)
+
+ -- martin f. krafft <madduck@debian.org> Tue, 13 Mar 2007 19:26:37 +0100
+
+mdadm (2.6.1-1~exp.3) experimental; urgency=low
+
+ * Patch by Jørn V. Christensen to properly handle multiple email addresses
+ for the MAILADDR setting (closes: #413330).
+
+ -- martin f. krafft <madduck@debian.org> Mon, 5 Mar 2007 11:19:18 +0000
+
+mdadm (2.6.1-1~exp.2) experimental; urgency=low
+
+ * Updated debconf translations:
+ - Galician by Jacobo Tarrio (closes: #412203).
+
+ -- martin f. krafft <madduck@debian.org> Sat, 24 Feb 2007 16:41:16 +0100
+
+mdadm (2.6.1-1~exp.1) experimental; urgency=low
+
+ * New upstream release, targeted at experimental until etch is out:
+ - adds --syslog option (closes: #402457).
+ - now can --wait for sync activity to finish (closes: #328197).
+ - for other changes, please see /usr/share/doc/mdadm/changelog.gz
+ * Updated debconf translations:
+ - Portuguese by Rui Branco (closes: #411745).
+
+ -- martin f. krafft <madduck@debian.org> Thu, 22 Feb 2007 16:35:16 +0100
+
+mdadm (2.5.6-9) UNRELEASED; urgency=low
+
+ * More logcheck filters to prevent redundant information logged by mdadm
+ --syslog.
+ * Improved mdadm-raid init script to correctly output status information for
+ drives that are initialising or adding spares.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 11 Jan 2007 16:05:12 +0100
+
+mdadm (2.5.6-8) unstable; urgency=low
+
+ * Hard-code path to /sbin/mdadm binary rather than searching the $PATH. This
+ closes: #403307 and should be a little more robust in the presence of
+ installations of mdadm in /usr/local.
+ * Made the bugscript a little more failure-resilient.
+ * Added more documentation.
+
+ -- martin f. krafft <madduck@debian.org> Mon, 8 Jan 2007 02:04:25 +0100
+
+mdadm (2.5.6-7) unstable; urgency=low
+
+ * Only parse ARRAY lines from configuration file when collecting the array
+ pairs. Thanks to Daniel Dehennin for the bug report and suggested fix
+ (closes: #402106).
+ * Prevent modules from being loaded during initramfs time if no arrays are
+ to be assembled at this stage.
+ * Export DAEMON_OPTIONS to /etc/default/mdadm, which gets passed to the
+ mdadm daemon on startup from the init.d script. The value is set to
+ -y/--syslog by default, and will get incorporated into debconf in a future
+ version; thanks for the idea by Alex Owen (closes: #401696).
+ * Incorporate patch by upstream to fix handling of --syslog long option
+ (closes: #402457).
+ * Added logcheck filters for new syslog entries by mdadm monitoring daemon.
+ * Added Spanish debconf translation by Javier Fernández-Sanguino
+ (closes: #402681).
+
+ -- martin f. krafft <madduck@debian.org> Tue, 12 Dec 2006 11:49:52 +0100
+
+mdadm (2.5.6-6) unstable; urgency=medium
+
+ * Fixed a typo in the debconf control script which would cause failures with
+ a dash shell; thanks to Santiago Garcia Mantinan for reporting this, and
+ Andreas Metzeler for providing a solution (closes: #399315).
+
+ -- martin f. krafft <madduck@debian.org> Mon, 20 Nov 2006 15:02:34 +0800
+
+mdadm (2.5.6-5) unstable; urgency=low
+
+ * Prevent initramfs hook from exiting prematurely when VERBOSE=false.
+ * Moved debconf question about arrays to start by initramfs to high
+ priority.
+
+ -- martin f. krafft <madduck@debian.org> Mon, 13 Nov 2006 11:24:21 +0100
+
+mdadm (2.5.6-4) unstable; urgency=low
+
+ * Actually remove mdadm.conf on purge; previously, the generation of
+ a temporary file for initramfs would screw up the purging; thanks to
+ Fabrice Lorrain for the report (closes: #398088).
+
+ -- martin f. krafft <madduck@debian.org> Sat, 11 Nov 2006 20:07:55 +0100
+
+mdadm (2.5.6-3) unstable; urgency=low
+
+ * Fix a syntax error in mdadm-raid script.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 9 Nov 2006 15:47:51 +0100
+
+mdadm (2.5.6-2) unstable; urgency=low
+
+ * Small fixes to mkconf, now returns 255 instead of -1.
+ * Added /usr/share/mdadm/startall, a helper script to facilitate starting
+ all arrays when booting from rescue/live media. It overrides AUTOSTART in
+ /etc/default/mdadm and starts arrays even if the variable is set to
+ a false value.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 9 Nov 2006 14:44:35 +0100
+
+mdadm (2.5.6-1) unstable; urgency=low
+
+ * New upstream release:
+ - added note to mdadm(8)/--metadata about overriding the default in
+ mdadm.conf (closes: #396914).
+ - fixed problems that could cause infinite loop with auto assemble. Thanks
+ to Dan Pascu for pointing this out (closes: #396582).
+ - fixed problems with bitmap file names lost after reading from
+ configuration file.
+ * Merged patch by Dan Pascu to nicely handle situations where a degraded
+ array only has one drive left nicely by the mdadm-raid script.
+ * Updated Japanese debconf translation; thanks to Hideki Yamane
+ (closes: #396400).
+
+ -- martin f. krafft <madduck@debian.org> Thu, 9 Nov 2006 00:47:45 +0100
+
+mdadm (2.5.5-1) unstable; urgency=low
+
+ * New upstream release:
+ - fixes the build problems on several architectures (closes: #393314) by
+ including the contents of linux/blkpg.h literally, not via #include.
+ - optimises bitmap file use on 64bit systems.
+ - does not error out anymore when trying to assemble an already assembled
+ array without a corresponding /dev device node.
+ - does not report an error if --assemble --scan only finds already running
+ arrays.
+ - fixes several bugs related to RAID10 and the new offset layout.
+ - improves error message when a wrong '--update' option is given.
+ * Added FAQ entries about partitionable arrays.
+ * chroot detection now also works for 2.6.18 and beyond (c.f. kernel commit
+ 778c1144771f0064b6f51bee865cceb0d996f2f9).
+ * Now recommends module-init-tools.
+ * Hides ugly errors during configuration in the absense of module-init-tools
+ or initramfs-tools.
+ * Send udev events for arrays assembled by the mdadm-raid init.d script.
+ This does not close #394193 but it's a good addition anyway. I am not
+ sending these events from the initramfs as well because it would be
+ non-trivial to ensure that an event doesn't get sent twice for a given
+ array.
+ Anyway, this is all a hack until the kernel sends online/offline events to
+ udev. See #394193.
+ * Added more RAID10 information to the FAQ.
+ * Added filters to logcheck for regular events, even by the md driver; also
+ promoted messages about non-fresh components to security events.
+ * Hide informational messages unless VERBOSE is set to a true value in
+ /etc/default/mdadm.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 26 Oct 2006 22:35:24 +0200
+
+mdadm (2.5.4-1) unstable; urgency=low
+
+ * New upstream release:
+ - --examine now reports chunk size also for RAID6 and RAID10
+ - fix endianness issues with v1 superblocks (closes: #385726) and bitmap
+ metadata.
+ - improved message when mdadm detects similar superblocks
+ (closes: #385951).
+ - documents that the automatic update of the super-minor field in the
+ superblock when using a 2.6 kernel only applies to RAID levels 1 and
+ higher. RAID0 array superblocks must be manually updated
+ (closes: #386315, #388172).
+ - removes partition table from any whole device added to an array.
+ - allow --auto=yes to specify a number; if mdadm determines from the
+ device name that you want a partitionable array, this number determines
+ the number of sub-device nodes to create.
+ * Removed patch previously used to fix #385951 because it's not adequate.
+ See the bug log for reasons.
+
+ -- martin f. krafft <madduck@debian.org> Fri, 13 Oct 2006 08:32:20 +0200
+
+mdadm (2.5.3.git200608202239-8) unstable; urgency=low
+
+ * This revision is dedicated to Peter Samuelson for his RAID10 expertise^W
+ educated guess^W^W pure luck. (:
+ * Now writes minimal mdadm.conf file even if the MD subsystem has not been
+ loaded and the scan for arrays thus failed.
+ * Now tries to ensure that the configuration file used for the initramfs
+ actually defines arrays.
+ * Now preserves MAILADDR from an existing mdadm.conf when generating a new
+ one.
+ * Documentation updates.
+ * Updated debconf translations:
+ - German by Mario Joußen.
+ - Vietnamese by Clytie Siddall (closes: #390311).
+ - Dutch by Frans Pop (closes: #390955).
+ - French by Jean-Luc Coulon (closes: #391215).
+
+ -- martin f. krafft <madduck@debian.org> Fri, 6 Oct 2006 15:03:46 +0200
+
+mdadm (2.5.3.git200608202239-7) unstable; urgency=medium
+
+ * Fixed a serious bug in the debconf script which would cause the
+ configuration to exit prematurely in cases when the root could not be
+ determined. Since this is RC, the urgency is set to medium.
+ * Updated the documentation a bit.
+ * Updated debconf translations:
+ - Swedish by Daniel Nylander (closes: #389040).
+ - Czech by Miroslav Kure (closes: #389083).
+ - Russian by Yuri Kozlov (closes: #389086).
+ - Brazilian Portuguese by Felipe Augusto.
+
+ -- martin f. krafft <madduck@debian.org> Fri, 29 Sep 2006 16:31:44 +0200
+
+mdadm (2.5.3.git200608202239-6) unstable; urgency=high
+
+ * Bumping urgency to high because previous version has been in unstable for
+ three days and this one really only fixes a stupid segfault:
+ * Reworked the parsing of /proc/partitions and spotted a mean segfault
+ (closes: #388355).
+
+ -- martin f. krafft <madduck@debian.org> Thu, 21 Sep 2006 15:25:21 +0200
+
+mdadm (2.5.3.git200608202239-5) unstable; urgency=medium
+
+ * Keeping medium urgency due to RC bug.
+ * Modified the patch responsible for pruning parent devices so that
+ superblocks at the end of a disk do not get interpreted twice. It now
+ makes less assumptions about the exact output of /proc/partitions and
+ should thus be more robust (now closes: #385951).
+ * Added code that defers mdadm preconfiguration when the debconf backend is
+ too old (and does not provide debconf-escape). Now configuration is
+ postponed until the postinst is run in this case.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 14 Sep 2006 11:16:39 +0200
+
+mdadm (2.5.3.git200608202239-4) unstable; urgency=medium
+
+ * Correct error related to an unbound variable in postinst.
+ * Keeping medium urgency.
+
+ -- martin f. krafft <madduck@debian.org> Wed, 13 Sep 2006 20:49:33 +0200
+
+mdadm (2.5.3.git200608202239-3) unstable; urgency=medium
+
+ * Urgency medium because of RC bugs.
+ * Add versioned dependency to debconf (closes: #385994); temporary fix until
+ we find a proper fix for #386439.
+ * Add patch by Steinar H. Gunderson to ensure mdadm does not interpret
+ a superblock as belonging to a device when it's actually part of
+ a partition on that device (closes: #385951).
+ * Do not override the superblock default version in mdadm.conf to prevent
+ creation of superblocks that the kernel can't handle (closes: #384614).
+ * Added a note to alert people that the warning about arrays not listed in
+ the configuration file is only relevant if the arrays are needed to be
+ brought up by mdadm from initramfs during boot (closes: #385017).
+ * Added bootloader/cmdline info to bugscript so that future bug reports via
+ bug/reportbug include information on how the system is booted with respect
+ to RAID (the root partition).
+ * If mdadm is being configured in a chroot, it now defaults to starting all
+ arrays from the initial ramdisk, rather than trying to figure out the root
+ MD array (we're using /proc information, so it would be the one of the
+ host, not the one of the chroot) (closes: #386468).
+ * Added LSB headers to init scripts.
+ * Reworked the documentation with respect to the use of "MD" and "RAID", and
+ added a FAQ entry on the meaning of "MD". Thanks to Frans Pop for his
+ help!
+ * Updated debconf translations:
+ - Czech by Miroslav Kure (closes: #384754).
+ - French by Florentin Duneau (closes: #385690).
+ - Russian by Yuri Kozlov (closes: #387017).
+
+ -- martin f. krafft <madduck@debian.org> Thu, 7 Sep 2006 14:32:04 +0200
+
+mdadm (2.5.3.git200608202239-2) unstable; urgency=low
+
+ * Allow ARRAY lines in configuration file to break across lines
+ (closes: #384222).
+ * Improved initramfs hook; now does not rely on initramfs to provide RAID
+ assembly:
+ - if a checked mdadm.conf file is present, use that.
+ - if an unchecked mdadm.conf is present, create a temporary one
+ - if that fails, use the unchecked one iff it contains at least one
+ ARRAY statement. Otherwise rely on auto-generation from the initramfs
+ during the book (and hope for the best).
+ - if no mdadm.conf is found, create one on the fly
+ - if that fails, hope that the auto-generation will work during boot
+ * Improved the messages printed by the initramfs script.
+ * Do not store the debconf answer for whether arrays not listed in the
+ configuration file should be used.
+ * Now asks again for the devices to start (preseeded with 'all') if the user
+ does not want to proceed with devices not listed in the configuration
+ file.
+
+ -- martin f. krafft <madduck@debian.org> Wed, 30 Aug 2006 16:29:07 +0200
+
+mdadm (2.5.3.git200608202239-1) unstable; urgency=low
+
+ * Tracking upstream git releases.
+ - now the --run switch behaves as stated in the manpage. This properly
+ fixes #287415
+ - new version-1 partitionable arrays are now named X instead of _dX (e.g.
+ 0 instead of _d0) for device names like md_dX. I actually think this is
+ a bug and hope upstream will use dX (e.g. d0) instead in a future
+ version.
+ - we specify --symlink=no and thus disable the new feature to create
+ /dev/mdX symlinks to /dev/md/X devices until the entire device node mess
+ is cleared up. No need to introduce yet another complicating factor at
+ this stage.
+ * Instead of trying to do a whole lot of magic with respect to detecting
+ RAID devices to start, mdadm from now on requires a valid mdadm.conf file
+ to be installed. It still tries to do what it can, but there are no
+ heuristics anymore. See /usr/share/doc/mdadm/README.upgrading-2.5.3 .
+ * Removed the zero-superblock warning because we require the user to sign
+ off the configuration file anyway.
+ * This also enables us to use mdadm.conf from the initial ramdisk and thus
+ closes: #381303.
+ * In case the user chooses to assemble all arrays from the initial ramdisk,
+ use the new homehost feature of mdadm (closes: #381057). This will start
+ only those arrays belonging to the local system, unless it is the first
+ run on a system, in which case it will start all arrays and mark them as
+ belonging to the local system.
+ * Improved the debconf control script: integrated error messages into the
+ frontends, and made it a bit smarter. Error messages can now be
+ translated, and the script checks whether the user's choice is listed in
+ the configuration file and only proceeds if it is, or the user chooses to
+ ignore that it is not.
+ * mkconf can now take a generate/force-generate parameter to write directly
+ to /etc/mdadm/mdadm.conf. A second parameter specifies an alternate
+ filename.
+ * mkconf now outputs comments for the settings it suggests.
+ * Removed all udev-related stuff. We must coexist with udev because there
+ are setups that assemble arrays without mdadm, so the device nodes must be
+ created by udev, if that is used.
+ (closes: #382263, #382450, #383688, #383891, #383806, #382480)
+ Staying at low urgency since these (RC) bugs only exist in unstable.
+ * Now installs MD modules and mdadm/mdrun into initial ramdisk regardless of
+ whether the hook script thinks there are devices to start. This was done
+ to enable recovery from the initramfs shell.
+ * Now uses 'MD' instead of 'RAID' consistently in all messages.
+ * Now rebuilds initramfs for all installed kernels.
+ * Now breaks the endless config loop only when the user does not see the
+ question (see #381284, #381007).
+ * Don't fail mdadm-raid when /dev is on a read-only filesystem
+ (closes: #382876).
+ * Updated debconf translations:
+ - French by Florentin Duneau (closes: #382389).
+
+ -- martin f. krafft <madduck@debian.org> Mon, 21 Aug 2006 00:25:22 +0100
+
+mdadm (2.5.2-10) unstable; urgency=low
+
+ * Applied patch by upstream to fix the logic of the --run switch (see
+ #287415). Thus also reverted the mdadm-raid hack used to fix the bug in
+ the 2.5.2-9 upload.
+ * Recognise devfs-style device nodes by fixing a regression bug in the root
+ RAID autodetection code (closes: #381007), which was introduced as part of
+ the fix for #380596 in the 2.5.2-9 upload. The bug is RC, but it only
+ applies to unstable right now, so I am not pumping up the urgency.
+ The autodetection code now doesn't care about the actual name of the array
+ device, but instead only insists that it exists, is a block device, and
+ recognised as an array by mdadm (mdadm --detail).
+ * Added safety net to prevent endless loops in RAID autodetection. Now just
+ falls back to starting all arrays from the initramfs if it fails to
+ determine an acceptable array for the / filesystem in three tries
+ (closes: #381284, also see #381007).
+ * Added udev rules file to prevent udev from ever creating md device nodes,
+ which can get in the way of mdadm (also see next item).
+ * Added a workaround to the initramfs hook to deal with the problem with
+ /dev/md/X device nodes when /dev/mdX is also present and version-0
+ superblocks are in use (closes: #381181).
+ * checkarray: correctly recognise when the kernel is too old for parity
+ checks, or when there are no redundant arrays present (closes: #380746).
+ * checkarray: now supports -s|--status switch to query parity check status
+ for given devices.
+ * checkarray: now supports -x|--cancel switch to cancel running checks.
+ * mkconf: now also outputs 'MAILADDR root' and 'HOMEHOST <system>'.
+ * Added README.checkarray with some information about the check process.
+ * Added /usr/share/doc/mdadm/FAQ to answer some FAQs.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 3 Aug 2006 22:54:04 +0100
+
+mdadm (2.5.2-9) unstable; urgency=low
+
+ * Added logcheck rules for kernel messages generated by checkarray, using
+ logcheck server level.
+ * Added handling of partitionable arrays to root RAID autodetection script
+ (closes: #380596).
+ * Forcing RAID assembly to run the arrays, working around an upstream bug
+ until that's fixed (closes: #287415).
+ * Updated documentation in README.initramfs-transition to include
+ information related to #380089.
+ * Updated debconf translations:
+ - Vietnamese by Clytie Syddall, thanks!
+
+ -- martin f. krafft <madduck@debian.org> Mon, 31 Jul 2006 14:35:38 +0100
+
+mdadm (2.5.2-8) unstable; urgency=low
+
+ * Re-added rootraiddoc.97.html which was mysteriously lost (closes: #378678).
+ * Catching modprobe error in case of absence of the kernel modules, or
+ a non-modular kernel. Thanks to Holger Levsen.
+ * Copy raid456 kernel module into initramfs, if present (closes: #380152).
+ * checkarray: check for presence of active RAID arrays and give an
+ appropriate error if there are none present (closes: #379019).
+ * checkarray: skip sync for non-redundant devices (closes: #379352, #380424).
+ * Fixing cron registration for checkarray. crontab(5) is really stupid and
+ makes me think that they simply documented a bug instead of fixing it, so
+ now I have to hack around it. See the cron.d file (closes: #380425).
+ * Removed the code writing auto-detected devices to /var, which was silly
+ since /var isn't necessarily mounted yet by the time mdadm-raid is called.
+ Thanks to Maurizio Avogadro for pointing this out.
+ * Add reference to BAARF to README.Debian and included the RAID5 vs RAID10
+ article from the BAARF website.
+ * Updated debconf translations:
+ - Japanese by Hideki Yamane, thanks!
+ - French by Florentin Duneau, thanks! (closes: #379511)
+
+ -- martin f. krafft <madduck@debian.org> Thu, 27 Jul 2006 22:49:32 +0100
+
+mdadm (2.5.2-7) unstable; urgency=low
+
+ * Release to unstable.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 20 Jul 2006 17:23:23 +0100
+
+mdadm (2.5.2-6) experimental; urgency=low
+
+ * Adding mdrun to generated udeb. I will only remove mdrun after etch.
+ * Updated debconf translations:
+ - Dutch, thanks to Frans "Franzerl" Pop! (closes: #377412)
+ - French, thanks to Florentin Duneau! (closes: #377968)
+
+ -- martin f. krafft <madduck@debian.org> Thu, 13 Jul 2006 23:11:24 +0200
+
+mdadm (2.5.2-5) UNRELEASED; urgency=low
+
+ * Remove the check for the lvm prereq in the initramfs hook, as #369617 is
+ now fixed. Thus conflicts against initramfs-tools (<< 0.65).
+ * Updated debconf translations:
+ - German, thanks to Mario Joußen!
+ * Added short note about maintenance in SVN to README.Debian.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 13 Jul 2006 23:10:36 +0200
+
+mdadm (2.5.2-4) UNRELEASED; urgency=low
+
+ * The "it takes two to swing" release.
+ * Now does not stop arrays on upgrade or remove. Thanks (and sorry) to
+ Christian Pernegger (and hopefully no others).
+ * Fixed small problem in debconf configuration script related to unbound
+ MAIL_TO variable. Thanks to Christian Pernegger.
+
+ -- martin f. krafft <madduck@debian.org> Fri, 7 Jul 2006 16:59:01 +0200
+
+mdadm (2.5.2-3) UNRELEASED; urgency=low
+
+ * The initramfs now gets all RAID modules installed. It's a lot safer to
+ have them all around, the size difference is negligible, and we still only
+ load the needed ones at boot time.
+ * Added /usr/share/mdadm/checkarray, which can be used to check arrays for
+ parity. Also added a debconf question to let the user choose whether
+ cron should run these checks (closes: #377071).
+ * Only shut down arrays automatically when they've been automatically
+ started (closes: #376009).
+ * Make sure the user has a chance to choose the autostart feature by
+ elevating the debconf priority to high (see #376009). The warning about
+ reuse of RAID components has also been elevated to debconf priority high.
+ * The MAIL_TO setting from /etc/default/mdadm has been removed. Instead, use
+ MAILADDR in /etc/mdadm/mdadm.conf. See mdadm.conf(5). Your setting should
+ be automatically migrated.
+ * Now rewrites /etc/default/mdadm (but preserves settings) instead of trying
+ to patch it with changes.
+ * Added note to README.Debian to ensure users know that only the devices
+ listed in mdadm.conf will be autostarted (see #376009).
+ * Now includes latest md.txt from kernel documentation in
+ /usr/share/doc/mdadm/md.txt.gz.
+ * Added some more recipes to /usr/share/doc/mdadm/README.recipes.gz.
+ * Updated debconf translations:
+ - Swedish, thanks to Daniel Nylander!
+ - Brazilian Portuguese, thanks to Felipe Augusto van de Wiel!
+ - Czech, thanks to Miroslav Kure!
+ - Russian, thanks to Yuri Kozlov (closes: #376181).
+ * Further updates to the debconf templates; I hope the translators aren't
+ going to kill me.
+
+ -- martin f. krafft <madduck@debian.org> Fri, 7 Jul 2006 15:09:40 +0200
+
+mdadm (2.5.2-2) experimental; urgency=low
+
+ * The "if it weren't for Munich's wheat beer, there'd be no" release.
+ * Removed -fno-strict-aliasing from compiler options, after upstream fixed
+ the bug that led to its use (see #369779, #356153). Thanks to Elimar
+ Riesebieter for pointing this out (closes: #375876).
+ * Moved detection of RAID devices from initramfs hook to debconf control
+ file, and added a (low-priority) debconf question as to which devices
+ should be started early in the boot sequence. For the cases where we
+ failed to auto-detect previously (e.g. root on LVM on RAID), it's paranoid
+ and suggests to start them all (closes: #375879). Thanks to Alec Berryman
+ for spotting this.
+ * Fixed a typo in README.experimental, which could lead to an unbootable
+ system with initramfs-tools 0.64 or before. Again, thanks to Alec for
+ spotting this.
+ * Extended bug script to include --examine output for all components (at
+ least if called by root, which hopefully should never happen. Err,
+ wait...)
+ * Disabled deprecation warning in mdrun until the transition is complete.
+ * Reworded the debconf templates due to a new question, and also for
+ readability.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 29 Jun 2006 22:54:47 +0200
+
+mdadm (2.5.2-1) experimental; urgency=low
+
+ * New upstream release.
+ * Implemented checks in the initramfs hooks and scripts for compatibility
+ with initramfs-tools. Now we do not need a conflict anymore because
+ mdadm's hooks and scripts will simply do nothing while the ones provided
+ by initramfs-tools are still present.
+ * Not using /bin/bash for mdrun, which I thought we'd need for read timeout
+ support (for the deprecation warning). Since the -n and -t flags to the
+ read shell builtin are non-POSIX, I dropped them, they were merely
+ cosmetic anyway.
+
+ -- martin f. krafft <madduck@debian.org> Tue, 27 Jun 2006 15:06:55 +0200
+
+mdadm (2.5.1-2) experimental; urgency=low
+
+ * Updating dependency on initramfs-tools, which has not yet adopted to mdadm
+ taking over the hooks.
+
+ -- martin f. krafft <madduck@debian.org> Mon, 26 Jun 2006 22:35:08 +0200
+
+mdadm (2.5.1-1) experimental; urgency=low
+
+ * New upstream release:
+ - Really fixes return status of examine (closes: #367901).
+ - Fixes a memory leak in monitor mode (closes: #372618).
+ - Fixes compiler warnings and errors (closes: #373802, #356153, #369779).
+ - Fix byte swapping issues (closes: #369765).
+ - Now lists devices it stops (closes: #369850). This also leads to
+ beautification of the init.d script's stop action.
+ * Fixed RAID init script to not complain about missing logger command.
+
+ -- martin f. krafft <madduck@debian.org> Mon, 26 Jun 2006 00:58:36 +0200
+
+mdadm (2.5-4) experimental; urgency=low
+
+ * The "would you like fries with your parasite?" release.
+ * Now does not require RAID support from the kernel just for package
+ installation; that was silly of me, sorry (closes: Bug#370115).
+ * Added version to Replaces: initramfs-tools dependency.
+ * Further init.d script improvements.
+ * Recommends mail-transport-agent, or the monitor daemon won't be able to
+ send anything.
+ * Ignores failures from modprobe in postinst when RAID modules are not
+ available (closes: #370582).
+
+ -- martin f. krafft <madduck@debian.org> Tue, 6 Jun 2006 12:45:53 +0200
+
+mdadm (2.5-3) experimental; urgency=low
+
+ * Added /usr/share/doc/mdadm/README.recipes with some common usage examples.
+ * Vastly improved the mdadm-raid init.d script output, and removed bashisms.
+
+ -- martin f. krafft <madduck@debian.org> Fri, 2 Jun 2006 00:45:06 +0200
+
+mdadm (2.5-2) experimental; urgency=low
+
+ * The "on her majesty's secret service" release.
+ * Enabled -DDEBIAN during build, which will take care of default permissions
+ on devices created by mdadm. Together with the CREATE configuration
+ directive in 2.5, this now certainly closes: Bug#310241.
+ * Added a patch (incorporating lib/mm/xlate.h from lvm2) to prevent direct
+ access to kernel headers from userspace (closes: Bug#369765).
+ * Disabled strict aliasing compiler checks until we find a better
+ implementation for linked lists in C (closes: Bug#369779, Bug#356153).
+ * Actually decreased the size of the udeb mdadm binary with -Os
+ -fomit-frame-pointer (as suggested by Joey Hess) (closes: Bug#314370)
+ * Added Replaces: initramfs-tools to communicate that we're not conflicting
+ but replacing instead (see Bug#367567)
+ * Updated conflict with initramfs-tools to (<< 0.63) per suggestion by the
+ maintainers.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 1 Jun 2006 20:15:17 +0200
+
+mdadm (2.5-1) experimental; urgency=low
+
+ * The "show me the way to the next whiskey bar" release.
+ * See /usr/share/doc/mdadm/README.experimental or
+ http://madduck.net/~madduck/scratch/README.experimental
+ * New upstream release:
+ - mails include /proc/mdstat output (closes: Bug#355882)
+ - allows specification of device permissions in config (closes: Bug#310241)
+ * /sbin/mdrun has been deprecated and replaced by calls to /sbin/mdadm;
+ a proper deprecation warning is in place (see NEWS).
+ * Moved initramfs hook and script into the package, and switched it to mdadm
+ (from mdadm. Thanks to Stephen Frost for his help (closes: Bug#354144).
+ This should make sure that the right minor numbers are chosen during boot
+ (mainly because mdadm takes care of it all) (closes: Bug#361408).
+ * Removing mdrun from the udeb (d-i patch submitted to debian-boot mailing
+ list)
+ * Upstream links against openssl for SHA1 support (homehost feature), which
+ is a problem. An internal SHA1 implementation is provided, however, so
+ I just link against that.
+ * Switched init.d scripts to use LSB-compliant output.
+ * Enhanced init.d script output.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 1 Jun 2006 02:20:22 +0200
+
+mdadm (2.4.1-2) unstable; urgency=low
+
+ * The "this took way longer than I thought" release.
+ * Migrating to unstable.
+ * If the init.d script creates the mdadm.conf file, it should remove it on
+ purge. To accomplish this, I create a semaphore in /var/lib/mdadm if it
+ was generated, and only remove the conffile on purge if the semaphore
+ exists.
+ * Added a little helper /usr/share/mdadm/mkconf to aid generation of
+ configuration file.
+ * Added a bug script to collect some important information when the user
+ uses Debian bug reporting tools (such as reportbug).
+ * Added a debian/watch file.
+
+ -- martin f. krafft <madduck@debian.org> Wed, 31 May 2006 23:07:48 +0200
+
+mdadm (2.4.1-1) experimental; urgency=low
+
+ * The "I'll kill that maintainer... uh, wait, it's me" release. Sorry for
+ the delay, here's the long awaited new upstream release (closes:
+ Bug#337903, Bug#363592), which gets rid of a bunch of functionality bugs:
+ - reiserfs size does not overflow anymore (closes: Bug#318230)
+ - fixed typos in manpages (closes: Bug#352798)
+ Oh, and we're moving away from that arch nightmare too. Sorry for the
+ confusion.
+ * Experimental release, because I really don't want to be responsible for
+ data loss. Though I am quite sure that the upgrade is painless, I also
+ don't have access to 18 drive RAID 10 with multipath on s390 or similar
+ arrangements.
+ * We now make the /dev/md* devices in postinst unless /dev/md15 exists (no
+ longer checking for /dev/md0), or unless devfs is in use. If udev is used,
+ /dev/md15 will only exist in complex setups, so the devices will be made
+ in /dev/.static by MAKEDEV, which is not really a concern. I opted against
+ unconditionally calling MAKEDEV until #367407 is fixed so as to preserve
+ custom permissions or owner settings. This also acknowledges the NMU
+ (#299623).
+ closes: Bug#310247, Bug#299623
+ * Patched some of the code to make mdadm honour /etc/mdadm/mdadm.conf over
+ /etc/mdadm.conf (see NEWS).
+ * Fixed a couple of typos in the mdadm(8) manpage; thanks to Reuben Thomas.
+ closes: Bug#345669, Bug#345667
+ * Pushed Standards-Version to 3.7.2; no changes required.
+ * Updated Debconf translations:
+ - Vietnamese by Clytie Siddall (closes: Bug#323950)
+ - Czech by Miroslav Kure (closes: Bug#360290)
+ - Russian by Yuri Kozlov (closes: Bug#361116)
+ - French by Eric Madesclair (closes: Bug#323988)
+ * Added new Debconf translations:
+ - Swedish by Daniel Nylander (closes: Bug#333486)
+ - Dutch by Frans Pop (closes: Bug#344714)
+
+ -- martin f. krafft <madduck@debian.org> Tue, 16 May 2006 18:21:36 -0500
+
+mdadm (1.12.0-1) unstable; urgency=low
+
+ * New upstream release.
+ (obsoletes branches: symlinks)
+ (reduces branches: gcc4signedness, debian, autoscan)
+ * Fixed typo in mdadm.conf(5) manpage (closes: Bug#321152).
+
+ -- martin f. krafft <madduck@debian.org> Sun, 24 Jul 2005 19:20:01 +0200
+
+mdadm (1.9.0-5) unstable; urgency=low
+
+ * martin f. krafft: (the, "look ma', we're maintained in arch now!" release)
+ (no functional differences except for added/updated translations)
+ - Acknowledge NMU by Steve Langasek; thanks! (closes: Bug#299623)
+ - split diff.gz into different arch branches (see debian/arch-branches).
+ - debian/control:
+ - Changed maintainer to pkg-mdadm-devel.
+ - Reworded some of the descriptions (closes: Bug#304170).
+ - Pushed Standards-Version to 3.6.2.1; no changes needed.
+ - fixed po-debconf integration
+ - debian/rules:
+ - fixed po-debconf integration
+ - l10n changes:
+ - Removed amiguity from debconf template (closes: Bug#312754).
+ - Added Vietnamese debconf translation; thanks to Clytie Siddall!
+ (closes: Bug#312753)
+ - Added Czech debconf translation; thanks to Miroslav Kure! (closes: Bug#319626)
+ - Updated German debconf translation; thanks to Jens Seidel! (closes: Bug#313981)
+ - backported upstream's gcc4 signedness fixes from 1.12.0 (gcc4signedness
+ branch) (closes: Bug#319743).
+
+ -- martin f. krafft <madduck@debian.org> Sun, 24 Jul 2005 17:58:46 +0200
+
+mdadm (1.9.0-4.1) unstable; urgency=high
+
+ * Non-maintainer upload.
+ * High-urgency upload for sarge-targetted RC bugfix
+ * Make sure error output from MAKEDEV is sent to stderr, to avoid
+ interfering with debconf; this avoids installation problems on
+ udev-using systems. Thanks to Jonas Smedegaard for the patch.
+ Closes: #299623.
+
+ -- Steve Langasek <vorlon@debian.org> Wed, 1 Jun 2005 03:36:42 -0700
+
+mdadm (1.9.0-4) unstable; urgency=high
+
+ * High-urgency upload for sarge targeted RC bugfix.
+ * mdrun: replaced invocation of /usr/bin/seq with hard-coded sequence
+ (closes: Bug#310671).
+
+ -- martin f. krafft <madduck@debian.org> Wed, 25 May 2005 09:51:41 +0200
+
+mdadm (1.9.0-3) unstable; urgency=high
+
+ * High-urgency upload for sarge targeted RC bugfix.
+ * Applied patch by Peter Samuelson <peter@p12n.org>, which causes mdadm to
+ follow symlinks of device nodes (closes: #274859, #310412, #310492).
+ * Added myself as co-maintainer as per agreement with Mario Joussen.
+
+ -- martin f. krafft <madduck@debian.org> Tue, 24 May 2005 00:03:49 +0200
+
+mdadm (1.9.0-2.3) unstable; urgency=high
+
+ * Non-maintainer upload.
+ * Do not prevent postinst node creation when udev is being used; MAKEDEV
+ puts files into /dev/.static/dev with udev, which is needed so that device
+ nodes will be there even if udev is removed. Sorry for letting this slip
+ my mind and thanks to Steve Langasek for spotting this error.
+ * Leaving urgency at high to make sarge.
+
+ -- martin f. krafft <madduck@debian.org> Sun, 22 May 2005 19:35:04 +0200
+
+mdadm (1.9.0-2.2) unstable; urgency=high
+
+ * Non-maintainer upload.
+ * High-urgency upload for sarge targeted RC bugfix.
+ * Move mdadm-raid back to S25 as it needs to run after modules have been
+ loaded at S20 (see followups to #294404, #301560).
+ * Verified that Steve Langasek's patch to config.c (see item 4 of the
+ 1.9.0-2.1 changelog) is necessary for `mdadm -A -s` to work.
+ (closes: #301560)
+ * Integrated patch by Erik van Konijnenburg to fix mdadm's --auto
+ option in the presence of --scan.
+ (closes: #294404, #273182, #284028, #310126).
+ * Modified mdrun to call mdadm with --auto in assembly mode. Removed code
+ which would auto-create 24 device nodes during system startup when udev
+ was used.
+ * Fixed next_free_md function in mdrun to iterate all 24 nodes instead of
+ using some fragile shell globbing, which did not work anyway.
+ * Prevent postinst node creation when udev is being used.
+ * Added a README.udev file to /usr/share/doc/mdadm.
+
+ -- martin f. krafft <madduck@debian.org> Sun, 22 May 2005 12:57:56 +0200
+
+mdadm (1.9.0-2.1) unstable; urgency=high
+
+ * Non-maintainer upload.
+ * High-urgency upload for sarge targetted RC bugfix.
+ * Start mdadm-raid before udev on boot-up, so that mdadm device node
+ creation is honored, and support changing the init script ordering
+ on upgrades (closes: #294404).
+ * Fix mdadm --scan to prefer the values contained in /proc/partitions,
+ instead of picking up device node names at random from /dev.
+ * Teach mdrun to look at /dev/.static/dev instead of /.dev for udev
+ mounts requiring autostart (closes: #301560).
+
+ -- Steve Langasek <vorlon@debian.org> Sun, 27 Mar 2005 21:59:12 -0800
+
+mdadm (1.9.0-2) unstable; urgency=low
+
+ * Patched is_standard() to accept /dev/md/* names as standard.
+ Thanks to Colin Watson <cjwatson@debian.org>.
+ (closes: Bug#296794)
+ * Added another typecast to make it compilable on amd64 with gcc-4.0.
+ Thanks to Andreas Jochens <aj@andaco.de>.
+ (closes: Bug#294217)
+ * Removed unnecessary second assignment to $BASE in mdrun.
+ Thanks to Colin Watson <cjwatson@debian.org>.
+ (closes: Bug#295433)
+
+ -- Mario Joussen <joussen@debian.org> Sun, 6 Mar 2005 14:22:24 +0100
+
+mdadm (1.9.0-1) unstable; urgency=high
+
+ * New upstream release.
+ Solves problems with same UUID for each array.
+ Again a stable upstream version.
+ (closes: Bug#292282, Bug#293406, Bug#292784, Bug#290363, Bug#292715)
+ * Added some typecasts to make it compilable on amd64 with gcc-4.0.
+ Thanks to Andreas Jochens <aj@andaco.de>.
+ (closes: Bug#287638)
+
+ -- Mario Joussen <joussen@debian.org> Sun, 6 Feb 2005 12:25:03 +0100
+
+mdadm (1.8.1-1) unstable; urgency=low
+
+ * New upstream release.
+ Fixed segfault if no config file present and --scan is used.
+ (closes: Bug#283425, Bug#282604, Bug#284024)
+ * Fixed typo in detailed help of grow mode.
+ (closes: Bug#286980)
+ * Added japanese debconf translation. Thanks to Hideki Yamane
+ <henrich@samba.gr.jp>.
+ (closes: Bug#281073)
+ * Fixed missing variable initialization causing segfaults.
+
+ -- Mario Joussen <joussen@debian.org> Sun, 26 Dec 2004 14:44:31 +0100
+
+mdadm (1.7.0-2) unstable; urgency=high
+
+ * Changed debconf script to save the settings from the config file.
+ Thanks to Fabio Massimo Di Nitto <fabbione@fabbione.net> and
+ Frank Lichtenheld <djpig@debian.org> for the patch.
+ (closes: Bug#274208)
+ * Moved try to load md module inside the AUTOSTART if branch in
+ /etc/init.d/mdadm.
+ * Removed try to load md module from /etc/init.d/mdadm-raid.
+
+ -- Mario Joussen <joussen@debian.org> Sun, 24 Oct 2004 19:48:06 +0200
+
+mdadm (1.7.0-1) unstable; urgency=low
+
+ * New upstream release.
+ (closes: Bug#267814)
+
+ -- Mario Joussen <joussen@debian.org> Sun, 12 Sep 2004 20:48:33 +0200
+
+mdadm (1.6.0-3) unstable; urgency=high
+
+ * Added 'Conflicts: raidtools2 (<< 1.00.3-12.1)' because these packages
+ contain a mdrun.8 man page also.
+ (closes: Bug#268634, Bug#266527)
+ * Updated the french translation.
+ Thanks to Eric <eric-m@wanadoo.fr>
+ (closes: Bug#266251)
+
+ -- Mario Joussen <joussen@debian.org> Sat, 28 Aug 2004 18:23:17 +0200
+
+mdadm (1.6.0-2) unstable; urgency=low
+
+ * Included version 0.97 of "Debian Software Root Raid Documentation".
+ * Now mdrun is only used if no mdadm.conf is present.
+ Thanks to Thomas Prokosch <7nrmi1s02@sneakemail.com>.
+ (closes: Bug#264059)
+ * Added man page for mdrun.
+ Thanks to Robert Collins <robertc@robertcollins.net>.
+ (closes: Bug#265480)
+ * Moved /etc/mdadm/debian.conf to /etc/default/mdadm.
+ (closes: Bug#254922)
+ * Added a little workaround to mdrun to interact better with udev.
+ Thanks to Fabio Massimo Di Nitto <fabbione@fabbione.net>.
+ (closes: Bug#259491)
+ * Updated Brazilian Portuguese translation.
+ Thanks to Andre Luis Lopes <andrelop@debian.org>.
+ (closes: Bug#264220)
+
+ -- Mario Joussen <joussen@debian.org> Mon, 16 Aug 2004 22:10:59 +0200
+
+mdadm (1.6.0-1) unstable; urgency=low
+
+ * New upstream release.
+ Detect degraded arrays in --monitor mode now.
+ (closes: Bug#257357)
+ * Changed default to autostart RAID array.
+ (closes: Bug#250792)
+ * Fixed mdrun problem with kernel 2.6.
+ Thanks to Andre Tomt <andre@tomt.net> and Fabio Massimo Di Nitto
+ <fabbione@fabbione.net>
+ (closes: Bug#231823)
+ * Changed reuse warning to be less misleading.
+ (closes: Bug#253339)
+
+ -- Mario Joussen <joussen@debian.org> Tue, 20 Jul 2004 21:40:33 +0200
+
+mdadm (1.5.0-2) unstable; urgency=low
+
+ * Added french debconf template.
+ Thanks to Eric Madesclair <eric-m@wanadoo.fr>.
+ (closes: Bug#231968)
+
+ -- Mario Joussen <joussen@debian.org> Tue, 4 May 2004 21:29:19 +0200
+
+mdadm (1.5.0-1) unstable; urgency=low
+
+ * New upstream release.
+ * Rewrote debconf templates to avoid referring to debconf interface
+ widgets.
+ (closes: Bug#231221)
+ * Removed manual scan for RAID devices from init script.
+ (closes: Bug#233122, Bug#236762)
+ * Added creation of an udeb package.
+ (closes: Bug#243609)
+ * Added "Debian Software Root Raid Documentation".
+ Thanks to Lucas Albers <albersl@cs.montana.edu>.
+
+ -- Mario Joussen <joussen@debian.org> Sun, 25 Apr 2004 16:16:06 +0200
+
+mdadm (1.4.0-3) unstable; urgency=low
+
+ * Updated to standards version 3.6.1.0 and debhelper 4.
+ * Corrected definition of BLKGETSIZE64 macro to compile with the
+ 2.6 kernel headers.
+ (closes: Bug#223191)
+ * Swichted to po-debconf to provide localized debconf templates.
+ (closes: Bug#225288)
+
+ -- Mario Joussen <joussen@debian.org> Sun, 1 Feb 2004 19:30:53 +0100
+
+mdadm (1.4.0-2) unstable; urgency=low
+
+ * Corrected human readable size calculation.
+ (closes: Bug#225041)
+ * Added a warning about reusing hard disks and using the autostart
+ feature.
+ (closes: Bug#223790)
+
+ -- Mario Joussen <joussen@debian.org> Thu, 25 Dec 2003 19:52:57 +0100
+
+mdadm (1.4.0-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Mario Joussen <joussen@debian.org> Sun, 7 Dec 2003 19:39:27 +0100
+
+mdadm (1.3.0-2) unstable; urgency=low
+
+ * Added upstream changelog to package.
+
+ -- Mario Joussen <joussen@debian.org> Tue, 12 Aug 2003 21:51:59 +0200
+
+mdadm (1.3.0-1) unstable; urgency=low
+
+ * New upstream release.
+ (closes: Bug#191561, Bug#200921)
+
+ -- Mario Joussen <joussen@debian.org> Thu, 31 Jul 2003 20:59:20 +0200
+
+mdadm (1.2.0-1) unstable; urgency=low
+
+ * New upstream release. (closes: Bug#183191)
+ * New version of mdrun that works properly with devfs and temporary
+ device directory.
+ (closes: Bug#182035)
+ * Added 'Conflicts: raidtools' because of a name clash with mdrun.
+ (closes: Bug#182960)
+
+ -- Mario Joussen <joussen@debian.org> Sun, 16 Mar 2003 13:32:45 +0100
+
+mdadm (1.0.1-4) unstable; urgency=low
+
+ * Changed mdrun so that it can deal with partition statistics in
+ /proc/partitions.
+ (closes: Bug#174000, Bug#175130)
+ * Added russian (ru) debconf template translation. Thanks to Sergey
+ Spiridonov <sena@hurd.homeunix.org>.
+
+ -- Mario Joussen <joussen@debian.org> Sun, 5 Jan 2003 13:14:45 +0100
+
+mdadm (1.0.1-3) unstable; urgency=low
+
+ * Fixed a bug in mdrun. (closes: Bug#167607)
+
+ -- Mario Joussen <joussen@debian.org> Mon, 11 Nov 2002 07:53:23 +0100
+
+mdadm (1.0.1-2) unstable; urgency=low
+
+ * Fixed typo in help option. (closes: Bug#151533)
+ * Added a script that discovers and assembles all arrays automatically.
+ Thanks to Eduard Bloch <blade@debian.org>.
+ (closes: Bug#161699)
+
+ -- Mario Joussen <joussen@debian.org> Fri, 1 Nov 2002 13:46:47 +0100
+
+mdadm (1.0.1-1) unstable; urgency=low
+
+ * New upstream release.
+
+ -- Mario Joussen <joussen@debian.org> Thu, 30 May 2002 14:01:22 +0200
+
+mdadm (0.8.2-1) unstable; urgency=low
+
+ * New upstream release.
+ * Splitted up mdadm.templates in one file for each language.
+ * Added brazilian portuguese (pt_BR) debconf template translation.
+ Thanks to Andre Luis Lopes <andrelop@ig.com.br>.
+ (closes: Bug#141540)
+
+ -- Mario Joussen <joussen@debian.org> Thu, 18 Apr 2002 19:31:00 +0200
+
+mdadm (0.7.2-1) unstable; urgency=low
+
+ * New upstream release.
+ * 'mdctl' was renamed to 'mdadm' upstream.
+ * Removed question about updating mdctl init script links.
+
+ -- Mario Joussen <joussen@debian.org> Sat, 23 Mar 2002 02:50:51 +0100
+
+mdctl (0.5-4) unstable; urgency=low
+
+ * Added debconf template to ask the user if the init script links
+ should be updated.
+
+ -- Mario Joussen <joussen@debian.org> Mon, 4 Mar 2002 22:53:37 +0100
+
+mdctl (0.5-3) unstable; urgency=low
+
+ * Splitted up init script in two parts. One starts the md array and the
+ other starts the raid monitor daemon.
+ (closes: Bug#136184)
+
+ -- Mario Joussen <joussen@debian.org> Thu, 28 Feb 2002 22:45:57 +0100
+
+mdctl (0.5-2) unstable; urgency=low
+
+ * Included optimization in Makefile
+ (closes: Bug#127687)
+ * Removed Conflicts/Replaces/Provides: mdutils
+ (closes: Bug#127684, Bug#127719)
+ * Added an init script, which can start md arrays and the raid monitor
+ daemon
+ * MD devices are now created under /dev if necessary
+ * Added a sample configuration file
+
+ -- Mario Joussen <mario@joussen.org> Sun, 13 Jan 2002 23:43:40 +0100
+
+mdctl (0.5-1) unstable; urgency=low
+
+ * Initial Release.
+ (closes: Bug#126610)
+
+ -- Mario Joussen <mario@joussen.org> Wed, 26 Dec 2001 17:07:09 +0100
diff --git a/debian/checkarray b/debian/checkarray
new file mode 100644
index 00000000..88266c45
--- /dev/null
+++ b/debian/checkarray
@@ -0,0 +1,219 @@
+#!/bin/sh
+#
+# checkarray -- initiates a check run of an MD array's redundancy information.
+#
+# Copyright © martin f. krafft <madduck@debian.org>
+# distributed under the terms of the Artistic Licence 2.0
+#
+set -eu
+
+PROGNAME=${0##*/}
+
+about()
+{
+ echo "\
+$PROGNAME -- MD array (RAID) redundancy checker tool
+Copyright © martin f. krafft <madduck@debian.org>
+Released under the terms of the Artistic Licence 2.0"
+}
+
+usage()
+{
+ about
+ echo "
+Usage: $PROGNAME [options] [arrays]
+
+Valid options are:
+ -a|--all check all assembled arrays (ignores arrays in command line).
+ -s|--status print redundancy check status of devices.
+ -x|--cancel queue a request to cancel a running redundancy check.
+ -i|--idle perform check in a lowest scheduling class (idle)
+ -l|--slow perform check in a lower-than-standard scheduling class
+ -f|--fast perform check in higher-than-standard scheduling class
+ --realtime perform check in real-time scheduling class (DANGEROUS!)
+ -c|--cron honour AUTOCHECK setting in /etc/default/mdadm.
+ -q|--quiet suppress informational messages
+ (use twice to suppress error messages too).
+ -h|--help show this output.
+ -V|--version show version information.
+
+Examples:
+ $PROGNAME --all --idle
+ $PROGNAME --quiet /dev/md[123]
+ $PROGNAME -sa
+ $PROGNAME -x --all
+
+Devices can be specified in almost any format. The following are equivalent:
+ /dev/md0, md0, /dev/md/0, /sys/block/md0
+
+You can also control the status of a check with /proc/mdstat file."
+}
+
+SHORTOPTS=achVqQsxilf
+LONGOPTS=all,cron,help,version,quiet,real-quiet,status,cancel,idle,slow,fast,realtime
+
+eval set -- $(getopt -o $SHORTOPTS -l $LONGOPTS -n $PROGNAME -- "$@")
+
+arrays=''
+cron=0
+all=0
+quiet=0
+status=0
+action=check
+ionice=
+
+for opt in $@; do
+ case "$opt" in
+ -a|--all) all=1;;
+ -s|--status) action=status;;
+ -x|--cancel) action=idle;;
+ -i|--idle) ionice=idle;;
+ -l|--slow) ionice=low;;
+ -f|--fast) ionice=high;;
+ --realtime) ionice=realtime;;
+ -c|--cron) cron=1;;
+ -q|--quiet) quiet=$(($quiet+1));;
+ -Q|--real-quiet) quiet=$(($quiet+2));; # for compatibility
+ -h|--help) usage; exit 0;;
+ -V|--version) about; exit 0;;
+ /dev/md/*|md/*) arrays="${arrays:+$arrays }md${opt#*md/}";;
+ /dev/md*|md*) arrays="${arrays:+$arrays }${opt#/dev/}";;
+ /sys/block/md*) arrays="${arrays:+$arrays }${opt#/sys/block/}";;
+ --) :;;
+ *) echo "$PROGNAME: E: invalid option: $opt. Try --help." >&2; exit 1;;
+ esac
+done
+
+is_true()
+{
+ case "${1:-}" in
+ [Yy]es|[Yy]|1|[Tt]rue|[Tt]) return 0;;
+ *) return 1;
+ esac
+}
+
+DEBIANCONFIG=/etc/default/mdadm
+[ -r $DEBIANCONFIG ] && . $DEBIANCONFIG
+if [ $cron = 1 ] && ! is_true ${AUTOCHECK:-false}; then
+ [ $quiet -lt 1 ] && echo "$PROGNAME: I: disabled in $DEBIANCONFIG ." >&2
+ exit 0
+fi
+
+if [ ! -f /proc/mdstat ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: E: MD subsystem not loaded, or /proc unavailable." >&2
+ exit 2
+fi
+
+if [ ! -d /sys/block ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: E: /sys filesystem not available." >&2
+ exit 7
+fi
+
+if [ -z "$(ls /sys/block/md* 2>/dev/null)" ]; then
+ if [ $quiet -lt 2 ] && [ $cron != 1 ]; then
+ echo "$PROGNAME: W: no active MD arrays found." >&2
+ echo "$PROGNAME: W: (maybe uninstall the mdadm package?)" >&2
+ fi
+ exit 0
+fi
+
+if [ -z "$(ls /sys/block/md*/md/level 2>/dev/null)" ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: E: kernel too old, no support for redundancy checks." >&2
+ exit 6
+fi
+
+if ! egrep -q '^raid([1456]|10)$' /sys/block/md*/md/level 2>/dev/null; then
+ [ $quiet -lt 1 ] && echo "$PROGNAME: I: no redundant arrays present; skipping checks..." >&2
+ exit 0
+fi
+
+if [ -z "$(ls /sys/block/md*/md/sync_action 2>/dev/null)" ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: E: no kernel support for redundancy checks." >&2
+ exit 3
+fi
+
+[ $all = 1 ] && arrays="$(ls -d1 /sys/block/md* | cut -d/ -f4)"
+
+for array in $arrays; do
+ MDBASE=/sys/block/$array/md
+
+ if [ ! -e $MDBASE/sync_action ]; then
+ [ $quiet -lt 1 ] && echo "$PROGNAME: I: skipping non-redundant array $array." >&2
+ continue
+ fi
+
+ read cur_status < $MDBASE/sync_action
+
+ if [ $action = status ]; then
+ echo "$array: $cur_status"
+ continue
+ fi
+
+ if [ ! -w $MDBASE/sync_action ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: E: $MDBASE/sync_action not writeable." >&2
+ exit 4
+ fi
+
+ if [ "$(cat $MDBASE/array_state)" = 'read-auto' ]; then
+ [ $quiet -lt 1 ] && echo "$PROGNAME: W: array $array in auto-read-only state, skipping..." >&2
+ continue
+ fi
+
+ case "$action" in
+ idle)
+ echo $action > $MDBASE/sync_action
+ [ $quiet -lt 1 ] && echo "$PROGNAME: I: cancel request queued for array $array." >&2
+ ;;
+
+ check)
+ if [ "$cur_status" != idle ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: W: array $array not idle, skipping..." >&2
+ continue
+ fi
+
+ # check if the array created recently and skip test if it is
+ created=$(mdadm --detail /dev/$array 2>/dev/null |
+ sed -n 's/.*Creation Time *://p' )
+ if [ -n "$created" ]; then
+ created=$(date +%s -d "$created" 2>/dev/null)
+ fi
+ if [ -n "$created" ]; then
+ now=$(date +%s)
+ if [ "$created" -lt "$now" -a \
+ "$created" -gt "$(($now - 14 * 24 * 60 * 60))" ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: I: array $array created recently, skipping..." >&2
+ continue
+ fi
+ fi
+
+ # queue request for the array. The kernel will make sure that these requests
+ # are properly queued so as to not kill one of the array.
+ echo $action > $MDBASE/sync_action
+ [ $quiet -lt 1 ] && echo "$PROGNAME: I: check queued for array $array." >&2
+
+ case "$ionice" in
+ idle) ioarg='-c3'; renice=15;;
+ low) ioarg='-c2 -n7'; renice=5;;
+ high) ioarg='-c2 -n0'; renice=0;;
+ realtime) ioarg='-c1 -n4'; renice=-5;;
+ *) break;;
+ esac
+
+ resync_pid= wait=5
+ while [ $wait -gt 0 ]; do
+ wait=$((wait - 1))
+ resync_pid=$(ps -ef | awk -v dev=$array 'BEGIN { pattern = "^\\[" dev "_resync]$" } $8 ~ pattern { print $2 }')
+ if [ -n "$resync_pid" ]; then
+ [ $quiet -lt 1 ] && echo "$PROGNAME: I: selecting $ionice I/O scheduling class and $renice niceness for resync of $array." >&2
+ ionice -p "$resync_pid" $ioarg || :
+ renice -n $renice -p "$resync_pid" 1>/dev/null || :
+ break
+ fi
+ sleep 1
+ done
+ ;;
+ esac
+
+done
+
+exit 0
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 00000000..1e8b3149
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+6
diff --git a/debian/control b/debian/control
new file mode 100644
index 00000000..4a053617
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,32 @@
+Source: mdadm
+Section: admin
+Priority: optional
+Maintainer: Debian mdadm maintainers <pkg-mdadm-devel@lists.alioth.debian.org>
+Uploaders: Michael Tokarev <mjt@tls.msk.ru>
+Build-Depends: debhelper (>= 6.0.7~), po-debconf, groff-base
+Standards-Version: 3.9.5
+Vcs-Git: git://anonscm.debian.org/pkg-mdadm/mdadm.git
+Vcs-Browser: https://anonscm.debian.org/cgit/pkg-mdadm/mdadm.git
+Homepage: http://neil.brown.name/blog/mdadm
+
+Package: mdadm
+Architecture: any
+Depends: ${shlibs:Depends}, udev, ${misc:Depends}, lsb-base, debconf, initscripts
+Recommends: default-mta | mail-transport-agent, kmod | module-init-tools
+Description: tool to administer Linux MD arrays (software RAID)
+ The mdadm utility can be used to create, manage, and monitor MD
+ (multi-disk) arrays for software RAID or multipath I/O.
+ .
+ This package automatically configures mdadm to assemble arrays during the
+ system startup process. If not needed, this functionality can be disabled.
+
+Package: mdadm-udeb
+Section: debian-installer
+XC-Package-Type: udeb
+Architecture: any
+Depends: ${shlibs:Depends}
+Description: tool to administer Linux MD arrays (software RAID)
+ The mdadm utility can be used to create, manage, and monitor MD
+ (multi-disk) arrays for software RAID or multipath I/O.
+ .
+ This is a minimal package used by the debian-installer.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 00000000..f6eaec91
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,21 @@
+This package was debianized by Mario Jou/3en <joussen@debian.org> on
+Wed, 26 Dec 2001 17:07:09 +0100.
+Martin F. Krafft <madduck@debian.org> took over on
+Tue, 16 May 2006 13:21:06 -0500
+
+The mdadm source was downloaded from
+ http://www.kernel.org/pub/linux/utils/raid/mdadm/
+
+Upstream Author: Neil Brown <neilb@suse.de>
+
+Copyright © 2001-2006 Neil Brown <neilb@suse.de>
+Packaging copyright © 2001-2005 Mario Jou/3en <joussen@debian.org>
+Packaging copyright © 2005-2008 Martin F. Krafft <madduck@debian.org>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+On Debian GNU/Linux systems, the complete text of the GNU General
+Public License can be found in '/usr/share/common-licenses/GPL'.
diff --git a/debian/initramfs/hook b/debian/initramfs/hook
new file mode 100644
index 00000000..85bad65e
--- /dev/null
+++ b/debian/initramfs/hook
@@ -0,0 +1,266 @@
+#!/bin/sh
+#
+# Copyright © 2006-2008 Martin F. Krafft <madduck@debian.org>,
+# 2012 Michael Tokarev <mjt@tls.msk.ru>
+# based on the scripts in the initramfs-tools package.
+# released under the terms of the Artistic Licence.
+#
+set -eu
+
+PREREQ=
+prereqs()
+{
+ echo "$PREREQ"
+}
+
+case "${1:-}" in
+ prereqs)
+ prereqs
+ exit 0
+ ;;
+esac
+
+is_true()
+{
+ case "${1:-}" in
+ [Yy]es|[Yy]|1|[Tt]rue|[Tt]) return 0;;
+ *) return 1;;
+ esac
+}
+
+write()
+{
+ local PREFIX; PREFIX=$1; shift
+ echo "${PREFIX}: mdadm: $@" >&2
+}
+
+info()
+{
+ is_true ${VERBOSE:-false} && write I "$@" || :
+}
+
+warn()
+{
+ write W "$@"
+}
+
+err()
+{
+ write E "$@"
+}
+
+MDADM=/sbin/mdadm
+MDMON=/sbin/mdmon
+[ -x "$MDADM" ] || exit 0
+
+[ -r /usr/share/initramfs-tools/hook-functions ] || exit 0
+. /usr/share/initramfs-tools/hook-functions
+
+# copy the binary as early as possible
+copy_exec $MDADM /sbin
+copy_exec $MDMON /sbin
+
+# copy all modules into the initramfs, just for safety.
+# we copy raid456 / raid5+raid6 because the hook script just won't do
+# anything when the module cannot be found.
+modules="linear multipath raid0 raid1 raid456 raid5 raid6 raid10"
+for mod in $modules; do manual_add_modules $mod; done
+
+# read in the configuration
+CONFIG=/etc/mdadm/mdadm.conf
+ALTCONFIG=/etc/mdadm.conf
+[ ! -f $CONFIG ] && [ -f $ALTCONFIG ] && CONFIG=$ALTCONFIG || :
+
+DEBIANCONFIG=/etc/default/mdadm
+INITRDSTART=all
+[ -s $DEBIANCONFIG ] && . $DEBIANCONFIG
+[ -z "$INITRDSTART" ] && INITRDSTART=none
+
+DESTMDADMCONF=$DESTDIR/etc/mdadm/mdadm.conf
+DESTCONFIG=$DESTDIR/conf/mdadm
+
+if [ -f $CONFIG ]; then
+ homehost="$(sed -ne 's,^[[:space:]]*HOMEHOST[[:space:]]*,,p' $CONFIG)"
+fi
+if [ -z "${homehost:-}" ] || [ "${homehost:-}" = '<system>' ]; then
+ echo "MD_HOMEHOST='$(hostname)'" > $DESTCONFIG
+fi
+
+install_config()
+{
+ # install the configuration file
+ mkdir -p ${2%/*}
+ # only copy ARRAY/DEVICE/HOMEHOST lines, and merge continuation lines into one
+ if [ -f "$1" ] ; then
+ sed -e :a -re '$!N;s/\n[[:space:]]+/ /;ta' -ne '/^(ARRAY|DEVICE|HOMEHOST)/P;D' $1 > $2
+ fi
+}
+
+if [ ! -f $CONFIG ]; then
+ # there is no configuration file, so let's create one
+
+ if /usr/share/mdadm/mkconf generate $CONFIG; then
+ # all is well
+ install_config $CONFIG $DESTMDADMCONF
+ info "auto-generated the mdadm.conf configuration file."
+ else
+ # we failed to auto-generate, so let the emergency procedure take over
+ warn "failed to auto-generate the mdadm.conf file."
+ fi
+
+else
+
+ if grep -q '^ARRAY' $CONFIG; then
+
+ # this is the ideal case
+ install_config $CONFIG $DESTMDADMCONF
+ info "using configuration file: $CONFIG"
+
+ else
+
+ # the file defines no ARRAYs. We better create a temporary file to be sure.
+
+ warn "$CONFIG defines no arrays."
+
+ mkdir --parents ${DESTMDADMCONF%/*}
+ tmpfile="${DESTMDADMCONF}.tmp"
+ if /usr/share/mdadm/mkconf > $tmpfile; then
+ # all is well, we now have a temporary configuration file
+ info "auto-generated temporary mdadm.conf configuration file."
+ install_config $tmpfile $DESTMDADMCONF
+ else
+ # stuff's really broke, as we failed to generate a temporary file.
+ # let's hope the unchecked file works, provided it contains at least one
+ # ARRAY statement...
+ warn "failed to auto-generate temporary mdadm.conf file."
+ if grep -q '^ARRAY' $CONFIG; then
+ warn "using the unchecked file and hoping for the best..."
+ install_config $CONFIG $DESTMDADMCONF
+ fi
+ fi
+ rm -f $tmpfile
+
+ fi
+
+fi
+
+# if at this point, $DESTMDADMCONF does not exist or it does not contain any
+# ARRAY statements, we must let the initramfs handle stuff.
+if [ ! -f $DESTMDADMCONF ]; then
+ warn "no configuration file available."
+ info "letting initramfs assemble auto-detected arrays."
+ exit 0
+elif ! grep -q '^ARRAY' $DESTMDADMCONF; then
+ warn "no arrays defined in configuration file."
+ info "letting initramfs assemble auto-detected arrays."
+ exit 0
+else
+ # obtain devices list from config file, honouring multiline entries
+ devices="$(
+ dev=
+ while read line; do
+ case "$line" in
+ (ARRAY*) :;;
+ (*) continue;;
+ esac
+ for atom in $line; do
+ case "$atom" in
+ (/dev*) dev=$atom;;
+ esac
+ done
+
+ # /dev/mdX and /dev/md/X are the same, really
+ case "$dev" in
+ "") continue ;;
+ (/dev/md/*) alt=/dev/md${dev##*/};;
+ (/dev/md*) alt=/dev/md/${dev#/dev/md};;
+ (*)
+ err "unknown device encountered: $dev"
+ warn_emergency
+ exit 0
+ ;;
+ esac
+ if [ ! -b "$dev" ] && [ -b "$alt" ]; then
+ dev="$alt"
+ fi
+
+ echo "$dev"
+ done < $DESTMDADMCONF)" || exit $?
+fi
+
+if [ "$INITRDSTART" != none ] && [ -n "$devices" ]; then
+
+ devs=
+ for dev in $devices; do
+ case "$INITRDSTART " in
+ all|*${dev}[[:space:]]*)
+ case "$devs " in # uniquiness
+ (*${dev}\ *) :;;
+ (*) devs="${devs:+$devs }$dev" ;;
+ esac
+ ;;
+ *) :;;
+ esac
+ done
+
+ # make sure the configuration file knows about all running devices
+ $MDADM --detail --scan | while read array device params; do
+ uuid=${params#*UUID=}; uuid=${uuid%% *}
+ if ! grep -qi "uuid=$uuid" $DESTMDADMCONF; then
+ warn "the array $device with UUID $uuid"
+ warn "is currently active, but it is not listed in mdadm.conf. if"
+ warn "it is needed for boot, then YOUR SYSTEM IS NOW UNBOOTABLE!"
+ warn "please inspect the output of /usr/share/mdadm/mkconf, compare"
+ warn "it to $CONFIG, and make the necessary changes."
+ fi
+ done
+
+ for i in $INITRDSTART; do
+ case "$INITRDSTART" in all) break;; *) :;; esac
+ case "$devs" in
+ *${i}*) continue;;
+ *) :;;
+ esac
+
+ warn "I am supposed to start $i from the initial ramdisk,"
+ warn "yet I cannot find the array in the configuration file."
+ warn "I am thus reverting to starting all arrays."
+ INITRDSTART=all
+ break
+ done
+
+ if [ "$INITRDSTART" = all ]; then
+ echo "MD_DEVS=all" >> $DESTCONFIG
+ else
+ echo "MD_DEVS='$devs'" >> $DESTCONFIG
+ fi
+
+ if [ "$INITRDSTART" = all ]; then
+ info "will start all available MD arrays from the initial ramdisk."
+ else
+ for i in $devs; do
+ info "will start MD array $i from the initial ramdisk."
+ done
+ fi
+
+ # Copy udev rules, which udev no longer does
+ for rules_file in 63-md-raid-arrays.rules 64-md-raid-assembly.rules
+ do
+ for rules_folder in /lib/udev/rules.d /etc/udev/rules.d; do
+ if [ -f $rules_folder/$rules_file ]; then
+ mkdir -p $DESTDIR$rules_folder
+ cp $rules_folder/$rules_file $DESTDIR$rules_folder/$rules_file
+ fi
+ done
+ done
+
+else
+ echo "MD_DEVS=none" >> $DESTCONFIG
+ info "no MD arrays will be started from the initial ramdisk."
+fi
+
+# only output this on Debian systems
+[ -s /etc/default/mdadm ] && \
+ info 'use `dpkg-reconfigure --priority=low mdadm` to change this.'
+
+exit 0
diff --git a/debian/initramfs/script.local-top b/debian/initramfs/script.local-top
new file mode 100644
index 00000000..9450bb72
--- /dev/null
+++ b/debian/initramfs/script.local-top
@@ -0,0 +1,101 @@
+#!/bin/sh
+#
+# Copyright © 2006-2008 Martin F. Krafft <madduck@debian.org>,
+# 2012 Michael Tokarev <mjt@tls.msk.ru>
+# based on the scripts in the initramfs-tools package.
+# released under the terms of the Artistic Licence.
+#
+set -eu
+
+case ${1:-} in
+ prereqs) echo "multipath"; exit 0;;
+esac
+
+. /scripts/functions
+
+maybe_break pre-mdadm
+
+if [ -e /scripts/local-top/md ]; then
+ log_warning_msg "old md initialisation script found, getting out of its way..."
+ exit 0
+fi
+
+MDADM=/sbin/mdadm
+[ -x "$MDADM" ] || exit 0
+
+verbose()
+{
+ case "$quiet" in y*|Y*|1|t*|T*)
+ return 1;;
+ *)
+ return 0;;
+ esac
+}
+
+MD_DEVS=all
+[ -s /conf/mdadm ] && . /conf/mdadm
+
+if [ "$MD_DEVS" = none ]; then
+ verbose &&
+ log_warning_msg "INITRDSTART set to \"none\" in /etc/default/mdadm, not assembling raid arrays"
+ exit 0
+fi
+
+if [ ! -f /proc/mdstat ] && ! modprobe -q md_mod; then
+ verbose && log_warning_msg "failed to load module md_mod."
+fi
+if [ ! -f /proc/mdstat ]; then
+ verbose && log_warning_msg "cannot initialise MD subsystem (/proc/mdstat missing)"
+ exit 0
+fi
+
+# handle /dev/md/X nodes
+mkdir -p /dev/md
+
+CONFIG=/etc/mdadm/mdadm.conf
+# in case the hook failed to install a configuration file, this is our last
+# attempt... the "emergency procedure"... <drumroll>
+if [ ! -e $CONFIG ]; then
+ log_warning_msg "missing mdadm.conf file, trying to create one..."
+ mkdir -p ${CONFIG%/*}
+ echo DEVICE partitions > $CONFIG
+ $MDADM --examine --scan >> $CONFIG
+ if [ -s $CONFIG ]; then
+ verbose && log_success_msg "mdadm.conf created."
+ else
+ verbose && log_failure_msg "could not create mdadm.conf, the boot will likely fail."
+ fi
+ MD_DEVS=all
+fi
+
+if [ "$MD_DEVS" = all ]; then
+
+ verbose && log_begin_msg "Assembling all MD arrays"
+ extra_args=''
+ [ -n "${MD_HOMEHOST:-}" ] && extra_args="--homehost=$MD_HOMEHOST"
+ if $MDADM --assemble --scan --run --auto=yes${extra_args:+ $extra_args}; then
+ verbose && log_success_msg "assembled all arrays."
+ else
+ log_failure_msg "failed to assemble all arrays."
+ fi
+ verbose && log_end_msg
+
+else
+ for dev in $MD_DEVS; do
+
+ verbose && log_begin_msg "Assembling MD array $dev"
+ if $MDADM --assemble --scan --run --auto=yes $dev; then
+ verbose && log_success_msg "started $dev"
+ else
+ log_failure_msg "failed to start $dev"
+ fi
+ verbose && log_end_msg
+
+ done
+fi
+
+wait_for_udev 10
+
+maybe_break post-mdadm
+
+exit 0
diff --git a/debian/mdadm-raid b/debian/mdadm-raid
new file mode 100644
index 00000000..6d4d6a99
--- /dev/null
+++ b/debian/mdadm-raid
@@ -0,0 +1,256 @@
+#!/bin/sh
+#
+# Start all arrays specified in the configuration file.
+#
+# Copyright © 2001-2005 Mario Jou/3en <joussen@debian.org>
+# Copyright © 2005-2008 Martin F. Krafft <madduck@debian.org>
+# Distributable under the terms of the GNU GPL version 2.
+#
+### BEGIN INIT INFO
+# Provides: mdadm-raid
+# Required-Start: mountkernfs hostname
+# Should-Start: udev multipath-tools-boot
+# X-Start-Before: checkfs mountall
+# Required-Stop: mountkernfs
+# Should-Stop: udev
+# X-Stop-After: umountfs
+# Default-Start: S
+# Default-Stop: 0 6
+# Short-Description: MD array assembly
+# Description: This script assembles a system's MD arrays, according to
+# the settings in /etc/mdadm/mdadm.conf and the preferences
+# in /etc/default/mdadm.
+### END INIT INFO
+#
+set -eu
+
+MDADM=/sbin/mdadm
+CONFIG=/etc/mdadm/mdadm.conf
+ALTCONFIG=/etc/mdadm.conf
+DEBIANCONFIG=/etc/default/mdadm
+
+test -x "$MDADM" || exit 0
+
+STATEDIR=/run/mdadm
+test -f $DEBIANCONFIG && . $DEBIANCONFIG
+
+. /lib/lsb/init-functions
+
+short_dev()
+{
+ local dev; dev=${1##*/}
+ case "$dev" in
+ md*|md_*|mdp*|mdp_*) echo "$dev";;
+ d*) echo "md_${dev}";;
+ *) echo "md${dev}";;
+ esac
+}
+
+log()
+{
+ case "$1" in
+ [[:digit:]]*) success=$1; shift;;
+ *) :;;
+ esac
+ log_action_begin_msg "$1"; shift
+ log_action_end_msg ${success:-0} "$*"
+}
+
+log_dev()
+{
+ success=${1:-}; shift
+ dev=${1:-}; shift
+ log $success "${PREFIX:-} $(short_dev ${dev:-})" "$*"
+}
+
+log_notice()
+{
+ log 0 "${PREFIX:-}s" "$*"
+}
+
+log_problem()
+{
+ log 1 "${PREFIX:-}s" "$*"
+}
+
+is_true()
+{
+ case "${1:-}" in
+ [Yy]es|[Yy]|1|[Tt]rue|[Tt]) return 0;;
+ *) return 1;;
+ esac
+}
+
+case "${1:-}" in
+
+ start)
+ PREFIX="Assembling MD array"
+
+ if [ ! -f /proc/mdstat ] && [ -x "$(command -v modprobe)" ] ; then
+ modprobe -q md 2>/dev/null || :
+ fi
+ if [ ! -f /proc/mdstat ]; then
+ log_problem "failed to load MD subsystem"
+ exit 0
+ fi
+
+ if [ ! -f $CONFIG -a ! -f $ALTCONFIG ]; then
+ log_problem "no $CONFIG file"
+ exit 0
+ fi
+
+ # handle devfs-style names and version-1 devices
+ # fail gracefully in case we're on a read-only filesystem, in which
+ # case it's safe to assume that the admin knows what s/he's doing.
+ # See (#382876).
+ mkdir --parent /dev/md || :
+
+ # ugly hack because shell sucks
+ IFSOLD=${IFS:-}
+ IFS='
+'
+ for line in $($MDADM --assemble --scan --auto=yes --symlink=no 2>&1); do
+ IFS=$IFSOLD
+ set -- $line
+ shift
+
+ case "$@" in
+
+ 'No arrays found in config file'*)
+ # no point in carrying on.
+ shift
+ log_notice "no $*"
+ exit 0
+ ;;
+
+ 'Unknown keyword'*)
+ # warn only
+ if [ -x $(command -v logger >/dev/null) ]; then
+ logger -t mdadm -p syslog.warning -- "$*"
+ elif [ -w /dev/console ]; then
+ echo "mdadm: $*" > /dev/console
+ else
+ echo "mdadm: $*" >&2
+ fi
+ ;;
+
+ *' is already active.')
+ log_dev 0 $1 "already running"
+ ;;
+
+ *'has been started with '[[:digit:]]*' drive'*' (out of '[[:digit:]]*') and '[[:digit:]]*' spare'*'.')
+ log_dev 0 $1 "initialising [$6/${10%).}]"
+ ;;
+
+ *'has been started with '[[:digit:]]*' drive'*' (out of '[[:digit:]]*').')
+ log_dev 0 $1 "degraded [$6/${10%).}]"
+ ;;
+
+ *'has been started with '[[:digit:]]*' drive'*'.')
+ log_dev 0 $1 "started [$6/$6]"
+ ;;
+
+ *'assembled from '[[:digit:]]*' drive'*' - not enough to start the array.')
+ log_dev 1 $1 "not enough devices"
+ ;;
+
+ 'no devices found for '*)
+ log_dev 1 $5 "no devices found"
+ ;;
+
+ 'failed to RUN_ARRAY '*': Input/output error')
+ log_dev 1 ${4%:} "RUN_ARRAY input/output error"
+ ;;
+
+ *) :;;
+ esac
+ done
+ ret=$?
+
+ log_action_begin_msg "Generating udev events for MD arrays"
+ [ -d $STATEDIR ] || mkdir -p $STATEDIR
+ for uevent in /sys/block/md*/uevent; do
+ test -e $uevent || break
+ sentinel=${uevent#/sys/block/}; sentinel=${sentinel%/uevent}-uevent
+ test -e $STATEDIR/$sentinel && continue
+ test -w $uevent || continue
+ echo add > $uevent
+ test -d $STATEDIR && : > $STATEDIR/$sentinel
+ done
+ log_action_end_msg 0
+
+ exit $ret
+ ;;
+
+ stop)
+ PREFIX="Stopping MD array"
+
+ if [ ! -f /proc/mdstat ]; then
+ log_problem "no MD subsystem loaded"
+ exit 0
+ fi
+
+ # ugly hack because shell sucks
+ IFSOLD=${IFS:-}
+ IFS='
+'
+ set +e
+ for line in $($MDADM --stop --scan 2>&1); do
+ set -e
+ IFS=$IFSOLD
+ set -- $line
+ shift
+ case "$@" in
+
+ 'Unknown keyword'*)
+ # warn only
+ if [ -x $(command -v logger >/dev/null) ]; then
+ logger -t mdadm -p syslog.warning -- "$*"
+ elif [ -w /dev/console ]; then
+ echo "mdadm: $*" > /dev/console
+ else
+ echo "mdadm: $*" >&2
+ fi
+ ;;
+
+ 'stopped '*)
+ log_dev 0 $2 stopped
+ ;;
+
+ 'fail to stop array '*': Device or resource busy')
+ log_dev 1 ${5%:} busy
+ ;;
+
+ *) :;;
+ esac
+ done || exit $?
+
+ rm -f $STATEDIR/md*-uevent
+ ;;
+
+ restart|force-reload)
+ ${0:-} stop
+ ${0:-} start
+ ;;
+
+ reload)
+ PREFIX="Reloading MD array"
+ log_notice "never anything to do"
+ ;;
+
+ status)
+ if [ ! -f /proc/mdstat ]; then
+ log_problem "no MD subsystem loaded"
+ exit 1
+ else
+ cat /proc/mdstat
+ fi
+ ;;
+
+ *)
+ echo "Usage: ${0:-} {start|stop|restart}" >&2
+ exit 1;;
+
+esac
+
+exit 0
diff --git a/debian/mdadm-waitidle b/debian/mdadm-waitidle
new file mode 100644
index 00000000..920272c7
--- /dev/null
+++ b/debian/mdadm-waitidle
@@ -0,0 +1,56 @@
+#!/bin/sh
+# This script is not used when systemd is running
+### BEGIN INIT INFO
+# Provides: mdadm-waitidle
+# Required-Start:
+# Required-Stop:
+# Should-Stop: halt reboot kexec
+# X-Stop-After: umountroot
+# Default-Start:
+# Default-Stop: 0 6
+# Short-Description: Wait for MD arrays to become idle
+# Description: Waits until all MD arrays are in idle and synced state
+# before halt/reboot.
+### END INIT INFO
+#
+set -eu
+
+MDADM=/sbin/mdadm
+test -x "$MDADM" || exit 0
+test -f /proc/mdstat || exit 0
+
+. /lib/lsb/init-functions
+
+case "${1:-}" in
+
+ start|restart|force-reload)
+ # nothing, the only reason the script is here is to stop arrays
+ ;;
+
+ stop)
+ sync
+ wait=
+ for md in /sys/block/md*/md ; do
+ [ -d "$md" ] || continue
+ [ "$wait" ] || log_action_begin_msg "Waiting for MD arrays to become idle"
+ wait=y
+ [ -w $md/sync_action ] && echo idle > $md/sync_action
+ done
+ if [ "$wait" ]; then
+ # mdadm --wait-clean has a short internal timeout
+ if $MDADM --wait-clean --scan; then
+ log_action_end_msg 0
+ else
+ log_action_end_msg 1
+ sleep 1
+ fi
+ fi
+ ;;
+
+ *)
+ echo "Usage: ${0:-} stop" >&2
+ exit 1;;
+
+esac
+
+exit 0
diff --git a/debian/mdadm.config b/debian/mdadm.config
new file mode 100644
index 00000000..97352662
--- /dev/null
+++ b/debian/mdadm.config
@@ -0,0 +1,174 @@
+#!/bin/sh
+# Copyright © 2001-2004 Mario Jou/3en <joussen@debian.org>
+# Copyright © martin f. krafft <madduck@debian.org>
+# Distributable under the terms of the GNU GPL version 2.
+#
+. /usr/share/debconf/confmodule
+# see #369953 for ordering
+set -eu
+
+CONFIG=/etc/mdadm/mdadm.conf
+ALTCONFIG=/etc/mdadm.conf
+[ ! -f $CONFIG ] && [ -f $ALTCONFIG ] && CONFIG=$ALTCONFIG
+
+DEBIANCONFIG=/etc/default/mdadm
+
+db_get mdadm/initrdstart || :
+INITRDSTART="$RET"
+
+if [ -s $DEBIANCONFIG ] ; then
+ AUTOCHECK=true
+ START_DAEMON=true
+ MAILADDR=root
+
+ [ -f $DEBIANCONFIG ] && . $DEBIANCONFIG
+ if [ -f $CONFIG ]; then
+ MAILADDR=$(sed -rne 's/^MAILADDR[[:space:]]*([^[:space:]]+).*/\1/p' $CONFIG)
+ fi
+
+ [ -n "$AUTOCHECK" ] && db_set mdadm/autocheck "$AUTOCHECK"
+ [ -n "$START_DAEMON" ] && db_set mdadm/start_daemon "$START_DAEMON"
+ [ -n "$MAILADDR" ] && db_set mdadm/mail_to "$MAILADDR"
+fi
+
+chrooted() {
+ test "$(stat -c "%d/%i" /)" != "$(stat -Lc "%d/%i" /proc/1/root 2>/dev/null)"
+}
+
+get_root_raiddev() {
+ local rootraiddev;
+ rootraiddev="$(df --portability / | sed -rne 's,^(/dev/[^[:space:]]+).*,\1,p')"
+ if ! mdadm --detail $rootraiddev >/dev/null 2>&1; then
+ return 1
+ fi
+ echo ${rootraiddev%p[0-9]*}
+ return 0
+}
+
+get_initrdstart() {
+ db_fget mdadm/initrdstart seen
+ if chrooted || [ "$RET $INITRDSTART" = "false all" ]; then
+ echo all
+ return 1
+ fi
+
+ get_root_raiddev || echo all
+}
+
+[ -z "$INITRDSTART" ] && INITRDSTART="$(get_initrdstart)" || :
+[ -n "$INITRDSTART" ] && db_set mdadm/initrdstart "$INITRDSTART"
+
+INITRDSTART_PRIORITY=high
+if chrooted; then
+ INITRDSTART_PRIORITY=medium
+fi
+
+db_capb escape
+
+msg=intro; suffix=''; error=0
+while true; do
+ db_metaget mdadm/initrdstart_msg_${msg} extended_description || :
+ db_subst mdadm/initrdstart msg "$(echo -n "${RET}${suffix}" | debconf-escape -e)"
+ ret=0; db_input $INITRDSTART_PRIORITY mdadm/initrdstart || ret=$?
+ db_go
+
+ if [ $error -ne 0 ] && [ $ret -eq 30 ]; then
+ # there was an error in a previous run of this loop, but the above question
+ # was not asked, so we better exit the endless loop...
+ echo "W: mdadm: unable to determine MD arrays needed for boot." >&2
+ echo "W: mdadm: falling back to starting all of them..." >&2
+ INITRDSTART=all
+ break
+ fi
+
+ db_get mdadm/initrdstart
+ INITRDSTART="$(echo $RET | tr , ' ')"
+
+ case "$INITRDSTART" in
+ ''|none) INITRDSTART=none; break;;
+ all) break;;
+
+ *)
+ arrays=''
+ for i in $INITRDSTART; do
+
+ # standardise by prefixing /dev/
+ i="/dev/${i#/dev/}"
+
+ # remove partition from partitionable array
+ i="${i%p[0-9]*}"
+
+ if [ ! -e "$i" ]; then
+ error=1; msg=errexist; suffix=": $i"; break
+ fi
+
+ if [ ! -b "$i" ]; then
+ t="$(readlink -nf "$i")"
+ if [ ! -b "$t" ]; then
+ suffix=": $t"
+ else
+ suffix=": $i"
+ fi
+ error=1; msg=errblock; break
+ fi
+
+ if ! mdadm --detail "$i" >/dev/null 2>&1; then
+ error=1; msg=errmd; suffix=": $i"; break
+ fi
+
+ if [ -f $CONFIG ]; then
+ if ! egrep -q "^(ARRAY)?[[:space:]]+$i([[:space:]]+|$)" $CONFIG; then
+ t="$(readlink -nf "$i")"
+ if [ -L "$i" ] \
+ && egrep -q "^(ARRAY)?[[:space:]]+$t([[:space:]]|$)+" $CONFIG; then
+ i="$t"
+ else
+ db_set mdadm/initrdstart_notinconf false
+ db_fset mdadm/initrdstart_notinconf seen false
+ db_subst mdadm/initrdstart_notinconf array "$i"
+ db_subst mdadm/initrdstart_notinconf config "$CONFIG"
+ db_input high mdadm/initrdstart_notinconf || :
+ db_go
+ db_get mdadm/initrdstart_notinconf
+ if [ "$RET" = true ]; then
+ error=0
+ else
+ db_fset mdadm/initrdstart seen false
+ db_set mdadm/initrdstart all
+ error=1; msg=errconf; suffix=": $i"; break
+ fi
+ fi
+ fi
+ fi
+
+ # uniquely add device name
+ echo $arrays | egrep -q "\b${i}\b" || arrays="${arrays:+$arrays }$i"
+ done
+
+ if [ $error -eq 0 ]; then
+ INITRDSTART="$arrays"
+ # exit the while true loop
+ break
+ fi
+ ;;
+ esac
+done
+
+db_set mdadm/initrdstart "$INITRDSTART"
+
+if [ "$INITRDSTART" != all ]; then
+ db_input high mdadm/autostart || :
+ db_go
+fi
+
+db_input medium mdadm/autocheck || :
+db_go
+
+db_input medium mdadm/start_daemon || :
+db_go
+
+db_get mdadm/start_daemon || :
+if [ "$RET" = true ]; then
+ db_input medium mdadm/mail_to || :
+ db_go
+fi
diff --git a/debian/mdadm.cron.d b/debian/mdadm.cron.d
new file mode 100644
index 00000000..309d180e
--- /dev/null
+++ b/debian/mdadm.cron.d
@@ -0,0 +1,12 @@
+#
+# cron.d/mdadm -- schedules periodic redundancy checks of MD devices
+#
+# Copyright © martin f. krafft <madduck@madduck.net>
+# distributed under the terms of the Artistic Licence 2.0
+#
+
+# By default, run at 00:57 on every Sunday, but do nothing unless the day of
+# the month is less than or equal to 7. Thus, only run on the first Sunday of
+# each month. crontab(5) sucks, unfortunately, in this regard; therefore this
+# hack (see #380425).
+57 0 * * 0 root if [ -x /usr/share/mdadm/checkarray ] && [ $(date +\%d) -le 7 ]; then /usr/share/mdadm/checkarray --cron --all --idle --quiet; fi
diff --git a/debian/mdadm.cron.daily b/debian/mdadm.cron.daily
new file mode 100644
index 00000000..d5ac1ae0
--- /dev/null
+++ b/debian/mdadm.cron.daily
@@ -0,0 +1,18 @@
+#!/bin/sh
+#
+# cron.daily/mdadm -- daily check that MD devices are functional
+#
+# Copyright © 2008 Paul Slootman <paul@debian.org>
+# distributed under the terms of the Artistic Licence 2.0
+
+# As recommended by the manpage, run
+# mdadm --monitor --scan --oneshot
+# every day to ensure that any degraded MD devices don't go unnoticed.
+# Email will go to the address specified in /etc/mdadm/mdadm.conf .
+#
+set -eu
+
+MDADM=/sbin/mdadm
+[ -x $MDADM ] || exit 0 # package may be removed but not purged
+
+exec $MDADM --monitor --scan --oneshot
diff --git a/debian/mdadm.doc-base.faq b/debian/mdadm.doc-base.faq
new file mode 100644
index 00000000..3fff4504
--- /dev/null
+++ b/debian/mdadm.doc-base.faq
@@ -0,0 +1,9 @@
+Document: mdadm-faq
+Title: mdadm Debian FAQ
+Author: martin f. krafft
+Abstract: The document answers frequently asked questions about Debian's mdadm
+Section: System/Administration
+
+Format: text
+Index: /usr/share/doc/mdadm/FAQ.gz
+Files: /usr/share/doc/mdadm/FAQ.gz
diff --git a/debian/mdadm.doc-base.recipes b/debian/mdadm.doc-base.recipes
new file mode 100644
index 00000000..d1069682
--- /dev/null
+++ b/debian/mdadm.doc-base.recipes
@@ -0,0 +1,9 @@
+Document: mdadm-readme-recipes
+Title: mdadm Debian recipes
+Author: David Pashley
+Abstract: The document contains some common recipes for mdadm usage on Debian
+Section: System/Administration
+
+Format: text
+Index: /usr/share/doc/mdadm/README.recipes.gz
+Files: /usr/share/doc/mdadm/README.recipes.gz
diff --git a/debian/mdadm.docs b/debian/mdadm.docs
new file mode 100644
index 00000000..830665f4
--- /dev/null
+++ b/debian/mdadm.docs
@@ -0,0 +1,7 @@
+TODO
+debian/README.recipes
+debian/README.checkarray
+debian/FAQ
+ANNOUNCE-*
+external-reshape-design.txt
+mdmon-design.txt
diff --git a/debian/mdadm.init b/debian/mdadm.init
new file mode 100644
index 00000000..8b603796
--- /dev/null
+++ b/debian/mdadm.init
@@ -0,0 +1,94 @@
+#!/bin/sh
+#
+# Start the MD monitor daemon for all active MD arrays if desired.
+# This script is not used under systemd.
+#
+# Copyright © 2001-2005 Mario Jou/3en <joussen@debian.org>
+# Copyright © 2005-2009 Martin F. Krafft <madduck@debian.org>
+# Distributable under the terms of the GNU GPL version 2.
+#
+### BEGIN INIT INFO
+# Provides: mdadm
+# Required-Start: $local_fs $syslog mdadm-raid
+# Required-Stop: $local_fs $syslog sendsigs mdadm-raid
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: MD monitoring daemon
+# Description: mdadm provides a monitor mode, in which it will scan for
+# problems with the MD devices. If a problem is found, the
+# administrator is alerted via email, or a custom script is
+# run.
+### END INIT INFO
+#
+set -eu
+
+MDADM=/sbin/mdadm
+MDMON=/sbin/mdmon
+RUNDIR=/run/mdadm
+PIDFILE=$RUNDIR/monitor.pid
+DEBIANCONFIG=/etc/default/mdadm
+
+test -x "$MDADM" || exit 0
+
+test -f /proc/mdstat || exit 0
+
+START_DAEMON=true
+test -f $DEBIANCONFIG && . $DEBIANCONFIG
+
+. /lib/lsb/init-functions
+
+is_true()
+{
+ case "${1:-}" in
+ [Yy]es|[Yy]|1|[Tt]|[Tt]rue) return 0;;
+ *) return 1;
+ esac
+}
+
+case "${1:-}" in
+ start)
+ if is_true $START_DAEMON; then
+ log_daemon_msg "Starting MD monitoring service" "mdadm --monitor"
+ mkdir -p $RUNDIR
+ set +e
+ start-stop-daemon -S -p $PIDFILE -x $MDADM -- \
+ --monitor --pid-file $PIDFILE --daemonise --scan ${DAEMON_OPTIONS:-}
+ log_end_msg $?
+ set -e
+ fi
+ if [ "$(echo $RUNDIR/md[0-9]*.pid)" != "$RUNDIR/md[0-9]*.pid" ]; then
+ log_daemon_msg "Restarting MD external metadata monitor" "mdmon --takeover --all"
+ set +e
+ $MDMON --takeover --all
+ log_end_msg $?
+ set -e
+ fi
+ ;;
+ stop)
+ if [ -f $PIDFILE ] ; then
+ log_daemon_msg "Stopping MD monitoring service" "mdadm --monitor"
+ set +e
+ start-stop-daemon -K -p $PIDFILE -x $MDADM
+ rm -f $PIDFILE
+ log_end_msg $?
+ set -e
+ fi
+ for file in $RUNDIR/md[0-9]*.pid ; do
+ [ ! -f "$file" ] && continue
+ ln -sf $file /run/sendsigs.omit.d/mdmon-${file##*/}
+ done
+ ;;
+ status)
+ status_of_proc -p $PIDFILE "$MDADM" "mdadm" && exit 0 || exit $?
+ ;;
+ restart|reload|force-reload)
+ ${0:-} stop
+ ${0:-} start
+ ;;
+ *)
+ echo "Usage: ${0:-} {start|stop|status|restart|reload|force-reload}" >&2
+ exit 1
+ ;;
+esac
+
+exit 0
diff --git a/debian/mdadm.logcheck.ignore.server b/debian/mdadm.logcheck.ignore.server
new file mode 100644
index 00000000..051c4732
--- /dev/null
+++ b/debian/mdadm.logcheck.ignore.server
@@ -0,0 +1,23 @@
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: md driver [.[:digit:]]+ MAX_MD_DEVS=[[:digit:]]+, MD_SB_DISKS=[[:digit:]]+$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: bitmap version [.[:digit:]]+$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: md[[:digit:]]+ stopped\.$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: md[[:digit:]]+ still in use\.$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: cannot remove active disk [[:alnum:]]+ from md[[:digit:]]+ \.\.\. ?$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: raid([01456]|456|10) personality registered for level ([01456]|10)$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: (data-check|requested-resync|resync|reshape|recovery) of RAID array md[[:digit:]]+$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: resuming (data-check|requested-resync|resync|reshape|recovery) of md[[:digit:]]+ from checkpoint\.$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: md[[:digit:]]+: (data-check|requested-resync|resync|reshape|recovery) done\.$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: minimum _guaranteed_ ?speed: [[:digit:]]+ KB/sec/disk\.$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: using maximum available idle IO bandwidth \(but not more than [[:digit:]]+ KB/sec\) for (data-check|requested-resync|resync|reshape|recovery)\.$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: delaying (data-check|requested-resync|resync|reshape|recovery) of md[[:digit:]]+ until md[[:digit:]]+ has finished \(they share one or more physical units\)$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: using [[:digit:]]+k window, over a total of [[:digit:]]+( blocks|k)\.$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: (un)?bind<[^>]+>$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: export_rdev\([^)]+\)$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? raid[[:digit:]]+: raid set [[:alnum:]]+ active with [[:digit:]]+ out of [[:digit:]]+ mirrors$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? RAID([01456]|10) conf printout:$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])?[[:space:]]+---( [wrf]d:[[:digit:]]+){2,3}$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])?[[:space:]]+disk [[:digit:]]+,( wo:[[:digit:]]+,)? o:[[:digit:]]+, dev:[[:alnum:]]+$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ mdadm(\[[[:digit:]]+\])?: Rebuild((Start|Finish)ed|[[:digit:]]+) event detected on md device /dev/[-_./[:alnum:]]+$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ mdadm(\[[[:digit:]]+\])?: SpareActive event detected on md device /dev/[-_./[:alnum:]]+, component device /dev/[-_./[:alnum:]]+$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ mdadm(\[[[:digit:]]+\])?: (New|Degraded)Array event detected on md device /dev/[-_./[:alnum:]]+$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ mdadm(\[[[:digit:]]+\])?: DeviceDisappeared event detected on md device /dev/[-_./[:alnum:]]+$
diff --git a/debian/mdadm.logcheck.violations b/debian/mdadm.logcheck.violations
new file mode 100644
index 00000000..ea8cce72
--- /dev/null
+++ b/debian/mdadm.logcheck.violations
@@ -0,0 +1,3 @@
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? md: kicking non-fresh [[:alnum:]]+ from array!$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])? raid[[:digit:]]+: Disk failure on [[:alnum:]]+, disabling device\.$
+^\w{3} [ :0-9]{11} [._[:alnum:]-]+ kernel:( \[ *[[:digit:]]+\.[[:digit:]]+\])?[[:space:]]+Operation continuing on [[:digit:]]+ devices?$
diff --git a/debian/mdadm.modules b/debian/mdadm.modules
new file mode 100644
index 00000000..5ad12499
--- /dev/null
+++ b/debian/mdadm.modules
@@ -0,0 +1,8 @@
+# mdadm module configuration file
+# set start_ro=1 to make newly assembled arrays read-only initially,
+# to prevent metadata writes. This is needed in order to allow
+# resume-from-disk to work - new boot should not perform writes
+# because it will be done behind the back of the system being
+# resumed. See http://bugs.debian.org/415441 for details.
+
+options md_mod start_ro=1
diff --git a/debian/mdadm.postinst b/debian/mdadm.postinst
new file mode 100644
index 00000000..a603376d
--- /dev/null
+++ b/debian/mdadm.postinst
@@ -0,0 +1,112 @@
+#!/bin/sh
+# Copyright © 2001-2005 Mario Jou/3en <joussen@debian.org>
+# Copyright © 2005-2008 Martin F. Krafft <madduck@debian.org>
+# Distributable under the terms of the GNU GPL version 2.
+#
+set -e
+
+. /usr/share/debconf/confmodule
+
+case "${1:-}" in
+ configure|reconfigure)
+
+ if [ ! -f /proc/mdstat ] && [ -x $(command -v modprobe 2>/dev/null) ]; then
+ modprobe md >/dev/null 2>&1 || :
+ fi
+ if [ ! -f /proc/mdstat ]; then
+ echo 'W: mdadm: failed to load MD subsystem.' >&2
+ fi
+
+ DEBIANCONFIG=/etc/default/mdadm
+ CONFIG=/etc/mdadm/mdadm.conf
+ ALTCONFIG=/etc/mdadm.conf
+ MDADM=/sbin/mdadm
+
+ # load current settings, most of which will be overwritten.
+ [ -f $DEBIANCONFIG ] && . $DEBIANCONFIG
+
+ db_get mdadm/mail_to
+ MAILADDR="${RET:-root}"
+
+ [ ! -f $CONFIG ] && [ -f $ALTCONFIG ] && CONFIG=$ALTCONFIG
+ if [ ! -f $CONFIG ]; then
+ echo -n 'Generating mdadm.conf... ' >&2
+ # pass the MAILADDR variable into the script
+ MDADM_MAILADDR__="$MAILADDR"; export MDADM_MAILADDR__
+ if /usr/share/mdadm/mkconf generate $CONFIG 2>/dev/null; then
+ echo done. >&2
+ else
+ echo "done (failed to scan arrays; /proc probably not mounted)." >&2
+ fi
+ fi
+
+ if [ -w $CONFIG ] && [ -z "${MDADM_MAILADDR__:-}" ]; then
+ # if the configuration is writeable but has not been written just
+ # before, then edit it to reflect the MAILADDR preference
+ if grep -q '^MAILADDR' $CONFIG; then
+ sed -i -e "s/^MAILADDR.*/MAILADDR $MAILADDR/" $CONFIG
+ else
+ echo "MAILADDR $MAILADDR" >> $CONFIG
+ fi
+ fi
+ unset MDADM_MAILADDR__
+
+ db_get mdadm/initrdstart
+ INITRDSTART="${RET:-all}"
+ db_get mdadm/autocheck
+ AUTOCHECK="${RET:-true}"
+ db_get mdadm/start_daemon
+ START_DAEMON="${RET:-true}"
+ #db_get mdadm/daemon_options
+ [ -n "${DAEMON_OPTIONS:-}" ] || DAEMON_OPTIONS='--syslog'
+
+ cat <<_eof > $DEBIANCONFIG
+# mdadm Debian configuration
+#
+# You can run 'dpkg-reconfigure mdadm' to modify the values in this file, if
+# you want. You can also change the values here and changes will be preserved.
+# Do note that only the values are preserved; the rest of the file is
+# rewritten.
+#
+
+# INITRDSTART:
+# list of arrays (or 'all') to start automatically when the initial ramdisk
+# loads. This list *must* include the array holding your root filesystem. Use
+# 'none' to prevent any array from being started from the initial ramdisk.
+INITRDSTART='$INITRDSTART'
+
+# AUTOCHECK:
+# should mdadm run periodic redundancy checks over your arrays? See
+# /etc/cron.d/mdadm.
+AUTOCHECK=$AUTOCHECK
+
+# START_DAEMON:
+# should mdadm start the MD monitoring daemon during boot?
+START_DAEMON=$START_DAEMON
+
+# DAEMON_OPTIONS:
+# additional options to pass to the daemon.
+DAEMON_OPTIONS="$DAEMON_OPTIONS"
+
+# VERBOSE:
+# if this variable is set to true, mdadm will be a little more verbose e.g.
+# when creating the initramfs.
+VERBOSE=${VERBOSE:-false}
+_eof
+
+ db_stop
+
+ command -v update-initramfs >/dev/null 2>&1 && update-initramfs -u
+
+ if dpkg --compare-versions "$2" le 3.3.2-3; then
+ rm -f /var/lib/mdadm/CONF-UNCHECKED /var/lib/mdadm/mdadm.conf-generated
+ if [ -d /var/lib/mdadm ]; then
+ rmdir --ignore-fail-on-non-empty /var/lib/mdadm
+ fi
+ fi
+ ;;
+esac
+
+[ -d /run/systemd/system ] && systemctl --system daemon-reload >/dev/null || :
+
+#DEBHELPER#
diff --git a/debian/mdadm.postrm b/debian/mdadm.postrm
new file mode 100644
index 00000000..30309626
--- /dev/null
+++ b/debian/mdadm.postrm
@@ -0,0 +1,25 @@
+#! /bin/sh
+# Copyright © 2001,2002 Mario Jou/3en <joussen@debian.org>
+# Copyright © 2006-2008 Martin F. Krafft <madduck@debian.org>
+# Distributable under the terms of the GNU GPL version 2.
+#
+set -e
+
+case "${1:-}" in
+ remove)
+ if command -v update-initramfs >/dev/null 2>&1; then
+ echo "W: mdadm: I'll update the initramfs, but if you need MD to boot" >&2
+ echo "W: mdadm: with initramfs, your system may be left unbootable!" >&2
+ update-initramfs -u
+ fi
+ ;;
+
+ purge)
+ rm -f /etc/default/mdadm /etc/mdadm.conf /etc/mdadm/mdadm.conf
+ ;;
+
+esac
+
+[ -d /run/systemd/system ] && systemctl --system daemon-reload >/dev/null || :
+
+#DEBHELPER#
diff --git a/debian/mdadm.preinst b/debian/mdadm.preinst
new file mode 100644
index 00000000..b90ffb87
--- /dev/null
+++ b/debian/mdadm.preinst
@@ -0,0 +1,45 @@
+#!/bin/sh
+# Copyright © martin f. krafft <madduck@debian.org>
+# Distributed under the terms of the Artistic Licence 2.0
+#
+set -e
+
+# based on idea from http://www.dpkg.org/dpkg/ConffileHandling
+rm_conffile() {
+ local conffile md5sum package old_md5sum
+
+ conffile="$1"
+ if [ -e "$conffile" ]; then
+ md5sum=$(md5sum "$conffile" | cut -d' ' -f1)
+ package=$(dpkg -S "$conffile" | cut -d: -f1)
+ old_md5sum=$(dpkg -s $package | sed -rne "s,[[:space:]]+${conffile}[[:space:]]+,,p")
+ if [ "$md5sum" != "$old_md5sum" ]; then
+ echo "Obsolete conffile $conffile has been modified by you."
+ echo "Saving as ${conffile}.dpkg-bak ..."
+ mv -f "$conffile" "$conffile".dpkg-bak
+ else
+ echo "Removing obsolete conffile $conffile ..."
+ rm -f "$conffile"
+ fi
+ fi
+}
+
+case "$1" in
+
+ upgrade|install)
+
+ # migrate old configuration from *way back then*
+ DEBIANCONFIG=/etc/default/mdadm
+ OLDCONFIG=/etc/mdadm/debian.conf
+ if [ -s $OLDCONFIG ] && [ ! -f $DEBIANCONFIG ]; then
+ mv $OLDCONFIG $DEBIANCONFIG
+ fi
+
+ # Used incorrect name s/_/-/, keep all throughout until after jessie is released.
+ rm_conffile /etc/udev/rules.d/65-mdadm.vol_id.rules
+ ;;
+
+ *) :;;
+esac
+
+#DEBHELPER#
diff --git a/debian/mdadm.templates b/debian/mdadm.templates
new file mode 100644
index 00000000..ddf63c76
--- /dev/null
+++ b/debian/mdadm.templates
@@ -0,0 +1,99 @@
+# These templates have been reviewed by the debian-l10n-english
+# team
+#
+# If modifications/additions/rewording are needed, please ask
+# debian-l10n-english@lists.debian.org for advice.
+#
+# Even minor modifications require translation updates and such
+# changes should be coordinated with translators and reviewers.
+
+Template: mdadm/initrdstart
+Type: string
+Default: all
+#flag:translate!:2
+_Description: MD arrays needed for the root file system:
+ ${msg}
+ .
+ Please enter 'all', 'none', or a space-separated list of devices such as
+ 'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted).
+
+Template: mdadm/initrdstart_msg_intro
+Type: text
+_Description: for internal use - only the long description is needed.
+ If the system's root file system is located on an MD array (RAID), it needs to be
+ started early during the boot sequence. If it is located on
+ a logical volume (LVM), which is on MD, all constituent arrays need to be
+ started.
+ .
+ If you know exactly which arrays are needed to bring up the root file system,
+ and you want to postpone starting all other arrays to a later point in the
+ boot sequence, enter the arrays to start here. Alternatively, enter 'all' to
+ simply start all available arrays.
+ .
+ If you do not need or want to start any arrays for the root file system, leave
+ the answer blank (or enter 'none'). This may be the case if you are using
+ kernel autostart or do not need any arrays to boot.
+
+Template: mdadm/initrdstart_msg_errexist
+Type: text
+_Description:
+ An error occurred: device node does not exist
+
+Template: mdadm/initrdstart_msg_errblock
+Type: text
+_Description:
+ An error occurred: not a block device
+
+Template: mdadm/initrdstart_msg_errmd
+Type: text
+_Description:
+ An error occurred: not an MD array
+
+Template: mdadm/initrdstart_msg_errconf
+Type: text
+_Description:
+ An error occurred: array not listed in mdadm.conf file
+
+Template: mdadm/initrdstart_notinconf
+Type: boolean
+Default: false
+_Description: Start arrays not listed in mdadm.conf?
+ The specified array (${array}) is not listed in the configuration
+ file (${config}). Therefore, it cannot be started during boot, unless you
+ correct the configuration file and recreate the initial ramdisk.
+ .
+ This warning is only relevant if you need arrays to be started from the
+ initial ramdisk to be able to boot. If you use kernel autostarting, or do not
+ need any arrays to be started as early as the initial ramdisk is loaded, you
+ can simply continue. Alternatively, choose not to continue and enter 'none'
+ when prompted which arrays to start from the initial ramdisk.
+
+Template: mdadm/autocheck
+Type: boolean
+Default: true
+_Description: Should mdadm run monthly redundancy checks of the MD arrays?
+ If the kernel supports it (versions greater than 2.6.14), mdadm can periodically check the
+ redundancy of MD arrays (RAIDs). This may be a resource-intensive process,
+ depending on the local setup, but it could help prevent rare cases of data loss.
+ Note that this is a read-only check unless errors are found; if errors are
+ found, mdadm will try to correct them, which may result in write access to
+ the media.
+ .
+ The default, if turned on, is to check on the first Sunday of every
+ month at 01:06.
+
+Template: mdadm/start_daemon
+Type: boolean
+Default: true
+_Description: Do you want to start the MD monitoring daemon?
+ The MD (RAID) monitor daemon sends email notifications in response to
+ important MD events (such as a disk failure).
+ .
+ Enabling this option is recommended.
+
+Template: mdadm/mail_to
+Type: string
+Default: root
+_Description: Recipient for email notifications:
+ Please enter the email address of the user who should get the email
+ notifications for important MD events.
diff --git a/debian/mkconf b/debian/mkconf
new file mode 100644
index 00000000..a580f5b3
--- /dev/null
+++ b/debian/mkconf
@@ -0,0 +1,101 @@
+#!/bin/sh
+#
+# mkconf -- outputs valid mdadm.conf contents for the local system
+#
+# Copyright © martin f. krafft <madduck@madduck.net>
+# distributed under the terms of the Artistic Licence 2.0
+#
+set -eu
+
+ME="${0##*/}"
+MDADM=/sbin/mdadm
+DEBIANCONFIG=/etc/default/mdadm
+CONFIG=/etc/mdadm/mdadm.conf
+
+# initialise config variables in case the environment leaks
+MAILADDR= DEVICE= CREATE= HOMEHOST= PROGRAM=
+
+test -r $DEBIANCONFIG && . $DEBIANCONFIG
+
+if [ -n "${MDADM_MAILADDR__:-}" ]; then
+ # honour MAILADDR from the environment (from postinst)
+ MAILADDR="$MDADM_MAILADDR__"
+else
+ # preserve existing MAILADDR
+ MAILADDR="$(sed -ne 's/^MAILADDR //p' $CONFIG 2>/dev/null)" || :
+fi
+
+# save existing values as defaults
+if [ -r "$CONFIG" ]; then
+ DEVICE="$(sed -ne 's/^DEVICE //p' $CONFIG)"
+ CREATE="$(sed -ne 's/^CREATE //p' $CONFIG)"
+ HOMEHOST="$(sed -ne 's/^HOMEHOST //p' $CONFIG)"
+ PROGRAM="$(sed -ne 's/^PROGRAM //p' $CONFIG)"
+fi
+
+[ "${1:-}" = force-generate ] && rm -f $CONFIG
+case "${1:-}" in
+ generate|force-generate)
+ [ -n "${2:-}" ] && CONFIG=$2
+ # only barf if the config file specifies anything else than MAILADDR
+ if egrep -qv '^(MAILADDR.*|#.*|)$' $CONFIG 2>/dev/null; then
+ echo "E: $ME: $CONFIG already exists." >&2
+ exit 255
+ fi
+
+ mkdir --parent ${CONFIG%/*}
+ exec >$CONFIG
+ ;;
+esac
+
+cat <<_eof
+# mdadm.conf
+#
+# Please refer to mdadm.conf(5) for information about this file.
+#
+
+# by default (built-in), scan all partitions (/proc/partitions) and all
+# containers for MD superblocks. alternatively, specify devices to scan, using
+# wildcards if desired.
+#DEVICE ${DEVICE:-partitions containers}
+
+# auto-create devices with Debian standard permissions
+CREATE ${CREATE:-owner=root group=disk mode=0660 auto=yes}
+
+# automatically tag new arrays as belonging to the local system
+HOMEHOST ${HOMEHOST:-<system>}
+
+# instruct the monitoring daemon where to send mail alerts
+MAILADDR ${MAILADDR:-root}
+
+_eof
+
+if [ -n "${PROGRAM:-}" ]; then
+ cat <<-_eof
+ # program to run when mdadm monitor detects potentially interesting events
+ PROGRAM ${PROGRAM}
+
+ _eof
+fi
+
+error=0
+if [ ! -r /proc/mdstat ]; then
+ echo W: $ME: MD subsystem is not loaded, thus I cannot scan for arrays. >&2
+ error=1
+elif [ ! -r /proc/partitions ]; then
+ echo W: $ME: /proc/partitions cannot be read, thus I cannot scan for arrays. >&2
+ error=2
+else
+ echo "# definitions of existing MD arrays"
+ if ! $MDADM --examine --scan --config=partitions; then
+ error=$(($? + 128))
+ echo W: $ME: failed to scan for partitions. >&2
+ echo "### WARNING: scan failed."
+ else
+ echo
+ fi
+fi
+
+echo "# This configuration was auto-generated on $(date -R) by mkconf"
+
+exit $error
diff --git a/debian/patches/build-sys-no-check_rundir.patch b/debian/patches/build-sys-no-check_rundir.patch
new file mode 100644
index 00000000..61f3a6f0
--- /dev/null
+++ b/debian/patches/build-sys-no-check_rundir.patch
@@ -0,0 +1,33 @@
+Subject: build-sys: do not depend on check_rundir for executables
+From: Michael Tokarev <mjt@tls.msk.ru>
+To: linux-raid@vger.kernel.org
+
+The problem is that the link lines will always be executed
+even if the executables are up to date.
+
+If anything, this check_rundir should be a dependency of
+install target, or some other "phony" target like this,
+since check_rundir is phony by itself.
+
+Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
+
+--- a/Makefile
++++ b/Makefile
+@@ -172,7 +172,7 @@ everything-test: all mdadm.static swap_super test_stripe \
+ # mdadm.uclibc and mdassemble.uclibc don't work on x86-64
+ # mdadm.tcc doesn't work..
+
+-mdadm : check_rundir $(OBJS)
++mdadm : $(OBJS)
+ $(CC) $(CFLAGS) $(LDFLAGS) -o mdadm $(OBJS) $(LDLIBS)
+
+ mdadm.static : $(OBJS) $(STATICOBJS)
+@@ -195,7 +195,7 @@ mdmon.O2 : $(MON_SRCS) $(INCL) mdmon.h
+ $(CC) -o mdmon.O2 $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -DHAVE_STDINT_H -O2 -D_FORTIFY_SOURCE=2 $(MON_SRCS)
+
+ # use '-z now' to guarantee no dynamic linker interactions with the monitor thread
+-mdmon : check_rundir $(MON_OBJS)
++mdmon : $(MON_OBJS)
+ $(CC) $(CFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -Wl,-z,now -o mdmon $(MON_OBJS) $(LDLIBS)
+ msg.o: msg.c msg.h
+
diff --git a/debian/patches/debian-conffile-location.diff b/debian/patches/debian-conffile-location.diff
new file mode 100644
index 00000000..8acc6077
--- /dev/null
+++ b/debian/patches/debian-conffile-location.diff
@@ -0,0 +1,115 @@
+From: martin f. krafft <madduck@debian.org>
+Subject: Set /etc/mdadm/mdadm.conf as primary config file location
+
+On Debian, the configuration file resides primarily in /etc/mdadm/mdadm.conf,
+/etc/mdadm.conf is only used as a backup.
+
+This is a Debian-specific patch.
+
+Forwarded: not-needed
+Reviewed-by: martin f. krafft <madduck@debian.org>
+
+---
+ Makefile | 4 ++--
+ ReadMe.c | 2 +-
+ mdadm.8.in | 14 ++++++--------
+ mdadm.conf.5 | 2 +-
+ mdassemble.8 | 2 +-
+ 5 files changed, 11 insertions(+), 13 deletions(-)
+
+--- a/Makefile
++++ b/Makefile
+@@ -61,8 +61,8 @@
+ PKG_CONFIG ?= pkg-config
+
+ SYSCONFDIR = /etc
+-CONFFILE = $(SYSCONFDIR)/mdadm.conf
+-CONFFILE2 = $(SYSCONFDIR)/mdadm/mdadm.conf
++CONFFILE = $(SYSCONFDIR)/mdadm/mdadm.conf
++CONFFILE2 = $(SYSCONFDIR)/mdadm.conf
+ MAILCMD =/usr/sbin/sendmail -t
+ CONFFILEFLAGS = -DCONFFILE=\"$(CONFFILE)\" -DCONFFILE2=\"$(CONFFILE2)\"
+ # Both MAP_DIR and MDMON_DIR should be somewhere that persists across the
+--- a/ReadMe.c
++++ b/ReadMe.c
+@@ -590,7 +590,7 @@
+ ;
+
+ char Help_config[] =
+-"The /etc/mdadm.conf config file:\n\n"
++"The /etc/mdadm/mdadm.conf config file:\n\n"
+ " The config file contains, apart from blank lines and comment lines that\n"
+ " start with a hash(#), array lines, device lines, and various\n"
+ " configuration lines.\n"
+--- a/mdadm.8.in
++++ b/mdadm.8.in
+@@ -264,13 +264,13 @@
+ .TP
+ .BR \-c ", " \-\-config=
+ Specify the config file or directory. Default is to use
+-.B /etc/mdadm.conf
++.B /etc/mdadm/mdadm.conf
+ and
+-.BR /etc/mdadm.conf.d ,
++.BR /etc/mdadm/mdadm.conf.d ,
+ or if those are missing then
+-.B /etc/mdadm/mdadm.conf
++.B /etc/mdadm.conf
+ and
+-.BR /etc/mdadm/mdadm.conf.d .
++.BR /etc/mdadm.conf.d .
+ If the config file given is
+ .B "partitions"
+ then nothing will be read, but
+@@ -1742,9 +1742,9 @@
+ or requested with (a possibly implicit)
+ .BR \-\-scan .
+ In the later case,
+-.B /etc/mdadm.conf
+-or
+ .B /etc/mdadm/mdadm.conf
++or
++.B /etc/mdadm.conf
+ is used.
+
+ If
+@@ -3003,7 +3003,7 @@
+ is given in Misc mode, and to monitor array reconstruction
+ on Monitor mode.
+
+-.SS /etc/mdadm.conf
++.SS /etc/mdadm/mdadm.conf (or /etc/mdadm.conf)
+
+ The config file lists which devices may be scanned to see if
+ they contain MD super block, and gives identifying information
+@@ -3011,7 +3011,7 @@
+ .BR mdadm.conf (5)
+ for more details.
+
+-.SS /etc/mdadm.conf.d
++.SS /etc/mdadm/mdadm.conf.d (or /etc/mdadm.conf.d)
+
+ A directory containing configuration files which are read in lexical
+ order.
+--- a/mdadm.conf.5
++++ b/mdadm.conf.5
+@@ -8,7 +8,7 @@
+ .SH NAME
+ mdadm.conf \- configuration for management of Software RAID with mdadm
+ .SH SYNOPSIS
+-/etc/mdadm.conf
++/etc/mdadm/mdadm.conf
+ .SH DESCRIPTION
+ .PP
+ .I mdadm
+--- a/mdassemble.8
++++ b/mdassemble.8
+@@ -40,7 +40,7 @@
+
+ .SH FILES
+
+-.SS /etc/mdadm.conf
++.SS /etc/mdadm/mdadm.conf
+
+ The config file lists which devices may be scanned to see if
+ they contain MD super block, and gives identifying information
diff --git a/debian/patches/debian-no-Werror.diff b/debian/patches/debian-no-Werror.diff
new file mode 100644
index 00000000..0a427f16
--- /dev/null
+++ b/debian/patches/debian-no-Werror.diff
@@ -0,0 +1,24 @@
+From: martin f. krafft <madduck@debian.org>
+Subject: Remove -Werror from compiler flags
+
+-Werror seems like a bad idea on released/packaged code because a toolchain
+update (introducing new warnings) could break the build. We'll let upstream
+use it to beautify the code, but remove it for out builds.
+
+Signed-off-by: martin f. krafft <madduck@debian.org>
+
+---
+ Makefile | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+--- a/Makefile
++++ b/Makefile
+@@ -43,7 +43,7 @@
+
+ CC = $(CROSS_COMPILE)gcc
+ CXFLAGS ?= -ggdb
+-CWFLAGS = -Wall -Werror -Wstrict-prototypes -Wextra -Wno-unused-parameter
++CWFLAGS = -Wall -Wstrict-prototypes -Wextra -Wno-unused-parameter
+ ifdef WARN_UNUSED
+ CWFLAGS += -Wp,-D_FORTIFY_SOURCE=2 -O3
+ endif
diff --git a/debian/patches/mdmonitor-service-simplify.diff b/debian/patches/mdmonitor-service-simplify.diff
new file mode 100644
index 00000000..ea706a68
--- /dev/null
+++ b/debian/patches/mdmonitor-service-simplify.diff
@@ -0,0 +1,20 @@
+Subject: simplify mdmonitor.service
+From: Michael Tokarev <mjt@tls.msk.ru>
+Date: Fri, 14 Nov 2014 19:18:05 +0300
+Bug-Debian: http://bugs.debian.org/764647
+Forwarded: no
+
+There isn't much for customization for mdadm --monitor.
+it'll just do what it's supposed to do, so just run it.
+
+--- a/systemd/mdmonitor.service
++++ b/systemd/mdmonitor.service
+@@ -10,7 +10,4 @@ Description=MD array monitor
+ DefaultDependencies=no
+
+ [Service]
+-Environment= MDADM_MONITOR_ARGS=--scan
+-EnvironmentFile=-/run/sysconfig/mdadm
+-ExecStartPre=-/usr/lib/systemd/scripts/mdadm_env.sh
+-ExecStart=BINDIR/mdadm --monitor $MDADM_MONITOR_ARGS
++ExecStart=BINDIR/mdadm --monitor --scan
diff --git a/debian/patches/readlink-path.patch b/debian/patches/readlink-path.patch
new file mode 100644
index 00000000..86544592
--- /dev/null
+++ b/debian/patches/readlink-path.patch
@@ -0,0 +1,15 @@
+From: Michael Tokarev <mjt@tls.msk.ru>
+Subject: readlink is in /bin not /usr/bin on debian
+Date: Fri, 14 Nov 2014 19:11:51 +0300
+Bug-Debian: http://bugs.debian.org/766416
+Forwarded: no
+
+This is a debian-specific change, upstream ships
+the rule to use /usr/bin/readlink while on debian
+it is /bin/readlink
+
+--- a/udev-md-raid-arrays.rules
++++ b/udev-md-raid-arrays.rules
+@@ -38 +38 @@ ENV{MD_LEVEL}=="raid[1-9]*", ENV{SYSTEMD_WANTS}+="mdmonitor.service"
+-ENV{MD_LEVEL}=="raid[1-9]*", ENV{MD_CONTAINER}=="?*", PROGRAM="/usr/bin/readlink $env{MD_CONTAINER}", ENV{MD_MON_THIS}="%c"
++ENV{MD_LEVEL}=="raid[1-9]*", ENV{MD_CONTAINER}=="?*", PROGRAM="/bin/readlink $env{MD_CONTAINER}", ENV{MD_MON_THIS}="%c"
diff --git a/debian/patches/rebuildmap-strip-local-host-name-from-device-name.patch b/debian/patches/rebuildmap-strip-local-host-name-from-device-name.patch
new file mode 100644
index 00000000..f7c9b1b7
--- /dev/null
+++ b/debian/patches/rebuildmap-strip-local-host-name-from-device-name.patch
@@ -0,0 +1,47 @@
+From 628cdf19ea35daad22e409e51c0abc7ffb19d6aa Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Mon, 3 Nov 2014 12:49:05 +1100
+Subject: Rebuildmap: strip local host name from device name.
+
+When /run/mdadm/map is being rebuilt, e.g. by "mdadm -Ir",
+if the device doesn't exist in /dev, we have to choose
+a name.
+Currently we don't strip the hostname which is wrong if
+it is the local host.
+
+Reported-by: Stephen Kent <smkent@smkent.net>
+Signed-off-by: NeilBrown <neilb@suse.de>
+---
+ mapfile.c | 13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/mapfile.c b/mapfile.c
+index 4e7f242..41599df 100644
+--- a/mapfile.c
++++ b/mapfile.c
+@@ -455,12 +455,19 @@ void RebuildMap(void)
+ sep = "";
+ }
+ }
+- if (strchr(name, ':'))
+- /* probably a uniquifying
++ if (strchr(name, ':')) {
++ /* Probably a uniquifying
+ * hostname prefix. Allow
+- * without a suffix
++ * without a suffix, and strip
++ * hostname if it is us.
+ */
++ if (homehost && unum == -1 &&
++ strncmp(name, homehost,
++ strlen(homehost)) == 0 &&
++ name[strlen(homehost)] == ':')
++ name += strlen(homehost)+1;
+ unum = -1;
++ }
+
+ while (conflict) {
+ if (unum >= 0)
+--
+1.7.10.4
+
diff --git a/debian/patches/series b/debian/patches/series
new file mode 100644
index 00000000..c13a5c88
--- /dev/null
+++ b/debian/patches/series
@@ -0,0 +1,9 @@
+debian-conffile-location.diff
+debian-no-Werror.diff
+sha1-includes.diff
+use-external-blkid.diff
+use-tempnode-not-devnode.patch
+build-sys-no-check_rundir.patch
+rebuildmap-strip-local-host-name-from-device-name.patch
+readlink-path.patch
+mdmonitor-service-simplify.diff
diff --git a/debian/patches/sha1-includes.diff b/debian/patches/sha1-includes.diff
new file mode 100644
index 00000000..0dfd7daf
--- /dev/null
+++ b/debian/patches/sha1-includes.diff
@@ -0,0 +1,40 @@
+From: Michael Tokarev <mjt@tls.msk.ru>
+Subject: do not #include ansidecl.h from sha1.h, use system headers
+
+In 3.2.5 version of mdadm, new sha1 implementation has been included
+which tries to include ansidecl.h header which is internal to some
+other project. But this #include isn't really necessary, since this
+implementation does not actually use any defines from ansidecl.h. So
+just remove the #include, instead of adding a new external dependency.
+
+References: http://www.spinics.net/lists/raid/msg38859.html
+
+While at it, unconditionally include system headers like limits.h and
+stdint.h, since on a Linux system these headers are available, and
+these contains definitive information about real system types than
+any guesses.
+
+--- a/sha1.h
++++ b/sha1.h
+@@ -22,7 +22,7 @@
+
+ #include <stdio.h>
+
+-#if defined HAVE_LIMITS_H || _LIBC
++#if 1 /* defined HAVE_LIMITS_H || _LIBC */
+ # include <limits.h>
+ #endif
+
+@@ -33,9 +33,9 @@
+ the resulting executable. Locally running cross-compiled executables
+ is usually not possible. */
+
+-#ifdef _LIBC
+-# include <sys/types.h>
+-typedef u_int32_t sha1_uint32;
++#if 1 /* def _LIBC */
++# include <stdint.h>
++typedef uint32_t sha1_uint32;
+ typedef uintptr_t sha1_uintptr;
+ #else
+ # define INT_MAX_32_BITS 2147483647
diff --git a/debian/patches/use-external-blkid.diff b/debian/patches/use-external-blkid.diff
new file mode 100644
index 00000000..637f7c3a
--- /dev/null
+++ b/debian/patches/use-external-blkid.diff
@@ -0,0 +1,16 @@
+From: Michael Tokarev <mjt@tls.msk.ru>
+Subject: blkid is not udev builtin, use /sbin/blkid
+
+--- a/udev-md-raid-arrays.rules
++++ b/udev-md-raid-arrays.rules
+@@ -26,9 +26,7 @@ ENV{DEVTYPE}=="partition", ENV{MD_UUID}=
+ ENV{DEVTYPE}=="partition", ENV{MD_DEVNAME}=="*[^0-9]", SYMLINK+="md/$env{MD_DEVNAME}%n"
+ ENV{DEVTYPE}=="partition", ENV{MD_DEVNAME}=="*[0-9]", SYMLINK+="md/$env{MD_DEVNAME}p%n"
+
+-IMPORT{builtin}="blkid"
+-OPTIONS+="link_priority=100"
+-OPTIONS+="watch"
++IMPORT{program}="/sbin/blkid -o udev -p -u noraid $tempnode"
+ ENV{ID_FS_USAGE}=="filesystem|other|crypto", ENV{ID_FS_UUID_ENC}=="?*", SYMLINK+="disk/by-uuid/$env{ID_FS_UUID_ENC}"
+ ENV{ID_FS_USAGE}=="filesystem|other", ENV{ID_FS_LABEL_ENC}=="?*", SYMLINK+="disk/by-label/$env{ID_FS_LABEL_ENC}"
+
diff --git a/debian/patches/use-tempnode-not-devnode.patch b/debian/patches/use-tempnode-not-devnode.patch
new file mode 100644
index 00000000..38a55044
--- /dev/null
+++ b/debian/patches/use-tempnode-not-devnode.patch
@@ -0,0 +1,31 @@
+From: Michael Tokarev <mjt@tls.msk.ru>
+Subject: use tempnode not devnode in udev rules
+Bug-Debian: http://bugs.debian.org/770883
+Forwarded: no
+
+udev in wheezy does not understand $devnode construct
+in rules file, while upstream uses it in mdadm rules
+files. udev in jessie has $devnode and it also supports
+old $tempnode which is the way it worked in wheezy and
+before, even if $tempnode in jessie's udev is not documented.
+So on jessie, both $tempnode and $devnode works fine, while
+in wheezy, only $tempnode works.
+
+Use $tempnode instead of $devnode. Since mdadm is important
+enough for system functionality and easily can break system
+by making it unbootable, and this is the only incompatibility
+between wheezy's and jessie's udev wrt mdadm, it is better than
+having a versioned dependency on udev.
+
+This patch is debian-specific and should be dropped for jessie+1.
+
+--- a/udev-md-raid-arrays.rules
++++ b/udev-md-raid-arrays.rules
+@@ -20 +20 @@
+-IMPORT{program}="BINDIR/mdadm --detail --export $devnode"
++IMPORT{program}="BINDIR/mdadm --detail --export $tempnode"
+--- a/udev-md-raid-assembly.rules
++++ b/udev-md-raid-assembly.rules
+@@ -30 +30 @@
+-ACTION=="add|change", IMPORT{program}="BINDIR/mdadm --incremental --export $devnode --offroot ${DEVLINKS}"
++ACTION=="add|change", IMPORT{program}="BINDIR/mdadm --incremental --export $tempnode --offroot ${DEVLINKS}"
diff --git a/debian/po/POTFILES.in b/debian/po/POTFILES.in
new file mode 100644
index 00000000..04922385
--- /dev/null
+++ b/debian/po/POTFILES.in
@@ -0,0 +1 @@
+[type: gettext/rfc822deb] mdadm.templates
diff --git a/debian/po/ca.po b/debian/po/ca.po
new file mode 100644
index 00000000..0710fd90
--- /dev/null
+++ b/debian/po/ca.po
@@ -0,0 +1,220 @@
+# mdadm Catalan translation.
+# Copyright (C) 2004-2006 Software in the Public Interest
+# This file is distributed under the same license as the squid package.
+# Innocent De Marchi <tangram.peces@gmail.com>, 2011.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: 3.1.4-1+8efb9d1\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2011-05-17 16:54+0100\n"
+"Last-Translator: Innocent De Marchi <tangram.peces@gmail.com>\n"
+"Language-Team: Catalan <debian-l10n-catalan@lists.debian.org>\n"
+"Language: ca\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Poedit-Language: Catalan\n"
+"X-Poedit-Country: SPAIN\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "Les matrius MD necessaris per al sistema de fitxers arrel:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Escriviu «all» (tots), «none» (cap) o una llista separada per espais dels "
+"dispositius com «md0 md1» o «md/1 md/d0» (podeu ometre el «/dev/» inicial)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "per a ús intern, només és necessària la descripció llarga. "
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Si el sistema de fitxers arrel del sistema està en un conjunt MD (RAID), cal "
+"que s'iniciï al principi de la seqüència d'arrencada. Si està en un volum "
+"lògic (LVM), que està definit sobre un MD, cal iniciar totes les matrius que "
+"el constitueixen."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Si sabeu exactament quines matrius són necessàries per arrencar el sistema "
+"de fitxers arrel, i vol ajornar l'arrencada de la resta de conjunts a un "
+"punt posterior de la seqüència d'arrencada, Introduïu aquí els conjunts que "
+"voleu arrencar. També podeu seleccionar «all» per, simplement, arrencar tots "
+"els disponibles."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Si no necessita o vol arrencar qualsevol matriu per al sistema de fitxers "
+"arrel, deixau en blanc la resposta (o escriviu «none»). Pot ésser el seu cas "
+"si fa servir l'auto-arrencada del nucli o no necessiteu cap matriu en "
+"l'arrencada."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "S'ha produït un error: el node de dispositiu no existeix."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "S'ha produït un error: no és un dispositiu de blocs."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "S'ha produït un error: no és un conjunt («array») MD."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr ""
+"S'ha produït un error: la matriu («array») no apareix llistada en el fitxer "
+"de configuració «mdadm.conf»."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Voleu arrencar les matrius no llistats en el fitxer «mdadm.conf»?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"La matriu («array») especificada (${array}) no apareix llistada en el "
+"fitxer de configuració (${config}). Per tant, no es pot iniciar la matriu "
+"durant l'arrencada del sistema, llevat que corregeixi el fitxer de "
+"configuració i regenereu el disc RAM inicial."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Aquest avís només és important si necessiteu que les matrius s'arrenquin en "
+"el disc RAM inicial per poder arrencar el sistema. Si feu servir l'arrencada "
+"automàtica del nucli o no necessiteu que les matrius estiguin arrencats "
+"quan es carregui el disc RAM, podeu continuar. També podeu decidir no "
+"continuar i introduir «none» quan se li demani quines matrius cal iniciar "
+"del disc RAM inicial."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr ""
+"Voleu que «mdadm» executi comprovacions de redundància mensuals de les "
+"matrius MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Si el nucli ho accepta (versions superiors a la 2.6.14), «mdadm» pot fer "
+"comprovacions periòdiques de la redundància de les matrius MD (RAIDs). Pot "
+"ésser que aquest procés consumeixi molts recursos del sistema, depenent de "
+"la configuració, però pot ajudar a prevenir casos poc freqüents de pèrdua de "
+"dades. Teniu present que aquestes comprovacions es fan en mode lectura "
+"llevat que es detectin errors: si hi ha errors, «mdadm» els corregirà i per "
+"això, caldrà que tengui accés d'escriptura als mitjans físics. "
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"El valor predeterminat, si s'activa, es fer la comprovació el primer "
+"diumenge de cada mes a les 01:06 am."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Desitjau arrencar el dimoni monitor MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"El dimoni monitor de MD (RAID) envia notificacions per correu electrònic "
+"quan es produeixen esdeveniments importants en els dispositius MD (com un "
+"error de disc)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Es recomana l'activació d'aquesta opció."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Destinatari de les notificacions de correu electrònic:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Introduïu l'adreça de correu electrònic de l'usuari que ha de rebre les "
+"notificacions de correu electrònic per a esdeveniments MD rellevants."
diff --git a/debian/po/cs.po b/debian/po/cs.po
new file mode 100644
index 00000000..40e8668c
--- /dev/null
+++ b/debian/po/cs.po
@@ -0,0 +1,265 @@
+#
+# Translators, if you are not familiar with the PO format, gettext
+# documentation is worth reading, especially sections dedicated to
+# this format, e.g. by running:
+# info -n '(gettext)PO Files'
+# info -n '(gettext)Header Entry'
+#
+# Some information specific to po-debconf are available at
+# /usr/share/doc/po-debconf/README-trans
+# or http://www.debian.org/intl/l10n/po-debconf/README-trans
+#
+# Developers do not need to manually edit POT or PO files.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2008-02-17 21:10+0100\n"
+"Last-Translator: Miroslav Kure <kurem@debian.cz>\n"
+"Language-Team: Czech <debian-l10n-czech@lists.debian.org>\n"
+"Language: cs\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "MD pole vyžadovaná pro kořenový souborový systém:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Zadejte prosím mezerami oddělený seznam zařízení, případně „all“ nebo "
+"„none“. PoÄáteÄní „/dev/“ můžete vynechat a zadat jen napÅ™. „md0 md1“ nebo "
+"„md/1 md/d0“."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "pro vnitřní použití - pouze kvůli zobrazení dlouhého popisu."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Pokud je kořenový souborový systém umístěn na MD (RAID) svazku, musí být "
+"tento spuštěn během zavádění systému co nejdříve. Pokud se kořenový "
+"souborový systém nachází na logickém svazku LVM, který je vytvořen nad MD "
+"polem, musí se spustit všechna související pole."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Jestliže přesně víte, která pole jsou potřeba pro připojení kořenového "
+"souborového systému a zároveň chcete pozdržet spuštění ostatních polí na "
+"pozdější dobu, zadejte zde prosím pole, která se mají spustit. Chcete-li "
+"spustit všechna dostupná pole, můžete zadat „all“."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Nepotřebujete-li nebo nechcete-li spouštět pole pro kořenový souborový "
+"systém, ponechte odpovÄ›Ä prázdnou, případnÄ› zadejte „none“. To může nastat "
+"třeba v případě, že používáte automatický start přímo v jádře, nebo pokud k "
+"zavedení systému žádná pole nepotřebujete."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Vyskytla se chyba: uzel zařízení neexistuje"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Vyskytla se chyba: není blokovým zařízením"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Vyskytla se chyba: není MD polem"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Vyskytla se chyba: pole není uvedeno v souboru mdadm.conf"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Spustit pole neuvedená v mdadm.conf?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"Zadané pole (${array}) není uvedeno v konfiguraÄním souboru ${config} a tím "
+"pádem nemůže být spuštěno při zavádění systému. Napravit to můžete opravou "
+"konfiguraÄního souboru a znovuvytvoÅ™ením poÄáteÄního ramdisku."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Toto varování je relevantní pouze pokud k zavedení systému potřebujete, aby "
+"se pole spustila z poÄáteÄního ramdisku. Používáte-li automatické spouÅ¡tÄ›ní "
+"přímo v jádře, nebo pokud nepotřebujete pouštět žádná pole ještě z "
+"poÄáteÄního ramdisku, můžete jednoduÅ¡e pokraÄovat. Jinou možností je "
+"nepokraÄovat dále a pÅ™i dotazu na seznam polí, která se mají spouÅ¡tÄ›t z "
+"poÄáteÄního ramdisku, zadat 'none'."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr "Má mdadm spouÅ¡tÄ›t mÄ›síÄní kontroly redundance MD polí?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Pokud to vaše jádro podporuje (verze větší než 2.6.14), může mdadm "
+"pravidelně kontrolovat redundanci MD polí (RAIDů). Podle konfigurace "
+"poÄítaÄe to může být proces velmi nároÄný na prostÅ™edky, ovÅ¡em může pÅ™edejít "
+"vzácným případům ztráty dat. Pokud nejsou nalezeny chyby, používá tato "
+"kontrola v zásadÄ› jen Ätecí operace. PÅ™i nalezení chyb se je mdadm pokusí "
+"opravit, což může znamenat zápis na médium."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"Pokud kontrolu povolíte, bude se dle výchozího nastavení spouštět každou "
+"první neděli v měsíci v 01:06 ráno."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Chcete spustit daemon pro monitorování MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"Daemon pro monitorování MD (RAIDu) zasílá emailová upozornění na významné MD "
+"události, jako je selhání disku."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Povolení této možnosti je doporuÄeno."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Příjemce emailových upozornění:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Zadejte prosím emailovou adresu uživatele, který má dostávat emailová "
+"upozornění při výskytu významných MD událostí."
+
+#~ msgid "Initialise the superblock if you reuse hard disks"
+#~ msgstr "Při znovupoužití starších disků inicializujte superblok"
+
+#~ msgid ""
+#~ "WARNING! If you are using hard disks which have RAID superblocks from "
+#~ "earlier installations in different RAID arrays, you MUST zero each "
+#~ "superblock *before* activating the autostart feature."
+#~ msgstr ""
+#~ "VAROVÃNÃ! Používáte-li pevné disky, které obsahují RAID superbloky z "
+#~ "dřívÄ›jší instalace v jiném RAID poli, MUSÃTE vÅ¡echny superbloky pÅ™ed "
+#~ "použitím automatického spouštění vynulovat."
+
+#~ msgid ""
+#~ "To do this, do not start the RAID devices automatically. First, zero the "
+#~ "superblock (mdadm --zero-superblock /dev/mdX). Next, use `dpkg-"
+#~ "reconfigure mdadm` to reactivate the autostart feature."
+#~ msgstr ""
+#~ "Chcete-li to provést, nespouštějte RAID zařízení automaticky. Nejprve "
+#~ "vynulujte superblok příkazem 'mdadm --zero-superblock /dev/mdX' a teprve "
+#~ "poté můžete povolit automatické spouštění RAIDu příkazem 'dpkg-"
+#~ "reconfigure mdadm'."
+
+#~ msgid ""
+#~ "You have the option to start all other arrays (those not needed for the "
+#~ "root filesystem) later in the boot sequence. Doing so will give you "
+#~ "greater control over the arrays with the mdadm configuration file. "
+#~ "Starting all arrays at boot-time may be safer though."
+#~ msgstr ""
+#~ "Všechna ostatní pole (ta, která nejsou potřeba pro kořenový souborový "
+#~ "systém) můžete spustit pozdÄ›ji. Pokud tak uÄiníte, budete mít v "
+#~ "konfiguraÄním souboru mdadm nad poli vÄ›tší kontrolu. Na druhou stranu je "
+#~ "spouÅ¡tÄ›ní vÅ¡ech polí hned na zaÄátku zavádÄ›ní o nÄ›co jistÄ›jší volbou."
+
+#~ msgid ""
+#~ "If RAID devices are started automatically, all RAID devices are "
+#~ "discovered and assembled automatically at system startup. This option "
+#~ "should only be used if the md driver is compiled as a module. If it is "
+#~ "compiled into your kernel, the automatic startup will be performed at "
+#~ "boot time by the kernel and therefore you should not choose this option."
+#~ msgstr ""
+#~ "Jestliže jsou RAID zařízení spouštěna automaticky, jsou všechna RAID "
+#~ "zařízení rozpoznána a poskládána automaticky při zavádění systému. Tuto "
+#~ "volbu byste mÄ›li použít pouze v případÄ›, že jste ovladaÄ md zakompilovali "
+#~ "jako modul. Pokud jste jej zakompilovali přímo do jádra, o automatické "
+#~ "spuštění se postará samotné jádro a tedy tuto možnost nepotřebujete."
diff --git a/debian/po/da.po b/debian/po/da.po
new file mode 100644
index 00000000..47294504
--- /dev/null
+++ b/debian/po/da.po
@@ -0,0 +1,210 @@
+# Danish translation mdadm.
+# Copyright (C) 2011 mdadm & nedenstående oversættere.
+# This file is distributed under the same license as the mdadm package.
+# Joe Hansen <joedalton2@yahoo.dk>, 2011.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2011-04-03 17:30+01:00\n"
+"Last-Translator: Joe Hansen <joedalton2@yahoo.dk>\n"
+"Language-Team: Danish <debian-l10n-danish@lists.debian.org> \n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "MD arrays krævet for rodfilsystemet:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Indtast venligst »all«, »none« eller en mellemrumsadskilt liste af enheder "
+"såsom »md0 md1« eller »md/1 md/d0« (det foranstillede »/dev/« kan udelades)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "for intern brug - kun den lange beskrivelse er krævet."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Hvis systemets rodfilsystem er placeret på en MD-array (RAID), skal det "
+"startes tidligt under opstartssekvensen. Hvis den er placeret på en logisk "
+"diskenhed (LVM), som er på MD, skal alle indgående arrays startes."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Hvis du ved præcis hvilke arrays som er krævet, for at få rodfilsystemet op, "
+"og du ønsker at udsætte start af alle andre arrays til et senere tidspunkt i "
+"opstartssekvensen, så indtast her de arrays som skal startes. Alternativt "
+"kan du indtaste »all« for at starte alle tilgængelige arrays."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Hvis du ikke har brug for eller ønsker at starte nogen arrays for "
+"rodfilsystemet, så efterlad svaret tomt (eller indtast »none«). Dette kan "
+"være tilfældet, hvis du bruger automatisk start af kernen eller ikke skal "
+"bruge arrays til at starte op med."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Der opstod en fejl: Enhedsknude findes ikke"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Der opstod en fejl: Ikke en blokenhed"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Der opstod en fejl: Ikke en MD array"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Der opstod en fejl: Array er ikke anført i mdadm.conf-filen"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Start arrays er ikke anført i mdadm.conf?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"Den angivne array (${array}) er ikke anført i konfigurationsfilen "
+"(${config}). Den kan derfor ikke startes under opstarten, med mindre du "
+"retter i konfigurationsfilen og gendanner den oprindleige ramdisk."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Denne advarsel er kun relevant hvis du skal have arrays til at blive startet "
+"fra den oprindelige ramdisk for at kunne starte op. Hvis du bruger den "
+"automatiske opstart i kernen, eller ikke skal bruge at arrays startes så "
+"tidligt som den oprindelige ramdisk indlæses, så kan du bare fortsætte. "
+"Alternativt så vælg at fortsætte og indtaste »none« når du bliver spurgt om "
+"hvilke arrays, der skal startes fra den oprindelige ramdisk."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr "Skal mdadm køre månedlig redundanskontrol af MD arrays?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Hvis kernen understøtter det (versioner større end 2.6.14), kan mdadm "
+"periodisk kontrollere redundansen på MD arrays (RAID'er). Det kan være en "
+"ressourcekrævende proces, afhængig af den lokale opsætning, men det kan "
+"hjælpe med at forhindre at du i sjældne tilfælde får datatab. Bemærk at "
+"dette er en skrivebeskyttet kontrol med mindre at der findes fejl; hvis der "
+"registreres fejl vil mdadm forsøge at rette dem, hvilket kan medføre "
+"skriveadgang til mediet."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"Standarden - hvis aktiveret - er at kontrollere på den første søndag i hver "
+"måned klokken 01:06."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Ønsker du at starte MD-overvågningsdæmonen?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"MD-overvågningsdæmonem (RAID) sender e-post-påmindelser udløst af vigtige MD-"
+"hændelser (såsom en diskfejl)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Aktivering af denne indstilling anbefales."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Modtager af e-post-påmindelser:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Indtast venligst e-post-adressen på brugeren, som skal modtage e-post-"
+"påmindelser for vigtige MD-hændelser."
diff --git a/debian/po/de.po b/debian/po/de.po
new file mode 100644
index 00000000..e1191e21
--- /dev/null
+++ b/debian/po/de.po
@@ -0,0 +1,284 @@
+# Translators, if you are not familiar with the PO format, gettext
+# documentation is worth reading, especially sections dedicated to
+# this format, e.g. by running:
+# info -n '(gettext)PO Files'
+# info -n '(gettext)Header Entry'
+# Some information specific to po-debconf are available at
+# /usr/share/doc/po-debconf/README-trans
+# or http://www.debian.org/intl/l10n/po-debconf/README-trans
+# Developers do not need to manually edit POT or PO files.
+#
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm 2.6.9-3\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2009-06-24 17:35+0200\n"
+"Last-Translator: Mario Joussen <joussen@debian.org>\n"
+"Language-Team: German <debian-l10n-german@lists.debian.org>\n"
+"Language: de\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "Für das Wurzeldateisystem benötigte MD folgende Verbünde:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Bitte geben Sie »all«, »none« oder eine leerzeichenseparierte Geräteliste "
+"wie zum Beispiel »md0 md1« oder »md/1 md/d0« ein (das führende »/dev« kann "
+"weggelassen werden)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr ""
+"für internen Gebrauch - es wird nur die ausführliche Beschreibung benötigt."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Wenn das Wurzeldateisystem Ihres Systems auf einem MD-Verbund (RAID) liegt, "
+"muss es frühzeitig während des Bootvorgangs gestartet werden. Wenn sich Ihr "
+"Wurzeldateisystem auf einem logischen Laufwerk (LVM) befindet, das sich "
+"wiederum auf einem MD Verbund befindet, müssen alle zugehörigen Verbünde "
+"gestartet werden."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Wenn Sie genau wissen, welche Verbünde benötigt werden, um das "
+"Wurzeldateisystem zu starten, und Sie den Start der anderen Verbünde auf "
+"einen späteren Zeitpunkt in der Bootreihenfolge verschieben wollen, geben "
+"Sie die zu startenden Verbünde hier ein. Alternativ geben Sie »all« ein, um "
+"alle verfügbaren Verbünde zu starten."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Falls Sie keine RAID-Verbünde für das Wurzeldateisystem benötigen oder "
+"starten wollen, lassen Sie die Antwort leer (oder geben »none« ein). Dies "
+"könnte der Fall sein, wenn Sie entweder die Autostartfunktion des Kernels "
+"verwenden oder keine Verbünde zum Booten benötigen."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Ein Fehler ist aufgetreten: Geräteknoten existiert nicht"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Ein Fehler ist aufgetreten: kein Blockgerät"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Ein Fehler ist aufgetreten: kein RAID-Verbund"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr ""
+"Ein Fehler ist aufgetreten: Verbund nicht in der Datei mdadm.conf aufgeführt"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Nicht in mdadm.conf aufgeführte Verbünde starten?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"Der angegebene Verbund (${array}) ist in der Konfigurationsdatei ${config} "
+"nicht aufgeführt. Deshalb kann er während des Bootvorgangs nicht gestartet "
+"werden, es sei denn, Sie korrigieren die Konfigurationsdatei und erzeugen "
+"die initiale Ramdisk neu."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Diese Warnung ist nur von Bedeutung, wenn Sie RAID-Verbünde, die von der "
+"initialen Ramdisk gestartet werden, benötigen, um booten zu können. Falls "
+"Sie die Autostartfunktion des Kernels verwenden oder kein RAID-Verbund zum "
+"frühen Zeitpunkt des Ladens der initialen Ramdisk gestartet werden muss, "
+"können Sie einfach fortfahren. Alternativ wählen Sie, nicht fortzufahren und "
+"geben »none« ein, wenn Sie gefragt werden, welche RAID-Verbünde von der "
+"initialen Ramdisk gestartet werden sollen."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr ""
+"Soll mdadm monatlich die Redundanzüberprüfung auf den RAID-Verbünden "
+"ausführen?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Falls Ihr Kernel es unterstützt (Versionen größer als 2.6.14) kann mdadm "
+"regelmäßig die Redundanz Ihrer MD-Verbünde (RAID) überprüfen. Dies kann "
+"abhängig von Ihrer Installation ein resourcenintensiver Vorgang sein, der "
+"aber helfen kann, seltene Fälle von Datenverlust zu vermeiden. Bitte "
+"beachten Sie, dass diese Überprüfung nur lesend erfolgt, solange keine "
+"Fehler gefunden werden. Falls Fehler gefunden werden, wird mdadm versuchen, "
+"diese zu beheben, was zu schreibendem Zugriff auf das Medium führen kann."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"Die Voreinstellung ist, falls eingeschaltet, die Überprüfung am ersten "
+"Sonntag jedes Monats um 01:06 Uhr durchzuführen."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Möchten Sie den RAID-Überwachungsdämon starten?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"Der MD- (RAID-)Überwachungsdämon verschickt Benachrichtigungen als Reaktion "
+"auf wichtige RAID-Ereignisse (wie zum Beispiel Festplattenfehler)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Das Aktivieren dieser Option ist empfohlen."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Empfänger der E-Mail-Benachrichtungen:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Geben Sie bitte die E-Mail-Adresse des Benutzers an, der die E-Mail-"
+"Benachrichtigung für wichtigen MD-Ereignisse erhalten soll."
+
+#~ msgid "Initialise the superblock if you reuse hard disks"
+#~ msgstr ""
+#~ "Initialisieren Sie den Superblock, wenn Sie Festplatten wieder verwenden."
+
+#~ msgid ""
+#~ "WARNING! If you are using hard disks which have RAID superblocks from "
+#~ "earlier installations in different RAID arrays, you MUST zero each "
+#~ "superblock *before* activating the autostart feature."
+#~ msgstr ""
+#~ "WARNUNG! Wenn Sie Festplatten verwenden, die bereits einen md-Superblock "
+#~ "von einer vorherigen Installation in einem anderen RAID-Verbund besitzen, "
+#~ "so MÜSSEN Sie diesen löschen, *bevor* Sie die Autostart-Funktion "
+#~ "aktivieren."
+
+#~ msgid ""
+#~ "To do this, do not start the RAID devices automatically. First, zero the "
+#~ "superblock (mdadm --zero-superblock /dev/mdX). Next, use `dpkg-"
+#~ "reconfigure mdadm` to reactivate the autostart feature."
+#~ msgstr ""
+#~ "Dazu starten Sie die RAID-Laufwerke nicht automatisch und löschen dann "
+#~ "erst den Superblock (mdadm --zero-superblock /dev/mdX). Danach können Sie "
+#~ "mit »dpkg-reconfigure mdadm« die Autostart-Funktion aktivieren."
+
+#~ msgid ""
+#~ "You have the option to start all other arrays (those not needed for the "
+#~ "root filesystem) later in the boot sequence. Doing so will give you "
+#~ "greater control over the arrays with the mdadm configuration file. "
+#~ "Starting all arrays at boot-time may be safer though."
+#~ msgstr ""
+#~ "Sie haben die Option, alle anderen Verbünde (diese die nicht für das "
+#~ "Wurzeldateisystem benötigt werden) später während des Bootvorgangs zu "
+#~ "starten. Damit haben Sie größere Kontrolle über die Verbünde mit Hilfe "
+#~ "der mdadm-Konfigurationsdatei. Es ist jedoch sicherer, alle Verbünde beim "
+#~ "Booten zu starten."
+
+#~ msgid ""
+#~ "If RAID devices are started automatically, all RAID devices are "
+#~ "discovered and assembled automatically at system startup. This option "
+#~ "should only be used if the md driver is compiled as a module. If it is "
+#~ "compiled into your kernel, the automatic startup will be performed at "
+#~ "boot time by the kernel and therefore you should not choose this option."
+#~ msgstr ""
+#~ "Wenn die RAID-Laufwerke automatisch gestartet werden, werden alle RAID-"
+#~ "Laufwerke beim Systemstart automatisch gefunden und gestartet. Diese "
+#~ "Option sollte nur benutzt werden, falls der md-Treiber als Modul "
+#~ "kompiliert wurde. Falls er in den Kernel einkompiliert wurde, führt der "
+#~ "Kernel den automatischen Start beim Booten durch und Sie sollten diese "
+#~ "Option deshalb nicht auswählen."
+
+#~ msgid ""
+#~ "When the RAID monitor daemon runs, email notifications are sent when a "
+#~ "disk belonging to a RAID array fails or changes its status for some "
+#~ "reason."
+#~ msgstr ""
+#~ "Wird der RAID-Überwachungsdaemon gestartet, so werden E-Mail-"
+#~ "Benachrichtigungen verschickt, falls ein zum RAID gehörendes Laufwerk "
+#~ "ausfällt oder den Status ändert."
diff --git a/debian/po/es.po b/debian/po/es.po
new file mode 100644
index 00000000..d8809892
--- /dev/null
+++ b/debian/po/es.po
@@ -0,0 +1,254 @@
+# mdadm po-debconf translation to spanish
+# Copyright (C) 2006 Software in the Public Interest, SPI Inc.
+# This file is distributed under the same license as the mdadm package.
+#
+# Changes:
+# - Initial translation
+# Javier Fernández-Sanguino , 2006
+# - Revision
+# Fernando Cerezal
+#
+#
+# Traductores, si no conoce el formato PO, merece la pena leer la
+# documentación de gettext, especialmente las secciones dedicadas a este
+# formato, por ejemplo ejecutando:
+# info -n '(gettext)PO Files'
+# info -n '(gettext)Header Entry'
+#
+# Equipo de traducción al español, por favor lean antes de traducir
+# los siguientes documentos:
+#
+# - El proyecto de traducción de Debian al español
+# http://www.debian.org/intl/spanish/
+# especialmente las notas y normas de traducción en
+# http://www.debian.org/intl/spanish/notas
+#
+# - La guía de traducción de po's de debconf:
+# /usr/share/doc/po-debconf/README-trans
+# o http://www.debian.org/intl/l10n/po-debconf/README-trans
+#
+# Si tiene dudas o consultas sobre esta traducción consulte con el último
+# traductor (campo Last-Translator) y ponga en copia a la lista de
+# traducción de Debian al español (<debian-l10n-spanish@lists.debian.org>)
+#
+# Notas:
+# - 'array' no está traducido aán. La traducción como 'arreglo' suena
+# fatal (y es poco conocida) [ cambiar cuando se cambie en d-i ]
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm 2.5.6-6\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2008-04-25 17:47+0200\n"
+"Last-Translator: Javier Fernández-Sanguino <jfs@debian.org>\n"
+"Language-Team: Debian Spanish <debian-l10n-spanish@lists.debian.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-15\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "Arrays MD necesarios para el sistema de ficheros raíz:"
+
+# No se traduce «all» y «none» porque no aparecen en la plantilla para traducir los elementos individuales
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Introduzca «all» (todos), «none» (ninguno) o una lista de dispositivos "
+"separados por espacios como por ejemplo puede sólo introducir «md0 md1» o "
+"«md/1 md/d0» (no tiene que preceder los nombres de dispositivos con «/dev»)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "para uso interno. Sólo se utiliza la descripción larga."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Si el sistema de ficheros raíz de su sistema está en un array MD (RAID) "
+"tiene que inicializarse antes durante de la secuencia de arranque. Si está "
+"en un volumen lógico (LVM), que está definido sobre un MD, todos los arrays "
+"que lo forman tienen que haberse inicializado."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Introduzca los arrays a iniciar aquí, si sabe con exactitud cuáles son "
+"necesarios para arrancar el sistema de ficheros raíz y quiere posponer el "
+"arranque de todos los demás arrays a un punto posterior de la secuencia de "
+"arranque. También puede introducir «all» (todos) para, sencillamente, "
+"iniciar todos los arrays disponibles."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Puede dejar la respuesta en blanco (o introducir «none») si no necesita o "
+"desea arrancar los arrays para el sistema de ficheros raíz. Este puede ser "
+"su caso si está utilizando el autoarranque del núcleo o no necesita ningún "
+"array para el arranque."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Se produjo un error: el nodo de dispositivo no existe"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Se produjo un error: no es un dispositivo de bloques"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Se produjo un error: no es un array MD"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr ""
+"Se produjo un error: el array no está en la lista definida en el archivo "
+"mdadm.conf"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "¿Desea arrancar los arrays no listados en mdadm.conf?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"El array que ha especificado (${array}) no está listado en el fichero de "
+"configuración ${config}. Este array no podrá iniciarse durante el arranque "
+"del sistema a no ser que corrija el fichero de configuración y regenere el "
+"disco de ram inicial."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Este aviso sólo es relevante si necesita que los arrays se inicien en el "
+"disco de RAM inicial para poder arrancar el sistema. Si utiliza el "
+"autoarranque del núcleo o no necesita que los arrays estén arrancados tan "
+"pronto como se cargue el disco de RAM, puede continuar simplemente. También "
+"puede decidir no continuar e introducir «none» cuando se le pregunte qué "
+"arrays deberían arrancarse del disco de RAM inicial."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr ""
+"¿Debería mdadm ejecutar comprobaciones de redundancia mensuales de los "
+"arrays MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Mdadm puede comprobar de forma periódica la redundancia de sus arrays MD "
+"(RAIDs) si el núcleo lo soporta (si su versión es superior a la 2.6.14). "
+"Esto puede ser un proceso que consuma muchos recursos, dependiendo de su "
+"configuración, pero podría ayudar a prevenir casos raros de pérdida de "
+"datos. Tenga en cuenta que estas comprobaciones se hacen en modo lectura "
+"salvo que se detecten errores, en cuyo caso mdadm necesitará corregirlos, lo "
+"que significa que será necesario tener acceso de escritura a los medios "
+"físicos."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"El valor por omisio, si se activa, es comprobar el primer Domingo de cada "
+"mes a las 01:06 am."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "¿Desea arrancar el demonio de monitorización MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"El demonio de monitorización MD (RAID) envía notificaciones por correo "
+"electrónico cuando se producen eventos importantes en los dispositivos MD "
+"(como pueda ser el caso de un fallo de un disco)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Es opcional habilitar esta opción."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Destinatario de las notificaciones por correo:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Introduzca la dirección de correo electrónico del usuario que debería "
+"recibir las notificaciones por correo de eventos relevantes en los "
+"dispositivos MD."
diff --git a/debian/po/eu.po b/debian/po/eu.po
new file mode 100644
index 00000000..06d8e537
--- /dev/null
+++ b/debian/po/eu.po
@@ -0,0 +1,211 @@
+# mdadm debconf templates basque translation
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# Piarres Beobide <pi@beobide.net>, 2008.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm-debconf\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: Y2008-04-30 11:00+0100\n"
+"Last-Translator: Piarres Beobide <pi@beobide.net>\n"
+"Language-Team: Euskara <debian-l10n-basque@lists.debian.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "Erro fitxategi-sistemarentzat beharrezko MD array-ak:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Mesedez idatzi 'denak', 'batez', edo zuriunez bereziriko gailuen zerrenda, "
+"adibidez 'md0 md1' edo 'md/1 md/d0' (hasierako '/dev/' baztertu daiteke)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "barne erabilerako - deskribapen luzea bakarrik behar da."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Sistemaren erro fitxategi-sistema MD array (RAID) batetan kokaturik badago, "
+"berau abio sekuentziaren hasieran abiarazi behar da. MD batetan kokaturiko "
+"bolumen logiko (LVM) batetan badago osatzen duten array guztiak abiarazi "
+"behar dira."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Erro fitxategi-sistema erabiltzeko beharrezkoak diren arrayak zehazki jakin "
+"eta beste array-en abiaraztea abioaren beranduagoko puntu batetara atzeratu "
+"nahi baduzu, idatzi abiarazi beharreko array-ak hemen. Bestela idatzi "
+"'denak' array erabilgarri guztiak abiarazteko."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Ez baduzu erro fitxategi sistemarako array-rik abiarazi behar, hutsik utzi "
+"ezazu (edo 'batez' idatzi). Hau abioan array-rik behar ez duzulako edo "
+"kernel auto-abioa erabiltzen duzulako izan daiteke."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Errore bat gertatu da: gailu nodoa ez dago"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Errore bat gertatu da: ez da bloke gailu bat"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Errore bat gertatu da: ez da MD array bat"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr ""
+"Errore bat gertatu da: array-a ez dago mdadm.conf fitxategian zerrendaturik"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Abiarazi mdadm.conf fitxategian ez dauden array-ak?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"Zehazturiko (${array}) array-a ez dago (${config}) konfigurazio fitxategian "
+"zerrendaturiko. Horregatik ezin da abioan abiarazi zuk konfigurazio "
+"fitxategia konpondu eta abio ramdiskoa bersortu arte."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Abisu hau abiarazi ahal izateko ramdisk.etik array-ak baiaraztea behar "
+"baduzu bakarrik da garrantzitsua. Kernel auto-abioa erabiltzen baduzu edo ez "
+"baduzu ramdisk-etik hasieran array-rik kargatzea behar aurrera jarraitu "
+"dezakezu. Bestela ez jarraitzea hautatu eta 'batez' idatzi hasierako ramdisk-"
+"etik kargatu beharreko array-ez galdetzean."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr ""
+"Mdadm-ek hilabetero egin behar al du MD array-en erredundantzia egiaztapena?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Kernelak onartzen badu (2.6.14 baino bertsio berriagoak), mdadm-ek aldiro MD "
+"array-en (RAID-en) erredundantzia aldiro egiazta dezake. Hau errekurtso-"
+"behar handiko prozesu bat izan daiteke, konfigurazio lokalaren arabera, "
+"baina datu galera kasuak saihesten lagundu dezake. Kontutan izan errorerik "
+"aurkitzen ez bada irakurketa-soileko egiaztapen bat dela; errorerik "
+"arukituko balitz mdadm konpontzen saiatuko da, honetarako euskarrian idaztea "
+"beharrezko izan daitekeelarik."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"Lehenespen bezala gaiturik dago, egiaztapena hilabete bakoitzeko lehenengo "
+"asteleheneko 01:06-etan egingo da."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "MD monitorizazio deabrua abiarazi nahi al duzu?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"MD (RAID) monitore deabruak eposta bidezko berri-emateak bidaltzen ditu "
+"gertaera garrantzitsuetan (disko erroreen antzerakoetan)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Aukera hau gaitzea gomendagarria da."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Eposta berri-emateen hartzailea:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Mesedez idatzi MD gertaera garrantzitsuen berri emate mezuak jaso behar "
+"dituen erabiltzailearen eposta helbidea."
diff --git a/debian/po/fi.po b/debian/po/fi.po
new file mode 100644
index 00000000..688baef0
--- /dev/null
+++ b/debian/po/fi.po
@@ -0,0 +1,208 @@
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2008-02-14 11:24+0200\n"
+"Last-Translator: Esko Arajärvi <edu@iki.fi>\n"
+"Language-Team: Finnish <debian-l10n-finnish@lists.debian.org>\n"
+"Language: fi\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Poedit-Language: Finnish\n"
+"X-Poedit-Country: FINLAND\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "Juuritiedostojärjestelmän tarvitsemat MD-pakat:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Syötä â€all†(kaikki), â€none†(ei mitään) tai välilyönnein eroteltu lista "
+"laitteista, esimerkiksi â€md0 md1†tai â€md/1 md/d0†(edeltävä /dev/ voidaan "
+"jättää pois)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "vain sisäiseen käyttöön - vain pitkä kuvaus on tarpeellinen."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Jos järjestelmän juuritiedostojärjestelmä sijaitsee MD-levypakassa (RAID), "
+"pakka tulee käynnistää aikaisessa vaiheessa käynnistettäessä järjestelmää. "
+"Jos se sijaitsee loogisella taltiolla (LVM), joka on MD-pakassa, kaikki "
+"taltioon liittyvät pakat tulee käynnistää."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Jos tiedät tarkalleen mitä pakkoja tarvitaan juuritiedostojärjestelmän "
+"käynnistämiseen ja haluat viivästyttää muiden pakkojen käynnistystä, syötä "
+"käynnistettävät pakat tähän. Vaihtoehtoisesti voit käynnistää kaikki pakat "
+"syöttämällä â€allâ€."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Jos mitään pakkoja ei tarvitse käynnistää juuritiedostojärjestelmän "
+"käyttämiseksi, jätä kenttä tyhjäksi (tai syötä â€noneâ€). Tämä voi olla "
+"tilanne, jos käytät ytimen autokäynnistystä tai et tarvitse mitään pakkoja "
+"käynnistykseen."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Tapahtui virhe: laitetiedostoa ei ole olemassa"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Tapahtui virhe: ei lohkolaite"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Tapahtui virhe: ei MD-pakka"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Tapahtui virhe: pakkaa ei ole listattu tiedostossa mdadm.conf"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Käynnistetäänkö pakat, joita ei ole listattu tiedostossa mdadm.conf?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"Annettua pakkaa (${array}) ei ole listattu asetustiedostossa (${config}). "
+"Niinpä sitä ei voida käynnistää käynnistettäessä järjestelmä, ellei "
+"asetustiedostoa korjata ja käynnistysmuistilevyä (initrd) luoda uudelleen."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Tämä varoitus on aiheellinen vain, jos järjestelmän käynnistäminen vaatii "
+"pakkojen käynnistämistä käynnistysmuistilevyltä. Jos ytimen autokäynnistys "
+"on käytössä tai pakkoja ei tarvita siinä vaiheessa, kun käynnistysmuistilevy "
+"ladataan, voit jatkaa. Vaihtoehtoisesti voit olla jatkamatta ja syöttää "
+"â€none†kysyttäessä käynnistysmuistilevyltä käynnistettäviä pakkoja."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr "Tulisiko mdadm:n tarkistaa kuukausittain MD-pakkojen eheys?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Ohjelma mdadm voi säännöllisesti tarkistaa MD-pakkojen (RAIDien) tietojen "
+"monistuksen, jos ydin tukee tätä (versiosta 2.6.14 eteenpäin). Tämä prosessi "
+"voi paikallisesta kokoonpanosta riippuen kuluttaa paljon resursseja, mutta "
+"saattaa ehkäistä tietojen menetyksiä tietyissä harvinaisissa tapauksissa. "
+"Tarkistus vaatii vain tietojen lukemista, jos virheitä ei löyty. Jos "
+"virheitä löytyy, mdadm yrittää korjata ne, jolloin levylle saatetaan myös "
+"kirjoittaa."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"Oletuksena, jos tarkistus on käytössä, se tehdään kuukauden ensimmäisenä "
+"sunnuntaina kello 01.06."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Haluatko käynnistää MD-seurannan?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"MD-pakkoja (RAIDeja) seuraava taustaohjelma lähettää tietoja sähköpostiin "
+"tärkeiden MD-tapahtumien (kuten levyrikon) sattuessa."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Tämän valitseminen on suositeltavaa."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Sähköpostiviestien vastaanottaja:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Anna sähköpostiosoite, johon sähköpostitiedotteet tärkeistä MD-tapahtumista "
+"lähetetään."
diff --git a/debian/po/fr.po b/debian/po/fr.po
new file mode 100644
index 00000000..ce2459c4
--- /dev/null
+++ b/debian/po/fr.po
@@ -0,0 +1,222 @@
+# Translation of mdadm debconf templates to French
+# Copyright (C) 2008 Florentin Duneau <fduneau@gmail.com>
+# This file is distributed under the same license as the lurker package.
+#
+#
+# Éric Madesclair <eric-m@wanadoo.fr>, 2005, 2006.
+# Jean-Luc Coulon (f5ibh) <jean-luc.coulon@wanadoo.fr>, 2006.
+# Florentin Duneau <fduneau@gmail.com>, 2006, 2007, 2008.
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2008-02-15 20:42+0100\n"
+"Last-Translator: Florentin Duneau <fduneau@gmail.com>\n"
+"Language-Team: French <debian-l10n-french@lists.debian.org>\n"
+"Language: fr\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n > 1);\n"
+"X-Generator: KBabel 1.11.4\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "Ensembles MD requis par le système de fichiers racine :"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Veuillez indiquer « all », « none » ou une liste de périphériques, séparés "
+"par des espaces, par exemple, « md0 md1 » ou « md/1 md/d0 » (vous pouvez "
+"omettre « /dev/ »)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr ""
+"Pour une utilisation interne - seule la description longue est nécessaire"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Si le système de fichiers racine se trouve sur un ensemble MD (RAID), il "
+"doit être lancé au début de la procédure de démarrage. Si le système de "
+"fichiers racine se trouve sur un volume logique (« LVM »), qui se trouve "
+"aussi sur un volume MD, tous les composants de l'ensemble doivent être "
+"démarrés."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Si vous savez exactement quels sont les ensembles RAID nécessaires au "
+"démarrage du système de fichiers racine et si vous souhaitez différer le "
+"démarrage de tous les autres ensembles, veuillez les indiquer ici. Vous "
+"pouvez aussi indiquer « all » pour démarrer tous les ensembles existants."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Si vous n'avez pas besoin ou ne souhaitez pas démarrer d'ensemble RAID pour "
+"le système de fichiers racine, veuillez laissez l'entrée vide (ou entrez "
+"« none »). Ceci peut être le cas si vous utilisez l'option de démarrage "
+"automatique (« autostart ») du noyau ou si vous n'avez besoin d'aucun "
+"ensemble pour démarrer."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Erreur : périphérique inconnu"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Erreur : ce n'est pas un périphérique en mode bloc"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Erreur : ce n'est pas un ensemble RAID"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Erreur : ensemble non mentionné dans le fichier mdadm.conf"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Faut-il démarrer les ensembles RAID non mentionnés dans mdadm.conf ?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"L'ensemble (${array}) que vous avez spécifié n'est pas mentionné dans le "
+"fichier de configuration ${config}. Il ne sera donc pas démarré à moins que "
+"vous corrigiez le fichier de configuration et que vous génériez de nouveau "
+"le disque mémoire initial (« ramdisk »)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Cet avertissement n'a de signification que si des ensembles RAID doivent "
+"être lancés à partir du disque mémoire initial afin de pouvoir démarrer le "
+"système. Si vous utilisez le démarrage automatique par le noyau, ou si vous "
+"n'avez pas besoin de lancer d'ensemble RAID depuis le disque mémoire "
+"initial, vous pouvez simplement poursuivre. Vous pouvez aussi choisir de ne "
+"pas poursuivre et entrer « none » lorsqu'il vous sera demandé le nom des "
+"ensembles RAID à démarrer à partir du disque mémoire initial."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr "Faut-il vérifier chaque mois la redondance des ensembles RAID ?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Si le noyau le gère (à partir de la version 2.6.14), mdadm peut vérifier "
+"périodiquement la redondance des ensembles RAID. Cette action peut demander "
+"beaucoup de ressources selon la configuration, mais cela aide à prévenir les "
+"rares cas de pertes de données. Notez que ce test est réalisé en lecture "
+"seule à moins que des erreurs ne soient rencontrées. Si des erreurs sont "
+"détectées, mdadm essayera de les corriger, ce qui entraînera des écritures "
+"sur le média."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"Par défaut, la vérification s'effectuera tous les premiers dimanche du mois "
+"à 01 h 06."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Faut-il démarrer le démon de surveillance MD ?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"Le démon de surveillance MD envoie des notifications par courriel lors "
+"d'importants événements MD (comme une panne de disque dur)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Il est recommandé d'activer cette option."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Destinataire des notifications par courriel :"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Veuillez indiquer l'adresse électronique de l'utilisateur qui doit recevoir "
+"les notifications lors d'importants événements MD."
diff --git a/debian/po/gl.po b/debian/po/gl.po
new file mode 100644
index 00000000..b454e818
--- /dev/null
+++ b/debian/po/gl.po
@@ -0,0 +1,210 @@
+# Galician translation of mdadm's debconf templates
+# This file is distributed under the same license as the mdadm package.
+# Jacobo Tarrio <jtarrio@debian.org>, 2007, 2008.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2008-02-06 23:45+0000\n"
+"Last-Translator: Jacobo Tarrio <jtarrio@debian.org>\n"
+"Language-Team: Galician <proxecto@trasno.net>\n"
+"Language: gl\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "Arrays MD necesarios para o sistema de ficheiros raíz"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Introduza \"all\" (todos), \"none\" (ningún) ou unha lista de dispositivos "
+"separados por espazos, tales coma \"md0 md1\" ou \"md/1 md/0\" (pódese "
+"omitir o \"/dev/\" do principio)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "para uso interno - só se precisa da descrición longa."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Se o sistema de ficheiros raíz do sistema está ubicado nun array MD (RAID), "
+"hai que o iniciar no principio da secuencia de inicio. Se está ubicado nun "
+"volume lóxico (LVM) que está nun MD, hai que iniciar os arrays constituíntes."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Se sabe exactamente que arrays son necesarios para erguer o sistema de "
+"ficheiros raíz, e se quere pospor o inicio dos demáis arrays ata un punto "
+"posterior da secuencia de inicio, introduza aquí os arrays a iniciar. "
+"Alternativamente, introduza \"all\" para iniciar tódolos arrays dispoñibles."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Se non quere ou precisa de iniciar ningún array para o sistema de ficheiros "
+"raíz, deixe a resposta en branco (ou introduza \"none\"). Este pode ser o "
+"caso se está a empregar o autoinicio do núcleo ou non precisa de ningún "
+"array para o inicio."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Houbo un erro: o nodo do dispositivo non existe"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Houbo un erro: non é un dispositivo de bloques"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Houbo un erro: non é un array MD"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Houbo un erro: o array non figura no ficheiro mdadm.conf"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "¿Iniciar os arrays que non figuran en mdadm.conf?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"O array indicado (${array}) non figura no ficheiro de configuración "
+"(${config}). Polo tanto, non se pode arrincar no inicio do sistema, a menos "
+"que corrixa o ficheiro de configuración e volva crear o disco RAM inicial."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Este aviso só é relevante se precisa de iniciar arrays desde o disco RAM "
+"inicial para poder iniciar o sistema. Se emprega autoinicio do núcleo ou non "
+"precisa de iniciar arrays tan pronto como se cargue o disco RAM inicial, "
+"pode continuar. De xeito alternativo, escolla non continuar e introduza "
+"\"none\" cando se lle pregunte que arrays quere iniciar do disco RAM inicial."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr ""
+"¿Debería mdadm facer comprobacións mensuais de redundancia dos arrays MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Se o núcleo ten soporte para iso (en versións superiores á 2.6.14), mdadm "
+"pode facer comprobacións periódicas de redundancia dos arrays MD (RAIDs). "
+"Este pode ser un proceso intensivo en recursos, dependendo da configuración "
+"local, pero pode axudar a evitar algúns casos raros de perdas de datos. Teña "
+"en conta que esta é unha comprobación de só lectura a menos que se atopen "
+"erros; se se atopan erros, mdadm ha tratar de os arranxar, o que pode "
+"producir accesos de escritura aos soportes."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"A opción por defecto, se se activa, é facer as comprobacións o primeiro "
+"domingo de cada mes ás 01:16."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "¿Quere iniciar o servizo de monitorización de MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"O servizo de monitorización de MD (RAID) envía avisos por email en resposta "
+"a eventos importantes de MD (coma fallos nos discos)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Recoméndase activar esta opción."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Destinatario para os avisos por email:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Introduza o enderezo de email do usuario que debe recibir os avisos por "
+"email de eventos importantes de MD."
diff --git a/debian/po/it.po b/debian/po/it.po
new file mode 100644
index 00000000..c2d5285e
--- /dev/null
+++ b/debian/po/it.po
@@ -0,0 +1,213 @@
+# Italian (it) translation of debconf templates for mdadm
+# Copyright (C) 2008 Software in the Public Interest
+# This file is distributed under the same license as the mdadm package.
+# Luca Monducci <luca.mo@tiscali.it>, 2008.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm italian debconf\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2008-11-19 11:02+0100\n"
+"Last-Translator: Luca Monducci <luca.mo@tiscali.it>\n"
+"Language-Team: Italian <debian-l10n-italian@lists.debian.org>\n"
+"Language: it\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "Array MD necessari per il file system di root:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Inserire \"all\", \"none\" oppure un elenco dei device separati da uno "
+"spazio, per esempio \"md0 md1\" o \"md/1 md/d0\" (il \"/dev/\" iniziale può "
+"essere omesso)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "uso interno - è necessaria solo la descrizione lunga."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Se il file system di root è su un array MD (RAID), è necessario attivare "
+"tale array all'inizio della sequenza d'avvio. Se è su un volume logico "
+"(LVM), il quale è su un MD, è necessario attivare tutti gli array che "
+"costituiscono il volume."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Se si conoscono esattamente quali sono gli array da attivare per il file "
+"system di root e si vuole rimandare l'attivazione di tutti gli altri array a "
+"una fase successiva della sequenza d'avvio, inserire adesso gli array da "
+"attivare. In alternativa, inserire \"all\" per attivare tutti gli array "
+"disponibili."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Se non si ha bisogno o non si vuole attivare nessun array per il file system "
+"di root, lasciare la risposta in bianco (oppure inserire \"none\"). Questo "
+"potrebbe essere il caso se si utilizza l'attivazione automatica da kernel "
+"oppure se non si ha bisogno di alcun array per l'avvio."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Errore: il nodo del device non esiste"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Errore: non è un device a blocchi"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Errore: non è un array MD"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Errore: array non elencato nel file mdadm.conf"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Avviare gli array non elencati in mdadm.conf?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"L'array specificato (${array}) non è presente nel file di configurazione "
+"(${config}): quindi non può essere attivato durante l'avvio senza correggere "
+"il file di configurazione e ricreare il ramdisk iniziale."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Questo avviso è pertinente solo se è necessario attivare gli array dal "
+"ramdisk iniziale per permettere l'avvio. Con l'avvio automatico da kernel o "
+"se non è necessario attivare gli array così presto come al caricamento del "
+"ramdisk iniziale, si può proseguire. In alternativa, scegliere di non "
+"continuare e inserire \"none\" quando viene chiesto quali array attivare dal "
+"ramdisk iniziale."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr "Far eseguire a mdadm i controlli mensili di ridondanza sugli array MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Se il kernel lo supporta (tutte le versioni successive la 2.6.14), mdadm può "
+"effettuare delle verifiche periodiche sulla ridondanza degli array MD "
+"(RAID). Questo è un processo che potrebbe richiedere molte risorse, in base "
+"alle impostazioni locali, ma può prevenire i rari casi di perdita di dati. "
+"Notare che questa verifica è di sola-lettura tranne quando riscontra degli "
+"errori; quando ci sono errori, mdadm prova a correggerli e potrebbe accedere "
+"in scrittura al supporto."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"Se attivo, la configurazione predefinita prevede che il controllo sia "
+"eseguito la prima domenica di ogni mese alle 01.06."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Avviare il demone di monitoraggio MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"Il demone di monitoraggio MD (RAID) invia delle notifiche via email quando "
+"si verificano eventi importanti (come la rottura di un disco)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Si raccomanda l'attivazione di questa funzione."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Destinatario delle email di notifica:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Inserire l'indirizzo email dell'utente che deve ricevere le notifiche di "
+"eventi importanti legati al MD."
diff --git a/debian/po/ja.po b/debian/po/ja.po
new file mode 100644
index 00000000..eeefe8ed
--- /dev/null
+++ b/debian/po/ja.po
@@ -0,0 +1,268 @@
+#
+# Translators, if you are not familiar with the PO format, gettext
+# documentation is worth reading, especially sections dedicated to
+# this format, e.g. by running:
+# info -n '(gettext)PO Files'
+# info -n '(gettext)Header Entry'
+#
+# Some information specific to po-debconf are available at
+# /usr/share/doc/po-debconf/README-trans
+# or http://www.debian.org/intl/l10n/po-debconf/README-trans
+#
+# Developers do not need to manually edit POT or PO files.
+#
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm 2.6.3+200709292116+4450e59-4\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2008-02-07 05:52+0900\n"
+"Last-Translator: Hideki Yamane (Debian-JP) <henrich@debian.or.jp>\n"
+"Language-Team: Japanese <debian-japanese@lists.debian.org>\n"
+"Language: ja\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "ルートファイルシステムã«å¿…è¦ãª MD アレイ:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"'all' ã¾ãŸã¯ 'none'ã€ã‚ã‚‹ã„ã¯ãƒ‡ãƒã‚¤ã‚¹ã®ãƒªã‚¹ãƒˆã‚’ 'md0 md1' ã‚„ 'md/1 md/d0' ã®"
+"よã†ã«ã‚¹ãƒšãƒ¼ã‚¹ã§åŒºåˆ‡ã£ã¦å…¥åŠ›ã—ã¦ãã ã•ã„ (å‰ã«ä»˜ã '/dev/' ã¯çœç•¥å¯èƒ½ã§ã™)。"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "内部ã§ã®åˆ©ç”¨ã«ã¤ã„㦠- ã§ã‚‚ã€é•·ã„説明ãŒå¿…è¦ã§ã™ã€‚"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"ã‚ãªãŸã®ã‚·ã‚¹ãƒ†ãƒ ã®ãƒ«ãƒ¼ãƒˆãƒ•ã‚¡ã‚¤ãƒ«ã‚·ã‚¹ãƒ†ãƒ ãŒ MD アレイ (RAID) 上ã«é…ç½®ã•ã‚Œã¦ã„"
+"ã‚‹ãªã‚‰ã°ã€ãƒ–ートシーケンスã®åˆæœŸæ®µéšŽã§ MD アレイを開始ã™ã‚‹å¿…è¦ãŒã‚ã‚Šã¾ã™ã€‚"
+"ルートファイルシステム㌠MD ã®ã‚ˆã†ãªè«–ç†ãƒœãƒªãƒ¥ãƒ¼ãƒ  (LVM) 上ã«ã‚ã‚‹å ´åˆã¯ã€æ§‹æˆ"
+"ã—ã¦ã„るアレイ全ã¦ã®é–‹å§‹ãŒå¿…è¦ã§ã™ã€‚"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"ã©ã®ã‚¢ãƒ¬ã‚¤ãŒãƒ«ãƒ¼ãƒˆãƒ•ã‚¡ã‚¤ãƒ«ã‚·ã‚¹ãƒ†ãƒ ã®ç«‹ã¡ä¸Šã’ã«å¿…è¦ã‹ã‚’正確ã«çŸ¥ã£ã¦ãŠã‚Šã€ãƒ–ー"
+"トシーケンスã®å¾Œã®æ™‚点ã¾ã§æ„図ã—ã¦ã„ã‚‹ã‚‚ã®ä»¥å¤–å…¨ã¦ã®ã‚¢ãƒ¬ã‚¤èµ·å‹•ã‚’é…らã›ãŸã„å ´"
+"åˆã€ã“ã“ã§æœ€åˆã«èµ·å‹•ã™ã‚‹ã‚¢ãƒ¬ã‚¤ã‚’入力ã—ã¦ãã ã•ã„。ãã†ã§ãªã„å ´åˆã€'all' ã¨å…¥"
+"力ã—ã¦å˜ã«å…¨ã¦ã®åˆ©ç”¨å¯èƒ½ãªã‚¢ãƒ¬ã‚¤ã‚’最åˆã«ç«‹ã¡ä¸Šã’ã¦ãã ã•ã„。"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"ルートファイルシステムã®ãŸã‚ã«ã€ã©ã®ã‚¢ãƒ¬ã‚¤ã‚‚å¿…è¦ãªã„ã€ã‚ã‚‹ã„ã¯ã©ã®ã‚¢ãƒ¬ã‚¤ã‚‚èµ·"
+"å‹•ã—ãŸãã¯ç„¡ã„ã¨ã„ã†å ´åˆã¯ã€ç©ºç™½ã®ã¾ã¾ã« (ã‚ã‚‹ã„㯠'none' ã¨å…¥åŠ›) ã—ã¦ãã ã•"
+"ã„。ã“ã‚Œã¯ã€ã‚«ãƒ¼ãƒãƒ«ã§è‡ªå‹•çš„ã«èµ·å‹•ã•ã‚Œã‚‹å ´åˆã‚„起動時ã«ã¯ã‚¢ãƒ¬ã‚¤ã¯ä¸è¦ã§ã‚ã‚‹ã¨"
+"ã„ã†å ´åˆã§ã™ã€‚"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "エラーãŒç™ºç”Ÿã—ã¾ã—ãŸ: デãƒã‚¤ã‚¹ãƒŽãƒ¼ãƒ‰ãŒå­˜åœ¨ã—ã¾ã›ã‚“"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "エラーãŒç™ºç”Ÿã—ã¾ã—ãŸ: ブロックデãƒã‚¤ã‚¹ã§ã¯ã‚ã‚Šã¾ã›ã‚“"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "エラーãŒç™ºç”Ÿã—ã¾ã—ãŸ: MD アレイã§ã¯ã‚ã‚Šã¾ã›ã‚“"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "エラーãŒç™ºç”Ÿã—ã¾ã—ãŸ: mdadm.conf ファイルã«è¨˜è¿°ã•ã‚Œã¦ã„ãªã„アレイã§ã™"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "mdadm.conf ã«è¨˜è¿°ã•ã‚Œã¦ã„ãªã„アレイを起動ã—ã¾ã™ã‹?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"指定ã—ãŸã‚¢ãƒ¬ã‚¤ (${array}) ã¯è¨­å®šãƒ•ã‚¡ã‚¤ãƒ« (${config}) ã«è¨˜è¿°ã•ã‚Œã¦ã„ã¾ã›ã‚“。ã"
+"ã®ãŸã‚ã€è¨­å®šãƒ•ã‚¡ã‚¤ãƒ«ã‚’修正ã—㦠initrd ã‚’å†ç”Ÿæˆã—ãªã‘ã‚Œã°ãƒ–ート時ã«èµ·å‹•ã§ãã¾"
+"ã›ã‚“。"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"ã“ã®è­¦å‘Šã¯ã€ãƒ–ートã§ãるよã†ã«ã‚¢ãƒ¬ã‚¤ã‚’ initrd ã‹ã‚‰èµ·å‹•ã™ã‚‹å¿…è¦ãŒã‚ã‚‹å ´åˆã ã‘"
+"関係ã—ã¾ã™ã€‚カーãƒãƒ«ã§è‡ªå‹•çš„ã«ã‚¢ãƒ¬ã‚¤ã‚’èµ·å‹•ã™ã‚‹ã‚ˆã†ã«ã—ã¦ã„ã‚‹å ´åˆã€ã‚ã‚‹ã„㯠"
+"initrd ãŒãƒ­ãƒ¼ãƒ‰ã•ã‚Œã‚‹ç¨‹æ—©ã„段階ã§ã©ã®ã‚¢ãƒ¬ã‚¤ã‚‚èµ·å‹•ã—ãŸãã¯ãªã„å ´åˆã¯ãã®ã¾ã¾ç¶š"
+"è¡Œã§ãã¾ã™ã€‚ä»–ã®é¸æŠžè‚¢ã¨ã—ã¦ã¯ã€èµ·å‹•ã®ç¶šè¡Œã‚’中止ã—ã€ã©ã®ã‚¢ãƒ¬ã‚¤ã‚’ initrd ã‹ã‚‰"
+"èµ·å‹•ã™ã‚‹ã‹ã‚’å°‹ã­ã‚‰ã‚ŒãŸéš›ã« 'none' ã¨å…¥åŠ›ã—ã¾ã™ã€‚"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr "mdadm ã¯ã€æ¯Žæœˆ MD アレイã®å†—長性ãƒã‚§ãƒƒã‚¯ã‚’è¡Œã„ã¾ã™ã‹?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"カーãƒãƒ«ãŒã‚µãƒãƒ¼ãƒˆã—ã¦ã„ã‚‹å ´åˆ (ãƒãƒ¼ã‚¸ãƒ§ãƒ³ 2.6.14 以é™)ã€mdadm ã¯å®šæœŸçš„ã« MD "
+"アレイ (RAID) ã®å†—長性ãƒã‚§ãƒƒã‚¯ã‚’ã™ã‚‹ã“ã¨ãŒå¯èƒ½ã§ã™ã€‚ã“ã‚Œã¯ã€è¨­å®šã«ä¾å­˜ã—ã¾ã™"
+"ãŒãƒªã‚½ãƒ¼ã‚¹ã‚’集中的ã«ä½¿ç”¨ã™ã‚‹å‹•ä½œã§ã™ã€‚ã—ã‹ã—ã€ç¨€ãªãƒ‡ãƒ¼ã‚¿æ¶ˆå¤±ã‚’ã‚らã‹ã˜ã‚é¿ã‘"
+"ã‚‹ã®ã«å½¹ç«‹ã¤ã§ã—ょã†ã€‚ã“ã‚Œã¯ã€ã‚¨ãƒ©ãƒ¼ãŒè¦‹ã¤ã‹ã‚‰ãªã„é™ã‚Šã¯èª­ã¿è¾¼ã¿ãƒã‚§ãƒƒã‚¯ã®ã¿"
+"ã§ã‚ã‚‹ã®ã«æ³¨æ„ã—ã¦ãã ã•ã„。エラーãŒç™ºè¦‹ã•ã‚ŒãŸå ´åˆã€mdadm ã¯ä¿®æ­£ã—よã†ã¨ã—"
+"ã¦ã€çµæžœçš„ã«ãƒ¡ãƒ‡ã‚£ã‚¢ã¸æ›¸ãè¾¼ã¿ã‚’è¡Œã„ã¾ã™ã€‚"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"有効ã«ã—ãŸå ´åˆã€ãƒ‡ãƒ•ã‚©ãƒ«ãƒˆã§ã¯æ¯Žæœˆç¬¬ä¸€æ—¥æ›œ 01:06 ã«ãƒã‚§ãƒƒã‚¯ãŒå®Ÿè¡Œã•ã‚Œã¾ã™ã€‚"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "MD 監視デーモンを起動ã—ã¾ã™ã‹?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"MD (RAID) 監視デーモンã¯ã€é‡å¤§ãª MD 関連ã®ã‚¤ãƒ™ãƒ³ãƒˆ (ディスク障害ãªã©) ã«å¯¾ã—"
+"ã¦ãƒ¡ãƒ¼ãƒ«ã§é€šçŸ¥ã‚’é€ã‚Šã¾ã™ã€‚"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "ã“ã®æ©Ÿèƒ½ã‚’有効ã«ã™ã‚‹ã®ã‚’ãŠå‹§ã‚ã—ã¾ã™ã€‚"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "メール通知ã®å®›å…ˆ:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"MD 関連ã®é‡å¤§ãªã‚¤ãƒ™ãƒ³ãƒˆãŒç™ºç”Ÿã—ãŸéš›ã€ãƒ¡ãƒ¼ãƒ«ã§ã®é€šçŸ¥ã‚’å—ã‘å–ã‚‹å¿…è¦ãŒã‚るユーザ"
+"ã®ãƒ¡ãƒ¼ãƒ«ã‚¢ãƒ‰ãƒ¬ã‚¹ã‚’入力ã—ã¦ãã ã•ã„。"
+
+#~ msgid ""
+#~ "WARNING! If you are using hard disks which have RAID superblocks from "
+#~ "earlier installations in different RAID arrays, you MUST zero each "
+#~ "superblock *before* activating the autostart feature."
+#~ msgstr ""
+#~ "警告! 以å‰ã®ã‚¤ãƒ³ã‚¹ãƒˆãƒ¼ãƒ«ã«ã‚ˆã£ã¦ä»–ã® RAID アレイã«å¯¾ã™ã‚‹ RAID superblock "
+#~ "ã‚’ä¿æŒã—ã¦ã„ã‚‹ãƒãƒ¼ãƒ‰ãƒ‡ã‚£ã‚¹ã‚¯ã‚’使ã£ã¦ã„ã‚‹å ´åˆã€è‡ªå‹•èµ·å‹•æ©Ÿèƒ½ã‚’有効ã«ã™ã‚‹"
+#~ "「å‰ã€ã«ã€ãã® superblock をゼロã§ä¸Šæ›¸ãã™ã‚‹ã“ã¨ãŒã€Œå¿…è¦ã€ã§ã™ã€‚"
+
+#~ msgid ""
+#~ "To do this, do not start the RAID devices automatically. First, zero the "
+#~ "superblock (mdadm --zero-superblock /dev/mdX). Next, use `dpkg-"
+#~ "reconfigure mdadm` to reactivate the autostart feature."
+#~ msgstr ""
+#~ "ã“れを行ã†ã«ã¯ã€RAID デãƒã‚¤ã‚¹ã‚’自動的ã«èµ·å‹•ã—ã¦ã¯ã„ã‘ã¾ã›ã‚“。ã¾ãšã€ "
+#~ "superblock をゼロã§ä¸Šæ›¸ãã—ã¾ã™ (mdadm --zero-superblock /dev/xxx)。 ãã—"
+#~ "ã¦ã€è‡ªå‹•èµ·å‹•æ©Ÿèƒ½ã‚’有効ã«ã™ã‚‹ãŸã‚ã€'dpkg-reconfigure mdadm' コマンドを実行"
+#~ "ã—ã¾ã™ã€‚"
+
+#~ msgid ""
+#~ "You have the option to start all other arrays (those not needed for the "
+#~ "root filesystem) later in the boot sequence. Doing so will give you "
+#~ "greater control over the arrays with the mdadm configuration file. "
+#~ "Starting all arrays at boot-time may be safer though."
+#~ msgstr ""
+#~ "ブートシーケンスã®å¾Œã‚ã®æ–¹ã§ (root ファイルシステムã«ã¯å¿…è¦ãªã„) ä»–ã®ã‚¢ãƒ¬"
+#~ "イ全ã¦ã‚’èµ·å‹•ã™ã‚‹ã¨ã„ã†é¸æŠžè‚¢ã‚‚ã‚ã‚Šã¾ã™ã€‚ã“れをé¸ã¹ã°ã€mdadm ã®è¨­å®šãƒ•ã‚¡ã‚¤ãƒ«"
+#~ "を使ã£ã¦ã€ã‚¢ãƒ¬ã‚¤ã«ã¤ã„ã¦æ§˜ã€…ãªè¨­å®šãŒå‡ºæ¥ã‚‹ã‚ˆã†ã«ãªã‚‹ã§ã—ょã†ã€‚ã‚‚ã£ã¨ã‚‚ã€èµ·"
+#~ "動時ã«å…¨ã¦ã®ã‚¢ãƒ¬ã‚¤ã‚’èµ·å‹•ã™ã‚‹ã»ã†ãŒå®‰å…¨ã§ã¯ã‚ã‚Šã¾ã™ã€‚"
+
+#~ msgid ""
+#~ "If RAID devices are started automatically, all RAID devices are "
+#~ "discovered and assembled automatically at system startup. This option "
+#~ "should only be used if the md driver is compiled as a module. If it is "
+#~ "compiled into your kernel, the automatic startup will be performed at "
+#~ "boot time by the kernel and therefore you should not choose this option."
+#~ msgstr ""
+#~ "RAID デãƒã‚¤ã‚¹ãŒè‡ªå‹•çš„ã«èµ·å‹•ã™ã‚‹ã‚ˆã†ã«ã™ã‚‹ã¨ã€ã‚·ã‚¹ãƒ†ãƒ èµ·å‹•æ™‚ã«å…¨ã¦ã® RAID "
+#~ "デãƒã‚¤ã‚¹ãŒæ¤œå‡ºã•ã‚Œã€è‡ªå‹•çš„ã«æ§‹æˆã•ã‚Œã¾ã™ã€‚ã“ã®ã‚ªãƒ—ション㯠md ドライãƒãŒãƒ¢"
+#~ "ジュールã¨ã—ã¦ã‚³ãƒ³ãƒ‘イルã•ã‚Œã¦ã„ã‚‹å ´åˆã®ã¿ã«åˆ©ç”¨ã—ã¾ã™ã€‚カーãƒãƒ«ã«çµ„ã¿è¾¼ã‚“"
+#~ "ã§ã‚³ãƒ³ãƒ‘イルã—ã¦ã„ãŸå ´åˆã€ã‚·ã‚¹ãƒ†ãƒ èµ·å‹•æ™‚ã«ã‚«ãƒ¼ãƒãƒ«ã«ã‚ˆã£ã¦è‡ªå‹•èµ·å‹•ãŒå®Ÿè¡Œã•"
+#~ "れるã®ã§ã€ã“ã®ã‚ªãƒ—ションã§ã®é¸æŠžã¯ã§ãã¾ã›ã‚“。"
+
+#~ msgid ""
+#~ "When the RAID monitor daemon runs, email notifications are sent when a "
+#~ "disk belonging to a RAID array fails or changes its status for some "
+#~ "reason."
+#~ msgstr ""
+#~ "RAID 監視デーモンãŒå‹•ä½œã—ã¦ã„ã‚‹å ´åˆã€RAID アレイã«å±žã—ã¦ã„るディスクãŒæ•…éšœ"
+#~ "ã™ã‚‹ã‹ä½•ã‚‰ã‹ã®ç†ç”±ã§å¤‰åŒ–ã—ãŸéš›ã«ãƒ¡ãƒ¼ãƒ«ã§é€šçŸ¥ãŒé€ã‚‰ã‚Œã¾ã™ã€‚"
diff --git a/debian/po/nl.po b/debian/po/nl.po
new file mode 100644
index 00000000..d3e5f3da
--- /dev/null
+++ b/debian/po/nl.po
@@ -0,0 +1,223 @@
+# translation of mdadm_2.6.3+200709292116+4450e59-4.po to Dutch
+#
+# Translators, if you are not familiar with the PO format, gettext
+# documentation is worth reading, especially sections dedicated to
+# this format, e.g. by running:
+# info -n '(gettext)PO Files'
+# info -n '(gettext)Header Entry'
+# Some information specific to po-debconf are available at
+# /usr/share/doc/po-debconf/README-trans
+# or http://www.debian.org/intl/l10n/po-debconf/README-trans#
+# Developers do not need to manually edit POT or PO files.
+#
+# Frans Pop <aragorn@tiscali.nl>, 2005, 2006.
+# Frans Pop <elendil@planet.nl>, 2008.
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm_2.6.3+200709292116+4450e59-4\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2008-02-19 14:04+0100\n"
+"Last-Translator: Frans Pop <elendil@planet.nl>\n"
+"Language-Team: Dutch <debian-l10n-dutch@lists.debian.org>\n"
+"Language: nl\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Generator: KBabel 1.11.4\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "Voor het basisbestandssysteem benodigde RAID-reeksen:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Geef in 'all' (alle), 'none' (geen) of één of meerdere apparaatbestanden "
+"(gescheiden door spaties), bijvoorbeeld \"md0 md1\" of \"md/1 md/d0\" (de "
+"prefix '/dev/' kan dus worden weggelaten)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "Voor intern gebruik - alleen de lange omschrijving wordt gebruikt."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Als het basisbestandssysteem van uw systeem zich op een RAID-volume bevindt, "
+"dient dit vroeg in de opstartcyclus geactiveerd te worden. Als het zich op "
+"een logisch volume (LVM) op RAID bevindt, dienen alle betrokken reeksen "
+"geactiveerd te worden."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Als u precies weet welke reeksen benodigd zijn voor het basisbestandssysteem "
+"en u het activeren van alle overige reeksen wilt uitstellen tot later in de "
+"opstartprocedure, geef dan hier de te activeren reeksen in. Anders kunt u "
+"'all' ingeven om alle beschikbare reeksen te activeren."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Als u geen reeksen hoeft of wenst te activeren voor het "
+"basisbestandssysteem, laat dan het antwoord leeg (of geef 'none' in). Dit "
+"kan het geval zijn als u \"kernel autostart\" gebruikt of geen reeksen nodig "
+"heeft om uw systeem op te starten."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Er is een fout opgetreden: apparaatbestand bestaat niet"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Er is een fout opgetreden: geen blokapparaat"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Er is een fout opgetreden: geen RAID reeks"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Er is een fout opgetreden: reeks komt niet voor in bestand mdadm.conf"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Reeksen activeren die niet in mdadm.conf voorkomen?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"De reeks die u heeft opgegeven (${array}) komt niet voor in het "
+"configuratiebestand (${config}). Tenzij u het configuratiebestand corrigeert "
+"en de initiële ramdisk opnieuw aanmaakt, kan deze reeks tijdens het "
+"opstarten van het systeem niet worden geactiveerd."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Deze waarschuwing is alleen relevant als de reeksen geactiveerd moeten "
+"worden vanuit een initiële ramdisk om het systeem te kunnen opstarten. Als u "
+"\"kernel autostart\" gebruikt of er geen reeksen zijn die vanuit de initiële "
+"ramdisk gestart moeten worden, kunt u gewoon doorgaan. Kies anders nu om "
+"niet door te gaan en geef 'none' in bij de vraag welke reeksen vanuit de "
+"initiële ramdisk gestart moeten worden."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr "Maandelijkse redundantiecontrole van RAID-reeksen uitvoeren?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Als uw kernel dit ondersteunt (versies groter dan 2.6.14), kan mdadm "
+"periodiek de redundantie van uw RAID reeksen controleren. Afhankelijk van uw "
+"configuratie kan dit een intensief proces zijn, maar het kan wel helpen om "
+"uitzonderlijke gevallen van gegevensverlies te voorkomen. Zolang geen fouten "
+"worden gevonden, zal het proces alleen gegevens lezen; als echter wel fouten "
+"worden gevonden zal mdadm deze proberen te corrigeren."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"Standaard wordt de controle, indien geactiveerd, uitgevoerd om 01:06 op elke "
+"eerste zondag van de maand."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Wilt u de achtergronddienst voor de RAID-monitor starten?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"De achtergronddienst voor de RAID-monitor stuurt per e-mail berichten bij "
+"belangrijke gebeurtenissen die betrekking hebben op RAID (zoals een falende "
+"harde schijf)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Gebruik van deze optie wordt aanbevolen."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Adres voor e-mailberichten:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Wat is het e-mailadres van de gebruiker die de e-mailberichten voor "
+"belangrijke gebeurtenissen met betrekking tot RAID dient te ontvangen."
diff --git a/debian/po/pt.po b/debian/po/pt.po
new file mode 100644
index 00000000..e450997a
--- /dev/null
+++ b/debian/po/pt.po
@@ -0,0 +1,214 @@
+# Portuguese translation for mdadm debconf messages.
+# Copyright (C) 2008 Pedro Ribeiro <p.m42.ribeiro@gmail.com>
+# This file is distributed under the same license as the mdadm package.
+# Pedro Ribeiro <p.m42.ribeiro@gmail.com>, 2008
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm 2.6.3+200709292116+4450e59-4\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2008-02-21 00:15+0000\n"
+"Last-Translator: Pedro Ribeiro <p.m42.ribeiro@gmail.com>\n"
+"Language-Team: Portuguese <traduz@debianpt.org>\n"
+"Language: pt\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "Grupos MD necessários para o sistema de ficheiros raiz:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Por favor, introduza 'all', 'none', ou uma lista de dispositivos separados "
+"por espaços, tais como 'md0 md1' ou 'md/1 md/d0' (o '/dev/' inicial pode ser "
+"omitido)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "para uso interno - apenas a descrição longa é necessária"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Se o sistema de ficheiros de raiz do sistema estiver num grupo MD (RAID), "
+"necessita de ser iniciado mais cedo na sequência de arranque. Se o seu "
+"sistema de ficheiros de raiz estiver num volume lógico (LVM) que está no MD, "
+"todos os grupos constituintes necessitam de ser iniciados."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Se souber exactamente que grupos são necessários para iniciar o sistema de "
+"ficheiros raiz, e quiser adiar o inicio de todos os outros grupos para mais "
+"tarde no processo de arranque, introduza os grupos aqui. Alternativamente, "
+"introduza 'all' para iniciar todos os grupos disponíveis."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Se não necessita ou deseja iniciar grupos para o sistema de ficheiros raiz, "
+"deixe a resposta em branco (ou introduza 'none'). Isto vale no caso de usar "
+"o auto-arranque do kernel ou não necessitar de grupos para o arranque do "
+"sistema."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Ocorreu um erro: o nó do dispositivo não existe"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Ocorreu um erro: não é um dispositivo de bloco"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Ocorreu um erro: não é um grupo MD"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Ocorreu um erro: o grupo não está listado no ficheiro mdadm.conf"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Iniciar grupos não listados no mdadm.conf?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"O grupo especificado (${array}) não está listado no ficheiro de configuração "
+"(${config}). Portanto, não pode ser iniciado durante o processo de arranque, "
+"a não ser que corrija o ficheiro de configuração e recrie o ramdisk inicial."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Este aviso só é relevante se houver necessidade de iniciar grupos a partir "
+"do ramdisk durante o arranque do sistema. Se usar o auto-arranque do kernel, "
+"ou não necessitar de iniciar os grupos tão cedo no processo de arranque do "
+"sistema, pode simplesmente continuar. Em alternativa, escolha não continuar "
+"e introduza 'none' quando perguntado sobre quais grupos iniciar a partir do "
+"ramdisk inicial."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr ""
+"O mdadm deve correr verificações de redundância nos grupos MD mensalmente?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Se o kernel suportar (versões mais recentes que 2.6.14) o mdadm pode "
+"verificar periodicamente a redundância dos grupos MD (RAIDs). Isto pode ser "
+"um processo que requer muitos recursos, dependendo da sua configuração, mas "
+"pode prevenir casos raros de perda de dados. Notar que esta verificação é "
+"feita em modo de leitura a não ser que sejam encontrados erros; se forem "
+"encontrados erros, o mdadm tenta corrigi-los, o que pode resultar em "
+"acessosde escrita aos discos."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"O pré-definido, se ligado, é os testes serem executados no primeiro Domingo "
+"de cada mês às 01:06."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Quer iniciar o deamon de monitorização do MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"O daemon monitor MD(RAID) envia notificações por email no caso de eventos "
+"importantes (tais como falha de um disco). Provavelmente quer activar esta "
+"opção."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "É recomendado activar esta opção."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Destinatário de email para notificações:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Por favor, introduza o endereço de email do utilizador que deve receber as "
+"notificações de eventos MD importantes."
diff --git a/debian/po/pt_BR.po b/debian/po/pt_BR.po
new file mode 100644
index 00000000..118538b8
--- /dev/null
+++ b/debian/po/pt_BR.po
@@ -0,0 +1,340 @@
+#
+# Translators, if you are not familiar with the PO format, gettext
+# documentation is worth reading, especially sections dedicated to
+# this format, e.g. by running:
+# info -n '(gettext)PO Files'
+# info -n '(gettext)Header Entry'
+#
+# Some information specific to po-debconf are available at
+# /usr/share/doc/po-debconf/README-trans
+# or http://www.debian.org/intl/l10n/po-debconf/README-trans
+#
+# Developers do not need to manually edit POT or PO files.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2006-09-24 19:22-0300\n"
+"Last-Translator: Felipe Augusto van de Wiel (faw) <faw@cathedrallabs.org>\n"
+"Language-Team: l10n portuguese <debian-l10n-portuguese@lists.debian.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"pt_BR utf-8\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+#, fuzzy
+#| msgid "MD arrays needed for the root filesystem:"
+msgid "MD arrays needed for the root file system:"
+msgstr "Dispositivos MD necessários para o sistema de arquivos raiz:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+#, fuzzy
+#| msgid ""
+#| "Please enter a space-separated list of devices, 'all', or 'none'. You may "
+#| "omit the leading '/dev/' and just enter e.g. \"md0 md1\", or \"md/1 md/"
+#| "d0\"."
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Por favor, informe uma lista separada por espaços dos dispositivos, 'all' ou "
+"'none'. Você pode omitir a parte inicial '/dev/' e apenas informar, por "
+"exemplo, \"md0 md1\", ou \"md/1 md/d0\"."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "para uso interno - apenas a descrição longa é necessária."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+#, fuzzy
+#| msgid ""
+#| "If your system has its root filesystem on an MD array (RAID), it needs to "
+#| "be started early during the boot sequence. If your root filesystem is on "
+#| "a logical volume (LVM), which is on MD, all constituent arrays need to be "
+#| "started."
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Se o seu sistema tem o sistema de arquivos raiz em um dispositivo MD (RAID), "
+"este precisa ser iniciado mais cedo durante a seqüência de inicialização. Se "
+"o sistema de arquivos raiz está em um volume lógico (LVM), que está em um "
+"MD, todos os dispositivos que o constituem precisam ser iniciados."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+#, fuzzy
+#| msgid ""
+#| "If you know exactly which arrays are needed to bring up the root "
+#| "filesystem, and you want to postpone starting all other arrays to a later "
+#| "point in the boot sequence, enter the arrays to start here. "
+#| "Alternatively, enter 'all' to simply start all available arrays."
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Se você sabe exatamente quais dispositivos são necessários para ativar o "
+"sistema de arquivos raiz, e você deseja adiar o início de todos os outros "
+"dispositivos para um ponto posterior na seqüência de inicialização, informe "
+"os dispositivos a serem iniciados aqui. Como alternativa, informe 'all' para "
+"simplesmente iniciar todos os dispositivos disponíveis."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+#, fuzzy
+#| msgid ""
+#| "If you do not need or want to start any arrays for the root filesystem, "
+#| "leave the answer blank (or enter 'none'). This may be the case if you are "
+#| "using kernel autostart or do not need any arrays to boot."
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Se você não precisa ou não quer iniciar quaisquer dispositivos para o "
+"sistema de arquivos raiz, deixe a resposta em branco (ou informe 'none'). "
+"Este pode ser o caso se você está usando \"kernel autostart\" ou não precisa "
+"de quaisquer dispositivos para a inicialização."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Um erro ocorreu: o dispositivo (\"device node\") não existe"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Um erro ocorreu: não é um dispositivo de blocos"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Um erro ocorreu: não é um dispositivo MD"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Um erro ocorreu: dispositivo não listado no arquivo mdadm.conf"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+#, fuzzy
+#| msgid "Proceed with starting arrays not listed in mdadm.conf?"
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Continuar com o início de dispositivos não listados no mdadm.conf?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+#, fuzzy
+#| msgid ""
+#| "The array you have specified (${array}) is not listed in the "
+#| "configuration file ${config}. Therefore it cannot be started during boot, "
+#| "unless you correct the configuration file and recreate the initial "
+#| "ramdisk."
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"O dispositivo que você especificou (${array}) não está listado no arquivo de "
+"configuração ${config}. Portanto não pode ser iniciado durante a "
+"inicialização, a menos que você corrija o arquivo de configuração e recrie o "
+"\"ramdisk\" inicial."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Este aviso só é relevante se você precisa de dispositivos que sejam "
+"iniciados a partir do \"ramdisk\" inicial para que seja possível inicializar "
+"o computador. Se você usa \"kernel autostarting\", ou não precisa de "
+"quaisquer dispositivos sendo iniciados tão logo o \"ramdisk\" inicial seja "
+"carregado, você pode simplesmente continuar. Alternativamente, escolha não "
+"continuar e informe 'none' quando perguntado quais dispositivos iniciar a "
+"partir do \"ramdisk\" inicial."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr ""
+"O mdadm deve, mensalmente, executar checagens de redundância dos "
+"dispositivos MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+#, fuzzy
+#| msgid ""
+#| "If your kernel supports it (>> 2.6.14), mdadm can periodically check the "
+#| "redundancy of your MD arrays (RAIDs). This may be a resource-intensive "
+#| "process, depending on your setup, but it could help prevent rare cases of "
+#| "data loss. Note that this is a read-only check unless errors are found; "
+#| "if errors are found, mdadm will try to correct them, which may result in "
+#| "write access to the media."
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Se o seu kernel suporta isto (>> 2.6.14), mdadm pode periodicamente checar a "
+"redundância dos seus dispositivos MD (RAIDs). Isto pode ser um processo com "
+"uso intensivo dos recursos, dependendo da sua configuração, mas pode ajudar "
+"a previnir casos raros de perdas de dados. Note que esta é uma checagem "
+"somente-leitura a menos que erros sejam encontrados; se erros são "
+"encontrados, mdadm tentará corrigí-los, o que poderá resultar em acesso de "
+"escrita na mídia."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+#, fuzzy
+#| msgid ""
+#| "The default, if turned on, is to run the checks on the first Sunday of "
+#| "every month at 01:06 o'clock."
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"O padrão, se ativado, é executar checagens no primeiro Domingo de cada mês "
+"às 01:06 em ponto."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Você deseja iniciar o \"daemon\" de monitoramento MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+#, fuzzy
+#| msgid ""
+#| "The MD (RAID) monitor daemon sends email notifications in response to "
+#| "important MD events (such as a disk failure). You probably want to enable "
+#| "it."
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"O \"daemon\" de monitoramento MD (RAID) envia e-mails de notificações em "
+"resposta a eventos MD importantes (como uma falha de disco). Você "
+"provavelmente quer habilitar esta opção."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr ""
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Destinatário para os e-mails de notificações:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+#, fuzzy
+#| msgid ""
+#| "Please enter the email address of the user who should get the email "
+#| "notification for important MD events."
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Por favor, informe o endereço de e-mail do usuário que deverá receber os e-"
+"mails de notificações para estes eventos MD importantes."
+
+#~ msgid "Initialise the superblock if you reuse hard disks"
+#~ msgstr "Inicialize o superbloco caso você reutilize discos rígidos"
+
+#~ msgid ""
+#~ "WARNING! If you are using hard disks which have RAID superblocks from "
+#~ "earlier installations in different RAID arrays, you MUST zero each "
+#~ "superblock *before* activating the autostart feature."
+#~ msgstr ""
+#~ "AVISO! Se você estiver usando discos rígidos que já contêm superblocos "
+#~ "RAID de instalações anteriores em \"arrays\" RAID diferentes, você DEVE "
+#~ "zerar o superbloco *antes* de ativar o recurso de \"autostart\"."
+
+#~ msgid ""
+#~ "To do this, do not start the RAID devices automatically. First, zero the "
+#~ "superblock (mdadm --zero-superblock /dev/mdX). Next, use `dpkg-"
+#~ "reconfigure mdadm` to reactivate the autostart feature."
+#~ msgstr ""
+#~ "Para fazê-lo, não inicie os dispositivos RAID automaticamente. Primeiro, "
+#~ "zere os superblocos (mdadm --zero-superblock /dev/mdX). Em seguida, use "
+#~ "`dpkg-reconfigure mdadm` para reativar o recurso de \"autostart\"."
+
+#~ msgid ""
+#~ "You have the option to start all other arrays (those not needed for the "
+#~ "root filesystem) later in the boot sequence. Doing so will give you "
+#~ "greater control over the arrays with the mdadm configuration file. "
+#~ "Starting all arrays at boot-time may be safer though."
+#~ msgstr ""
+#~ "Você tem a opção de iniciar todos os \"arrays\" (aqueles que não são "
+#~ "necessários pelo sistema de arquivos raiz) posteriormente na seqüência de "
+#~ "inicialização. Fazendo isto, você terá um controle maior sobre os \"arrays"
+#~ "\" com o arquivo de configuração mdadm. No entanto, iniciar todos os "
+#~ "\"arrays\" durante a inicialização pode ser mais seguro."
+
+#~ msgid ""
+#~ "If RAID devices are started automatically, all RAID devices are "
+#~ "discovered and assembled automatically at system startup. This option "
+#~ "should only be used if the md driver is compiled as a module. If it is "
+#~ "compiled into your kernel, the automatic startup will be performed at "
+#~ "boot time by the kernel and therefore you should not choose this option."
+#~ msgstr ""
+#~ "Caso os dispositivos RAID sejam iniciados automaticamente, todos os "
+#~ "dispositivos RAID serão detectados e montados automaticamente na "
+#~ "inicialização do sistema operacional. Esta opção deverá ser usada somente "
+#~ "caso o driver md esteja compilado como módulo. Caso o mesmo esteja "
+#~ "compilado embutido em seu kernel, a inicialização automática será "
+#~ "executada em tempo de inicialização pelo próprio kernel e, portanto, você "
+#~ "não deverá e nem precisará escolher esta opção."
+
+#~ msgid ""
+#~ "When the RAID monitor daemon runs, email notifications are sent when a "
+#~ "disk belonging to a RAID array fails or changes its status for some "
+#~ "reason."
+#~ msgstr ""
+#~ "Quando o daemon monitorador RAID é executado, notificações via e-mail são "
+#~ "enviadas quando um disco pertencente a uma array RAID falha ou muda seu "
+#~ "status por qualquer razão."
+
+#~ msgid "Which user should get the email notification?"
+#~ msgstr "Qual usuário deve receber o e-mail de notificação ?"
diff --git a/debian/po/ru.po b/debian/po/ru.po
new file mode 100644
index 00000000..a4aff77d
--- /dev/null
+++ b/debian/po/ru.po
@@ -0,0 +1,224 @@
+# translation of ru.po to Russian
+#
+# Translators, if you are not familiar with the PO format, gettext
+# documentation is worth reading, especially sections dedicated to
+# this format, e.g. by running:
+# info -n '(gettext)PO Files'
+# info -n '(gettext)Header Entry'
+# Some information specific to po-debconf are available at
+# /usr/share/doc/po-debconf/README-trans
+# or http://www.debian.org/intl/l10n/po-debconf/README-trans#
+# Developers do not need to manually edit POT or PO files.
+#
+# Yuri Kozlov <kozlov.y@gmail.com>, 2006, 2008.
+msgid ""
+msgstr ""
+"Project-Id-Version: 2.6.3+200709292116+4450e59-4\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2008-02-07 21:02+0300\n"
+"Last-Translator: Yuri Kozlov <kozlov.y@gmail.com>\n"
+"Language-Team: Russian <debian-l10n-russian@lists.debian.org>\n"
+"Language: ru\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Generator: KBabel 1.11.4\n"
+"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n"
+"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "MD-маÑÑивы, необходимые Ð´Ð»Ñ ÐºÐ¾Ñ€Ð½ÐµÐ²Ð¾Ð¹ файловой ÑиÑтемы:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Введите ÑпиÑок уÑтройÑтв через пробел, Ñлово 'all' или 'none'. Ð’Ñ‹ можете не "
+"указывать начальную чаÑÑ‚ÑŒ пути типа '/dev/', а проÑто вводить имена "
+"уÑтройÑтв, например 'md0 md1' или 'md/1 md/d0'."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr ""
+"Ð´Ð»Ñ Ð²Ð½ÑƒÑ‚Ñ€ÐµÐ½Ð½ÐµÐ³Ð¾ Ð¿Ð¾Ð»ÑŒÐ·Ð¾Ð²Ð°Ð½Ð¸Ñ - нужно иÑпользовать только длинное опиÑание."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"ЕÑли в ÑиÑтеме ÐºÐ¾Ñ€Ð½ÐµÐ²Ð°Ñ Ñ„Ð°Ð¹Ð»Ð¾Ð²Ð°Ñ ÑиÑтема раÑположена на MD-маÑÑиве (RAID), "
+"он должен быть запущен в Ñамом начале процеÑÑа загрузки. ЕÑли ÐºÐ¾Ñ€Ð½ÐµÐ²Ð°Ñ "
+"Ñ„Ð°Ð¹Ð»Ð¾Ð²Ð°Ñ ÑиÑтема раÑположена на логичеÑком томе (LVM), который раÑположен на "
+"MD, то должны быть запущены вÑе ÑоÑтавлÑющие маÑÑивы."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"ЕÑли вы точно знаете, какие маÑÑивы требуютÑÑ Ð´Ð»Ñ Ð¿Ð¾Ð»ÑƒÑ‡ÐµÐ½Ð¸Ñ Ñ€Ð°Ð±Ð¾Ñ‚Ð¾ÑпоÑобной "
+"корневой файловой ÑиÑтемы и хотите отложить запуÑк оÑтальных маÑÑивов на "
+"более поздний момент в процеÑÑе загрузки, то введите их здеÑÑŒ. Иначе, "
+"введите Ñлово 'all', чтобы проÑто запуÑтить вÑе доÑтупные маÑÑивы."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"ЕÑли вам Ñто не нужно, или вы хотите запуÑкать вÑе маÑÑивы Ð´Ð»Ñ ÐºÐ¾Ñ€Ð½ÐµÐ²Ð¾Ð¹ "
+"файловой ÑиÑтемы, оÑтавьте Ñто поле пуÑтым (или введите Ñлово 'none'). Этот "
+"вариант подходит, еÑли вы иÑпользуете автоматичеÑкий запуÑк из Ñдра или еÑли "
+"Ð´Ð»Ñ Ð·Ð°Ð³Ñ€ÑƒÐ·ÐºÐ¸ маÑÑивы ненужны."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Произошла ошибка: нода уÑтройÑтва не ÑущеÑтвует"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Произошла ошибка: уÑтройÑтво не ÑвлÑетÑÑ Ð±Ð»Ð¾Ñ‡Ð½Ñ‹Ð¼"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Произошла ошибка: Ñто не MD-маÑÑив"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Произошла ошибка: маÑÑив не опиÑан в файле mdadm.conf"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "ЗапуÑтить маÑÑивы, неопиÑанные в mdadm.conf?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"Указанный вами маÑÑив (${array}) не опиÑан в конфигурационном файле "
+"(${config}). ПоÑтому он не может быть запущен при Ñтарте машины, пока вы не "
+"иÑправите конфигурационный файл и не переÑоздадите первоначальный ramdisk."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Данное предупреждение умеÑтно только, еÑли вам требуетÑÑ Ð·Ð°Ð¿ÑƒÑкать маÑÑивы "
+"из первоначального ramdisk Ð´Ð»Ñ Ð·Ð°Ð³Ñ€ÑƒÐ·ÐºÐ¸ машины. ЕÑли вы иÑпользуете "
+"автоматичеÑкий запуÑк из Ñдра или вам не нужны маÑÑивы Ð´Ð»Ñ Ð·Ð°Ð³Ñ€ÑƒÐ·ÐºÐ¸ на Ñтапе "
+"загрузки первоначального ramdisk, вы можете проÑто продолжить. Иначе, "
+"выберите не продолжать и введите 'none', когда вам предложат выбрать маÑÑивы "
+"Ð´Ð»Ñ Ð·Ð°Ð¿ÑƒÑка из первоначального ramdisk."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr ""
+"Должен ли mdadm запуÑкать ежемеÑÑчную проверку избыточноÑти на MD-маÑÑивах?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"ЕÑли Ñто поддерживаетÑÑ Ñдром (>> 2.6.14), mdadm может периодичеÑки "
+"проверÑÑ‚ÑŒ избыточноÑÑ‚ÑŒ MD маÑÑивов (RAID-ов). Это может Ñтать реÑурÑоёмким "
+"процеÑÑом в завиÑимоÑти от наÑтройки, но он может помочь предотвратить "
+"редкие Ñлучаи потери данных. Заметим, что пока не обнаружено ошибок, работа "
+"ведётÑÑ Ð² режиме только чтение; еÑли обнаруживаетÑÑ Ð¾ÑˆÐ¸Ð±ÐºÐ°, mdadm попытаетÑÑ "
+"иÑправить её, что может потребовать прав запиÑи на ноÑитель."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"ЕÑли ответить утвердительно, то по умолчанию проверка выполнÑетÑÑ Ð² первое "
+"воÑкреÑенье каждого меÑÑца в 01:06."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "ЗапуÑкать демон-монитор MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"Демон-монитор MD (RAID) поÑылает почтовые ÑƒÐ²ÐµÐ´Ð¾Ð¼Ð»ÐµÐ½Ð¸Ñ Ð² Ñлучае Ð²Ð¾Ð·Ð½Ð¸ÐºÐ½Ð¾Ð²ÐµÐ½Ð¸Ñ "
+"важных Ñобытий Ñ MD (таких как отказ диÑка)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "РекомендуетÑÑ Ð¾Ñ‚Ð²ÐµÑ‚Ð¸Ñ‚ÑŒ утвердительно."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Получатель уведомительных пиÑем:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Введите Ð°Ð´Ñ€ÐµÑ Ñлектронной почты пользователÑ, который будет получать "
+"почтовые ÑƒÐ²ÐµÐ´Ð¾Ð¼Ð»ÐµÐ½Ð¸Ñ Ð¾ важных изменениÑÑ… в ÑоÑтоÑнии MD."
diff --git a/debian/po/sk.po b/debian/po/sk.po
new file mode 100644
index 00000000..5d6d5a50
--- /dev/null
+++ b/debian/po/sk.po
@@ -0,0 +1,212 @@
+# Slovak translations for mdadm package
+# Slovenské preklady pre balík mdadm.
+# Copyright (C) 2011 THE mdadm'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the mdadm package.
+# Automatically generated, 2011.
+# Slavko <linux@slavino.sk>, 2011.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm 3.2.2-1\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2011-09-18 11:22+0200\n"
+"Last-Translator: Slavko <linux@slavino.sk>\n"
+"Language-Team: Slovak <debian-l10n-slovak@lists.debian.org>\n"
+"Language: sk\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "Polia MD, potrebné pre koreň súborového systému:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Prosím, zadajte „allâ€, „none†alebo medzerou oddelený zoznam zariadení, "
+"napr. „md0 md1†alebo „md/1 md/d0†(poÄiatoÄné „/dev/†môže byÅ¥ vynechané)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "na interné použitie – potrebný je len dlhý popis."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Ak je koreň súborového systému umiestnený na poli MD (RAID), musí byť "
+"spustený poÄas zavádzania systému. Ak je koreň umiestnený na logickom zväzku "
+"(LVM), ktorý je na MD, musia byť spustené všetky súvisiace polia."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Ak viete presne, ktoré polia sú potrebné na pripojenie koreňa súborového "
+"systému a chcete odložiť štart všetkých ostatných polí na neskorší okamih "
+"zavádzania, zadajte tu polia, ktoré majú byÅ¥ spustené. Alebo zadajte „allâ€, "
+"Äím budú jednoducho spustené vÅ¡etky dostupné polia."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Ak pre koreň súborového systému nepotrebujete alebo nechcete spúšťať žiadne "
+"polia, nechajte odpoveÄ prázdnu (alebo zadajte „noneâ€). Tento prípad môže "
+"nastať, ak používate automatický štart polí priamo v jadre alebo "
+"nepotrebujete pri zavádzaní žiadne polia."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Nastala chyba: uzol zariadenia neexistuje"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Nastala chyba: nie je blokové zariadenie"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Nastala chyba: nie je pole MD"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Nastala chyba: pole nie je uvedené v súbore mdadm.conf"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Spustiť polia, ktoré nie sú uvedené v mdadm.conf?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"Zadané pole (${array}) nie je uvedené v konfiguraÄnom súbore (${config}), a "
+"preto nemôže byÅ¥ spustené poÄas zavádzania, až kým neopravíte konfiguraÄný "
+"súbor a nevytvoríte nový poÄiatoÄný ramdisk (initrd)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Toto varovanie je dôležité, len ak potrebujete aby boli polia spúšťané z "
+"poÄiatoÄného ramdisku, aby boli dostupné poÄas zavádzania. Ak používate "
+"automatické spúšťanie polí priamo z jadra, alebo ak nepotrebujte aby boli "
+"polia spúšťané tak skoro (z poÄiatoÄného ramdisku), môžete prosto "
+"pokraÄovaÅ¥. Alebo môžete zvoliÅ¥ nepokraÄovaÅ¥ a odpovedaÅ¥ „none†na otázku, "
+"ktoré polia majú byÅ¥ spúšťané z poÄiatoÄného ramdisku."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr "Má mdadm spúšťaÅ¥ mesaÄnú kontrolu redundancie polí MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Ak to jadro podporuje (verzie novšie ako 2.6.14), mdadm môže periodicky "
+"kontrolovať redundanciu polí MD (RAIDov). Tento proces môže byť (v "
+"závislosti od lokálneho nastavenia) nároÄný na zdroje systému, ale môže "
+"pomôcť pri predchádzaní vzácnym prípadom straty dát. Pamätajte, že, pokiaľ "
+"nie sú nájdené chyby, je to kontrola read-only, až keÄ sú nájdené chyby, "
+"pokúsi sa ich mdadm opraviÅ¥, Äo môže maÅ¥ za následok zápis na médium."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"Predvolene je vypnuté, ak túto možnosť zapnete, bude kontrola vykonávaná "
+"každú prvú nedeľu mesiaca o 01:06."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Chcete spustiť démona monitorovania MD?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"Monitorovací démon MD (RAID) posiela upozornenia emailom, ako reakcie na "
+"dôležité udalosti MD (napr. zlyhanie disku)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Povolenie tejto možnosti je odporúÄané."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Príjemca emailových upozornení:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Prosím, zadajte emailovú adresu používateľa, ktorý má dostávať emailové "
+"upozornenia na dôležité udalosti MD."
diff --git a/debian/po/sv.po b/debian/po/sv.po
new file mode 100644
index 00000000..8257c8dd
--- /dev/null
+++ b/debian/po/sv.po
@@ -0,0 +1,219 @@
+# translation of mdadm_2.6.7-3_sv.po to Swedish
+# Translators, if you are not familiar with the PO format, gettext
+# documentation is worth reading, especially sections dedicated to
+# this format, e.g. by running:
+# info -n '(gettext)PO Files'
+# info -n '(gettext)Header Entry'
+# Some information specific to po-debconf are available at
+# /usr/share/doc/po-debconf/README-trans
+# or http://www.debian.org/intl/l10n/po-debconf/README-trans
+# Developers do not need to manually edit POT or PO files.
+#
+# Martin Ågren <martin.agren@gmail.com>, 2008.
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm_2.6.7-3_sv\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2008-07-23 18:34+0200\n"
+"Last-Translator: Martin Ågren <martin.agren@gmail.com>\n"
+"Language-Team: Swedish <debian-l10n-swedish@lists.debian.org>\n"
+"Language: sv\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Generator: KBabel 1.11.4\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "MD-kedjor som behövs för rotfilsystemet:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Ange \"all\", \"none\" eller en blankstegsseparerad lista på enheter, såsom "
+"\"md0 md1\" eller \"md/1 md/0\" (det inledande \"/dev\" kan uteslutas)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "för intern användning - endast den långa beskrivningen behövs."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Om ditt system har sitt rotfilsystem på en MD-kedja (RAID) behöver den "
+"startas upp tidigt under uppstartssekvensen. Om ditt rotfilsystem finns på "
+"en logisk volym (LVM), vilket är på MD, behöver alla bestående kedjor "
+"startas."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Om du vet exakt vilka kedjor som behövs för att ta upp rotfilsystemet, och "
+"du vill skjuta upp uppstarten för alla andra kedjor till en senare tidspunkt "
+"i uppstartssekvensen, ange vilka kedjor som ska starta här. Alternativt, "
+"ange \"all\" för att helt enkelt starta alla tillgängliga kedjor."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Om du inte behöver eller vill starta några kedjor för rotfilsystemet, lämna "
+"svaret blankt (eller ange \"none\"). Detta kan vara fallet om du använder "
+"kärnans automatstart eller inte behöver några kedjor för att starta upp."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Ett fel inträffade: enhetsnoden finns inte"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Ett fel inträffade: inte en blockenhet"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Ett fel inträffade: inte en MD-kedja"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Ett fel inträffade: kedjan är inte listad i filen mdadm.conf"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr "Starta kedjor som inte är listade i mdadm.conf?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"Kedjan du har angivit (${array}) är inte listad i konfigurationsfilen "
+"(${config}). Därför kan den inte startas under systemets uppstart, såvida du "
+"inte rättar till konfigurationsfilen och återskapar den initiala ramdisken."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Den här varningen är endast relevant om du behöver kedjor som ska startas "
+"från den initiala ramdisken för att kunna starta upp systemet. Om du "
+"använder kärnans automatstart, eller inte behöver starta några kedjor så "
+"tidigt som när de initiala ramdisken läses in, kan du helt enkelt fortsätta. "
+"Alternativt, välj att inte fortsätta och ange \"none\" när frågan om vilka "
+"kedjor som ska startas från den initiala ramdisken ställs."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr "Ska mdadm köra månatliga redundanskontroller av MD-kedjorna?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Om din kärna har stöd för det (versioner senare än 2.6.14), kan mdadm "
+"periodvis kontrollera redundansen för dina MD-kedjor (RAID). Det här kan "
+"vara en resurskrävande process, beroende på din konfiguration, men den kan "
+"hjälpa till att förhindra ovanliga fall av dataförluster. Observera att det "
+"är en skrivskyddad kontroll såvida inte fel påträffas; om fel hittas kommer "
+"mdadm försöka att rätta till dem, vilket kan leda till skrivåtkomst till "
+"mediet."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"Standardvärdet, om påslagen, är att kontrollera på den första söndagen i "
+"varje månad klockan 01.06."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Vill du starta MD-övervakningsdemonen?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"MD-övervakningsdemonen (RAID) skickar e-postnotifieringar för viktiga MD-"
+"händelser (såsom ett diskfel)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Att aktivera denna funktion rekommenderas."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "Mottagare av e-postnotifieringar:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Ange e-postadressen till den användare som ska ta emot e-postnotifieringar "
+"för dessa viktiga MD-händelser."
diff --git a/debian/po/templates.pot b/debian/po/templates.pot
new file mode 100644
index 00000000..beb000aa
--- /dev/null
+++ b/debian/po/templates.pot
@@ -0,0 +1,176 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr ""
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr ""
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr ""
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr ""
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr ""
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr ""
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr ""
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr ""
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr ""
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr ""
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr ""
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
diff --git a/debian/po/vi.po b/debian/po/vi.po
new file mode 100644
index 00000000..0e4933ca
--- /dev/null
+++ b/debian/po/vi.po
@@ -0,0 +1,215 @@
+# Vietnamese Translation for mdadm.
+# Copyright © 2008 Free Software Foundation, Inc.
+# Clytie Siddall <clytie@riverland.net.au>, 2005-2008.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: mdadm 2.6.3+200709292116+4450e59-4\n"
+"Report-Msgid-Bugs-To: mdadm@packages.debian.org\n"
+"POT-Creation-Date: 2012-05-22 00:50+0100\n"
+"PO-Revision-Date: 2008-02-23 17:40+1030\n"
+"Last-Translator: Clytie Siddall <clytie@riverland.net.au>\n"
+"Language-Team: Vietnamese <vi-VN@googlegroups.com>\n"
+"Language: vi\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"X-Generator: LocFactoryEditor 1.7b3\n"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid "MD arrays needed for the root file system:"
+msgstr "Các mảng MD cần thiết cho hệ thống tập tin gốc:"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:2001
+msgid ""
+"Please enter 'all', 'none', or a space-separated list of devices such as "
+"'md0 md1' or 'md/1 md/d0' (the leading '/dev/' can be omitted)."
+msgstr ""
+"Hãy nhập « all » (tất cả), « none » (không có), hoặc một danh sách các thiết "
+"bị định giới bằng dấu cách như « md0 md1 » hoặc « md/1 md/d0 » (có thể bỠ"
+"sót phần « /dev/ » đi trước)."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid "for internal use - only the long description is needed."
+msgstr "để sử dụng nội bộ — chỉ cần thiết mô tả dài."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If the system's root file system is located on an MD array (RAID), it needs "
+"to be started early during the boot sequence. If it is located on a logical "
+"volume (LVM), which is on MD, all constituent arrays need to be started."
+msgstr ""
+"Nếu hệ thống tập tin gốc của hệ thống nằm trên một mảng MD (RAID) thì cần "
+"phải khởi chạy nó sớm trong tiến trình khởi động. Nếu nó nằm trên một khối "
+"tin hợp lý (LVM) mà lần lượt nằm trên một MD thì cần phải khởi chạy tất cả "
+"các mảng thành phần."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you know exactly which arrays are needed to bring up the root file "
+"system, and you want to postpone starting all other arrays to a later point "
+"in the boot sequence, enter the arrays to start here. Alternatively, enter "
+"'all' to simply start all available arrays."
+msgstr ""
+"Nếu bạn biết chính xác những mảng nào cần thiết để kích hoạt hệ thống tập "
+"tin gốc, và bạn muốn hoãn việc khởi chạy các mảng khác tới một điểm sau "
+"trong dãy khởi động, hãy nhập vào đây các mảng cần khởi chạy. Hoặc nhập « "
+"all » (tất cả) để khởi chạy đơn giản tất cả các mảng sẵn sàng. "
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:3001
+msgid ""
+"If you do not need or want to start any arrays for the root file system, "
+"leave the answer blank (or enter 'none'). This may be the case if you are "
+"using kernel autostart or do not need any arrays to boot."
+msgstr ""
+"Nếu bạn không cần hoặc muốn khởi chạy mảng nào cho hệ thống tập tin gốc, hãy "
+"bá» trống câu trả lá»i này (hoặc nhập « none » [không có]). TrÆ°á»ng hợp này có "
+"thể xảy ra nếu bạn sử dụng khả năng tự động khởi động hạt nhân (kernel "
+"autostart), hoặc không cần mảng nào để khởi động máy tính."
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:4001
+msgid "An error occurred: device node does not exist"
+msgstr "Gặp lỗi: nút thiết bị không tồn tại"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:5001
+msgid "An error occurred: not a block device"
+msgstr "Gặp lỗi: không phải là một thiết bị khối"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:6001
+msgid "An error occurred: not an MD array"
+msgstr "Gặp lỗi: không phải là một mảng MD"
+
+#. Type: text
+#. Description
+#: ../mdadm.templates:7001
+msgid "An error occurred: array not listed in mdadm.conf file"
+msgstr "Gặp lỗi: mảng không được liệt kê trong tập tin cấu hình « mdadm.conf »"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid "Start arrays not listed in mdadm.conf?"
+msgstr ""
+"Khởi chạy các mảng không được liệt kê trong tập tin cấu hình « mdadm.conf » "
+"không?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"The specified array (${array}) is not listed in the configuration file "
+"(${config}). Therefore, it cannot be started during boot, unless you correct "
+"the configuration file and recreate the initial ramdisk."
+msgstr ""
+"Mảng bạn đã xác định (${array}) không được liệt kê trong tập tin cấu hình "
+"${config}. Vì vậy nó không thể được khởi chạy trong khi khởi động, nếu bạn "
+"không sửa tập tin cấu hình và tạo lại đĩa RAM đầu tiên."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:8001
+msgid ""
+"This warning is only relevant if you need arrays to be started from the "
+"initial ramdisk to be able to boot. If you use kernel autostarting, or do "
+"not need any arrays to be started as early as the initial ramdisk is loaded, "
+"you can simply continue. Alternatively, choose not to continue and enter "
+"'none' when prompted which arrays to start from the initial ramdisk."
+msgstr ""
+"Cảnh báo này chỉ là thích hợp nếu bạn cần thiết mảng được khởi chạy từ đĩa "
+"RAM đầu tiên, để có thể khởi động được. Nếu bạn sử dụng khả năng tự động "
+"khởi chạy hạt nhân (kernel autostart), hoặc không cần mảng nào được khởi "
+"chạy một khi nạp đĩa RAM đầu tiên, bạn đơn giản có thể tiếp tục lại. Hoặc "
+"chá»n không tiếp tục, và nhập « none » (không có) khi được nhắc nhập những "
+"mảng nào cần khởi chạy từ đĩa RAM đầu tiên."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid "Should mdadm run monthly redundancy checks of the MD arrays?"
+msgstr ""
+"mdadm có nên chạy việc kiểm tra thừa hàng tháng trên những mảng MD không?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"If the kernel supports it (versions greater than 2.6.14), mdadm can "
+"periodically check the redundancy of MD arrays (RAIDs). This may be a "
+"resource-intensive process, depending on the local setup, but it could help "
+"prevent rare cases of data loss. Note that this is a read-only check unless "
+"errors are found; if errors are found, mdadm will try to correct them, which "
+"may result in write access to the media."
+msgstr ""
+"Nếu hạt nhân có phải hỗ trợ (các phiên bản sau 2.6.14) thì mdadm có thể kiểm "
+"tra theo chu kỳ tình thừa của các mảng MD (RAID). Tiến trình này có thể "
+"chiếm nhiá»u tài nguyên hệ thống, phụ thuá»™c vào thiết lập cục bá»™, nhÆ°ng nó có "
+"thể giúp ngăn cản trÆ°á»ng hợp mất dữ liệu (ít có). Ghi chú rằng việc kiểm tra "
+"này là chỉ Ä‘á»c: gặp lá»—i thì mdadm sẽ thá»­ sá»­a chữa, mà có thể gây ra truy cập "
+"ghi vào vật chứa."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:10001
+msgid ""
+"The default, if turned on, is to check on the first Sunday of every month at "
+"01:06."
+msgstr ""
+"Giá trị mặc định, nếu được bật, là chạy những việc kiểm tra vào ngày hôm Chủ "
+"Nhật thứ nhất của mỗi tháng, vào lúc 01:06 giỠ(giỠti)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Do you want to start the MD monitoring daemon?"
+msgstr "Bạn có muốn khởi chạy trình ná»n theo dõi MD không?"
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid ""
+"The MD (RAID) monitor daemon sends email notifications in response to "
+"important MD events (such as a disk failure)."
+msgstr ""
+"Trình ná»n theo dõi MD (RAID) gá»­i thÆ° thông báo hưởng ứng dữ kiện MD quan "
+"trá»ng (v.d. Ä‘Ä©a bị há»ng)."
+
+#. Type: boolean
+#. Description
+#: ../mdadm.templates:11001
+msgid "Enabling this option is recommended."
+msgstr "Khuyên bạn hiệu lá»±c tùy chá»n này."
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid "Recipient for email notifications:"
+msgstr "NgÆ°á»i nhận thÆ° thông báo :"
+
+#. Type: string
+#. Description
+#: ../mdadm.templates:12001
+msgid ""
+"Please enter the email address of the user who should get the email "
+"notifications for important MD events."
+msgstr ""
+"Hãy nhập địa chỉ thÆ° của ngÆ°á»i dùng nên nhận thÆ° thông báo vá» dữ kiện MD "
+"quan trá»ng."
diff --git a/debian/presubj b/debian/presubj
new file mode 100644
index 00000000..103208df
--- /dev/null
+++ b/debian/presubj
@@ -0,0 +1,32 @@
+Reporting bugs against mdadm
+============================
+Before reporting bugs against mdadm, please read the README documents, as well
+as the FAQ in /usr/share/doc/mdadm . Most issues that are reported against the
+mdadm package are adequately answered therein.
+
+In particular, please do not file bugs about mdadm assembling arrays too
+early, e.g. when the driver is not yet ready and device nodes do not exist.
+Check FAQ item 27 about use of the rootdelay parameter instead.
+
+Also, please check out http://bugs.debian.org/mdadm and make sure that the
+issue you are facing has not already been reported.
+
+If you are not sure that the answer you are seeking is in those files, or you
+are not sure that you are facing a genuine bug, please approach
+debian-user@lists.debian.org or linux-raid@vger.kernel.org with your
+question(s).
+
+Gathering information relevant to mdadm as root
+===============================================
+If you are not reporting bugs as root (which you should not), you will be
+prompted to give permission to run a script to collect relevant information
+from your system as the root user. Only the root user has access to some
+information that might be relevant to the bug report you are about to file.
+
+** Please give permission to run the script as root when asked momentarily.
+
+If you would rather obtain the same information manually, you can run
+ /usr/share/bug/mdadm/script 3>&1
+as root and include or attach the output.
+
+ -- martin f. krafft <madduck@debian.org> Mon, 20 Jul 2009 15:02:48 +0200
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 00000000..7b21d228
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,101 @@
+#!/usr/bin/make -f
+# Copyright © 2001-2005 Mario Jou/3en <joussen@debian.org>
+# Copyright © 2005-2008 Martin F. Krafft <madduck@debian.org>
+# Distributable under the terms of the GNU GPL version 2.
+#
+
+#export DH_VERBOSE=1
+
+LDFLAGS = $(shell dpkg-buildflags --get LDFLAGS)
+CXFLAGS = $(shell dpkg-buildflags --get CFLAGS) \
+ $(shell dpkg-buildflags --get CPPFLAGS)
+BUILDFLAGS = CXFLAGS="$(CXFLAGS)" LDFLAGS="$(LDFLAGS)" DEBIAN=yes
+DESTDIR = $(CURDIR)/debian/mdadm
+DESTDIR_UDEB = $(DESTDIR)-udeb
+
+build-arch: build-stamp
+
+build-stamp:
+ dh_testdir
+ $(MAKE) all $(BUILDFLAGS) CONFFILE=/etc/mdadm/mdadm.conf CONFFILE2=/etc/mdadm.conf
+ touch $@
+
+# udeb rules should go, the only diff is the conffile location,
+# and d-i specifies path explicitly when needed
+
+udeb/dir-stamp:
+ rm -rf udeb
+ mkdir udeb
+ ln *.[ch] Makefile udeb/
+ touch $@
+
+build-arch: udeb/build-stamp
+
+udeb/build-stamp: udeb/dir-stamp
+ dh_testdir
+ $(MAKE) -C udeb mdadm mdmon $(BUILDFLAGS) CONFFILE=/tmp/mdadm.conf
+ touch $@
+
+clean:
+ dh_testdir
+ rm -f build-stamp
+ $(MAKE) clean
+ rm -rf udeb
+ dh_clean
+
+install-arch: build-arch
+ dh_testdir
+ dh_clean -k
+ dh_installdirs
+
+ $(MAKE) install install-systemd DESTDIR=$(DESTDIR)
+
+ mkdir -p $(DESTDIR)/etc/mdadm
+ install -Dm0755 debian/initramfs/hook \
+ $(DESTDIR)/usr/share/initramfs-tools/hooks/mdadm
+ install -Dm0755 debian/initramfs/script.local-top \
+ $(DESTDIR)/usr/share/initramfs-tools/scripts/local-top/mdadm
+ install -Dm0644 debian/mdadm.modules \
+ $(DESTDIR)/etc/modprobe.d/mdadm.conf
+
+ install -Dm0755 debian/mkconf $(DESTDIR)/usr/share/mdadm/mkconf
+ install -Dm0755 debian/checkarray $(DESTDIR)/usr/share/mdadm/checkarray
+ install -Dm0755 debian/bugscript $(DESTDIR)/usr/share/bug/mdadm/script
+ install -Dm0644 debian/presubj $(DESTDIR)/usr/share/bug/mdadm/presubj
+
+ install -Dm0755 udeb/mdadm $(DESTDIR_UDEB)/sbin/mdadm
+ install -Dm0755 udeb/mdmon $(DESTDIR_UDEB)/sbin/mdmon
+ install -Dm0644 udev-md-raid-arrays.rules $(DESTDIR_UDEB)/lib/udev/rules.d/63-md-raid-arrays.rules
+
+binary-arch: install-arch
+ dh_testdir
+ dh_testroot
+ dh_installdebconf
+ dh_installdocs
+ dh_installexamples -pmdadm mdadm.conf-example misc/syslog-events
+ dh_installinit --init-script=mdadm-raid --no-restart-on-upgrade -- start 25 S . start 60 0 6 .
+ dh_installinit --init-script=mdadm-waitidle --no-start -- stop 98 0 6 .
+ dh_link -pmdadm /dev/null /lib/systemd/system/mdadm-waitidle.service
+ dh_installinit -- defaults 25
+ dh_link -pmdadm /dev/null /lib/systemd/system/mdadm.service
+ dh_installman
+ dh_installcron
+ dh_installchangelogs ChangeLog
+ dh_installlogcheck
+ dh_link
+ dh_strip
+ dh_compress
+ dh_fixperms
+ dh_installdeb
+ dh_shlibdeps
+ dh_gencontrol
+ dh_md5sums
+ dh_builddeb
+
+build: build-arch
+install: install-arch
+binary: binary-arch
+build-indep:
+install-indep:
+binary-indep:
+.PHONY: clean build build-indep build-arch binary binary-indep binary-arch install install-indep install-arch
diff --git a/debian/source/format b/debian/source/format
new file mode 100644
index 00000000..163aaf8d
--- /dev/null
+++ b/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/debian/watch b/debian/watch
new file mode 100644
index 00000000..1b1172a0
--- /dev/null
+++ b/debian/watch
@@ -0,0 +1,2 @@
+version=3
+http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-([.[:digit:]]+).tar.gz
diff --git a/dlink.c b/dlink.c
new file mode 100644
index 00000000..3efa94b7
--- /dev/null
+++ b/dlink.c
@@ -0,0 +1,74 @@
+
+/* doubly linked lists */
+/* This is free software. No strings attached. No copyright claimed */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef __dietlibc__
+char *strncpy(char *dest, const char *src, size_t n) __THROW;
+#endif
+void *xcalloc(size_t num, size_t size);
+#include "dlink.h"
+
+void *dl_head()
+{
+ void *h;
+ h = dl_alloc(0);
+ dl_next(h) = h;
+ dl_prev(h) = h;
+ return h;
+}
+
+void dl_free(void *v)
+{
+ struct __dl_head *vv = v;
+ free(vv-1);
+}
+
+void dl_init(void *v)
+{
+ dl_next(v) = v;
+ dl_prev(v) = v;
+}
+
+void dl_insert(void *head, void *val)
+{
+ dl_next(val) = dl_next(head);
+ dl_prev(val) = head;
+ dl_next(dl_prev(val)) = val;
+ dl_prev(dl_next(val)) = val;
+}
+
+void dl_add(void *head, void *val)
+{
+ dl_prev(val) = dl_prev(head);
+ dl_next(val) = head;
+ dl_next(dl_prev(val)) = val;
+ dl_prev(dl_next(val)) = val;
+}
+
+void dl_del(void *val)
+{
+ if (dl_prev(val) == 0 || dl_next(val) == 0)
+ return;
+ dl_prev(dl_next(val)) = dl_prev(val);
+ dl_next(dl_prev(val)) = dl_next(val);
+ dl_prev(val) = dl_next(val) = 0;
+}
+
+char *dl_strndup(char *s, int l)
+{
+ char *n;
+ if (s == NULL)
+ return NULL;
+ n = dl_newv(char, l+1);
+ strncpy(n, s, l);
+ n[l] = 0;
+ return n;
+}
+
+char *dl_strdup(char *s)
+{
+ return dl_strndup(s, (int)strlen(s));
+}
diff --git a/dlink.h b/dlink.h
new file mode 100644
index 00000000..ab2a9459
--- /dev/null
+++ b/dlink.h
@@ -0,0 +1,25 @@
+
+/* doubley linked lists */
+/* This is free software. No strings attached. No copyright claimed */
+
+struct __dl_head
+{
+ void * dh_prev;
+ void * dh_next;
+};
+
+#define dl_alloc(size) ((void*)(((char*)xcalloc(1,(size)+sizeof(struct __dl_head)))+sizeof(struct __dl_head)))
+#define dl_new(t) ((t*)dl_alloc(sizeof(t)))
+#define dl_newv(t,n) ((t*)dl_alloc(sizeof(t)*n))
+
+#define dl_next(p) *(&(((struct __dl_head*)(p))[-1].dh_next))
+#define dl_prev(p) *(&(((struct __dl_head*)(p))[-1].dh_prev))
+
+void *dl_head(void);
+char *dl_strdup(char *);
+char *dl_strndup(char *, int);
+void dl_insert(void*, void*);
+void dl_add(void*, void*);
+void dl_del(void*);
+void dl_free(void*);
+void dl_init(void*);
diff --git a/external-reshape-design.txt b/external-reshape-design.txt
new file mode 100644
index 00000000..4eb04a2f
--- /dev/null
+++ b/external-reshape-design.txt
@@ -0,0 +1,280 @@
+External Reshape
+
+1 Problem statement
+
+External (third-party metadata) reshape differs from native-metadata
+reshape in three key ways:
+
+1.1 Format specific constraints
+
+In the native case reshape is limited by what is implemented in the
+generic reshape routine (Grow_reshape()) and what is supported by the
+kernel. There are exceptional cases where Grow_reshape() may block
+operations when it knows that the kernel implementation is broken, but
+otherwise the kernel is relied upon to be the final arbiter of what
+reshape operations are supported.
+
+In the external case the kernel, and the generic checks in
+Grow_reshape(), become the super-set of what reshapes are possible. The
+metadata format may not support, or have yet to implement a given
+reshape type. The implication for Grow_reshape() is that it must query
+the metadata handler and effect changes in the metadata before the new
+geometry is posted to the kernel. The ->reshape_super method allows
+Grow_reshape() to validate the requested operation and post the metadata
+update.
+
+1.2 Scope of reshape
+
+Native metadata reshape is always performed at the array scope (no
+metadata relationship with sibling arrays on the same disks). External
+reshape, depending on the format, may not allow the number of member
+disks to be changed in a subarray unless the change is simultaneously
+applied to all subarrays in the container. For example the imsm format
+requires all member disks to be a member of all subarrays, so a 4-disk
+raid5 in a container that also houses a 4-disk raid10 array could not be
+reshaped to 5 disks as the imsm format does not support a 5-disk raid10
+representation. This requires the ->reshape_super method to check the
+contents of the array and ask the user to run the reshape at container
+scope (if all subarrays are agreeable to the change), or report an
+error in the case where one subarray cannot support the change.
+
+1.3 Monitoring / checkpointing
+
+Reshape, unlike rebuild/resync, requires strict checkpointing to survive
+interrupted reshape operations. For example when expanding a raid5
+array the first few stripes of the array will be overwritten in a
+destructive manner. When restarting the reshape process we need to know
+the exact location of the last successfully written stripe, and we need
+to restore the data in any partially overwritten stripe. Native
+metadata stores this backup data in the unused portion of spares that
+are being promoted to array members, or in an external backup file
+(located on a non-involved block device).
+
+The kernel is in charge of recording checkpoints of reshape progress,
+but mdadm is delegated the task of managing the backup space which
+involves:
+1/ Identifying what data will be overwritten in the next unit of reshape
+ operation
+2/ Suspending access to that region so that a snapshot of the data can
+ be transferred to the backup space.
+3/ Allowing the kernel to reshape the saved region and setting the
+ boundary for the next backup.
+
+In the external reshape case we want to preserve this mdadm
+'reshape-manager' arrangement, but have a third actor, mdmon, to
+consider. It is tempting to give the role of managing reshape to mdmon,
+but that is counter to its role as a monitor, and conflicts with the
+existing capabilities and role of mdadm to manage the progress of
+reshape. For clarity the external reshape implementation maintains the
+role of mdmon as a (mostly) passive recorder of raid events, and mdadm
+treats it as it would the kernel in the native reshape case (modulo
+needing to send explicit metadata update messages and checking that
+mdmon took the expected action).
+
+External reshape can use the generic md backup file as a fallback, but in the
+optimal/firmware-compatible case the reshape-manager will use the metadata
+specific areas for managing reshape. The implementation also needs to spawn a
+reshape-manager per subarray when the reshape is being carried out at the
+container level. For these two reasons the ->manage_reshape() method is
+introduced. This method in addition to base tasks mentioned above:
+1/ Processed each subarray one at a time in series - where appropriate.
+2/ Uses either generic routines in Grow.c for md-style backup file
+ support, or uses the metadata-format specific location for storing
+ recovery data.
+This aims to avoid a "midlayer mistake"[1] and lets the metadata handler
+optionally take advantage of generic infrastructure in Grow.c
+
+2 Details for specific reshape requests
+
+There are quite a few moving pieces spread out across md, mdadm, and mdmon for
+the support of external reshape, and there are several different types of
+reshape that need to be comprehended by the implementation. A rundown of
+these details follows.
+
+2.0 General provisions:
+
+Obtain an exclusive open on the container to make sure we are not
+running concurrently with a Create() event.
+
+2.1 Freezing sync_action
+
+ Before making any attempt at a reshape we 'freeze' every array in
+ the container to ensure no spare assignment or recovery happens.
+ This involves writing 'frozen' to sync_action and changing the '/'
+ after 'external:' in metadata_version to a '-'. mdmon knows that
+ this means not to perform any management.
+
+ Before doing this we check that all sync_actions are 'idle', which
+ is racy but still useful.
+ Afterwards we check that all member arrays have no spares
+ or partial spares (recovery_start != 'none') which would indicate a
+ race. If they do, we unfreeze again.
+
+ Once this completes we know all the arrays are stable. They may
+ still have failed devices as devices can fail at any time. However
+ we treat those like failures that happen during the reshape.
+
+2.2 Reshape size
+
+ 1/ mdadm::Grow_reshape(): checks if mdmon is running and optionally
+ initializes st->update_tail
+ 2/ mdadm::Grow_reshape() calls ->reshape_super() to check that the size change
+ is allowed (being performed at subarray scope / enough room) prepares a
+ metadata update
+ 3/ mdadm::Grow_reshape(): flushes the metadata update (via
+ flush_metadata_update(), or ->sync_metadata())
+ 4/ mdadm::Grow_reshape(): post the new size to the kernel
+
+
+2.3 Reshape level (simple-takeover)
+
+"simple-takeover" implies the level change can be satisfied without touching
+sync_action
+
+ 1/ mdadm::Grow_reshape(): checks if mdmon is running and optionally
+ initializes st->update_tail
+ 2/ mdadm::Grow_reshape() calls ->reshape_super() to check that the level change
+ is allowed (being performed at subarray scope) prepares a
+ metadata update
+ 2a/ raid10 --> raid0: degrade all mirror legs prior to calling
+ ->reshape_super
+ 3/ mdadm::Grow_reshape(): flushes the metadata update (via
+ flush_metadata_update(), or ->sync_metadata())
+ 4/ mdadm::Grow_reshape(): post the new level to the kernel
+
+2.4 Reshape chunk, layout
+
+2.5 Reshape raid disks (grow)
+
+ 1/ mdadm::Grow_reshape(): unconditionally initializes st->update_tail
+ because only redundant raid levels can modify the number of raid disks
+ 2/ mdadm::Grow_reshape(): calls ->reshape_super() to check that the level
+ change is allowed (being performed at proper scope / permissible
+ geometry / proper spares available in the container), chooses
+ the spares to use, and prepares a metadata update.
+ 3/ mdadm::Grow_reshape(): Converts each subarray in the container to the
+ raid level that can perform the reshape and starts mdmon.
+ 4/ mdadm::Grow_reshape(): Pushes the update to mdmon.
+ 5/ mdadm::Grow_reshape(): uses container_content to find details of
+ the spares and passes them to the kernel.
+ 6/ mdadm::Grow_reshape(): gives raid_disks update to the kernel,
+ sets sync_max, sync_min, suspend_lo, suspend_hi all to zero,
+ and starts the reshape by writing 'reshape' to sync_action.
+ 7/ mdmon::monitor notices the sync_action change and tells
+ managemon to check for new devices. managemon notices the new
+ devices, opens relevant sysfs file, and passes them all to
+ monitor.
+ 8/ mdadm::Grow_reshape() calls ->manage_reshape to oversee the
+ rest of the reshape.
+
+ 9/ mdadm::<format>->manage_reshape(): saves data that will be overwritten by
+ the kernel to either the backup file or the metadata specific location,
+ advances sync_max, waits for reshape, ping mdmon, repeat.
+ Meanwhile mdmon::read_and_act(): records checkpoints.
+ Specifically.
+
+ 9a/ if the 'next' stripe to be reshaped will over-write
+ itself during reshape then:
+ 9a.1/ increase suspend_hi to cover a suitable number of
+ stripes.
+ 9a.2/ backup those stripes safely.
+ 9a.3/ advance sync_max to allow those stripes to be backed up
+ 9a.4/ when sync_completed indicates that those stripes have
+ been reshaped, manage_reshape must ping_manager
+ 9a.5/ when mdmon notices that sync_completed has been updated,
+ it records the new checkpoint in the metadata
+ 9a.6/ after the ping_manager, manage_reshape will increase
+ suspend_lo to allow access to those stripes again
+
+ 9b/ if the 'next' stripe to be reshaped will over-write unused
+ space during reshape then we apply same process as above,
+ except that there is no need to back anything up.
+ Note that we *do* need to keep suspend_hi progressing as
+ it is not safe to write to the area-under-reshape. For
+ kernel-managed-metadata this protection is provided by
+ ->reshape_safe, but that does not protect us in the case
+ of user-space-managed-metadata.
+
+ 10/ mdadm::<format>->manage_reshape(): Once reshape completes changes the raid
+ level back to the nominal raid level (if necessary)
+
+ FIXME: native metadata does not have the capability to record the original
+ raid level in reshape-restart case because the kernel always records current
+ raid level to the metadata, whereas external metadata can masquerade at an
+ alternate level based on the reshape state.
+
+2.6 Reshape raid disks (shrink)
+
+3 Interaction with metadata handle.
+
+ The following calls are made into the metadata handler to assist
+ with initiating and monitoring a 'reshape'.
+
+ 1/ ->reshape_super is called quite early (after only minimial
+ checks) to make sure that the metadata can record the new shape
+ and any necessary transitions. It may be passed a 'container'
+ or an individual array within a container, and it should notice
+ the difference and act accordingly.
+ When a reshape is requested against a container it is expected
+ that it should be applied to every array in the container,
+ however it is up to the metadata handler to determine final
+ policy.
+
+ If the reshape is supportable, the internal copy of the metadata
+ should be updated, and a metadata update suitable for sending
+ to mdmon should be queued.
+
+ If the reshape will involve converting spares into array members,
+ this must be recorded in the metadata too.
+
+ 2/ ->container_content will be called to find out the new state
+ of all the array, or all arrays in the container. Any newly
+ added devices (with state==0 and raid_disk >= 0) will be added
+ to the array as spares with the relevant slot number.
+
+ It is likely that the info returned by ->container_content will
+ have ->reshape_active set, ->reshape_progress set to e.g. 0, and
+ new_* set appropriately. mdadm will use this information to
+ cause the correct reshape to start at an appropriate time.
+
+ 3/ ->set_array_state will be called by mdmon when reshape has
+ started and again periodically as it progresses. This should
+ record the ->last_checkpoint as the point where reshape has
+ progressed to. When the reshape finished this will be called
+ again and it should notice that ->curr_action is no longer
+ 'reshape' and so should record that the reshape has finished
+ providing 'last_checkpoint' has progressed suitably.
+
+ 4/ ->manage_reshape will be called once the reshape has been set
+ up in the kernel but before sync_max has been moved from 0, so
+ no actual reshape will have happened.
+
+ ->manage_reshape should call progress_reshape() to allow the
+ reshape to progress, and should back-up any data as indicated
+ by the return value. See the documentation of that function
+ for more details.
+ ->manage_reshape will be called multiple times when a
+ container is being reshaped, once for each member array in
+ the container.
+
+
+ The progress of the metadata is as follows:
+ 1/ mdadm sends a metadata update to mdmon which marks the array
+ as undergoing a reshape. This is set up by
+ ->reshape_super and applied by ->process_update
+ For container-wide reshape, this happens once for the whole
+ container.
+ 2/ mdmon notices progress via the sysfs files and calls
+ ->set_array_state to update the state periodically
+ For container-wide reshape, this happens repeatedly for
+ one array, then repeatedly for the next, etc.
+ 3/ mdmon notices when reshape has finished and call
+ ->set_array_state to record the the reshape is complete.
+ For container-wide reshape, this happens once for each
+ member array.
+
+
+
+...
+
+[1]: Linux kernel design patterns - part 3, Neil Brown http://lwn.net/Articles/336262/
diff --git a/inventory b/inventory
new file mode 100755
index 00000000..40598cee
--- /dev/null
+++ b/inventory
@@ -0,0 +1,249 @@
+
+.gitignore
+ANNOUNCE-3.0
+ANNOUNCE-3.0.1
+ANNOUNCE-3.0.2
+ANNOUNCE-3.0.3
+ANNOUNCE-3.1
+ANNOUNCE-3.1.1
+ANNOUNCE-3.1.2
+ANNOUNCE-3.1.3
+ANNOUNCE-3.1.4
+ANNOUNCE-3.1.5
+ANNOUNCE-3.2
+ANNOUNCE-3.2.1
+ANNOUNCE-3.2.2
+ANNOUNCE-3.2.3
+ANNOUNCE-3.2.4
+ANNOUNCE-3.2.5
+ANNOUNCE-3.2.6
+ANNOUNCE-3.3
+ANNOUNCE-3.3.1
+ANNOUNCE-3.3.2
+Assemble.c
+Build.c
+COPYING
+ChangeLog
+Create.c
+Detail.c
+Dump.c
+Examine.c
+Grow.c
+INSTALL
+Incremental.c
+Kill.c
+Makefile
+Manage.c
+Monitor.c
+Query.c
+README.initramfs
+ReadMe.c
+TODO
+bitmap.c
+bitmap.h
+config.c
+crc32.c
+crc32.h
+dlink.c
+dlink.h
+external-reshape-design.txt
+inventory
+kernel-patch-2.6.18
+kernel-patch-2.6.18.6
+kernel-patch-2.6.19
+kernel-patch-2.6.25
+kernel-patch-2.6.27
+lib.c
+makedist
+managemon.c
+mapfile.c
+maps.c
+md.4
+md5.h
+md_p.h
+md_u.h
+mdadm.8.in
+mdadm.c
+mdadm.conf-example
+mdadm.conf.5
+mdadm.h
+mdadm.spec
+mdassemble.8
+mdassemble.c
+mdmon-design.txt
+mdmon.8
+mdmon.c
+mdmon.h
+mdopen.c
+mdstat.c
+misc/
+misc/mdcheck
+misc/syslog-events
+mkinitramfs
+monitor.c
+msg.c
+msg.h
+part.h
+platform-intel.c
+platform-intel.h
+policy.c
+probe_roms.c
+probe_roms.h
+pwgr.c
+raid5extend.c
+raid6check.8
+raid6check.c
+restripe.c
+sg_io.c
+sha1.c
+sha1.h
+super-ddf.c
+super-gpt.c
+super-intel.c
+super-mbr.c
+super0.c
+super1.c
+swap_super.c
+sysfs.c
+systemd/
+systemd/SUSE-mdadm_env.sh
+systemd/mdadm-grow-continue@.service
+systemd/mdadm-last-resort@.service
+systemd/mdadm-last-resort@.timer
+systemd/mdadm.shutdown
+systemd/mdmon@.service
+systemd/mdmonitor.service
+test
+tests/
+tests/00linear
+tests/00multipath
+tests/00names
+tests/00raid0
+tests/00raid1
+tests/00raid10
+tests/00raid4
+tests/00raid5
+tests/00raid6
+tests/01r1fail
+tests/01r5fail
+tests/01r5integ
+tests/01raid6integ
+tests/01replace
+tests/02lineargrow
+tests/02r1add
+tests/02r1grow
+tests/02r5grow
+tests/02r6grow
+tests/03assem-incr
+tests/03r0assem
+tests/03r5assem
+tests/03r5assem-failed
+tests/03r5assemV1
+tests/04r0update
+tests/04r1update
+tests/04r5swap
+tests/04update-metadata
+tests/04update-uuid
+tests/05r1-add-internalbitmap
+tests/05r1-add-internalbitmap-v1a
+tests/05r1-add-internalbitmap-v1b
+tests/05r1-add-internalbitmap-v1c
+tests/05r1-bitmapfile
+tests/05r1-grow-external
+tests/05r1-grow-internal
+tests/05r1-grow-internal-1
+tests/05r1-internalbitmap
+tests/05r1-internalbitmap-v1a
+tests/05r1-internalbitmap-v1b
+tests/05r1-internalbitmap-v1c
+tests/05r1-n3-bitmapfile
+tests/05r1-re-add
+tests/05r1-re-add-nosuper
+tests/05r1-remove-internalbitmap
+tests/05r1-remove-internalbitmap-v1a
+tests/05r1-remove-internalbitmap-v1b
+tests/05r1-remove-internalbitmap-v1c
+tests/05r5-bitmapfile
+tests/05r5-internalbitmap
+tests/05r6-bitmapfile
+tests/05r6tor0
+tests/06name
+tests/06sysfs
+tests/06wrmostly
+tests/07autoassemble
+tests/07autodetect
+tests/07changelevelintr
+tests/07changelevels
+tests/07layouts
+tests/07reshape5intr
+tests/07revert-grow
+tests/07revert-inplace
+tests/07revert-shrink
+tests/07testreshape5
+tests/09imsm-assemble
+tests/09imsm-create-fail-rebuild
+tests/09imsm-overlap
+tests/10ddf-assemble-missing
+tests/10ddf-create
+tests/10ddf-create-fail-rebuild
+tests/10ddf-fail-create-race
+tests/10ddf-fail-readd
+tests/10ddf-fail-readd-readonly
+tests/10ddf-fail-spare
+tests/10ddf-fail-stop-readd
+tests/10ddf-fail-twice
+tests/10ddf-fail-two-spares
+tests/10ddf-geometry
+tests/10ddf-incremental-wrong-order
+tests/10ddf-sudden-degraded
+tests/11spare-migration
+tests/12imsm-r0_2d-grow-r0_3d
+tests/12imsm-r0_2d-grow-r0_4d
+tests/12imsm-r0_2d-grow-r0_5d
+tests/12imsm-r0_3d-grow-r0_4d
+tests/12imsm-r5_3d-grow-r5_4d
+tests/12imsm-r5_3d-grow-r5_5d
+tests/13imsm-r0_r0_2d-grow-r0_r0_4d
+tests/13imsm-r0_r0_2d-grow-r0_r0_5d
+tests/13imsm-r0_r0_3d-grow-r0_r0_4d
+tests/13imsm-r0_r5_3d-grow-r0_r5_4d
+tests/13imsm-r0_r5_3d-grow-r0_r5_5d
+tests/13imsm-r5_r0_3d-grow-r5_r0_4d
+tests/13imsm-r5_r0_3d-grow-r5_r0_5d
+tests/14imsm-r0_3d-r5_3d-migrate-r5_4d-r5_4d
+tests/14imsm-r0_3d_no_spares-migrate-r5_3d
+tests/14imsm-r0_r0_2d-takeover-r10_4d
+tests/14imsm-r10_4d-grow-r10_5d
+tests/14imsm-r10_r5_4d-takeover-r0_2d
+tests/14imsm-r1_2d-grow-r1_3d
+tests/14imsm-r1_2d-takeover-r0_2d
+tests/14imsm-r5_3d-grow-r5_5d-no-spares
+tests/14imsm-r5_3d-migrate-r4_3d
+tests/15imsm-r0_3d_64k-migrate-r0_3d_256k
+tests/15imsm-r5_3d_4k-migrate-r5_3d_256k
+tests/15imsm-r5_3d_64k-migrate-r5_3d_256k
+tests/15imsm-r5_6d_4k-migrate-r5_6d_256k
+tests/15imsm-r5_r0_3d_64k-migrate-r5_r0_3d_256k
+tests/16imsm-r0_3d-migrate-r5_4d
+tests/16imsm-r0_5d-migrate-r5_6d
+tests/16imsm-r5_3d-migrate-r0_3d
+tests/16imsm-r5_5d-migrate-r0_5d
+tests/18imsm-1d-takeover-r0_1d
+tests/18imsm-1d-takeover-r1_2d
+tests/18imsm-r0_2d-takeover-r10_4d
+tests/18imsm-r10_4d-takeover-r0_2d
+tests/18imsm-r1_2d-takeover-r0_1d
+tests/19raid6auto-repair
+tests/19raid6repair
+tests/19repair-does-not-destroy
+tests/ToTest
+tests/check
+tests/env-ddf-template
+tests/env-imsm-template
+tests/imsm-grow-template
+tests/testdev
+tests/utils
+udev-md-raid-arrays.rules
+udev-md-raid-assembly.rules
+util.c
+xmalloc.c
diff --git a/kernel-patch-2.6.18 b/kernel-patch-2.6.18
new file mode 100644
index 00000000..87496ea2
--- /dev/null
+++ b/kernel-patch-2.6.18
@@ -0,0 +1,35 @@
+
+### Diffstat output
+ ./drivers/md/md.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff .prev/drivers/md/md.c ./drivers/md/md.c
+--- .prev/drivers/md/md.c 2006-10-23 10:26:37.000000000 +1000
++++ ./drivers/md/md.c 2006-12-21 16:28:29.000000000 +1100
+@@ -1783,7 +1783,8 @@ state_store(mdk_rdev_t *rdev, const char
+ else {
+ mddev_t *mddev = rdev->mddev;
+ kick_rdev_from_array(rdev);
+- md_update_sb(mddev);
++ if (mddev->pers)
++ md_update_sb(mddev);
+ md_new_event(mddev);
+ err = 0;
+ }
+@@ -1994,6 +1995,8 @@ static mdk_rdev_t *md_import_device(dev_
+ kobject_init(&rdev->kobj);
+
+ rdev->desc_nr = -1;
++ rdev->saved_raid_disk = -1;
++ rdev->raid_disk = -1;
+ rdev->flags = 0;
+ rdev->data_offset = 0;
+ rdev->sb_events = 0;
+@@ -3991,6 +3994,7 @@ static int set_array_info(mddev_t * mdde
+ mddev->major_version = info->major_version;
+ mddev->minor_version = info->minor_version;
+ mddev->patch_version = info->patch_version;
++ mddev->persistent = ! info->not_persistent;
+ return 0;
+ }
+ mddev->major_version = MD_MAJOR_VERSION;
diff --git a/kernel-patch-2.6.18.6 b/kernel-patch-2.6.18.6
new file mode 100644
index 00000000..e702e14a
--- /dev/null
+++ b/kernel-patch-2.6.18.6
@@ -0,0 +1,35 @@
+Signed-off-by: Neil Brown <neilb@suse.de>
+
+### Diffstat output
+ ./drivers/md/md.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff .prev/drivers/md/md.c ./drivers/md/md.c
+--- .prev/drivers/md/md.c 2006-12-21 17:08:23.000000000 +1100
++++ ./drivers/md/md.c 2006-12-21 17:08:26.000000000 +1100
+@@ -1783,7 +1783,8 @@ state_store(mdk_rdev_t *rdev, const char
+ else {
+ mddev_t *mddev = rdev->mddev;
+ kick_rdev_from_array(rdev);
+- md_update_sb(mddev);
++ if (mddev->pers)
++ md_update_sb(mddev);
+ md_new_event(mddev);
+ err = 0;
+ }
+@@ -1995,6 +1996,7 @@ static mdk_rdev_t *md_import_device(dev_
+
+ rdev->desc_nr = -1;
+ rdev->saved_raid_disk = -1;
++ rdev->raid_disk = -1;
+ rdev->flags = 0;
+ rdev->data_offset = 0;
+ rdev->sb_events = 0;
+@@ -3993,6 +3995,7 @@ static int set_array_info(mddev_t * mdde
+ mddev->major_version = info->major_version;
+ mddev->minor_version = info->minor_version;
+ mddev->patch_version = info->patch_version;
++ mddev->persistent = ! info->not_persistent;
+ return 0;
+ }
+ mddev->major_version = MD_MAJOR_VERSION;
diff --git a/kernel-patch-2.6.19 b/kernel-patch-2.6.19
new file mode 100644
index 00000000..22a67a39
--- /dev/null
+++ b/kernel-patch-2.6.19
@@ -0,0 +1,34 @@
+
+### Diffstat output
+ ./drivers/md/md.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff .prev/drivers/md/md.c ./drivers/md/md.c
+--- .prev/drivers/md/md.c 2006-12-21 15:55:01.000000000 +1100
++++ ./drivers/md/md.c 2006-12-21 16:28:09.000000000 +1100
+@@ -1792,7 +1792,8 @@ state_store(mdk_rdev_t *rdev, const char
+ else {
+ mddev_t *mddev = rdev->mddev;
+ kick_rdev_from_array(rdev);
+- md_update_sb(mddev, 1);
++ if (mddev->pers)
++ md_update_sb(mddev, 1);
+ md_new_event(mddev);
+ err = 0;
+ }
+@@ -2004,6 +2005,7 @@ static mdk_rdev_t *md_import_device(dev_
+
+ rdev->desc_nr = -1;
+ rdev->saved_raid_disk = -1;
++ rdev->raid_disk = -1;
+ rdev->flags = 0;
+ rdev->data_offset = 0;
+ rdev->sb_events = 0;
+@@ -3977,6 +3979,7 @@ static int set_array_info(mddev_t * mdde
+ mddev->major_version = info->major_version;
+ mddev->minor_version = info->minor_version;
+ mddev->patch_version = info->patch_version;
++ mddev->persistent = ! info->not_persistent;
+ return 0;
+ }
+ mddev->major_version = MD_MAJOR_VERSION;
diff --git a/kernel-patch-2.6.25 b/kernel-patch-2.6.25
new file mode 100644
index 00000000..23290078
--- /dev/null
+++ b/kernel-patch-2.6.25
@@ -0,0 +1,199 @@
+Status: ok
+
+Support adding a spare to a live md array with external metadata.
+
+i.e. extend the 'md/dev-XXX/slot' attribute so that you can
+tell a device to fill an vacant slot in an and md array.
+
+
+Signed-off-by: Neil Brown <neilb@suse.de>
+
+### Diffstat output
+ ./drivers/md/md.c | 44 ++++++++++++++++++++++++++++++++++++++++----
+ ./drivers/md/multipath.c | 7 ++++++-
+ ./drivers/md/raid1.c | 7 ++++++-
+ ./drivers/md/raid10.c | 10 ++++++++--
+ ./drivers/md/raid5.c | 10 ++++++++--
+ 5 files changed, 68 insertions(+), 10 deletions(-)
+
+diff .prev/drivers/md/md.c ./drivers/md/md.c
+--- .prev/drivers/md/md.c 2008-06-05 09:19:56.000000000 +1000
++++ ./drivers/md/md.c 2008-06-10 10:41:21.000000000 +1000
+@@ -1932,7 +1932,7 @@ slot_store(mdk_rdev_t *rdev, const char
+ slot = -1;
+ else if (e==buf || (*e && *e!= '\n'))
+ return -EINVAL;
+- if (rdev->mddev->pers) {
++ if (rdev->mddev->pers && slot == -1) {
+ /* Setting 'slot' on an active array requires also
+ * updating the 'rd%d' link, and communicating
+ * with the personality with ->hot_*_disk.
+@@ -1940,8 +1940,6 @@ slot_store(mdk_rdev_t *rdev, const char
+ * failed/spare devices. This normally happens automatically,
+ * but not when the metadata is externally managed.
+ */
+- if (slot != -1)
+- return -EBUSY;
+ if (rdev->raid_disk == -1)
+ return -EEXIST;
+ /* personality does all needed checks */
+@@ -1955,6 +1953,44 @@ slot_store(mdk_rdev_t *rdev, const char
+ sysfs_remove_link(&rdev->mddev->kobj, nm);
+ set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+ md_wakeup_thread(rdev->mddev->thread);
++ } else if (rdev->mddev->pers) {
++ mdk_rdev_t *rdev2;
++ struct list_head *tmp;
++ /* Activating a spare .. or possibly reactivating
++ * if we every get bitmaps working here.
++ */
++
++ if (rdev->raid_disk != -1)
++ return -EBUSY;
++
++ if (rdev->mddev->pers->hot_add_disk == NULL)
++ return -EINVAL;
++
++ rdev_for_each(rdev2, tmp, rdev->mddev)
++ if (rdev2->raid_disk == slot)
++ return -EEXIST;
++
++ rdev->raid_disk = slot;
++ if (test_bit(In_sync, &rdev->flags))
++ rdev->saved_raid_disk = slot;
++ else
++ rdev->saved_raid_disk = -1;
++ err = rdev->mddev->pers->
++ hot_add_disk(rdev->mddev, rdev);
++ if (err != 1) {
++ rdev->raid_disk = -1;
++ if (err == 0)
++ return -EEXIST;
++ return err;
++ }
++ sprintf(nm, "rd%d", rdev->raid_disk);
++ if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
++ printk(KERN_WARNING
++ "md: cannot register "
++ "%s for %s\n",
++ nm, mdname(rdev->mddev));
++
++ /* don't wakeup anyone, leave that to userspace. */
+ } else {
+ if (slot >= rdev->mddev->raid_disks)
+ return -ENOSPC;
+@@ -4205,7 +4241,7 @@ static int add_new_disk(mddev_t * mddev,
+ super_types[mddev->major_version].
+ validate_super(mddev, rdev);
+ err = mddev->pers->hot_add_disk(mddev, rdev);
+- if (err)
++ if (err < 0)
+ unbind_rdev_from_array(rdev);
+ }
+ if (err)
+
+diff .prev/drivers/md/multipath.c ./drivers/md/multipath.c
+--- .prev/drivers/md/multipath.c 2008-05-30 14:49:31.000000000 +1000
++++ ./drivers/md/multipath.c 2008-06-10 10:35:03.000000000 +1000
+@@ -284,10 +284,15 @@ static int multipath_add_disk(mddev_t *m
+ int found = 0;
+ int path;
+ struct multipath_info *p;
++ int first = 0;
++ int last = mddev->raid_disks - 1;
++
++ if (rdev->raid_disk >= 0)
++ first = last = rdev->raid_disk;
+
+ print_multipath_conf(conf);
+
+- for (path=0; path<mddev->raid_disks; path++)
++ for (path = first; path <= last; path++)
+ if ((p=conf->multipaths+path)->rdev == NULL) {
+ q = rdev->bdev->bd_disk->queue;
+ blk_queue_stack_limits(mddev->queue, q);
+
+diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
+--- .prev/drivers/md/raid10.c 2008-05-30 14:49:31.000000000 +1000
++++ ./drivers/md/raid10.c 2008-06-10 10:28:53.000000000 +1000
+@@ -1116,6 +1116,8 @@ static int raid10_add_disk(mddev_t *mdde
+ int found = 0;
+ int mirror;
+ mirror_info_t *p;
++ int first = 0;
++ int last = mddev->raid_disks - 1;
+
+ if (mddev->recovery_cp < MaxSector)
+ /* only hot-add to in-sync arrays, as recovery is
+@@ -1125,12 +1127,16 @@ static int raid10_add_disk(mddev_t *mdde
+ if (!enough(conf))
+ return 0;
+
++ if (rdev->raid_disk)
++ first = last = rdev->raid_disk;
++
+ if (rdev->saved_raid_disk >= 0 &&
++ rdev->saved_raid_disk >= first &&
+ conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
+ mirror = rdev->saved_raid_disk;
+ else
+- mirror = 0;
+- for ( ; mirror < mddev->raid_disks; mirror++)
++ mirror = first;
++ for ( ; mirror <= last ; mirror++)
+ if ( !(p=conf->mirrors+mirror)->rdev) {
+
+ blk_queue_stack_limits(mddev->queue,
+
+diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
+--- .prev/drivers/md/raid1.c 2008-05-30 14:49:31.000000000 +1000
++++ ./drivers/md/raid1.c 2008-06-10 10:41:00.000000000 +1000
+@@ -1103,8 +1103,13 @@ static int raid1_add_disk(mddev_t *mddev
+ int found = 0;
+ int mirror = 0;
+ mirror_info_t *p;
++ int first = 0;
++ int last = mddev->raid_disks - 1;
+
+- for (mirror=0; mirror < mddev->raid_disks; mirror++)
++ if (rdev->raid_disk >= 0)
++ first = last = rdev->raid_disk;
++
++ for (mirror = first; mirror <= last; mirror++)
+ if ( !(p=conf->mirrors+mirror)->rdev) {
+
+ blk_queue_stack_limits(mddev->queue,
+
+diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
+--- .prev/drivers/md/raid5.c 2008-05-30 14:49:35.000000000 +1000
++++ ./drivers/md/raid5.c 2008-06-10 10:27:51.000000000 +1000
+@@ -4399,21 +4399,27 @@ static int raid5_add_disk(mddev_t *mddev
+ int found = 0;
+ int disk;
+ struct disk_info *p;
++ int first = 0;
++ int last = conf->raid_disks - 1;
+
+ if (mddev->degraded > conf->max_degraded)
+ /* no point adding a device */
+ return 0;
+
++ if (rdev->raid_disk >= 0)
++ first = last = rdev->raid_disk;
++
+ /*
+ * find the disk ... but prefer rdev->saved_raid_disk
+ * if possible.
+ */
+ if (rdev->saved_raid_disk >= 0 &&
++ rdev->saved_raid_disk >= first &&
+ conf->disks[rdev->saved_raid_disk].rdev == NULL)
+ disk = rdev->saved_raid_disk;
+ else
+- disk = 0;
+- for ( ; disk < conf->raid_disks; disk++)
++ disk = first;
++ for ( ; disk <= last ; disk++)
+ if ((p=conf->disks + disk)->rdev == NULL) {
+ clear_bit(In_sync, &rdev->flags);
+ rdev->raid_disk = disk;
diff --git a/kernel-patch-2.6.27 b/kernel-patch-2.6.27
new file mode 100644
index 00000000..8d0785d8
--- /dev/null
+++ b/kernel-patch-2.6.27
@@ -0,0 +1,36 @@
+touch_mnt_namespace when the mount flags change
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+Daemons that need to be launched while the rootfs is read-only can now
+poll /proc/mounts to be notified when their O_RDWR requests may no
+longer end in EROFS.
+
+Cc: Kay Sievers <kay.sievers@vrfy.org>
+Cc: Neil Brown <neilb@suse.de>
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+---
+
+ fs/namespace.c | 7 ++++++-
+ 1 files changed, 6 insertions(+), 1 deletions(-)
+
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 6e283c9..1bd5ba2 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1553,8 +1553,13 @@ static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags,
+ if (!err)
+ nd->path.mnt->mnt_flags = mnt_flags;
+ up_write(&sb->s_umount);
+- if (!err)
++ if (!err) {
+ security_sb_post_remount(nd->path.mnt, flags, data);
++
++ spin_lock(&vfsmount_lock);
++ touch_mnt_namespace(nd->path.mnt->mnt_ns);
++ spin_unlock(&vfsmount_lock);
++ }
+ return err;
+ }
+
diff --git a/lib.c b/lib.c
new file mode 100644
index 00000000..6808f62d
--- /dev/null
+++ b/lib.c
@@ -0,0 +1,475 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2011 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include "dlink.h"
+#include <ctype.h>
+
+/* This fill contains various 'library' style function. They
+ * have no dependency on anything outside this file.
+ */
+
+int get_mdp_major(void)
+{
+static int mdp_major = -1;
+ FILE *fl;
+ char *w;
+ int have_block = 0;
+ int have_devices = 0;
+ int last_num = -1;
+
+ if (mdp_major != -1)
+ return mdp_major;
+ fl = fopen("/proc/devices", "r");
+ if (!fl)
+ return -1;
+ while ((w = conf_word(fl, 1))) {
+ if (have_block && strcmp(w, "devices:")==0)
+ have_devices = 1;
+ have_block = (strcmp(w, "Block")==0);
+ if (isdigit(w[0]))
+ last_num = atoi(w);
+ if (have_devices && strcmp(w, "mdp")==0)
+ mdp_major = last_num;
+ free(w);
+ }
+ fclose(fl);
+ return mdp_major;
+}
+
+char *devid2kname(int devid)
+{
+ char path[30];
+ char link[200];
+ static char devnm[32];
+ char *cp;
+ int n;
+
+ /* Look at the
+ * /sys/dev/block/%d:%d link which must look like
+ * and take the last component.
+ */
+ sprintf(path, "/sys/dev/block/%d:%d", major(devid),
+ minor(devid));
+ n = readlink(path, link, sizeof(link)-1);
+ if (n > 0) {
+ link[n] = 0;
+ cp = strrchr(link, '/');
+ if (cp) {
+ strcpy(devnm, cp+1);
+ return devnm;
+ }
+ }
+ return NULL;
+}
+
+char *devid2devnm(int devid)
+{
+ char path[30];
+ char link[200];
+ static char devnm[32];
+ char *cp, *ep;
+ int n;
+
+ /* Might be an extended-minor partition or a
+ * named md device. Look at the
+ * /sys/dev/block/%d:%d link which must look like
+ * ../../block/mdXXX/mdXXXpYY
+ * or
+ * ...../block/md_FOO
+ */
+ sprintf(path, "/sys/dev/block/%d:%d", major(devid),
+ minor(devid));
+ n = readlink(path, link, sizeof(link)-1);
+ if (n > 0) {
+ link[n] = 0;
+ cp = strstr(link, "/block/");
+ if (cp) {
+ cp += 7;
+ ep = strchr(cp, '/');
+ if (ep)
+ *ep = 0;
+ strcpy(devnm, cp);
+ return devnm;
+ }
+ }
+ if (major(devid) == MD_MAJOR)
+ sprintf(devnm,"md%d", minor(devid));
+ else if (major(devid) == (unsigned)get_mdp_major())
+ sprintf(devnm,"md_d%d",
+ (minor(devid)>>MdpMinorShift));
+ else
+ return NULL;
+ return devnm;
+}
+
+char *stat2devnm(struct stat *st)
+{
+ if ((S_IFMT & st->st_mode) != S_IFBLK)
+ return NULL;
+ return devid2devnm(st->st_rdev);
+}
+
+char *fd2devnm(int fd)
+{
+ struct stat stb;
+ if (fstat(fd, &stb) == 0)
+ return stat2devnm(&stb);
+ return NULL;
+}
+
+/*
+ * convert a major/minor pair for a block device into a name in /dev, if possible.
+ * On the first call, walk /dev collecting name.
+ * Put them in a simple linked listfor now.
+ */
+struct devmap {
+ int major, minor;
+ char *name;
+ struct devmap *next;
+} *devlist = NULL;
+int devlist_ready = 0;
+
+int add_dev(const char *name, const struct stat *stb, int flag, struct FTW *s)
+{
+ struct stat st;
+
+ if (S_ISLNK(stb->st_mode)) {
+ if (stat(name, &st) != 0)
+ return 0;
+ stb = &st;
+ }
+
+ if ((stb->st_mode&S_IFMT)== S_IFBLK) {
+ char *n = xstrdup(name);
+ struct devmap *dm = xmalloc(sizeof(*dm));
+ if (strncmp(n, "/dev/./", 7)==0)
+ strcpy(n+4, name+6);
+ if (dm) {
+ dm->major = major(stb->st_rdev);
+ dm->minor = minor(stb->st_rdev);
+ dm->name = n;
+ dm->next = devlist;
+ devlist = dm;
+ }
+ }
+ return 0;
+}
+
+#ifndef HAVE_NFTW
+#ifdef HAVE_FTW
+int add_dev_1(const char *name, const struct stat *stb, int flag)
+{
+ return add_dev(name, stb, flag, NULL);
+}
+int nftw(const char *path, int (*han)(const char *name, const struct stat *stb, int flag, struct FTW *s), int nopenfd, int flags)
+{
+ return ftw(path, add_dev_1, nopenfd);
+}
+#else
+int nftw(const char *path, int (*han)(const char *name, const struct stat *stb, int flag, struct FTW *s), int nopenfd, int flags)
+{
+ return 0;
+}
+#endif /* HAVE_FTW */
+#endif /* HAVE_NFTW */
+
+/*
+ * Find a block device with the right major/minor number.
+ * If we find multiple names, choose the shortest.
+ * If we find a name in /dev/md/, we prefer that.
+ * This applies only to names for MD devices.
+ * If 'prefer' is set (normally to e.g. /by-path/)
+ * then we prefer a name which contains that string.
+ */
+char *map_dev_preferred(int major, int minor, int create,
+ char *prefer)
+{
+ struct devmap *p;
+ char *regular = NULL, *preferred=NULL;
+ int did_check = 0;
+
+ if (major == 0 && minor == 0)
+ return NULL;
+
+ retry:
+ if (!devlist_ready) {
+ char *dev = "/dev";
+ struct stat stb;
+ while(devlist) {
+ struct devmap *d = devlist;
+ devlist = d->next;
+ free(d->name);
+ free(d);
+ }
+ if (lstat(dev, &stb)==0 &&
+ S_ISLNK(stb.st_mode))
+ dev = "/dev/.";
+ nftw(dev, add_dev, 10, FTW_PHYS);
+ devlist_ready=1;
+ did_check = 1;
+ }
+
+ for (p=devlist; p; p=p->next)
+ if (p->major == major &&
+ p->minor == minor) {
+ if (strncmp(p->name, "/dev/md/",8) == 0
+ || (prefer && strstr(p->name, prefer))) {
+ if (preferred == NULL ||
+ strlen(p->name) < strlen(preferred))
+ preferred = p->name;
+ } else {
+ if (regular == NULL ||
+ strlen(p->name) < strlen(regular))
+ regular = p->name;
+ }
+ }
+ if (!regular && !preferred && !did_check) {
+ devlist_ready = 0;
+ goto retry;
+ }
+ if (create && !regular && !preferred) {
+ static char buf[30];
+ snprintf(buf, sizeof(buf), "%d:%d", major, minor);
+ regular = buf;
+ }
+
+ return preferred ? preferred : regular;
+}
+
+/* conf_word gets one word from the conf file.
+ * if "allow_key", then accept words at the start of a line,
+ * otherwise stop when such a word is found.
+ * We assume that the file pointer is at the end of a word, so the
+ * next character is a space, or a newline. If not, it is the start of a line.
+ */
+
+char *conf_word(FILE *file, int allow_key)
+{
+ int wsize = 100;
+ int len = 0;
+ int c;
+ int quote;
+ int wordfound = 0;
+ char *word = xmalloc(wsize);
+
+ while (wordfound==0) {
+ /* at the end of a word.. */
+ c = getc(file);
+ if (c == '#')
+ while (c != EOF && c != '\n')
+ c = getc(file);
+ if (c == EOF) break;
+ if (c == '\n') continue;
+
+ if (c != ' ' && c != '\t' && ! allow_key) {
+ ungetc(c, file);
+ break;
+ }
+ /* looks like it is safe to get a word here, if there is one */
+ quote = 0;
+ /* first, skip any spaces */
+ while (c == ' ' || c == '\t')
+ c = getc(file);
+ if (c != EOF && c != '\n' && c != '#') {
+ /* we really have a character of a word, so start saving it */
+ while (c != EOF && c != '\n' && (quote || (c!=' ' && c != '\t'))) {
+ wordfound = 1;
+ if (quote && c == quote) quote = 0;
+ else if (quote == 0 && (c == '\'' || c == '"'))
+ quote = c;
+ else {
+ if (len == wsize-1) {
+ wsize += 100;
+ word = xrealloc(word, wsize);
+ }
+ word[len++] = c;
+ }
+ c = getc(file);
+ /* Hack for broken kernels (2.6.14-.24) that put
+ * "active(auto-read-only)"
+ * in /proc/mdstat instead of
+ * "active (auto-read-only)"
+ */
+ if (c == '(' && len >= 6
+ && strncmp(word+len-6, "active", 6) == 0)
+ c = ' ';
+ }
+ }
+ if (c != EOF) ungetc(c, file);
+ }
+ word[len] = 0;
+
+ /* Further HACK for broken kernels.. 2.6.14-2.6.24 */
+ if (strcmp(word, "auto-read-only)") == 0)
+ strcpy(word, "(auto-read-only)");
+
+/* printf("word is <%s>\n", word); */
+ if (!wordfound) {
+ free(word);
+ word = NULL;
+ }
+ return word;
+}
+
+void print_quoted(char *str)
+{
+ /* Printf the string with surrounding quotes
+ * iff needed.
+ * If no space, tab, or quote - leave unchanged.
+ * Else print surrounded by " or ', swapping quotes
+ * when we find one that will cause confusion.
+ */
+
+ char first_quote = 0, q;
+ char *c;
+
+ for (c = str; *c; c++) {
+ switch(*c) {
+ case '\'':
+ case '"':
+ first_quote = *c;
+ break;
+ case ' ':
+ case '\t':
+ first_quote = *c;
+ continue;
+ default:
+ continue;
+ }
+ break;
+ }
+ if (!first_quote) {
+ printf("%s", str);
+ return;
+ }
+
+ if (first_quote == '"')
+ q = '\'';
+ else
+ q = '"';
+ putchar(q);
+ for (c = str; *c; c++) {
+ if (*c == q) {
+ putchar(q);
+ q ^= '"' ^ '\'';
+ putchar(q);
+ }
+ putchar(*c);
+ }
+ putchar(q);
+}
+
+void print_escape(char *str)
+{
+ /* print str, but change space and tab to '_'
+ * as is suitable for device names
+ */
+ for (; *str ; str++) {
+ switch (*str) {
+ case ' ':
+ case '\t':
+ putchar('_');
+ break;
+ case '/':
+ putchar('-');
+ break;
+ default:
+ putchar(*str);
+ }
+ }
+}
+
+int check_env(char *name)
+{
+ char *val = getenv(name);
+
+ if (val && atoi(val) == 1)
+ return 1;
+
+ return 0;
+}
+
+int use_udev(void)
+{
+ static int use = -1;
+ struct stat stb;
+
+ if (use < 0) {
+ use = ((stat("/dev/.udev", &stb) == 0
+ || stat("/run/udev", &stb) == 0)
+ && check_env("MDADM_NO_UDEV") == 0);
+ }
+ return use;
+}
+
+unsigned long GCD(unsigned long a, unsigned long b)
+{
+ while (a != b) {
+ if (a < b)
+ b -= a;
+ if (b < a)
+ a -= b;
+ }
+ return a;
+}
+
+/*
+ * conf_line reads one logical line from the conffile or mdstat.
+ * It skips comments and continues until it finds a line that starts
+ * with a non blank/comment. This character is pushed back for the next call
+ * A doubly linked list of words is returned.
+ * the first word will be a keyword. Other words will have had quotes removed.
+ */
+
+char *conf_line(FILE *file)
+{
+ char *w;
+ char *list;
+
+ w = conf_word(file, 1);
+ if (w == NULL) return NULL;
+
+ list = dl_strdup(w);
+ free(w);
+ dl_init(list);
+
+ while ((w = conf_word(file,0))){
+ char *w2 = dl_strdup(w);
+ free(w);
+ dl_add(list, w2);
+ }
+/* printf("got a line\n");*/
+ return list;
+}
+
+void free_line(char *line)
+{
+ char *w;
+ for (w=dl_next(line); w != line; w=dl_next(line)) {
+ dl_del(w);
+ dl_free(w);
+ }
+ dl_free(line);
+}
diff --git a/makedist b/makedist
new file mode 100755
index 00000000..e4f20acf
--- /dev/null
+++ b/makedist
@@ -0,0 +1,96 @@
+#!/bin/sh
+# avoid silly sorting
+export LANG=C
+arg=$1
+target=~/public_html/source/mdadm
+if [ " $arg" = " test" ]
+then
+ target=/tmp/mdadm-test
+ rm -rf $target
+ mkdir -p $target
+fi
+if [ -d $target ]
+then :
+else echo $target is not a directory
+ exit 2
+fi
+set `grep '^#define VERSION' ReadMe.c `
+version=`echo $3 | sed -e 's/"//g'`
+grep "^.TH MDADM 8 .. v$version" mdadm.8.in > /dev/null 2>&1 ||
+ {
+ echo mdadm.8.in does not mention version $version.
+ exit 1
+ }
+grep "^.TH MDMON 8 .. v$version" mdmon.8 > /dev/null 2>&1 ||
+ {
+ echo mdmon.8 does not mention version $version.
+ exit 1
+ }
+rpmv=`echo $version | tr - _`
+grep "^Version: *$rpmv$" mdadm.spec > /dev/null 2>&1 ||
+ {
+ echo mdadm.spec does not mention version $version.
+ exit 1
+ }
+if [ -f ANNOUNCE-$version ]
+then :
+else
+ echo ANNOUNCE-$version does not exist
+ exit 1
+fi
+if grep "^ANNOUNCE-$version\$" inventory
+then :
+else { cat inventory ; echo ANNOUNCE-$version ; } | sort -o inventory
+fi
+
+echo version = $version
+base=mdadm-$version.tar.gz
+if [ " $arg" != " diff" ]
+then
+ if [ -f $target/$base ]
+ then
+ echo $target/$base exists.
+ exit 1
+ fi
+ trap "rm $target/$base; exit" 1 2 3
+ git archive --prefix=mdadm-$version/ HEAD | gzip --best > $target/$base
+ chmod a+r $target/$base
+ ls -l $target/$base
+ if tar tzf $target/$base | sed 's,[^/]*/,,' | sort | diff -u inventory -
+ then : correct files found
+ else echo "Extra files, or inventory is out-of-date"
+ rm $target/$base
+ exit 1
+ fi
+ rpmbuild -ta $target/$base || exit 1
+ find /home/neilb/src/RPM -name "*mdadm-$version-*" \
+ -exec cp {} $target/RPM \;
+ cp ANNOUNCE-$version $target/ANNOUNCE
+ cp ChangeLog $target/ChangeLog
+ if [ " $arg" != " test" ]
+ then
+ echo -n "Confirm signing this release? "
+ read a
+ if [ " $a" != " y" ]; then echo OK - bye. ; exit 1; fi
+ if zcat $target/$base | gpg -ba > $target/$base.sign && gpg -ba $target/ANNOUNCE
+ then
+ kup put $target/$base $target/$base.sign \
+ /pub/linux/utils/raid/mdadm/mdadm-$version.tar.gz
+ kup put $target/ANNOUNCE $target/ANNOUNCE.asc /pub/linux/utils/raid/mdadm/ANNOUNCE
+ else
+ echo signing failed
+ exit 1
+ fi
+ fi
+else
+ if [ ! -f $target/$base ]
+ then
+ echo $target/$base does not exist.
+ exit 1
+ fi
+ ( cd .. ; ln -s mdadm.v2 mdadm-$version ; tar chf - --exclude=.git --exclude="TAGS" --exclude='*,v' --exclude='*~' --exclude='*.o' --exclude mdadm --exclude=mdadm'.[^ch0-9]' --exclude=RCS mdadm-$version ; rm mdadm-$version ) | gzip --best > /var/tmp/mdadm-new.tgz
+ mkdir /var/tmp/mdadm-old ; zcat $target/$base | ( cd /var/tmp/mdadm-old ; tar xf - )
+ mkdir /var/tmp/mdadm-new ; zcat /var/tmp/mdadm-new.tgz | ( cd /var/tmp/mdadm-new ; tar xf - )
+ diff -ru /var/tmp/mdadm-old /var/tmp/mdadm-new
+ rm -rf /var/tmp/mdadm-old /var/tmp/mdadm-new /var/tmp/mdadm-new.tgz
+fi
diff --git a/managemon.c b/managemon.c
new file mode 100644
index 00000000..1c9eccc4
--- /dev/null
+++ b/managemon.c
@@ -0,0 +1,910 @@
+/*
+ * mdmon - monitor external metadata arrays
+ *
+ * Copyright (C) 2007-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2007-2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/*
+ * The management thread for monitoring active md arrays.
+ * This thread does things which might block such as memory
+ * allocation.
+ * In particular:
+ *
+ * - Find out about new arrays in this container.
+ * Allocate the data structures and open the files.
+ *
+ * For this we watch /proc/mdstat and find new arrays with
+ * metadata type that confirms sharing. e.g. "md4"
+ * When we find a new array we slip it into the list of
+ * arrays and signal 'monitor' by writing to a pipe.
+ *
+ * - Respond to reshape requests by allocating new data structures
+ * and opening new files.
+ *
+ * These come as a change to raid_disks. We allocate a new
+ * version of the data structures and slip it into the list.
+ * 'monitor' will notice and release the old version.
+ * Changes to level, chunksize, layout.. do not need re-allocation.
+ * Reductions in raid_disks don't really either, but we handle
+ * them the same way for consistency.
+ *
+ * - When a device is added to the container, we add it to the metadata
+ * as a spare.
+ *
+ * - Deal with degraded array
+ * We only do this when first noticing the array is degraded.
+ * This can be when we first see the array, when sync completes or
+ * when recovery completes.
+ *
+ * Check if number of failed devices suggests recovery is needed, and
+ * skip if not.
+ * Ask metadata to allocate a spare device
+ * Add device as not in_sync and give a role
+ * Update metadata.
+ * Open sysfs files and pass to monitor.
+ * Make sure that monitor Starts recovery....
+ *
+ * - Pass on metadata updates from external programs such as
+ * mdadm creating a new array.
+ *
+ * This is most-messy.
+ * It might involve adding a new array or changing the status of
+ * a spare, or any reconfig that the kernel doesn't get involved in.
+ *
+ * The required updates are received via a named pipe. There will
+ * be one named pipe for each container. Each message contains a
+ * sync marker: 0x5a5aa5a5, A byte count, and the message. This is
+ * passed to the metadata handler which will interpret and process it.
+ * For 'DDF' messages are internal data blocks with the leading
+ * 'magic number' signifying what sort of data it is.
+ *
+ */
+
+/*
+ * We select on /proc/mdstat and the named pipe.
+ * We create new arrays or updated version of arrays and slip
+ * them into the head of the list, then signal 'monitor' via a pipe write.
+ * 'monitor' will notice and place the old array on a return list.
+ * Metadata updates are placed on a queue just like they arrive
+ * from the named pipe.
+ *
+ * When new arrays are found based on correct metadata string, we
+ * need to identify them with an entry in the metadata. Maybe we require
+ * the metadata to be mdX/NN when NN is the index into an appropriate table.
+ *
+ */
+
+/*
+ * List of tasks:
+ * - Watch for spares to be added to the container, and write updated
+ * metadata to them.
+ * - Watch for new arrays using this container, confirm they match metadata
+ * and if so, start monitoring them
+ * - Watch for spares being added to monitored arrays. This shouldn't
+ * happen, as we should do all the adding. Just remove them.
+ * - Watch for change in raid-disks, chunk-size, etc. Update metadata and
+ * start a reshape.
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include "mdadm.h"
+#include "mdmon.h"
+#include <sys/syscall.h>
+#include <sys/socket.h>
+#include <signal.h>
+
+static void close_aa(struct active_array *aa)
+{
+ struct mdinfo *d;
+
+ for (d = aa->info.devs; d; d = d->next) {
+ close(d->recovery_fd);
+ close(d->state_fd);
+ }
+
+ if (aa->action_fd >= 0)
+ close(aa->action_fd);
+ if (aa->info.state_fd >= 0)
+ close(aa->info.state_fd);
+ if (aa->resync_start_fd >= 0)
+ close(aa->resync_start_fd);
+ if (aa->metadata_fd >= 0)
+ close(aa->metadata_fd);
+ if (aa->sync_completed_fd >= 0)
+ close(aa->sync_completed_fd);
+}
+
+static void free_aa(struct active_array *aa)
+{
+ /* Note that this doesn't close fds if they are being used
+ * by a clone. ->container will be set for a clone
+ */
+ dprintf("%s: sys_name: %s\n", __func__, aa->info.sys_name);
+ if (!aa->container)
+ close_aa(aa);
+ while (aa->info.devs) {
+ struct mdinfo *d = aa->info.devs;
+ aa->info.devs = d->next;
+ free(d);
+ }
+ free(aa);
+}
+
+static struct active_array *duplicate_aa(struct active_array *aa)
+{
+ struct active_array *newa = xmalloc(sizeof(*newa));
+ struct mdinfo **dp1, **dp2;
+
+ *newa = *aa;
+ newa->next = NULL;
+ newa->replaces = NULL;
+ newa->info.next = NULL;
+
+ dp2 = &newa->info.devs;
+
+ for (dp1 = &aa->info.devs; *dp1; dp1 = &(*dp1)->next) {
+ struct mdinfo *d;
+ if ((*dp1)->state_fd < 0)
+ continue;
+
+ d = xmalloc(sizeof(*d));
+ *d = **dp1;
+ *dp2 = d;
+ dp2 = & d->next;
+ }
+ *dp2 = NULL;
+
+ return newa;
+}
+
+static void wakeup_monitor(void)
+{
+ /* tgkill(getpid(), mon_tid, SIGUSR1); */
+ int pid = getpid();
+ syscall(SYS_tgkill, pid, mon_tid, SIGUSR1);
+}
+
+static void remove_old(void)
+{
+ if (discard_this) {
+ discard_this->next = NULL;
+ free_aa(discard_this);
+ if (pending_discard == discard_this)
+ pending_discard = NULL;
+ discard_this = NULL;
+ wakeup_monitor();
+ }
+}
+
+static void replace_array(struct supertype *container,
+ struct active_array *old,
+ struct active_array *new)
+{
+ /* To replace an array, we add it to the top of the list
+ * marked with ->replaces to point to the original.
+ * 'monitor' will take the original out of the list
+ * and put it on 'discard_this'. We take it from there
+ * and discard it.
+ */
+ remove_old();
+ while (pending_discard) {
+ while (discard_this == NULL)
+ sleep(1);
+ remove_old();
+ }
+ pending_discard = old;
+ new->replaces = old;
+ new->next = container->arrays;
+ container->arrays = new;
+ wakeup_monitor();
+}
+
+struct metadata_update *update_queue = NULL;
+struct metadata_update *update_queue_handled = NULL;
+struct metadata_update *update_queue_pending = NULL;
+
+static void free_updates(struct metadata_update **update)
+{
+ while (*update) {
+ struct metadata_update *this = *update;
+ void **space_list = this->space_list;
+
+ *update = this->next;
+ free(this->buf);
+ free(this->space);
+ while (space_list) {
+ void *space = space_list;
+ space_list = *space_list;
+ free(space);
+ }
+ free(this);
+ }
+}
+
+void check_update_queue(struct supertype *container)
+{
+ free_updates(&update_queue_handled);
+
+ if (update_queue == NULL &&
+ update_queue_pending) {
+ update_queue = update_queue_pending;
+ update_queue_pending = NULL;
+ wakeup_monitor();
+ }
+}
+
+static void queue_metadata_update(struct metadata_update *mu)
+{
+ struct metadata_update **qp;
+
+ qp = &update_queue_pending;
+ while (*qp)
+ qp = & ((*qp)->next);
+ *qp = mu;
+}
+
+static void add_disk_to_container(struct supertype *st, struct mdinfo *sd)
+{
+ int dfd;
+ char nm[20];
+ struct supertype *st2;
+ struct metadata_update *update = NULL;
+ struct mdinfo info;
+ mdu_disk_info_t dk = {
+ .number = -1,
+ .major = sd->disk.major,
+ .minor = sd->disk.minor,
+ .raid_disk = -1,
+ .state = 0,
+ };
+
+ dprintf("%s: add %d:%d to container\n",
+ __func__, sd->disk.major, sd->disk.minor);
+
+ sd->next = st->devs;
+ st->devs = sd;
+
+ sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
+ dfd = dev_open(nm, O_RDWR);
+ if (dfd < 0)
+ return;
+
+ /* Check the metadata and see if it is already part of this
+ * array
+ */
+ st2 = dup_super(st);
+ if (st2->ss->load_super(st2, dfd, NULL) == 0) {
+ st2->ss->getinfo_super(st2, &info, NULL);
+ if (st->ss->compare_super(st, st2) == 0 &&
+ info.disk.raid_disk >= 0) {
+ /* Looks like a good member of array.
+ * Just accept it.
+ * mdadm will incorporate any parts into
+ * active arrays.
+ */
+ st2->ss->free_super(st2);
+ return;
+ }
+ }
+ st2->ss->free_super(st2);
+
+ st->update_tail = &update;
+ st->ss->add_to_super(st, &dk, dfd, NULL, INVALID_SECTORS);
+ st->ss->write_init_super(st);
+ queue_metadata_update(update);
+ st->update_tail = NULL;
+}
+
+/*
+ * Create and queue update structure about the removed disks.
+ * The update is prepared by super type handler and passed to the monitor
+ * thread.
+ */
+static void remove_disk_from_container(struct supertype *st, struct mdinfo *sd)
+{
+ struct metadata_update *update = NULL;
+ mdu_disk_info_t dk = {
+ .number = -1,
+ .major = sd->disk.major,
+ .minor = sd->disk.minor,
+ .raid_disk = -1,
+ .state = 0,
+ };
+ dprintf("%s: remove %d:%d from container\n",
+ __func__, sd->disk.major, sd->disk.minor);
+
+ st->update_tail = &update;
+ st->ss->remove_from_super(st, &dk);
+ /* FIXME this write_init_super shouldn't be here.
+ * We have it after add_to_super to write to new device,
+ * but with 'remove' we don't ant to write to that device!
+ */
+ st->ss->write_init_super(st);
+ queue_metadata_update(update);
+ st->update_tail = NULL;
+}
+
+static void manage_container(struct mdstat_ent *mdstat,
+ struct supertype *container)
+{
+ /* Of interest here are:
+ * - if a new device has been added to the container, we
+ * add it to the array ignoring any metadata on it.
+ * - if a device has been removed from the container, we
+ * remove it from the device list and update the metadata.
+ * FIXME should we look for compatible metadata and take hints
+ * about spare assignment.... probably not.
+ */
+ if (mdstat->devcnt != container->devcnt) {
+ struct mdinfo **cdp, *cd, *di, *mdi;
+ int found;
+
+ /* read /sys/block/NAME/md/dev-??/block/dev to find out
+ * what is there, and compare with container->info.devs
+ * To see what is removed and what is added.
+ * These need to be remove from, or added to, the array
+ */
+ mdi = sysfs_read(-1, mdstat->devnm, GET_DEVS);
+ if (!mdi) {
+ /* invalidate the current count so we can try again */
+ container->devcnt = -1;
+ return;
+ }
+
+ /* check for removals */
+ for (cdp = &container->devs; *cdp; ) {
+ found = 0;
+ for (di = mdi->devs; di; di = di->next)
+ if (di->disk.major == (*cdp)->disk.major &&
+ di->disk.minor == (*cdp)->disk.minor) {
+ found = 1;
+ break;
+ }
+ if (!found) {
+ cd = *cdp;
+ *cdp = (*cdp)->next;
+ remove_disk_from_container(container, cd);
+ free(cd);
+ } else
+ cdp = &(*cdp)->next;
+ }
+
+ /* check for additions */
+ for (di = mdi->devs; di; di = di->next) {
+ for (cd = container->devs; cd; cd = cd->next)
+ if (di->disk.major == cd->disk.major &&
+ di->disk.minor == cd->disk.minor)
+ break;
+ if (!cd) {
+ struct mdinfo *newd = xmalloc(sizeof(*newd));
+
+ *newd = *di;
+ add_disk_to_container(container, newd);
+ }
+ }
+ sysfs_free(mdi);
+ container->devcnt = mdstat->devcnt;
+ }
+}
+
+static int disk_init_and_add(struct mdinfo *disk, struct mdinfo *clone,
+ struct active_array *aa)
+{
+ if (!disk || !clone)
+ return -1;
+
+ *disk = *clone;
+ disk->recovery_fd = sysfs_open(aa->info.sys_name, disk->sys_name, "recovery_start");
+ if (disk->recovery_fd < 0)
+ return -1;
+ disk->state_fd = sysfs_open(aa->info.sys_name, disk->sys_name, "state");
+ if (disk->state_fd < 0) {
+ close(disk->recovery_fd);
+ return -1;
+ }
+ disk->prev_state = read_dev_state(disk->state_fd);
+ disk->curr_state = disk->prev_state;
+ disk->next = aa->info.devs;
+ aa->info.devs = disk;
+
+ return 0;
+}
+
+static void manage_member(struct mdstat_ent *mdstat,
+ struct active_array *a)
+{
+ /* Compare mdstat info with known state of member array.
+ * We do not need to look for device state changes here, that
+ * is dealt with by the monitor.
+ *
+ * If a reshape is being requested, monitor will have noticed
+ * that sync_action changed and will have set check_reshape.
+ * We just need to see if new devices have appeared. All metadata
+ * updates will already have been processed.
+ *
+ * We also want to handle degraded arrays here by
+ * trying to find and assign a spare.
+ * We do that whenever the monitor tells us too.
+ */
+ char buf[64];
+ int frozen;
+ struct supertype *container = a->container;
+ unsigned long long int component_size = 0;
+
+ if (container == NULL)
+ /* Raced with something */
+ return;
+
+ if (mdstat->active) {
+ // FIXME
+ a->info.array.raid_disks = mdstat->raid_disks;
+ // MORE
+ }
+
+ if (sysfs_get_ll(&a->info, NULL, "component_size", &component_size) >= 0)
+ a->info.component_size = component_size << 1;
+
+ /* honor 'frozen' */
+ if (sysfs_get_str(&a->info, NULL, "metadata_version", buf, sizeof(buf)) > 0)
+ frozen = buf[9] == '-';
+ else
+ frozen = 1; /* can't read metadata_version assume the worst */
+
+ /* If sync_action is not 'idle' then don't try recovery now */
+ if (!frozen
+ && sysfs_get_str(&a->info, NULL, "sync_action", buf, sizeof(buf)) > 0
+ && strncmp(buf, "idle", 4) != 0)
+ frozen = 1;
+
+ if (mdstat->level) {
+ int level = map_name(pers, mdstat->level);
+ if (level == 0 || level == LEVEL_LINEAR) {
+ a->to_remove = 1;
+ wakeup_monitor();
+ return;
+ }
+ else if (a->info.array.level != level && level > 0) {
+ struct active_array *newa = duplicate_aa(a);
+ if (newa) {
+ newa->info.array.level = level;
+ replace_array(container, a, newa);
+ a = newa;
+ }
+ }
+ }
+
+ /* we are after monitor kick,
+ * so container field can be cleared - check it again
+ */
+ if (a->container == NULL)
+ return;
+
+ if (sigterm && a->info.safe_mode_delay != 1) {
+ sysfs_set_safemode(&a->info, 1);
+ a->info.safe_mode_delay = 1;
+ }
+
+ /* We don't check the array while any update is pending, as it
+ * might container a change (such as a spare assignment) which
+ * could affect our decisions.
+ */
+ if (a->check_degraded && !frozen &&
+ update_queue == NULL && update_queue_pending == NULL) {
+ struct metadata_update *updates = NULL;
+ struct mdinfo *newdev = NULL;
+ struct active_array *newa;
+ struct mdinfo *d;
+
+ a->check_degraded = 0;
+
+ /* The array may not be degraded, this is just a good time
+ * to check.
+ */
+ newdev = container->ss->activate_spare(a, &updates);
+ if (!newdev)
+ return;
+
+ newa = duplicate_aa(a);
+ if (!newa)
+ goto out;
+ /* prevent the kernel from activating the disk(s) before we
+ * finish adding them
+ */
+ dprintf("%s: freezing %s\n", __func__, a->info.sys_name);
+ sysfs_set_str(&a->info, NULL, "sync_action", "frozen");
+
+ /* Add device to array and set offset/size/slot.
+ * and open files for each newdev */
+ for (d = newdev; d ; d = d->next) {
+ struct mdinfo *newd;
+
+ newd = xmalloc(sizeof(*newd));
+ if (sysfs_add_disk(&newa->info, d, 0) < 0) {
+ free(newd);
+ continue;
+ }
+ disk_init_and_add(newd, d, newa);
+ }
+ queue_metadata_update(updates);
+ updates = NULL;
+ while (update_queue_pending || update_queue) {
+ check_update_queue(container);
+ usleep(15*1000);
+ }
+ replace_array(container, a, newa);
+ if (sysfs_set_str(&a->info, NULL, "sync_action", "recover")
+ == 0)
+ newa->prev_action = recover;
+ dprintf("%s: recovery started on %s\n", __func__,
+ a->info.sys_name);
+ out:
+ while (newdev) {
+ d = newdev->next;
+ free(newdev);
+ newdev = d;
+ }
+ free_updates(&updates);
+ }
+
+ if (a->check_reshape) {
+ /* mdadm might have added some devices to the array.
+ * We want to disk_init_and_add any such device to a
+ * duplicate_aa and replace a with that.
+ * mdstat doesn't have enough info so we sysfs_read
+ * and look for new stuff.
+ */
+ struct mdinfo *info, *d, *d2, *newd;
+ unsigned long long array_size;
+ struct active_array *newa = NULL;
+ a->check_reshape = 0;
+ info = sysfs_read(-1, mdstat->devnm,
+ GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);
+ if (!info)
+ goto out2;
+ for (d = info->devs; d; d = d->next) {
+ if (d->disk.raid_disk < 0)
+ continue;
+ for (d2 = a->info.devs; d2; d2 = d2->next)
+ if (d2->disk.raid_disk ==
+ d->disk.raid_disk)
+ break;
+ if (d2)
+ /* already have this one */
+ continue;
+ if (!newa) {
+ newa = duplicate_aa(a);
+ if (!newa)
+ break;
+ }
+ newd = xmalloc(sizeof(*newd));
+ disk_init_and_add(newd, d, newa);
+ }
+ if (sysfs_get_ll(info, NULL, "array_size", &array_size) == 0
+ && a->info.custom_array_size > array_size*2) {
+ sysfs_set_num(info, NULL, "array_size",
+ a->info.custom_array_size/2);
+ }
+ out2:
+ sysfs_free(info);
+ if (newa)
+ replace_array(container, a, newa);
+ }
+}
+
+static int aa_ready(struct active_array *aa)
+{
+ struct mdinfo *d;
+ int level = aa->info.array.level;
+
+ for (d = aa->info.devs; d; d = d->next)
+ if (d->state_fd < 0)
+ return 0;
+
+ if (aa->info.state_fd < 0)
+ return 0;
+
+ if (level > 0 && (aa->action_fd < 0 || aa->resync_start_fd < 0))
+ return 0;
+
+ if (!aa->container)
+ return 0;
+
+ return 1;
+}
+
+static void manage_new(struct mdstat_ent *mdstat,
+ struct supertype *container,
+ struct active_array *victim)
+{
+ /* A new array has appeared in this container.
+ * Hopefully it is already recorded in the metadata.
+ * Check, then create the new array to report it to
+ * the monitor.
+ */
+
+ struct active_array *new;
+ struct mdinfo *mdi, *di;
+ char *inst;
+ int i;
+ int failed = 0;
+ char buf[40];
+
+ /* check if array is ready to be monitored */
+ if (!mdstat->active || !mdstat->level)
+ return;
+ if (strcmp(mdstat->level, "raid0") == 0 ||
+ strcmp(mdstat->level, "linear") == 0)
+ return;
+
+ mdi = sysfs_read(-1, mdstat->devnm,
+ GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
+ GET_DEGRADED|GET_SAFEMODE|
+ GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|GET_LAYOUT);
+
+ if (!mdi)
+ return;
+ new = xcalloc(1, sizeof(*new));
+
+ strcpy(new->info.sys_name, mdstat->devnm);
+
+ new->prev_state = new->curr_state = new->next_state = inactive;
+ new->prev_action= new->curr_action= new->next_action= idle;
+
+ new->container = container;
+
+ inst = to_subarray(mdstat, container->devnm);
+
+ new->info.array = mdi->array;
+ new->info.component_size = mdi->component_size;
+
+ for (i = 0; i < new->info.array.raid_disks; i++) {
+ struct mdinfo *newd = xmalloc(sizeof(*newd));
+
+ for (di = mdi->devs; di; di = di->next)
+ if (i == di->disk.raid_disk)
+ break;
+
+ if (disk_init_and_add(newd, di, new) != 0) {
+ if (newd)
+ free(newd);
+
+ failed++;
+ if (failed > new->info.array.failed_disks) {
+ /* we cannot properly monitor without all working disks */
+ new->container = NULL;
+ break;
+ }
+ }
+ }
+
+ new->action_fd = sysfs_open(new->info.sys_name, NULL, "sync_action");
+ new->info.state_fd = sysfs_open(new->info.sys_name, NULL, "array_state");
+ new->resync_start_fd = sysfs_open(new->info.sys_name, NULL, "resync_start");
+ new->metadata_fd = sysfs_open(new->info.sys_name, NULL, "metadata_version");
+ new->sync_completed_fd = sysfs_open(new->info.sys_name, NULL, "sync_completed");
+ dprintf("%s: inst: %s action: %d state: %d\n", __func__, inst,
+ new->action_fd, new->info.state_fd);
+
+ if (sigterm)
+ new->info.safe_mode_delay = 1;
+ else if (mdi->safe_mode_delay >= 50)
+ /* Normal start, mdadm set this. */
+ new->info.safe_mode_delay = mdi->safe_mode_delay;
+ else
+ /* Restart, just pick a number */
+ new->info.safe_mode_delay = 5000;
+ sysfs_set_safemode(&new->info, new->info.safe_mode_delay);
+
+ /* reshape_position is set by mdadm in sysfs
+ * read this information for new arrays only (empty victim)
+ */
+ if ((victim == NULL) &&
+ (sysfs_get_str(mdi, NULL, "sync_action", buf, 40) > 0) &&
+ (strncmp(buf, "reshape", 7) == 0)) {
+ if (sysfs_get_ll(mdi, NULL, "reshape_position",
+ &new->last_checkpoint) != 0)
+ new->last_checkpoint = 0;
+ else {
+ int data_disks = mdi->array.raid_disks;
+ if (mdi->array.level == 4 || mdi->array.level == 5)
+ data_disks--;
+ if (mdi->array.level == 6)
+ data_disks -= 2;
+
+ new->last_checkpoint /= data_disks;
+ }
+ dprintf("mdmon: New monitored array is under reshape.\n"
+ " Last checkpoint is: %llu\n",
+ new->last_checkpoint);
+ }
+
+ sysfs_free(mdi);
+
+ /* if everything checks out tell the metadata handler we want to
+ * manage this instance
+ */
+ if (!aa_ready(new) || container->ss->open_new(container, new, inst) < 0) {
+ pr_err("failed to monitor %s\n",
+ mdstat->metadata_version);
+ new->container = NULL;
+ free_aa(new);
+ } else {
+ replace_array(container, victim, new);
+ if (failed) {
+ new->check_degraded = 1;
+ manage_member(mdstat, new);
+ }
+ }
+}
+
+void manage(struct mdstat_ent *mdstat, struct supertype *container)
+{
+ /* We have just read mdstat and need to compare it with
+ * the known active arrays.
+ * Arrays with the wrong metadata are ignored.
+ */
+
+ for ( ; mdstat ; mdstat = mdstat->next) {
+ struct active_array *a;
+ if (strcmp(mdstat->devnm, container->devnm) == 0) {
+ manage_container(mdstat, container);
+ continue;
+ }
+ if (!is_container_member(mdstat, container->devnm))
+ /* Not for this array */
+ continue;
+ /* Looks like a member of this container */
+ for (a = container->arrays; a; a = a->next) {
+ if (strcmp(mdstat->devnm, a->info.sys_name) == 0) {
+ if (a->container && a->to_remove == 0)
+ manage_member(mdstat, a);
+ break;
+ }
+ }
+ if (a == NULL || !a->container)
+ manage_new(mdstat, container, a);
+ }
+}
+
+static void handle_message(struct supertype *container, struct metadata_update *msg)
+{
+ /* queue this metadata update through to the monitor */
+
+ struct metadata_update *mu;
+
+ if (msg->len <= 0)
+ while (update_queue_pending || update_queue) {
+ check_update_queue(container);
+ usleep(15*1000);
+ }
+
+ if (msg->len == 0) { /* ping_monitor */
+ int cnt;
+
+ cnt = monitor_loop_cnt;
+ if (cnt & 1)
+ cnt += 2; /* wait until next pselect */
+ else
+ cnt += 3; /* wait for 2 pselects */
+ wakeup_monitor();
+
+ while (monitor_loop_cnt - cnt < 0)
+ usleep(10 * 1000);
+ } else if (msg->len == -1) { /* ping_manager */
+ struct mdstat_ent *mdstat = mdstat_read(1, 0);
+
+ manage(mdstat, container);
+ free_mdstat(mdstat);
+ } else if (!sigterm) {
+ mu = xmalloc(sizeof(*mu));
+ mu->len = msg->len;
+ mu->buf = msg->buf;
+ msg->buf = NULL;
+ mu->space = NULL;
+ mu->space_list = NULL;
+ mu->next = NULL;
+ if (container->ss->prepare_update)
+ if (!container->ss->prepare_update(container, mu))
+ free_updates(&mu);
+ queue_metadata_update(mu);
+ }
+}
+
+void read_sock(struct supertype *container)
+{
+ int fd;
+ struct metadata_update msg;
+ int terminate = 0;
+ long fl;
+ int tmo = 3; /* 3 second timeout before hanging up the socket */
+
+ fd = accept(container->sock, NULL, NULL);
+ if (fd < 0)
+ return;
+
+ fl = fcntl(fd, F_GETFL, 0);
+ fl |= O_NONBLOCK;
+ fcntl(fd, F_SETFL, fl);
+
+ do {
+ msg.buf = NULL;
+
+ /* read and validate the message */
+ if (receive_message(fd, &msg, tmo) == 0) {
+ handle_message(container, &msg);
+ if (msg.len == 0) {
+ /* ping reply with version */
+ msg.buf = Version;
+ msg.len = strlen(Version) + 1;
+ if (send_message(fd, &msg, tmo) < 0)
+ terminate = 1;
+ } else if (ack(fd, tmo) < 0)
+ terminate = 1;
+ } else
+ terminate = 1;
+
+ } while (!terminate);
+
+ close(fd);
+}
+
+int exit_now = 0;
+int manager_ready = 0;
+void do_manager(struct supertype *container)
+{
+ struct mdstat_ent *mdstat;
+ sigset_t set;
+
+ sigprocmask(SIG_UNBLOCK, NULL, &set);
+ sigdelset(&set, SIGUSR1);
+ sigdelset(&set, SIGTERM);
+
+ do {
+
+ if (exit_now)
+ exit(0);
+
+ /* Can only 'manage' things if 'monitor' is not making
+ * structural changes to metadata, so need to check
+ * update_queue
+ */
+ if (update_queue == NULL) {
+ mdstat = mdstat_read(1, 0);
+
+ manage(mdstat, container);
+
+ read_sock(container);
+
+ free_mdstat(mdstat);
+ }
+ remove_old();
+
+ check_update_queue(container);
+
+ manager_ready = 1;
+
+ if (sigterm)
+ wakeup_monitor();
+
+ if (update_queue == NULL)
+ mdstat_wait_fd(container->sock, &set);
+ else
+ /* If an update is happening, just wait for signal */
+ pselect(0, NULL, NULL, NULL, NULL, &set);
+ } while(1);
+}
diff --git a/mapfile.c b/mapfile.c
new file mode 100644
index 00000000..41599df0
--- /dev/null
+++ b/mapfile.c
@@ -0,0 +1,508 @@
+/*
+ * mapfile - keep track of uuid <-> array mapping. Part of:
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2006-2010 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ * Paper: Neil Brown
+ * Novell Inc
+ * GPO Box Q1283
+ * QVB Post Office, NSW 1230
+ * Australia
+ */
+
+/* The mapfile is used to track arrays being created in --incremental
+ * mode. It particularly allows lookup from UUID to array device, but
+ * also allows the array device name to be easily found.
+ *
+ * The map file is line based with space separated fields. The fields are:
+ * Device id - mdX or mdpX where X is a number.
+ * metadata - 0.90 1.0 1.1 1.2 ddf ...
+ * UUID - uuid of the array
+ * path - path where device created: /dev/md/home
+ *
+ * The best place for the mapfile is /run/mdadm/map. Distros and users
+ * which have not switched to /run yet can choose a different location
+ * at compile time via MAP_DIR and MAP_FILE.
+ */
+#include "mdadm.h"
+#include <sys/file.h>
+#include <ctype.h>
+
+#define MAP_READ 0
+#define MAP_NEW 1
+#define MAP_LOCK 2
+#define MAP_DIRNAME 3
+
+char *mapname[4] = {
+ MAP_DIR "/" MAP_FILE,
+ MAP_DIR "/" MAP_FILE ".new",
+ MAP_DIR "/" MAP_FILE ".lock",
+ MAP_DIR
+};
+
+int mapmode[3] = { O_RDONLY, O_RDWR|O_CREAT, O_RDWR|O_CREAT|O_TRUNC };
+char *mapsmode[3] = { "r", "w", "w"};
+
+FILE *open_map(int modenum)
+{
+ int fd;
+ if ((mapmode[modenum] & O_CREAT))
+ /* Attempt to create directory, don't worry about
+ * failure.
+ */
+ (void)mkdir(mapname[MAP_DIRNAME], 0755);
+ fd = open(mapname[modenum], mapmode[modenum], 0600);
+ if (fd >= 0)
+ return fdopen(fd, mapsmode[modenum]);
+ return NULL;
+}
+
+int map_write(struct map_ent *mel)
+{
+ FILE *f;
+ int err;
+
+ f = open_map(MAP_NEW);
+
+ if (!f)
+ return 0;
+ for (; mel; mel = mel->next) {
+ if (mel->bad)
+ continue;
+ fprintf(f, "%s ", mel->devnm);
+ fprintf(f, "%s ", mel->metadata);
+ fprintf(f, "%08x:%08x:%08x:%08x ", mel->uuid[0],
+ mel->uuid[1], mel->uuid[2], mel->uuid[3]);
+ fprintf(f, "%s\n", mel->path?:"");
+ }
+ fflush(f);
+ err = ferror(f);
+ fclose(f);
+ if (err) {
+ unlink(mapname[1]);
+ return 0;
+ }
+ return rename(mapname[1],
+ mapname[0]) == 0;
+}
+
+static FILE *lf = NULL;
+int map_lock(struct map_ent **melp)
+{
+ while (lf == NULL) {
+ struct stat buf;
+ lf = open_map(MAP_LOCK);
+ if (lf == NULL)
+ return -1;
+ if (flock(fileno(lf), LOCK_EX) != 0) {
+ fclose(lf);
+ lf = NULL;
+ return -1;
+ }
+ if (fstat(fileno(lf), &buf) != 0 ||
+ buf.st_nlink == 0) {
+ /* The owner of the lock unlinked it,
+ * so we have a lock on a stale file,
+ * try again
+ */
+ fclose(lf);
+ lf = NULL;
+ }
+ }
+ if (*melp)
+ map_free(*melp);
+ map_read(melp);
+ return 0;
+}
+
+void map_unlock(struct map_ent **melp)
+{
+ if (lf) {
+ /* must unlink before closing the file,
+ * as only the owner of the lock may
+ * unlink the file
+ */
+ unlink(mapname[2]);
+ fclose(lf);
+ }
+ lf = NULL;
+}
+
+void map_fork(void)
+{
+ /* We are forking, so must close the lock file.
+ * Don't risk flushing anything though.
+ */
+ if (lf) {
+ close(fileno(lf));
+ fclose(lf);
+ lf = NULL;
+ }
+}
+
+void map_add(struct map_ent **melp,
+ char * devnm, char *metadata, int uuid[4], char *path)
+{
+ struct map_ent *me = xmalloc(sizeof(*me));
+
+ strcpy(me->devnm, devnm);
+ strcpy(me->metadata, metadata);
+ memcpy(me->uuid, uuid, 16);
+ me->path = path ? xstrdup(path) : NULL;
+ me->next = *melp;
+ me->bad = 0;
+ *melp = me;
+}
+
+void map_read(struct map_ent **melp)
+{
+ FILE *f;
+ char buf[8192];
+ char path[200];
+ int uuid[4];
+ char devnm[32];
+ char metadata[30];
+
+ *melp = NULL;
+
+ f = open_map(MAP_READ);
+ if (!f) {
+ RebuildMap();
+ f = open_map(MAP_READ);
+ }
+ if (!f)
+ return;
+
+ while (fgets(buf, sizeof(buf), f)) {
+ path[0] = 0;
+ if (sscanf(buf, " %s %s %x:%x:%x:%x %200s",
+ devnm, metadata, uuid, uuid+1,
+ uuid+2, uuid+3, path) >= 7) {
+ map_add(melp, devnm, metadata, uuid, path);
+ }
+ }
+ fclose(f);
+}
+
+void map_free(struct map_ent *map)
+{
+ while (map) {
+ struct map_ent *mp = map;
+ map = mp->next;
+ free(mp->path);
+ free(mp);
+ }
+}
+
+int map_update(struct map_ent **mpp, char *devnm, char *metadata,
+ int *uuid, char *path)
+{
+ struct map_ent *map, *mp;
+ int rv;
+
+ if (mpp && *mpp)
+ map = *mpp;
+ else
+ map_read(&map);
+
+ for (mp = map ; mp ; mp=mp->next)
+ if (strcmp(mp->devnm, devnm) == 0) {
+ strcpy(mp->metadata, metadata);
+ memcpy(mp->uuid, uuid, 16);
+ free(mp->path);
+ mp->path = path ? xstrdup(path) : NULL;
+ mp->bad = 0;
+ break;
+ }
+ if (!mp)
+ map_add(&map, devnm, metadata, uuid, path);
+ if (mpp)
+ *mpp = NULL;
+ rv = map_write(map);
+ map_free(map);
+ return rv;
+}
+
+void map_delete(struct map_ent **mapp, char *devnm)
+{
+ struct map_ent *mp;
+
+ if (*mapp == NULL)
+ map_read(mapp);
+
+ for (mp = *mapp; mp; mp = *mapp) {
+ if (strcmp(mp->devnm, devnm) == 0) {
+ *mapp = mp->next;
+ free(mp->path);
+ free(mp);
+ } else
+ mapp = & mp->next;
+ }
+}
+
+void map_remove(struct map_ent **mapp, char *devnm)
+{
+ if (devnm[0] == 0)
+ return;
+
+ map_delete(mapp, devnm);
+ map_write(*mapp);
+ map_free(*mapp);
+}
+
+struct map_ent *map_by_uuid(struct map_ent **map, int uuid[4])
+{
+ struct map_ent *mp;
+ if (!*map)
+ map_read(map);
+
+ for (mp = *map ; mp ; mp = mp->next) {
+ if (memcmp(uuid, mp->uuid, 16) != 0)
+ continue;
+ if (!mddev_busy(mp->devnm)) {
+ mp->bad = 1;
+ continue;
+ }
+ return mp;
+ }
+ return NULL;
+}
+
+struct map_ent *map_by_devnm(struct map_ent **map, char *devnm)
+{
+ struct map_ent *mp;
+ if (!*map)
+ map_read(map);
+
+ for (mp = *map ; mp ; mp = mp->next) {
+ if (strcmp(mp->devnm, devnm) != 0)
+ continue;
+ if (!mddev_busy(mp->devnm)) {
+ mp->bad = 1;
+ continue;
+ }
+ return mp;
+ }
+ return NULL;
+}
+
+struct map_ent *map_by_name(struct map_ent **map, char *name)
+{
+ struct map_ent *mp;
+ if (!*map)
+ map_read(map);
+
+ for (mp = *map ; mp ; mp = mp->next) {
+ if (!mp->path)
+ continue;
+ if (strncmp(mp->path, "/dev/md/", 8) != 0)
+ continue;
+ if (strcmp(mp->path+8, name) != 0)
+ continue;
+ if (!mddev_busy(mp->devnm)) {
+ mp->bad = 1;
+ continue;
+ }
+ return mp;
+ }
+ return NULL;
+}
+
+/* sets the proper subarray and container_dev according to the metadata
+ * version super_by_fd does this automatically, this routine is meant as
+ * a supplement for guess_super()
+ */
+static char *get_member_info(struct mdstat_ent *ent)
+{
+
+ if (ent->metadata_version == NULL ||
+ strncmp(ent->metadata_version, "external:", 9) != 0)
+ return NULL;
+
+ if (is_subarray(&ent->metadata_version[9])) {
+ char *subarray;
+
+ subarray = strrchr(ent->metadata_version, '/');
+ return subarray + 1;
+ }
+ return NULL;
+}
+
+void RebuildMap(void)
+{
+ struct mdstat_ent *mdstat = mdstat_read(0, 0);
+ struct mdstat_ent *md;
+ struct map_ent *map = NULL;
+ int require_homehost;
+ char sys_hostname[256];
+ char *homehost = conf_get_homehost(&require_homehost);
+
+ if (homehost == NULL || strcmp(homehost, "<system>")==0) {
+ if (gethostname(sys_hostname, sizeof(sys_hostname)) == 0) {
+ sys_hostname[sizeof(sys_hostname)-1] = 0;
+ homehost = sys_hostname;
+ }
+ }
+
+ for (md = mdstat ; md ; md = md->next) {
+ struct mdinfo *sra = sysfs_read(-1, md->devnm, GET_DEVS);
+ struct mdinfo *sd;
+
+ if (!sra)
+ continue;
+
+ for (sd = sra->devs ; sd ; sd = sd->next) {
+ char namebuf[100];
+ char dn[30];
+ int dfd;
+ int ok;
+ int devid;
+ struct supertype *st;
+ char *subarray = NULL;
+ char *path;
+ struct mdinfo *info;
+
+ sprintf(dn, "%d:%d", sd->disk.major, sd->disk.minor);
+ dfd = dev_open(dn, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ st = guess_super(dfd);
+ if ( st == NULL)
+ ok = -1;
+ else {
+ subarray = get_member_info(md);
+ ok = st->ss->load_super(st, dfd, NULL);
+ }
+ close(dfd);
+ if (ok != 0)
+ continue;
+ if (subarray)
+ info = st->ss->container_content(st, subarray);
+ else {
+ info = xmalloc(sizeof(*info));
+ st->ss->getinfo_super(st, info, NULL);
+ }
+ if (!info)
+ continue;
+
+ devid = devnm2devid(md->devnm);
+ path = map_dev(major(devid), minor(devid), 0);
+ if (path == NULL ||
+ strncmp(path, "/dev/md/", 8) != 0) {
+ /* We would really like a name that provides
+ * an MD_DEVNAME for udev.
+ * The name needs to be unique both in /dev/md/
+ * and in this mapfile.
+ * It needs to match what -I or -As would come
+ * up with.
+ * That means:
+ * Check if array is in mdadm.conf
+ * - if so use that.
+ * determine trustworthy from homehost etc
+ * find a unique name based on metadata name.
+ *
+ */
+ struct mddev_ident *match = conf_match(st, info,
+ NULL, 0,
+ NULL);
+ struct stat stb;
+ if (match && match->devname && match->devname[0] == '/') {
+ path = match->devname;
+ if (path[0] != '/') {
+ strcpy(namebuf, "/dev/md/");
+ strcat(namebuf, path);
+ path = namebuf;
+ }
+ } else {
+ int unum = 0;
+ char *sep = "_";
+ const char *name;
+ int conflict = 1;
+ if ((homehost == NULL ||
+ st->ss->match_home(st, homehost) != 1) &&
+ st->ss->match_home(st, "any") != 1 &&
+ (require_homehost
+ || ! conf_name_is_free(info->name)))
+ /* require a numeric suffix */
+ unum = 0;
+ else
+ /* allow name to be used as-is if no conflict */
+ unum = -1;
+ name = info->name;
+ if (!*name) {
+ name = st->ss->name;
+ if (!isdigit(name[strlen(name)-1]) &&
+ unum == -1) {
+ unum = 0;
+ sep = "";
+ }
+ }
+ if (strchr(name, ':')) {
+ /* Probably a uniquifying
+ * hostname prefix. Allow
+ * without a suffix, and strip
+ * hostname if it is us.
+ */
+ if (homehost && unum == -1 &&
+ strncmp(name, homehost,
+ strlen(homehost)) == 0 &&
+ name[strlen(homehost)] == ':')
+ name += strlen(homehost)+1;
+ unum = -1;
+ }
+
+ while (conflict) {
+ if (unum >= 0)
+ sprintf(namebuf, "/dev/md/%s%s%d",
+ name, sep, unum);
+ else
+ sprintf(namebuf, "/dev/md/%s",
+ name);
+ unum++;
+ if (lstat(namebuf, &stb) != 0 &&
+ (map == NULL ||
+ !map_by_name(&map, namebuf+8)))
+ conflict = 0;
+ }
+ path = namebuf;
+ }
+ }
+ map_add(&map, md->devnm,
+ info->text_version,
+ info->uuid, path);
+ st->ss->free_super(st);
+ free(info);
+ break;
+ }
+ sysfs_free(sra);
+ }
+ /* Only trigger a change if we wrote a new map file */
+ if (map_write(map))
+ for (md = mdstat ; md ; md = md->next) {
+ struct mdinfo *sra = sysfs_read(-1, md->devnm,
+ GET_VERSION);
+ if (sra)
+ sysfs_uevent(sra, "change");
+ sysfs_free(sra);
+ }
+ map_free(map);
+ free_mdstat(mdstat);
+}
diff --git a/maps.c b/maps.c
new file mode 100644
index 00000000..64f1df2c
--- /dev/null
+++ b/maps.c
@@ -0,0 +1,150 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2011 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+
+/* name/number mappings */
+
+mapping_t r5layout[] = {
+ { "left-asymmetric", ALGORITHM_LEFT_ASYMMETRIC},
+ { "right-asymmetric", ALGORITHM_RIGHT_ASYMMETRIC},
+ { "left-symmetric", ALGORITHM_LEFT_SYMMETRIC},
+ { "right-symmetric", ALGORITHM_RIGHT_SYMMETRIC},
+
+ { "default", ALGORITHM_LEFT_SYMMETRIC},
+ { "la", ALGORITHM_LEFT_ASYMMETRIC},
+ { "ra", ALGORITHM_RIGHT_ASYMMETRIC},
+ { "ls", ALGORITHM_LEFT_SYMMETRIC},
+ { "rs", ALGORITHM_RIGHT_SYMMETRIC},
+
+ { "parity-first", ALGORITHM_PARITY_0},
+ { "parity-last", ALGORITHM_PARITY_N},
+ { "ddf-zero-restart", ALGORITHM_RIGHT_ASYMMETRIC},
+ { "ddf-N-restart", ALGORITHM_LEFT_ASYMMETRIC},
+ { "ddf-N-continue", ALGORITHM_LEFT_SYMMETRIC},
+
+ { NULL, 0}
+};
+mapping_t r6layout[] = {
+ { "left-asymmetric", ALGORITHM_LEFT_ASYMMETRIC},
+ { "right-asymmetric", ALGORITHM_RIGHT_ASYMMETRIC},
+ { "left-symmetric", ALGORITHM_LEFT_SYMMETRIC},
+ { "right-symmetric", ALGORITHM_RIGHT_SYMMETRIC},
+
+ { "default", ALGORITHM_LEFT_SYMMETRIC},
+ { "la", ALGORITHM_LEFT_ASYMMETRIC},
+ { "ra", ALGORITHM_RIGHT_ASYMMETRIC},
+ { "ls", ALGORITHM_LEFT_SYMMETRIC},
+ { "rs", ALGORITHM_RIGHT_SYMMETRIC},
+
+ { "parity-first", ALGORITHM_PARITY_0},
+ { "parity-last", ALGORITHM_PARITY_N},
+ { "ddf-zero-restart", ALGORITHM_ROTATING_ZERO_RESTART},
+ { "ddf-N-restart", ALGORITHM_ROTATING_N_RESTART},
+ { "ddf-N-continue", ALGORITHM_ROTATING_N_CONTINUE},
+
+ { "left-asymmetric-6", ALGORITHM_LEFT_ASYMMETRIC_6},
+ { "right-asymmetric-6", ALGORITHM_RIGHT_ASYMMETRIC_6},
+ { "left-symmetric-6", ALGORITHM_LEFT_SYMMETRIC_6},
+ { "right-symmetric-6", ALGORITHM_RIGHT_SYMMETRIC_6},
+ { "parity-first-6", ALGORITHM_PARITY_0_6},
+
+ { NULL, 0}
+};
+
+mapping_t pers[] = {
+ { "linear", LEVEL_LINEAR},
+ { "raid0", 0},
+ { "0", 0},
+ { "stripe", 0},
+ { "raid1", 1},
+ { "1", 1},
+ { "mirror", 1},
+ { "raid4", 4},
+ { "4", 4},
+ { "raid5", 5},
+ { "5", 5},
+ { "multipath", LEVEL_MULTIPATH},
+ { "mp", LEVEL_MULTIPATH},
+ { "raid6", 6},
+ { "6", 6},
+ { "raid10", 10},
+ { "10", 10},
+ { "faulty", LEVEL_FAULTY},
+ { "container", LEVEL_CONTAINER},
+ { NULL, 0}
+};
+
+mapping_t modes[] = {
+ { "assemble", ASSEMBLE},
+ { "build", BUILD},
+ { "create", CREATE},
+ { "manage", MANAGE},
+ { "misc", MISC},
+ { "monitor", MONITOR},
+ { "grow", GROW},
+ { "incremental", INCREMENTAL},
+ { "auto-detect", AUTODETECT},
+};
+
+mapping_t faultylayout[] = {
+ { "write-transient", WriteTransient },
+ { "wt", WriteTransient },
+ { "read-transient", ReadTransient },
+ { "rt", ReadTransient },
+ { "write-persistent", WritePersistent },
+ { "wp", WritePersistent },
+ { "read-persistent", ReadPersistent },
+ { "rp", ReadPersistent },
+ { "write-all", WriteAll },
+ { "wa", WriteAll },
+ { "read-fixable", ReadFixable },
+ { "rf", ReadFixable },
+
+ { "clear", ClearErrors},
+ { "flush", ClearFaults},
+ { "none", ClearErrors},
+ { "default", ClearErrors},
+ { NULL, 0}
+};
+
+char *map_num(mapping_t *map, int num)
+{
+ while (map->name) {
+ if (map->num == num)
+ return map->name;
+ map++;
+ }
+ return NULL;
+}
+
+int map_name(mapping_t *map, char *name)
+{
+ while (map->name) {
+ if (strcmp(map->name, name)==0)
+ return map->num;
+ map++;
+ }
+ return UnSet;
+}
diff --git a/md.4 b/md.4
new file mode 100644
index 00000000..e222237d
--- /dev/null
+++ b/md.4
@@ -0,0 +1,1125 @@
+.\" Copyright Neil Brown and others.
+.\" This program is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; either version 2 of the License, or
+.\" (at your option) any later version.
+.\" See file COPYING in distribution for details.
+.TH MD 4
+.SH NAME
+md \- Multiple Device driver aka Linux Software RAID
+.SH SYNOPSIS
+.BI /dev/md n
+.br
+.BI /dev/md/ n
+.br
+.BR /dev/md/ name
+.SH DESCRIPTION
+The
+.B md
+driver provides virtual devices that are created from one or more
+independent underlying devices. This array of devices often contains
+redundancy and the devices are often disk drives, hence the acronym RAID
+which stands for a Redundant Array of Independent Disks.
+.PP
+.B md
+supports RAID levels
+1 (mirroring),
+4 (striped array with parity device),
+5 (striped array with distributed parity information),
+6 (striped array with distributed dual redundancy information), and
+10 (striped and mirrored).
+If some number of underlying devices fails while using one of these
+levels, the array will continue to function; this number is one for
+RAID levels 4 and 5, two for RAID level 6, and all but one (N-1) for
+RAID level 1, and dependent on configuration for level 10.
+.PP
+.B md
+also supports a number of pseudo RAID (non-redundant) configurations
+including RAID0 (striped array), LINEAR (catenated array),
+MULTIPATH (a set of different interfaces to the same device),
+and FAULTY (a layer over a single device into which errors can be injected).
+
+.SS MD METADATA
+Each device in an array may have some
+.I metadata
+stored in the device. This metadata is sometimes called a
+.BR superblock .
+The metadata records information about the structure and state of the array.
+This allows the array to be reliably re-assembled after a shutdown.
+
+From Linux kernel version 2.6.10,
+.B md
+provides support for two different formats of metadata, and
+other formats can be added. Prior to this release, only one format is
+supported.
+
+The common format \(em known as version 0.90 \(em has
+a superblock that is 4K long and is written into a 64K aligned block that
+starts at least 64K and less than 128K from the end of the device
+(i.e. to get the address of the superblock round the size of the
+device down to a multiple of 64K and then subtract 64K).
+The available size of each device is the amount of space before the
+super block, so between 64K and 128K is lost when a device in
+incorporated into an MD array.
+This superblock stores multi-byte fields in a processor-dependent
+manner, so arrays cannot easily be moved between computers with
+different processors.
+
+The new format \(em known as version 1 \(em has a superblock that is
+normally 1K long, but can be longer. It is normally stored between 8K
+and 12K from the end of the device, on a 4K boundary, though
+variations can be stored at the start of the device (version 1.1) or 4K from
+the start of the device (version 1.2).
+This metadata format stores multibyte data in a
+processor-independent format and supports up to hundreds of
+component devices (version 0.90 only supports 28).
+
+The metadata contains, among other things:
+.TP
+LEVEL
+The manner in which the devices are arranged into the array
+(LINEAR, RAID0, RAID1, RAID4, RAID5, RAID10, MULTIPATH).
+.TP
+UUID
+a 128 bit Universally Unique Identifier that identifies the array that
+contains this device.
+
+.PP
+When a version 0.90 array is being reshaped (e.g. adding extra devices
+to a RAID5), the version number is temporarily set to 0.91. This
+ensures that if the reshape process is stopped in the middle (e.g. by
+a system crash) and the machine boots into an older kernel that does
+not support reshaping, then the array will not be assembled (which
+would cause data corruption) but will be left untouched until a kernel
+that can complete the reshape processes is used.
+
+.SS ARRAYS WITHOUT METADATA
+While it is usually best to create arrays with superblocks so that
+they can be assembled reliably, there are some circumstances when an
+array without superblocks is preferred. These include:
+.TP
+LEGACY ARRAYS
+Early versions of the
+.B md
+driver only supported LINEAR and RAID0 configurations and did not use
+a superblock (which is less critical with these configurations).
+While such arrays should be rebuilt with superblocks if possible,
+.B md
+continues to support them.
+.TP
+FAULTY
+Being a largely transparent layer over a different device, the FAULTY
+personality doesn't gain anything from having a superblock.
+.TP
+MULTIPATH
+It is often possible to detect devices which are different paths to
+the same storage directly rather than having a distinctive superblock
+written to the device and searched for on all paths. In this case,
+a MULTIPATH array with no superblock makes sense.
+.TP
+RAID1
+In some configurations it might be desired to create a RAID1
+configuration that does not use a superblock, and to maintain the state of
+the array elsewhere. While not encouraged for general use, it does
+have special-purpose uses and is supported.
+
+.SS ARRAYS WITH EXTERNAL METADATA
+
+From release 2.6.28, the
+.I md
+driver supports arrays with externally managed metadata. That is,
+the metadata is not managed by the kernel but rather by a user-space
+program which is external to the kernel. This allows support for a
+variety of metadata formats without cluttering the kernel with lots of
+details.
+.PP
+.I md
+is able to communicate with the user-space program through various
+sysfs attributes so that it can make appropriate changes to the
+metadata \- for example to mark a device as faulty. When necessary,
+.I md
+will wait for the program to acknowledge the event by writing to a
+sysfs attribute.
+The manual page for
+.IR mdmon (8)
+contains more detail about this interaction.
+
+.SS CONTAINERS
+Many metadata formats use a single block of metadata to describe a
+number of different arrays which all use the same set of devices.
+In this case it is helpful for the kernel to know about the full set
+of devices as a whole. This set is known to md as a
+.IR container .
+A container is an
+.I md
+array with externally managed metadata and with device offset and size
+so that it just covers the metadata part of the devices. The
+remainder of each device is available to be incorporated into various
+arrays.
+
+.SS LINEAR
+
+A LINEAR array simply catenates the available space on each
+drive to form one large virtual drive.
+
+One advantage of this arrangement over the more common RAID0
+arrangement is that the array may be reconfigured at a later time with
+an extra drive, so the array is made bigger without disturbing the
+data that is on the array. This can even be done on a live
+array.
+
+If a chunksize is given with a LINEAR array, the usable space on each
+device is rounded down to a multiple of this chunksize.
+
+.SS RAID0
+
+A RAID0 array (which has zero redundancy) is also known as a
+striped array.
+A RAID0 array is configured at creation with a
+.B "Chunk Size"
+which must be a power of two (prior to Linux 2.6.31), and at least 4
+kibibytes.
+
+The RAID0 driver assigns the first chunk of the array to the first
+device, the second chunk to the second device, and so on until all
+drives have been assigned one chunk. This collection of chunks forms a
+.BR stripe .
+Further chunks are gathered into stripes in the same way, and are
+assigned to the remaining space in the drives.
+
+If devices in the array are not all the same size, then once the
+smallest device has been exhausted, the RAID0 driver starts
+collecting chunks into smaller stripes that only span the drives which
+still have remaining space.
+
+
+.SS RAID1
+
+A RAID1 array is also known as a mirrored set (though mirrors tend to
+provide reflected images, which RAID1 does not) or a plex.
+
+Once initialised, each device in a RAID1 array contains exactly the
+same data. Changes are written to all devices in parallel. Data is
+read from any one device. The driver attempts to distribute read
+requests across all devices to maximise performance.
+
+All devices in a RAID1 array should be the same size. If they are
+not, then only the amount of space available on the smallest device is
+used (any extra space on other devices is wasted).
+
+Note that the read balancing done by the driver does not make the RAID1
+performance profile be the same as for RAID0; a single stream of
+sequential input will not be accelerated (e.g. a single dd), but
+multiple sequential streams or a random workload will use more than one
+spindle. In theory, having an N-disk RAID1 will allow N sequential
+threads to read from all disks.
+
+Individual devices in a RAID1 can be marked as "write-mostly".
+These drives are excluded from the normal read balancing and will only
+be read from when there is no other option. This can be useful for
+devices connected over a slow link.
+
+.SS RAID4
+
+A RAID4 array is like a RAID0 array with an extra device for storing
+parity. This device is the last of the active devices in the
+array. Unlike RAID0, RAID4 also requires that all stripes span all
+drives, so extra space on devices that are larger than the smallest is
+wasted.
+
+When any block in a RAID4 array is modified, the parity block for that
+stripe (i.e. the block in the parity device at the same device offset
+as the stripe) is also modified so that the parity block always
+contains the "parity" for the whole stripe. I.e. its content is
+equivalent to the result of performing an exclusive-or operation
+between all the data blocks in the stripe.
+
+This allows the array to continue to function if one device fails.
+The data that was on that device can be calculated as needed from the
+parity block and the other data blocks.
+
+.SS RAID5
+
+RAID5 is very similar to RAID4. The difference is that the parity
+blocks for each stripe, instead of being on a single device, are
+distributed across all devices. This allows more parallelism when
+writing, as two different block updates will quite possibly affect
+parity blocks on different devices so there is less contention.
+
+This also allows more parallelism when reading, as read requests are
+distributed over all the devices in the array instead of all but one.
+
+.SS RAID6
+
+RAID6 is similar to RAID5, but can handle the loss of any \fItwo\fP
+devices without data loss. Accordingly, it requires N+2 drives to
+store N drives worth of data.
+
+The performance for RAID6 is slightly lower but comparable to RAID5 in
+normal mode and single disk failure mode. It is very slow in dual
+disk failure mode, however.
+
+.SS RAID10
+
+RAID10 provides a combination of RAID1 and RAID0, and is sometimes known
+as RAID1+0. Every datablock is duplicated some number of times, and
+the resulting collection of datablocks are distributed over multiple
+drives.
+
+When configuring a RAID10 array, it is necessary to specify the number
+of replicas of each data block that are required (this will usually
+be\ 2) and whether their layout should be "near", "far" or "offset"
+(with "offset" being available since Linux\ 2.6.18).
+
+.B About the RAID10 Layout Examples:
+.br
+The examples below visualise the chunk distribution on the underlying
+devices for the respective layout.
+
+For simplicity it is assumed that the size of the chunks equals the
+size of the blocks of the underlying devices as well as those of the
+RAID10 device exported by the kernel (for example \fB/dev/md/\fPname).
+.br
+Therefore the chunks\ /\ chunk numbers map directly to the blocks\ /\
+block addresses of the exported RAID10 device.
+
+Decimal numbers (0,\ 1, 2,\ ...) are the chunks of the RAID10 and due
+to the above assumption also the blocks and block addresses of the
+exported RAID10 device.
+.br
+Repeated numbers mean copies of a chunk\ /\ block (obviously on
+different underlying devices).
+.br
+Hexadecimal numbers (0x00,\ 0x01, 0x02,\ ...) are the block addresses
+of the underlying devices.
+
+.TP
+\fB "near" Layout\fP
+When "near" replicas are chosen, the multiple copies of a given chunk are laid
+out consecutively ("as close to each other as possible") across the stripes of
+the array.
+
+With an even number of devices, they will likely (unless some misalignment is
+present) lay at the very same offset on the different devices.
+.br
+This is as the "classic" RAID1+0; that is two groups of mirrored devices (in the
+example below the groups Device\ #1\ /\ #2 and Device\ #3\ /\ #4 are each a
+RAID1) both in turn forming a striped RAID0.
+
+.ne 10
+.B Example with 2\ copies per chunk and an even number\ (4) of devices:
+.TS
+tab(;);
+ C - - - -
+ C | C | C | C | C |
+| - | - | - | - | - |
+| C | C | C | C | C |
+| C | C | C | C | C |
+| C | C | C | C | C |
+| C | C | C | C | C |
+| C | C | C | C | C |
+| C | C | C | C | C |
+| - | - | - | - | - |
+ C C S C S
+ C C S C S
+ C C S S S
+ C C S S S.
+;
+;Device #1;Device #2;Device #3;Device #4
+0x00;0;0;1;1
+0x01;2;2;3;3
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.
+:;:;:;:;:
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.
+0x80;254;254;255;255
+;\\---------v---------/;\\---------v---------/
+;RAID1;RAID1
+;\\---------------------v---------------------/
+;RAID0
+.TE
+
+.ne 10
+.B Example with 2\ copies per chunk and an odd number\ (5) of devices:
+.TS
+tab(;);
+ C - - - - -
+ C | C | C | C | C | C |
+| - | - | - | - | - | - |
+| C | C | C | C | C | C |
+| C | C | C | C | C | C |
+| C | C | C | C | C | C |
+| C | C | C | C | C | C |
+| C | C | C | C | C | C |
+| C | C | C | C | C | C |
+| - | - | - | - | - | - |
+C.
+;
+;Dev #1;Dev #2;Dev #3;Dev #4;Dev #5
+0x00;0;0;1;1;2
+0x01;2;3;3;4;4
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.
+:;:;:;:;:;:
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.
+0x80;317;318;318;319;319
+;
+.TE
+
+.TP
+\fB "far" Layout\fP
+When "far" replicas are chosen, the multiple copies of a given chunk
+are laid out quite distant ("as far as reasonably possible") from each
+other.
+
+First a complete sequence of all data blocks (that is all the data one
+sees on the exported RAID10 block device) is striped over the
+devices. Then another (though "shifted") complete sequence of all data
+blocks; and so on (in the case of more than 2\ copies per chunk).
+
+The "shift" needed to prevent placing copies of the same chunks on the
+same devices is actually a cyclic permutation with offset\ 1 of each
+of the stripes within a complete sequence of chunks.
+.br
+The offset\ 1 is relative to the previous complete sequence of chunks,
+so in case of more than 2\ copies per chunk one gets the following
+offsets:
+.br
+1.\ complete sequence of chunks: offset\ =\ \ 0
+.br
+2.\ complete sequence of chunks: offset\ =\ \ 1
+.br
+3.\ complete sequence of chunks: offset\ =\ \ 2
+.br
+ :
+.br
+n.\ complete sequence of chunks: offset\ =\ n-1
+
+.ne 10
+.B Example with 2\ copies per chunk and an even number\ (4) of devices:
+.TS
+tab(;);
+ C - - - -
+ C | C | C | C | C |
+| - | - | - | - | - |
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| - | - | - | - | - |
+C.
+;
+;Device #1;Device #2;Device #3;Device #4
+;
+0x00;0;1;2;3;\\
+0x01;4;5;6;7;> [#]
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;:
+:;:;:;:;:;:
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;:
+0x40;252;253;254;255;/
+0x41;3;0;1;2;\\
+0x42;7;4;5;6;> [#]~
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;:
+:;:;:;:;:;:
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;:
+0x80;255;252;253;254;/
+;
+.TE
+
+.ne 10
+.B Example with 2\ copies per chunk and an odd number\ (5) of devices:
+.TS
+tab(;);
+ C - - - - -
+ C | C | C | C | C | C |
+| - | - | - | - | - | - |
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| - | - | - | - | - | - |
+C.
+;
+;Dev #1;Dev #2;Dev #3;Dev #4;Dev #5
+;
+0x00;0;1;2;3;4;\\
+0x01;5;6;7;8;9;> [#]
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;:
+:;:;:;:;:;:;:
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;:
+0x40;315;316;317;318;319;/
+0x41;4;0;1;2;3;\\
+0x42;9;5;6;7;8;> [#]~
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;:
+:;:;:;:;:;:;:
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;:
+0x80;319;315;316;317;318;/
+;
+.TE
+
+With [#]\ being the complete sequence of chunks and [#]~\ the cyclic permutation
+with offset\ 1 thereof (in the case of more than 2 copies per chunk there would
+be ([#]~)~,\ (([#]~)~)~,\ ...).
+
+The advantage of this layout is that MD can easily spread sequential reads over
+the devices, making them similar to RAID0 in terms of speed.
+.br
+The cost is more seeking for writes, making them substantially slower.
+
+.TP
+\fB"offset" Layout\fP
+When "offset" replicas are chosen, all the copies of a given chunk are
+striped consecutively ("offset by the stripe length after each other")
+over the devices.
+
+Explained in detail, <number of devices> consecutive chunks are
+striped over the devices, immediately followed by a "shifted" copy of
+these chunks (and by further such "shifted" copies in the case of more
+than 2\ copies per chunk).
+.br
+This pattern repeats for all further consecutive chunks of the
+exported RAID10 device (in other words: all further data blocks).
+
+The "shift" needed to prevent placing copies of the same chunks on the
+same devices is actually a cyclic permutation with offset\ 1 of each
+of the striped copies of <number of devices> consecutive chunks.
+.br
+The offset\ 1 is relative to the previous striped copy of <number of
+devices> consecutive chunks, so in case of more than 2\ copies per
+chunk one gets the following offsets:
+.br
+1.\ <number of devices> consecutive chunks: offset\ =\ \ 0
+.br
+2.\ <number of devices> consecutive chunks: offset\ =\ \ 1
+.br
+3.\ <number of devices> consecutive chunks: offset\ =\ \ 2
+.br
+ :
+.br
+n.\ <number of devices> consecutive chunks: offset\ =\ n-1
+
+.ne 10
+.B Example with 2\ copies per chunk and an even number\ (4) of devices:
+.TS
+tab(;);
+ C - - - -
+ C | C | C | C | C |
+| - | - | - | - | - |
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| C | C | C | C | C | L
+| - | - | - | - | - |
+C.
+;
+;Device #1;Device #2;Device #3;Device #4
+;
+0x00;0;1;2;3;) AA
+0x01;3;0;1;2;) AA~
+0x02;4;5;6;7;) AB
+0x03;7;4;5;6;) AB~
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;) \.\.\.
+:;:;:;:;:; :
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;) \.\.\.
+0x79;251;252;253;254;) EX
+0x80;254;251;252;253;) EX~
+;
+.TE
+
+.ne 10
+.B Example with 2\ copies per chunk and an odd number\ (5) of devices:
+.TS
+tab(;);
+ C - - - - -
+ C | C | C | C | C | C |
+| - | - | - | - | - | - |
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| C | C | C | C | C | C | L
+| - | - | - | - | - | - |
+C.
+;
+;Dev #1;Dev #2;Dev #3;Dev #4;Dev #5
+;
+0x00;0;1;2;3;4;) AA
+0x01;4;0;1;2;3;) AA~
+0x02;5;6;7;8;9;) AB
+0x03;9;5;6;7;8;) AB~
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;) \.\.\.
+:;:;:;:;:;:; :
+\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;\.\.\.;) \.\.\.
+0x79;314;315;316;317;318;) EX
+0x80;318;314;315;316;317;) EX~
+;
+.TE
+
+With AA,\ AB,\ ..., AZ,\ BA,\ ... being the sets of <number of devices> consecutive
+chunks and AA~,\ AB~,\ ..., AZ~,\ BA~,\ ... the cyclic permutations with offset\ 1
+thereof (in the case of more than 2 copies per chunk there would be (AA~)~,\ ...
+as well as ((AA~)~)~,\ ... and so on).
+
+This should give similar read characteristics to "far" if a suitably large chunk
+size is used, but without as much seeking for writes.
+.PP
+
+
+It should be noted that the number of devices in a RAID10 array need
+not be a multiple of the number of replica of each data block; however,
+there must be at least as many devices as replicas.
+
+If, for example, an array is created with 5 devices and 2 replicas,
+then space equivalent to 2.5 of the devices will be available, and
+every block will be stored on two different devices.
+
+Finally, it is possible to have an array with both "near" and "far"
+copies. If an array is configured with 2 near copies and 2 far
+copies, then there will be a total of 4 copies of each block, each on
+a different drive. This is an artifact of the implementation and is
+unlikely to be of real value.
+
+.SS MULTIPATH
+
+MULTIPATH is not really a RAID at all as there is only one real device
+in a MULTIPATH md array. However there are multiple access points
+(paths) to this device, and one of these paths might fail, so there
+are some similarities.
+
+A MULTIPATH array is composed of a number of logically different
+devices, often fibre channel interfaces, that all refer the the same
+real device. If one of these interfaces fails (e.g. due to cable
+problems), the MULTIPATH driver will attempt to redirect requests to
+another interface.
+
+The MULTIPATH drive is not receiving any ongoing development and
+should be considered a legacy driver. The device-mapper based
+multipath drivers should be preferred for new installations.
+
+.SS FAULTY
+The FAULTY md module is provided for testing purposes. A FAULTY array
+has exactly one component device and is normally assembled without a
+superblock, so the md array created provides direct access to all of
+the data in the component device.
+
+The FAULTY module may be requested to simulate faults to allow testing
+of other md levels or of filesystems. Faults can be chosen to trigger
+on read requests or write requests, and can be transient (a subsequent
+read/write at the address will probably succeed) or persistent
+(subsequent read/write of the same address will fail). Further, read
+faults can be "fixable" meaning that they persist until a write
+request at the same address.
+
+Fault types can be requested with a period. In this case, the fault
+will recur repeatedly after the given number of requests of the
+relevant type. For example if persistent read faults have a period of
+100, then every 100th read request would generate a fault, and the
+faulty sector would be recorded so that subsequent reads on that
+sector would also fail.
+
+There is a limit to the number of faulty sectors that are remembered.
+Faults generated after this limit is exhausted are treated as
+transient.
+
+The list of faulty sectors can be flushed, and the active list of
+failure modes can be cleared.
+
+.SS UNCLEAN SHUTDOWN
+
+When changes are made to a RAID1, RAID4, RAID5, RAID6, or RAID10 array
+there is a possibility of inconsistency for short periods of time as
+each update requires at least two block to be written to different
+devices, and these writes probably won't happen at exactly the same
+time. Thus if a system with one of these arrays is shutdown in the
+middle of a write operation (e.g. due to power failure), the array may
+not be consistent.
+
+To handle this situation, the md driver marks an array as "dirty"
+before writing any data to it, and marks it as "clean" when the array
+is being disabled, e.g. at shutdown. If the md driver finds an array
+to be dirty at startup, it proceeds to correct any possibly
+inconsistency. For RAID1, this involves copying the contents of the
+first drive onto all other drives. For RAID4, RAID5 and RAID6 this
+involves recalculating the parity for each stripe and making sure that
+the parity block has the correct data. For RAID10 it involves copying
+one of the replicas of each block onto all the others. This process,
+known as "resynchronising" or "resync" is performed in the background.
+The array can still be used, though possibly with reduced performance.
+
+If a RAID4, RAID5 or RAID6 array is degraded (missing at least one
+drive, two for RAID6) when it is restarted after an unclean shutdown, it cannot
+recalculate parity, and so it is possible that data might be
+undetectably corrupted. The 2.4 md driver
+.B does not
+alert the operator to this condition. The 2.6 md driver will fail to
+start an array in this condition without manual intervention, though
+this behaviour can be overridden by a kernel parameter.
+
+.SS RECOVERY
+
+If the md driver detects a write error on a device in a RAID1, RAID4,
+RAID5, RAID6, or RAID10 array, it immediately disables that device
+(marking it as faulty) and continues operation on the remaining
+devices. If there are spare drives, the driver will start recreating
+on one of the spare drives the data which was on that failed drive,
+either by copying a working drive in a RAID1 configuration, or by
+doing calculations with the parity block on RAID4, RAID5 or RAID6, or
+by finding and copying originals for RAID10.
+
+In kernels prior to about 2.6.15, a read error would cause the same
+effect as a write error. In later kernels, a read-error will instead
+cause md to attempt a recovery by overwriting the bad block. i.e. it
+will find the correct data from elsewhere, write it over the block
+that failed, and then try to read it back again. If either the write
+or the re-read fail, md will treat the error the same way that a write
+error is treated, and will fail the whole device.
+
+While this recovery process is happening, the md driver will monitor
+accesses to the array and will slow down the rate of recovery if other
+activity is happening, so that normal access to the array will not be
+unduly affected. When no other activity is happening, the recovery
+process proceeds at full speed. The actual speed targets for the two
+different situations can be controlled by the
+.B speed_limit_min
+and
+.B speed_limit_max
+control files mentioned below.
+
+.SS SCRUBBING AND MISMATCHES
+
+As storage devices can develop bad blocks at any time it is valuable
+to regularly read all blocks on all devices in an array so as to catch
+such bad blocks early. This process is called
+.IR scrubbing .
+
+md arrays can be scrubbed by writing either
+.I check
+or
+.I repair
+to the file
+.I md/sync_action
+in the
+.I sysfs
+directory for the device.
+
+Requesting a scrub will cause
+.I md
+to read every block on every device in the array, and check that the
+data is consistent. For RAID1 and RAID10, this means checking that the copies
+are identical. For RAID4, RAID5, RAID6 this means checking that the
+parity block is (or blocks are) correct.
+
+If a read error is detected during this process, the normal read-error
+handling causes correct data to be found from other devices and to be
+written back to the faulty device. In many case this will
+effectively
+.I fix
+the bad block.
+
+If all blocks read successfully but are found to not be consistent,
+then this is regarded as a
+.IR mismatch .
+
+If
+.I check
+was used, then no action is taken to handle the mismatch, it is simply
+recorded.
+If
+.I repair
+was used, then a mismatch will be repaired in the same way that
+.I resync
+repairs arrays. For RAID5/RAID6 new parity blocks are written. For RAID1/RAID10,
+all but one block are overwritten with the content of that one block.
+
+A count of mismatches is recorded in the
+.I sysfs
+file
+.IR md/mismatch_cnt .
+This is set to zero when a
+scrub starts and is incremented whenever a sector is
+found that is a mismatch.
+.I md
+normally works in units much larger than a single sector and when it
+finds a mismatch, it does not determine exactly how many actual sectors were
+affected but simply adds the number of sectors in the IO unit that was
+used. So a value of 128 could simply mean that a single 64KB check
+found an error (128 x 512bytes = 64KB).
+
+If an array is created by
+.I mdadm
+with
+.I \-\-assume\-clean
+then a subsequent check could be expected to find some mismatches.
+
+On a truly clean RAID5 or RAID6 array, any mismatches should indicate
+a hardware problem at some level - software issues should never cause
+such a mismatch.
+
+However on RAID1 and RAID10 it is possible for software issues to
+cause a mismatch to be reported. This does not necessarily mean that
+the data on the array is corrupted. It could simply be that the
+system does not care what is stored on that part of the array - it is
+unused space.
+
+The most likely cause for an unexpected mismatch on RAID1 or RAID10
+occurs if a swap partition or swap file is stored on the array.
+
+When the swap subsystem wants to write a page of memory out, it flags
+the page as 'clean' in the memory manager and requests the swap device
+to write it out. It is quite possible that the memory will be
+changed while the write-out is happening. In that case the 'clean'
+flag will be found to be clear when the write completes and so the
+swap subsystem will simply forget that the swapout had been attempted,
+and will possibly choose a different page to write out.
+
+If the swap device was on RAID1 (or RAID10), then the data is sent
+from memory to a device twice (or more depending on the number of
+devices in the array). Thus it is possible that the memory gets changed
+between the times it is sent, so different data can be written to
+the different devices in the array. This will be detected by
+.I check
+as a mismatch. However it does not reflect any corruption as the
+block where this mismatch occurs is being treated by the swap system as
+being empty, and the data will never be read from that block.
+
+It is conceivable for a similar situation to occur on non-swap files,
+though it is less likely.
+
+Thus the
+.I mismatch_cnt
+value can not be interpreted very reliably on RAID1 or RAID10,
+especially when the device is used for swap.
+
+
+.SS BITMAP WRITE-INTENT LOGGING
+
+From Linux 2.6.13,
+.I md
+supports a bitmap based write-intent log. If configured, the bitmap
+is used to record which blocks of the array may be out of sync.
+Before any write request is honoured, md will make sure that the
+corresponding bit in the log is set. After a period of time with no
+writes to an area of the array, the corresponding bit will be cleared.
+
+This bitmap is used for two optimisations.
+
+Firstly, after an unclean shutdown, the resync process will consult
+the bitmap and only resync those blocks that correspond to bits in the
+bitmap that are set. This can dramatically reduce resync time.
+
+Secondly, when a drive fails and is removed from the array, md stops
+clearing bits in the intent log. If that same drive is re-added to
+the array, md will notice and will only recover the sections of the
+drive that are covered by bits in the intent log that are set. This
+can allow a device to be temporarily removed and reinserted without
+causing an enormous recovery cost.
+
+The intent log can be stored in a file on a separate device, or it can
+be stored near the superblocks of an array which has superblocks.
+
+It is possible to add an intent log to an active array, or remove an
+intent log if one is present.
+
+In 2.6.13, intent bitmaps are only supported with RAID1. Other levels
+with redundancy are supported from 2.6.15.
+
+.SS BAD BLOCK LIST
+
+From Linux 3.5 each device in an
+.I md
+array can store a list of known-bad-blocks. This list is 4K in size
+and usually positioned at the end of the space between the superblock
+and the data.
+
+When a block cannot be read and cannot be repaired by writing data
+recovered from other devices, the address of the block is stored in
+the bad block list. Similarly if an attempt to write a block fails,
+the address will be recorded as a bad block. If attempting to record
+the bad block fails, the whole device will be marked faulty.
+
+Attempting to read from a known bad block will cause a read error.
+Attempting to write to a known bad block will be ignored if any write
+errors have been reported by the device. If there have been no write
+errors then the data will be written to the known bad block and if
+that succeeds, the address will be removed from the list.
+
+This allows an array to fail more gracefully - a few blocks on
+different devices can be faulty without taking the whole array out of
+action.
+
+The list is particularly useful when recovering to a spare. If a few blocks
+cannot be read from the other devices, the bulk of the recovery can
+complete and those few bad blocks will be recorded in the bad block list.
+
+.SS WRITE-BEHIND
+
+From Linux 2.6.14,
+.I md
+supports WRITE-BEHIND on RAID1 arrays.
+
+This allows certain devices in the array to be flagged as
+.IR write-mostly .
+MD will only read from such devices if there is no
+other option.
+
+If a write-intent bitmap is also provided, write requests to
+write-mostly devices will be treated as write-behind requests and md
+will not wait for writes to those requests to complete before
+reporting the write as complete to the filesystem.
+
+This allows for a RAID1 with WRITE-BEHIND to be used to mirror data
+over a slow link to a remote computer (providing the link isn't too
+slow). The extra latency of the remote link will not slow down normal
+operations, but the remote system will still have a reasonably
+up-to-date copy of all data.
+
+.SS RESTRIPING
+
+.IR Restriping ,
+also known as
+.IR Reshaping ,
+is the processes of re-arranging the data stored in each stripe into a
+new layout. This might involve changing the number of devices in the
+array (so the stripes are wider), changing the chunk size (so stripes
+are deeper or shallower), or changing the arrangement of data and
+parity (possibly changing the RAID level, e.g. 1 to 5 or 5 to 6).
+
+As of Linux 2.6.35, md can reshape a RAID4, RAID5, or RAID6 array to
+have a different number of devices (more or fewer) and to have a
+different layout or chunk size. It can also convert between these
+different RAID levels. It can also convert between RAID0 and RAID10,
+and between RAID0 and RAID4 or RAID5.
+Other possibilities may follow in future kernels.
+
+During any stripe process there is a 'critical section' during which
+live data is being overwritten on disk. For the operation of
+increasing the number of drives in a RAID5, this critical section
+covers the first few stripes (the number being the product of the old
+and new number of devices). After this critical section is passed,
+data is only written to areas of the array which no longer hold live
+data \(em the live data has already been located away.
+
+For a reshape which reduces the number of devices, the 'critical
+section' is at the end of the reshape process.
+
+md is not able to ensure data preservation if there is a crash
+(e.g. power failure) during the critical section. If md is asked to
+start an array which failed during a critical section of restriping,
+it will fail to start the array.
+
+To deal with this possibility, a user-space program must
+.IP \(bu 4
+Disable writes to that section of the array (using the
+.B sysfs
+interface),
+.IP \(bu 4
+take a copy of the data somewhere (i.e. make a backup),
+.IP \(bu 4
+allow the process to continue and invalidate the backup and restore
+write access once the critical section is passed, and
+.IP \(bu 4
+provide for restoring the critical data before restarting the array
+after a system crash.
+.PP
+
+.B mdadm
+versions from 2.4 do this for growing a RAID5 array.
+
+For operations that do not change the size of the array, like simply
+increasing chunk size, or converting RAID5 to RAID6 with one extra
+device, the entire process is the critical section. In this case, the
+restripe will need to progress in stages, as a section is suspended,
+backed up, restriped, and released.
+
+.SS SYSFS INTERFACE
+Each block device appears as a directory in
+.I sysfs
+(which is usually mounted at
+.BR /sys ).
+For MD devices, this directory will contain a subdirectory called
+.B md
+which contains various files for providing access to information about
+the array.
+
+This interface is documented more fully in the file
+.B Documentation/md.txt
+which is distributed with the kernel sources. That file should be
+consulted for full documentation. The following are just a selection
+of attribute files that are available.
+
+.TP
+.B md/sync_speed_min
+This value, if set, overrides the system-wide setting in
+.B /proc/sys/dev/raid/speed_limit_min
+for this array only.
+Writing the value
+.B "system"
+to this file will cause the system-wide setting to have effect.
+
+.TP
+.B md/sync_speed_max
+This is the partner of
+.B md/sync_speed_min
+and overrides
+.B /proc/sys/dev/raid/speed_limit_max
+described below.
+
+.TP
+.B md/sync_action
+This can be used to monitor and control the resync/recovery process of
+MD.
+In particular, writing "check" here will cause the array to read all
+data block and check that they are consistent (e.g. parity is correct,
+or all mirror replicas are the same). Any discrepancies found are
+.B NOT
+corrected.
+
+A count of problems found will be stored in
+.BR md/mismatch_count .
+
+Alternately, "repair" can be written which will cause the same check
+to be performed, but any errors will be corrected.
+
+Finally, "idle" can be written to stop the check/repair process.
+
+.TP
+.B md/stripe_cache_size
+This is only available on RAID5 and RAID6. It records the size (in
+pages per device) of the stripe cache which is used for synchronising
+all write operations to the array and all read operations if the array
+is degraded. The default is 256. Valid values are 17 to 32768.
+Increasing this number can increase performance in some situations, at
+some cost in system memory. Note, setting this value too high can
+result in an "out of memory" condition for the system.
+
+memory_consumed = system_page_size * nr_disks * stripe_cache_size
+
+.TP
+.B md/preread_bypass_threshold
+This is only available on RAID5 and RAID6. This variable sets the
+number of times MD will service a full-stripe-write before servicing a
+stripe that requires some "prereading". For fairness this defaults to
+1. Valid values are 0 to stripe_cache_size. Setting this to 0
+maximizes sequential-write throughput at the cost of fairness to threads
+doing small or random writes.
+
+.SS KERNEL PARAMETERS
+
+The md driver recognised several different kernel parameters.
+.TP
+.B raid=noautodetect
+This will disable the normal detection of md arrays that happens at
+boot time. If a drive is partitioned with MS-DOS style partitions,
+then if any of the 4 main partitions has a partition type of 0xFD,
+then that partition will normally be inspected to see if it is part of
+an MD array, and if any full arrays are found, they are started. This
+kernel parameter disables this behaviour.
+
+.TP
+.B raid=partitionable
+.TP
+.B raid=part
+These are available in 2.6 and later kernels only. They indicate that
+autodetected MD arrays should be created as partitionable arrays, with
+a different major device number to the original non-partitionable md
+arrays. The device number is listed as
+.I mdp
+in
+.IR /proc/devices .
+
+.TP
+.B md_mod.start_ro=1
+.TP
+.B /sys/module/md_mod/parameters/start_ro
+This tells md to start all arrays in read-only mode. This is a soft
+read-only that will automatically switch to read-write on the first
+write request. However until that write request, nothing is written
+to any device by md, and in particular, no resync or recovery
+operation is started.
+
+.TP
+.B md_mod.start_dirty_degraded=1
+.TP
+.B /sys/module/md_mod/parameters/start_dirty_degraded
+As mentioned above, md will not normally start a RAID4, RAID5, or
+RAID6 that is both dirty and degraded as this situation can imply
+hidden data loss. This can be awkward if the root filesystem is
+affected. Using this module parameter allows such arrays to be started
+at boot time. It should be understood that there is a real (though
+small) risk of data corruption in this situation.
+
+.TP
+.BI md= n , dev , dev ,...
+.TP
+.BI md=d n , dev , dev ,...
+This tells the md driver to assemble
+.B /dev/md n
+from the listed devices. It is only necessary to start the device
+holding the root filesystem this way. Other arrays are best started
+once the system is booted.
+
+In 2.6 kernels, the
+.B d
+immediately after the
+.B =
+indicates that a partitionable device (e.g.
+.BR /dev/md/d0 )
+should be created rather than the original non-partitionable device.
+
+.TP
+.BI md= n , l , c , i , dev...
+This tells the md driver to assemble a legacy RAID0 or LINEAR array
+without a superblock.
+.I n
+gives the md device number,
+.I l
+gives the level, 0 for RAID0 or \-1 for LINEAR,
+.I c
+gives the chunk size as a base-2 logarithm offset by twelve, so 0
+means 4K, 1 means 8K.
+.I i
+is ignored (legacy support).
+
+.SH FILES
+.TP
+.B /proc/mdstat
+Contains information about the status of currently running array.
+.TP
+.B /proc/sys/dev/raid/speed_limit_min
+A readable and writable file that reflects the current "goal" rebuild
+speed for times when non-rebuild activity is current on an array.
+The speed is in Kibibytes per second, and is a per-device rate, not a
+per-array rate (which means that an array with more disks will shuffle
+more data for a given speed). The default is 1000.
+
+.TP
+.B /proc/sys/dev/raid/speed_limit_max
+A readable and writable file that reflects the current "goal" rebuild
+speed for times when no non-rebuild activity is current on an array.
+The default is 200,000.
+
+.SH SEE ALSO
+.BR mdadm (8),
diff --git a/md5.h b/md5.h
new file mode 100644
index 00000000..145970d4
--- /dev/null
+++ b/md5.h
@@ -0,0 +1,136 @@
+/* Declaration of functions and data types used for MD5 sum computing
+ library functions.
+ Copyright (C) 1995-1997,1999-2005 Free Software Foundation, Inc.
+
+ NOTE: The canonical source of this file is maintained with the GNU C
+ Library. Bugs can be reported to bug-glibc@prep.ai.mit.edu.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _MD5_H
+#define _MD5_H 1
+
+#include <stdio.h>
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#if HAVE_STDINT_H || _LIBC || defined __UCLIBC__
+# include <stdint.h>
+#endif
+
+#ifndef __GNUC_PREREQ
+# if defined __GNUC__ && defined __GNUC_MINOR__
+# define __GNUC_PREREQ(maj, min) \
+ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+# else
+# define __GNUC_PREREQ(maj, min) 0
+# endif
+#endif
+
+#ifndef __THROW
+# if defined __cplusplus && __GNUC_PREREQ (2,8)
+# define __THROW throw ()
+# else
+# define __THROW
+# endif
+#endif
+
+#ifndef __attribute__
+# if ! __GNUC_PREREQ (2,8) || __STRICT_ANSI__
+# define __attribute__(x)
+# endif
+#endif
+
+#ifndef _LIBC
+# define __md5_buffer md5_buffer
+# define __md5_finish_ctx md5_finish_ctx
+# define __md5_init_ctx md5_init_ctx
+# define __md5_process_block md5_process_block
+# define __md5_process_bytes md5_process_bytes
+# define __md5_read_ctx md5_read_ctx
+# define __md5_stream md5_stream
+#endif
+
+typedef uint32_t md5_uint32;
+
+/* Structure to save state of computation between the single steps. */
+struct md5_ctx
+{
+ md5_uint32 A;
+ md5_uint32 B;
+ md5_uint32 C;
+ md5_uint32 D;
+
+ md5_uint32 total[2];
+ md5_uint32 buflen;
+ char buffer[128] __attribute__ ((__aligned__ (__alignof__ (md5_uint32))));
+};
+
+/*
+ * The following three functions are build up the low level used in
+ * the functions `md5_stream' and `md5_buffer'.
+ */
+
+/* Initialize structure containing state of computation.
+ (RFC 1321, 3.3: Step 3) */
+extern void __md5_init_ctx (struct md5_ctx *ctx) __THROW;
+
+/* Starting with the result of former calls of this function (or the
+ initialization function update the context for the next LEN bytes
+ starting at BUFFER.
+ It is necessary that LEN is a multiple of 64!!! */
+extern void __md5_process_block (const void *buffer, size_t len,
+ struct md5_ctx *ctx) __THROW;
+
+/* Starting with the result of former calls of this function (or the
+ initialization function update the context for the next LEN bytes
+ starting at BUFFER.
+ It is NOT required that LEN is a multiple of 64. */
+extern void __md5_process_bytes (const void *buffer, size_t len,
+ struct md5_ctx *ctx) __THROW;
+
+/* Process the remaining bytes in the buffer and put result from CTX
+ in first 16 bytes following RESBUF. The result is always in little
+ endian byte order, so that a byte-wise output yields to the wanted
+ ASCII representation of the message digest.
+
+ IMPORTANT: On some systems it is required that RESBUF be correctly
+ aligned for a 32 bits value. */
+extern void *__md5_finish_ctx (struct md5_ctx *ctx, void *resbuf) __THROW;
+
+
+/* Put result from CTX in first 16 bytes following RESBUF. The result is
+ always in little endian byte order, so that a byte-wise output yields
+ to the wanted ASCII representation of the message digest.
+
+ IMPORTANT: On some systems it is required that RESBUF is correctly
+ aligned for a 32 bits value. */
+extern void *__md5_read_ctx (const struct md5_ctx *ctx, void *resbuf) __THROW;
+
+
+/* Compute MD5 message digest for bytes read from STREAM. The
+ resulting message digest number will be written into the 16 bytes
+ beginning at RESBLOCK. */
+extern int __md5_stream (FILE *stream, void *resblock) __THROW;
+
+/* Compute MD5 message digest for LEN bytes beginning at BUFFER. The
+ result is always in little endian byte order, so that a byte-wise
+ output yields to the wanted ASCII representation of the message
+ digest. */
+extern void *__md5_buffer (const char *buffer, size_t len,
+ void *resblock) __THROW;
+
+#endif /* md5.h */
diff --git a/md_p.h b/md_p.h
new file mode 100644
index 00000000..c4846bab
--- /dev/null
+++ b/md_p.h
@@ -0,0 +1,198 @@
+/*
+ md_p.h : physical layout of Linux RAID devices
+ Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ You should have received a copy of the GNU General Public License
+ (for example /usr/src/linux/COPYING); if not, write to the Free
+ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef _MD_P_H
+#define _MD_P_H
+
+/*
+ * RAID superblock.
+ *
+ * The RAID superblock maintains some statistics on each RAID configuration.
+ * Each real device in the RAID set contains it near the end of the device.
+ * Some of the ideas are copied from the ext2fs implementation.
+ *
+ * We currently use 4096 bytes as follows:
+ *
+ * word offset function
+ *
+ * 0 - 31 Constant generic RAID device information.
+ * 32 - 63 Generic state information.
+ * 64 - 127 Personality specific information.
+ * 128 - 511 12 32-words descriptors of the disks in the raid set.
+ * 512 - 911 Reserved.
+ * 912 - 1023 Disk specific descriptor.
+ */
+
+/*
+ * If x is the real device size in bytes, we return an apparent size of:
+ *
+ * y = (x & ~(MD_RESERVED_BYTES - 1)) - MD_RESERVED_BYTES
+ *
+ * and place the 4kB superblock at offset y.
+ */
+#define MD_RESERVED_BYTES (64 * 1024)
+#define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512)
+#define MD_RESERVED_BLOCKS (MD_RESERVED_BYTES / BLOCK_SIZE)
+
+#define MD_NEW_SIZE_SECTORS(x) ((x & ~(MD_RESERVED_SECTORS - 1)) - MD_RESERVED_SECTORS)
+#define MD_NEW_SIZE_BLOCKS(x) ((x & ~(MD_RESERVED_BLOCKS - 1)) - MD_RESERVED_BLOCKS)
+
+#define MD_SB_BYTES 4096
+#define MD_SB_WORDS (MD_SB_BYTES / 4)
+#define MD_SB_BLOCKS (MD_SB_BYTES / BLOCK_SIZE)
+#define MD_SB_SECTORS (MD_SB_BYTES / 512)
+
+/*
+ * The following are counted in 32-bit words
+ */
+#define MD_SB_GENERIC_OFFSET 0
+#define MD_SB_PERSONALITY_OFFSET 64
+#define MD_SB_DISKS_OFFSET 128
+#define MD_SB_DESCRIPTOR_OFFSET 992
+
+#define MD_SB_GENERIC_CONSTANT_WORDS 32
+#define MD_SB_GENERIC_STATE_WORDS 32
+#define MD_SB_GENERIC_WORDS (MD_SB_GENERIC_CONSTANT_WORDS + MD_SB_GENERIC_STATE_WORDS)
+#define MD_SB_PERSONALITY_WORDS 64
+#define MD_SB_DESCRIPTOR_WORDS 32
+#define MD_SB_DISKS 27
+#define MD_SB_DISKS_WORDS (MD_SB_DISKS*MD_SB_DESCRIPTOR_WORDS)
+#define MD_SB_RESERVED_WORDS (1024 - MD_SB_GENERIC_WORDS - MD_SB_PERSONALITY_WORDS - MD_SB_DISKS_WORDS - MD_SB_DESCRIPTOR_WORDS)
+#define MD_SB_EQUAL_WORDS (MD_SB_GENERIC_WORDS + MD_SB_PERSONALITY_WORDS + MD_SB_DISKS_WORDS)
+
+/*
+ * Device "operational" state bits
+ */
+#define MD_DISK_FAULTY 0 /* disk is faulty / operational */
+#define MD_DISK_ACTIVE 1 /* disk is running but may not be in sync */
+#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */
+#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */
+
+#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
+ * read requests will only be sent here in
+ * dire need
+ */
+
+#define MD_DISK_REPLACEMENT 17
+
+typedef struct mdp_device_descriptor_s {
+ __u32 number; /* 0 Device number in the entire set */
+ __u32 major; /* 1 Device major number */
+ __u32 minor; /* 2 Device minor number */
+ __u32 raid_disk; /* 3 The role of the device in the raid set */
+ __u32 state; /* 4 Operational state */
+ __u32 reserved[MD_SB_DESCRIPTOR_WORDS - 5];
+} mdp_disk_t;
+
+#define MD_SB_MAGIC 0xa92b4efc
+
+/*
+ * Superblock state bits
+ */
+#define MD_SB_CLEAN 0
+#define MD_SB_ERRORS 1
+#define MD_SB_BBM_ERRORS 2
+#define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */
+#define MD_SB_BLOCK_VOLUME 4 /* block activation of array, other arrays
+ * in container can be activated */
+#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */
+
+typedef struct mdp_superblock_s {
+ /*
+ * Constant generic information
+ */
+ __u32 md_magic; /* 0 MD identifier */
+ __u32 major_version; /* 1 major version to which the set conforms */
+ __u32 minor_version; /* 2 minor version ... */
+ __u32 patch_version; /* 3 patchlevel version ... */
+ __u32 gvalid_words; /* 4 Number of used words in this section */
+ __u32 set_uuid0; /* 5 Raid set identifier */
+ __u32 ctime; /* 6 Creation time */
+ __u32 level; /* 7 Raid personality */
+ __u32 size; /* 8 Apparent size of each individual disk */
+ __u32 nr_disks; /* 9 total disks in the raid set */
+ __u32 raid_disks; /* 10 disks in a fully functional raid set */
+ __u32 md_minor; /* 11 preferred MD minor device number */
+ __u32 not_persistent; /* 12 does it have a persistent superblock */
+ __u32 set_uuid1; /* 13 Raid set identifier #2 */
+ __u32 set_uuid2; /* 14 Raid set identifier #3 */
+ __u32 set_uuid3; /* 15 Raid set identifier #4 */
+ __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 16];
+
+ /*
+ * Generic state information
+ */
+ __u32 utime; /* 0 Superblock update time */
+ __u32 state; /* 1 State bits (clean, ...) */
+ __u32 active_disks; /* 2 Number of currently active disks */
+ __u32 working_disks; /* 3 Number of working disks */
+ __u32 failed_disks; /* 4 Number of failed disks */
+ __u32 spare_disks; /* 5 Number of spare disks */
+ __u32 sb_csum; /* 6 checksum of the whole superblock */
+#if __BYTE_ORDER == __BIG_ENDIAN
+ __u32 events_hi; /* 7 high-order of superblock update count */
+ __u32 events_lo; /* 8 low-order of superblock update count */
+ __u32 cp_events_hi; /* 9 high-order of checkpoint update count */
+ __u32 cp_events_lo; /* 10 low-order of checkpoint update count */
+#else
+ __u32 events_lo; /* 7 low-order of superblock update count */
+ __u32 events_hi; /* 8 high-order of superblock update count */
+ __u32 cp_events_lo; /* 9 low-order of checkpoint update count */
+ __u32 cp_events_hi; /* 10 high-order of checkpoint update count */
+#endif
+ __u32 recovery_cp; /* 11 recovery checkpoint sector count */
+ /* There are only valid for minor_version > 90 */
+ __u64 reshape_position; /* 12,13 next address in array-space for reshape */
+ __u32 new_level; /* 14 new level we are reshaping to */
+ __u32 delta_disks; /* 15 change in number of raid_disks */
+ __u32 new_layout; /* 16 new layout */
+ __u32 new_chunk; /* 17 new chunk size (bytes) */
+ __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 18];
+
+ /*
+ * Personality information
+ */
+ __u32 layout; /* 0 the array's physical layout */
+ __u32 chunk_size; /* 1 chunk size in bytes */
+ __u32 root_pv; /* 2 LV root PV */
+ __u32 root_block; /* 3 LV root block */
+ __u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 4];
+
+ /*
+ * Disks information
+ */
+ mdp_disk_t disks[MD_SB_DISKS];
+
+ /*
+ * Reserved
+ */
+ __u32 reserved[MD_SB_RESERVED_WORDS];
+
+ /*
+ * Active descriptor
+ */
+ mdp_disk_t this_disk;
+
+} mdp_super_t;
+
+#ifdef __TINYC__
+typedef unsigned long long __u64;
+#endif
+
+static inline __u64 md_event(mdp_super_t *sb) {
+ __u64 ev = sb->events_hi;
+ return (ev<<32)| sb->events_lo;
+}
+
+#endif
diff --git a/md_u.h b/md_u.h
new file mode 100644
index 00000000..be9868a7
--- /dev/null
+++ b/md_u.h
@@ -0,0 +1,122 @@
+/*
+ md_u.h : user <=> kernel API between Linux raidtools and RAID drivers
+ Copyright (C) 1998 Ingo Molnar
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ You should have received a copy of the GNU General Public License
+ (for example /usr/src/linux/COPYING); if not, write to the Free
+ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef _MD_U_H
+#define _MD_U_H
+
+/* ioctls */
+
+/* status */
+#define RAID_VERSION _IOR (MD_MAJOR, 0x10, mdu_version_t)
+#define GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, mdu_array_info_t)
+#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t)
+#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13)
+#define RAID_AUTORUN _IO (MD_MAJOR, 0x14)
+#define GET_BITMAP_FILE _IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t)
+
+/* configuration */
+#define CLEAR_ARRAY _IO (MD_MAJOR, 0x20)
+#define ADD_NEW_DISK _IOW (MD_MAJOR, 0x21, mdu_disk_info_t)
+#define HOT_REMOVE_DISK _IO (MD_MAJOR, 0x22)
+#define SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, mdu_array_info_t)
+#define SET_DISK_INFO _IO (MD_MAJOR, 0x24)
+#define WRITE_RAID_INFO _IO (MD_MAJOR, 0x25)
+#define UNPROTECT_ARRAY _IO (MD_MAJOR, 0x26)
+#define PROTECT_ARRAY _IO (MD_MAJOR, 0x27)
+#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28)
+#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29)
+#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int)
+
+/* usage */
+#define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t)
+#define START_ARRAY _IO (MD_MAJOR, 0x31)
+#define STOP_ARRAY _IO (MD_MAJOR, 0x32)
+#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33)
+#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34)
+
+typedef struct mdu_version_s {
+ int major;
+ int minor;
+ int patchlevel;
+} mdu_version_t;
+
+typedef struct mdu_array_info_s {
+ /*
+ * Generic constant information
+ */
+ int major_version;
+ int minor_version;
+ int patch_version;
+ int ctime;
+ int level;
+ int size;
+ int nr_disks;
+ int raid_disks;
+ int md_minor;
+ int not_persistent;
+
+ /*
+ * Generic state information
+ */
+ int utime; /* 0 Superblock update time */
+ int state; /* 1 State bits (clean, ...) */
+ int active_disks; /* 2 Number of currently active disks */
+ int working_disks; /* 3 Number of working disks */
+ int failed_disks; /* 4 Number of failed disks */
+ int spare_disks; /* 5 Number of spare disks */
+
+ /*
+ * Personality information
+ */
+ int layout; /* 0 the array's physical layout */
+ int chunk_size; /* 1 chunk size in bytes */
+
+} mdu_array_info_t;
+
+typedef struct mdu_disk_info_s {
+ /*
+ * configuration/status of one particular disk
+ */
+ int number;
+ int major;
+ int minor;
+ int raid_disk;
+ int state;
+
+} mdu_disk_info_t;
+
+typedef struct mdu_start_info_s {
+ /*
+ * configuration/status of one particular disk
+ */
+ int major;
+ int minor;
+ int raid_disk;
+ int state;
+
+} mdu_start_info_t;
+
+typedef struct mdu_bitmap_file_s
+{
+ char pathname[4096];
+} mdu_bitmap_file_t;
+
+typedef struct mdu_param_s
+{
+ int personality; /* 1,2,3,4 */
+ int chunk_size; /* in bytes */
+ int max_fault; /* unused for now */
+} mdu_param_t;
+
+#endif
diff --git a/mdadm.8.in b/mdadm.8.in
new file mode 100644
index 00000000..bc67a681
--- /dev/null
+++ b/mdadm.8.in
@@ -0,0 +1,3205 @@
+.\" -*- nroff -*-
+.\" Copyright Neil Brown and others.
+.\" This program is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; either version 2 of the License, or
+.\" (at your option) any later version.
+.\" See file COPYING in distribution for details.
+.TH MDADM 8 "" v3.3.2
+.SH NAME
+mdadm \- manage MD devices
+.I aka
+Linux Software RAID
+
+.SH SYNOPSIS
+
+.BI mdadm " [mode] <raiddevice> [options] <component-devices>"
+
+.SH DESCRIPTION
+RAID devices are virtual devices created from two or more
+real block devices. This allows multiple devices (typically disk
+drives or partitions thereof) to be combined into a single device to
+hold (for example) a single filesystem.
+Some RAID levels include redundancy and so can survive some degree of
+device failure.
+
+Linux Software RAID devices are implemented through the md (Multiple
+Devices) device driver.
+
+Currently, Linux supports
+.B LINEAR
+md devices,
+.B RAID0
+(striping),
+.B RAID1
+(mirroring),
+.BR RAID4 ,
+.BR RAID5 ,
+.BR RAID6 ,
+.BR RAID10 ,
+.BR MULTIPATH ,
+.BR FAULTY ,
+and
+.BR CONTAINER .
+
+.B MULTIPATH
+is not a Software RAID mechanism, but does involve
+multiple devices:
+each device is a path to one common physical storage device.
+New installations should not use md/multipath as it is not well
+supported and has no ongoing development. Use the Device Mapper based
+multipath-tools instead.
+
+.B FAULTY
+is also not true RAID, and it only involves one device. It
+provides a layer over a true device that can be used to inject faults.
+
+.B CONTAINER
+is different again. A
+.B CONTAINER
+is a collection of devices that are
+managed as a set. This is similar to the set of devices connected to
+a hardware RAID controller. The set of devices may contain a number
+of different RAID arrays each utilising some (or all) of the blocks from a
+number of the devices in the set. For example, two devices in a 5-device set
+might form a RAID1 using the whole devices. The remaining three might
+have a RAID5 over the first half of each device, and a RAID0 over the
+second half.
+
+With a
+.BR CONTAINER ,
+there is one set of metadata that describes all of
+the arrays in the container. So when
+.I mdadm
+creates a
+.B CONTAINER
+device, the device just represents the metadata. Other normal arrays (RAID1
+etc) can be created inside the container.
+
+.SH MODES
+mdadm has several major modes of operation:
+.TP
+.B Assemble
+Assemble the components of a previously created
+array into an active array. Components can be explicitly given
+or can be searched for.
+.I mdadm
+checks that the components
+do form a bona fide array, and can, on request, fiddle superblock
+information so as to assemble a faulty array.
+
+.TP
+.B Build
+Build an array that doesn't have per-device metadata (superblocks). For these
+sorts of arrays,
+.I mdadm
+cannot differentiate between initial creation and subsequent assembly
+of an array. It also cannot perform any checks that appropriate
+components have been requested. Because of this, the
+.B Build
+mode should only be used together with a complete understanding of
+what you are doing.
+
+.TP
+.B Create
+Create a new array with per-device metadata (superblocks).
+Appropriate metadata is written to each device, and then the array
+comprising those devices is activated. A 'resync' process is started
+to make sure that the array is consistent (e.g. both sides of a mirror
+contain the same data) but the content of the device is left otherwise
+untouched.
+The array can be used as soon as it has been created. There is no
+need to wait for the initial resync to finish.
+
+.TP
+.B "Follow or Monitor"
+Monitor one or more md devices and act on any state changes. This is
+only meaningful for RAID1, 4, 5, 6, 10 or multipath arrays, as
+only these have interesting state. RAID0 or Linear never have
+missing, spare, or failed drives, so there is nothing to monitor.
+
+.TP
+.B "Grow"
+Grow (or shrink) an array, or otherwise reshape it in some way.
+Currently supported growth options including changing the active size
+of component devices and changing the number of active devices in
+Linear and RAID levels 0/1/4/5/6,
+changing the RAID level between 0, 1, 5, and 6, and between 0 and 10,
+changing the chunk size and layout for RAID 0,4,5,6,10 as well as adding or
+removing a write-intent bitmap.
+
+.TP
+.B "Incremental Assembly"
+Add a single device to an appropriate array. If the addition of the
+device makes the array runnable, the array will be started.
+This provides a convenient interface to a
+.I hot-plug
+system. As each device is detected,
+.I mdadm
+has a chance to include it in some array as appropriate.
+Optionally, when the
+.I \-\-fail
+flag is passed in we will remove the device from any active array
+instead of adding it.
+
+If a
+.B CONTAINER
+is passed to
+.I mdadm
+in this mode, then any arrays within that container will be assembled
+and started.
+
+.TP
+.B Manage
+This is for doing things to specific components of an array such as
+adding new spares and removing faulty devices.
+
+.TP
+.B Misc
+This is an 'everything else' mode that supports operations on active
+arrays, operations on component devices such as erasing old superblocks, and
+information gathering operations.
+.\"This mode allows operations on independent devices such as examine MD
+.\"superblocks, erasing old superblocks and stopping active arrays.
+
+.TP
+.B Auto-detect
+This mode does not act on a specific device or array, but rather it
+requests the Linux Kernel to activate any auto-detected arrays.
+.SH OPTIONS
+
+.SH Options for selecting a mode are:
+
+.TP
+.BR \-A ", " \-\-assemble
+Assemble a pre-existing array.
+
+.TP
+.BR \-B ", " \-\-build
+Build a legacy array without superblocks.
+
+.TP
+.BR \-C ", " \-\-create
+Create a new array.
+
+.TP
+.BR \-F ", " \-\-follow ", " \-\-monitor
+Select
+.B Monitor
+mode.
+
+.TP
+.BR \-G ", " \-\-grow
+Change the size or shape of an active array.
+
+.TP
+.BR \-I ", " \-\-incremental
+Add/remove a single device to/from an appropriate array, and possibly start the array.
+
+.TP
+.B \-\-auto-detect
+Request that the kernel starts any auto-detected arrays. This can only
+work if
+.I md
+is compiled into the kernel \(em not if it is a module.
+Arrays can be auto-detected by the kernel if all the components are in
+primary MS-DOS partitions with partition type
+.BR FD ,
+and all use v0.90 metadata.
+In-kernel autodetect is not recommended for new installations. Using
+.I mdadm
+to detect and assemble arrays \(em possibly in an
+.I initrd
+\(em is substantially more flexible and should be preferred.
+
+.P
+If a device is given before any options, or if the first option is
+and of
+.BR \-\-add ,
+.BR \-\-re\-add ,
+.BR \-\-add\-spare ,
+.BR \-\-fail ,
+.BR \-\-remove ,
+or
+.BR \-\-replace ,
+then the MANAGE mode is assumed.
+Anything other than these will cause the
+.B Misc
+mode to be assumed.
+
+.SH Options that are not mode-specific are:
+
+.TP
+.BR \-h ", " \-\-help
+Display general help message or, after one of the above options, a
+mode-specific help message.
+
+.TP
+.B \-\-help\-options
+Display more detailed help about command line parsing and some commonly
+used options.
+
+.TP
+.BR \-V ", " \-\-version
+Print version information for mdadm.
+
+.TP
+.BR \-v ", " \-\-verbose
+Be more verbose about what is happening. This can be used twice to be
+extra-verbose.
+The extra verbosity currently only affects
+.B \-\-detail \-\-scan
+and
+.BR "\-\-examine \-\-scan" .
+
+.TP
+.BR \-q ", " \-\-quiet
+Avoid printing purely informative messages. With this,
+.I mdadm
+will be silent unless there is something really important to report.
+
+
+.TP
+.BR \-f ", " \-\-force
+Be more forceful about certain operations. See the various modes for
+the exact meaning of this option in different contexts.
+
+.TP
+.BR \-c ", " \-\-config=
+Specify the config file or directory. Default is to use
+.B /etc/mdadm/mdadm.conf
+and
+.BR /etc/mdadm/mdadm.conf.d ,
+or if those are missing then
+.B /etc/mdadm.conf
+and
+.BR /etc/mdadm.conf.d .
+If the config file given is
+.B "partitions"
+then nothing will be read, but
+.I mdadm
+will act as though the config file contained exactly
+.br
+.B " DEVICE partitions containers"
+.br
+and will read
+.B /proc/partitions
+to find a list of devices to scan, and
+.B /proc/mdstat
+to find a list of containers to examine.
+If the word
+.B "none"
+is given for the config file, then
+.I mdadm
+will act as though the config file were empty.
+
+If the name given is of a directory, then
+.I mdadm
+will collect all the files contained in the directory with a name ending
+in
+.BR .conf ,
+sort them lexically, and process all of those files as config files.
+
+.TP
+.BR \-s ", " \-\-scan
+Scan config file or
+.B /proc/mdstat
+for missing information.
+In general, this option gives
+.I mdadm
+permission to get any missing information (like component devices,
+array devices, array identities, and alert destination) from the
+configuration file (see previous option);
+one exception is MISC mode when using
+.B \-\-detail
+or
+.B \-\-stop,
+in which case
+.B \-\-scan
+says to get a list of array devices from
+.BR /proc/mdstat .
+
+.TP
+.BR \-e ", " \-\-metadata=
+Declare the style of RAID metadata (superblock) to be used. The
+default is {DEFAULT_METADATA} for
+.BR \-\-create ,
+and to guess for other operations.
+The default can be overridden by setting the
+.B metadata
+value for the
+.B CREATE
+keyword in
+.BR mdadm.conf .
+
+Options are:
+.RS
+.ie '{DEFAULT_METADATA}'0.90'
+.IP "0, 0.90, default"
+.el
+.IP "0, 0.90"
+Use the original 0.90 format superblock. This format limits arrays to
+28 component devices and limits component devices of levels 1 and
+greater to 2 terabytes. It is also possible for there to be confusion
+about whether the superblock applies to a whole device or just the
+last partition, if that partition starts on a 64K boundary.
+.ie '{DEFAULT_METADATA}'0.90'
+.IP "1, 1.0, 1.1, 1.2"
+.el
+.IP "1, 1.0, 1.1, 1.2 default"
+Use the new version-1 format superblock. This has fewer restrictions.
+It can easily be moved between hosts with different endian-ness, and a
+recovery operation can be checkpointed and restarted. The different
+sub-versions store the superblock at different locations on the
+device, either at the end (for 1.0), at the start (for 1.1) or 4K from
+the start (for 1.2). "1" is equivalent to "1.2" (the commonly
+preferred 1.x format).
+'if '{DEFAULT_METADATA}'1.2' "default" is equivalent to "1.2".
+.IP ddf
+Use the "Industry Standard" DDF (Disk Data Format) format defined by
+SNIA.
+When creating a DDF array a
+.B CONTAINER
+will be created, and normal arrays can be created in that container.
+.IP imsm
+Use the Intel(R) Matrix Storage Manager metadata format. This creates a
+.B CONTAINER
+which is managed in a similar manner to DDF, and is supported by an
+option-rom on some platforms:
+.IP
+.B http://www.intel.com/design/chipsets/matrixstorage_sb.htm
+.PP
+.RE
+
+.TP
+.B \-\-homehost=
+This will override any
+.B HOMEHOST
+setting in the config file and provides the identity of the host which
+should be considered the home for any arrays.
+
+When creating an array, the
+.B homehost
+will be recorded in the metadata. For version-1 superblocks, it will
+be prefixed to the array name. For version-0.90 superblocks, part of
+the SHA1 hash of the hostname will be stored in the later half of the
+UUID.
+
+When reporting information about an array, any array which is tagged
+for the given homehost will be reported as such.
+
+When using Auto-Assemble, only arrays tagged for the given homehost
+will be allowed to use 'local' names (i.e. not ending in '_' followed
+by a digit string). See below under
+.BR "Auto Assembly" .
+
+.TP
+.B \-\-prefer=
+When
+.I mdadm
+needs to print the name for a device it normally finds the name in
+.B /dev
+which refers to the device and is shortest. When a path component is
+given with
+.B \-\-prefer
+.I mdadm
+will prefer a longer name if it contains that component. For example
+.B \-\-prefer=by-uuid
+will prefer a name in a subdirectory of
+.B /dev
+called
+.BR by-uuid .
+
+This functionality is currently only provided by
+.B \-\-detail
+and
+.BR \-\-monitor .
+
+.SH For create, build, or grow:
+
+.TP
+.BR \-n ", " \-\-raid\-devices=
+Specify the number of active devices in the array. This, plus the
+number of spare devices (see below) must equal the number of
+.I component-devices
+(including "\fBmissing\fP" devices)
+that are listed on the command line for
+.BR \-\-create .
+Setting a value of 1 is probably
+a mistake and so requires that
+.B \-\-force
+be specified first. A value of 1 will then be allowed for linear,
+multipath, RAID0 and RAID1. It is never allowed for RAID4, RAID5 or RAID6.
+.br
+This number can only be changed using
+.B \-\-grow
+for RAID1, RAID4, RAID5 and RAID6 arrays, and only on kernels which provide
+the necessary support.
+
+.TP
+.BR \-x ", " \-\-spare\-devices=
+Specify the number of spare (eXtra) devices in the initial array.
+Spares can also be added
+and removed later. The number of component devices listed
+on the command line must equal the number of RAID devices plus the
+number of spare devices.
+
+.TP
+.BR \-z ", " \-\-size=
+Amount (in Kibibytes) of space to use from each drive in RAID levels 1/4/5/6.
+This must be a multiple of the chunk size, and must leave about 128Kb
+of space at the end of the drive for the RAID superblock.
+If this is not specified
+(as it normally is not) the smallest drive (or partition) sets the
+size, though if there is a variance among the drives of greater than 1%, a warning is
+issued.
+
+A suffix of 'M' or 'G' can be given to indicate Megabytes or
+Gigabytes respectively.
+
+Sometimes a replacement drive can be a little smaller than the
+original drives though this should be minimised by IDEMA standards.
+Such a replacement drive will be rejected by
+.IR md .
+To guard against this it can be useful to set the initial size
+slightly smaller than the smaller device with the aim that it will
+still be larger than any replacement.
+
+This value can be set with
+.B \-\-grow
+for RAID level 1/4/5/6 though
+.B CONTAINER
+based arrays such as those with IMSM metadata may not be able to
+support this.
+If the array was created with a size smaller than the currently
+active drives, the extra space can be accessed using
+.BR \-\-grow .
+The size can be given as
+.B max
+which means to choose the largest size that fits on all current drives.
+
+Before reducing the size of the array (with
+.BR "\-\-grow \-\-size=" )
+you should make sure that space isn't needed. If the device holds a
+filesystem, you would need to resize the filesystem to use less space.
+
+After reducing the array size you should check that the data stored in
+the device is still available. If the device holds a filesystem, then
+an 'fsck' of the filesystem is a minimum requirement. If there are
+problems the array can be made bigger again with no loss with another
+.B "\-\-grow \-\-size="
+command.
+
+This value cannot be used when creating a
+.B CONTAINER
+such as with DDF and IMSM metadata, though it perfectly valid when
+creating an array inside a container.
+
+.TP
+.BR \-Z ", " \-\-array\-size=
+This is only meaningful with
+.B \-\-grow
+and its effect is not persistent: when the array is stopped and
+restarted the default array size will be restored.
+
+Setting the array-size causes the array to appear smaller to programs
+that access the data. This is particularly needed before reshaping an
+array so that it will be smaller. As the reshape is not reversible,
+but setting the size with
+.B \-\-array-size
+is, it is required that the array size is reduced as appropriate
+before the number of devices in the array is reduced.
+
+Before reducing the size of the array you should make sure that space
+isn't needed. If the device holds a filesystem, you would need to
+resize the filesystem to use less space.
+
+After reducing the array size you should check that the data stored in
+the device is still available. If the device holds a filesystem, then
+an 'fsck' of the filesystem is a minimum requirement. If there are
+problems the array can be made bigger again with no loss with another
+.B "\-\-grow \-\-array\-size="
+command.
+
+A suffix of 'M' or 'G' can be given to indicate Megabytes or
+Gigabytes respectively.
+A value of
+.B max
+restores the apparent size of the array to be whatever the real
+amount of available space is.
+
+.TP
+.BR \-c ", " \-\-chunk=
+Specify chunk size of kibibytes. The default when creating an
+array is 512KB. To ensure compatibility with earlier versions, the
+default when building an array with no persistent metadata is 64KB.
+This is only meaningful for RAID0, RAID4, RAID5, RAID6, and RAID10.
+
+RAID4, RAID5, RAID6, and RAID10 require the chunk size to be a power
+of 2. In any case it must be a multiple of 4KB.
+
+A suffix of 'M' or 'G' can be given to indicate Megabytes or
+Gigabytes respectively.
+
+.TP
+.BR \-\-rounding=
+Specify rounding factor for a Linear array. The size of each
+component will be rounded down to a multiple of this size.
+This is a synonym for
+.B \-\-chunk
+but highlights the different meaning for Linear as compared to other
+RAID levels. The default is 64K if a kernel earlier than 2.6.16 is in
+use, and is 0K (i.e. no rounding) in later kernels.
+
+.TP
+.BR \-l ", " \-\-level=
+Set RAID level. When used with
+.BR \-\-create ,
+options are: linear, raid0, 0, stripe, raid1, 1, mirror, raid4, 4,
+raid5, 5, raid6, 6, raid10, 10, multipath, mp, faulty, container.
+Obviously some of these are synonymous.
+
+When a
+.B CONTAINER
+metadata type is requested, only the
+.B container
+level is permitted, and it does not need to be explicitly given.
+
+When used with
+.BR \-\-build ,
+only linear, stripe, raid0, 0, raid1, multipath, mp, and faulty are valid.
+
+Can be used with
+.B \-\-grow
+to change the RAID level in some cases. See LEVEL CHANGES below.
+
+.TP
+.BR \-p ", " \-\-layout=
+This option configures the fine details of data layout for RAID5, RAID6,
+and RAID10 arrays, and controls the failure modes for
+.IR faulty .
+
+The layout of the RAID5 parity block can be one of
+.BR left\-asymmetric ,
+.BR left\-symmetric ,
+.BR right\-asymmetric ,
+.BR right\-symmetric ,
+.BR la ", " ra ", " ls ", " rs .
+The default is
+.BR left\-symmetric .
+
+It is also possible to cause RAID5 to use a RAID4-like layout by
+choosing
+.BR parity\-first ,
+or
+.BR parity\-last .
+
+Finally for RAID5 there are DDF\-compatible layouts,
+.BR ddf\-zero\-restart ,
+.BR ddf\-N\-restart ,
+and
+.BR ddf\-N\-continue .
+
+These same layouts are available for RAID6. There are also 4 layouts
+that will provide an intermediate stage for converting between RAID5
+and RAID6. These provide a layout which is identical to the
+corresponding RAID5 layout on the first N\-1 devices, and has the 'Q'
+syndrome (the second 'parity' block used by RAID6) on the last device.
+These layouts are:
+.BR left\-symmetric\-6 ,
+.BR right\-symmetric\-6 ,
+.BR left\-asymmetric\-6 ,
+.BR right\-asymmetric\-6 ,
+and
+.BR parity\-first\-6 .
+
+When setting the failure mode for level
+.I faulty,
+the options are:
+.BR write\-transient ", " wt ,
+.BR read\-transient ", " rt ,
+.BR write\-persistent ", " wp ,
+.BR read\-persistent ", " rp ,
+.BR write\-all ,
+.BR read\-fixable ", " rf ,
+.BR clear ", " flush ", " none .
+
+Each failure mode can be followed by a number, which is used as a period
+between fault generation. Without a number, the fault is generated
+once on the first relevant request. With a number, the fault will be
+generated after that many requests, and will continue to be generated
+every time the period elapses.
+
+Multiple failure modes can be current simultaneously by using the
+.B \-\-grow
+option to set subsequent failure modes.
+
+"clear" or "none" will remove any pending or periodic failure modes,
+and "flush" will clear any persistent faults.
+
+Finally, the layout options for RAID10 are one of 'n', 'o' or 'f' followed
+by a small number. The default is 'n2'. The supported options are:
+
+.I 'n'
+signals 'near' copies. Multiple copies of one data block are at
+similar offsets in different devices.
+
+.I 'o'
+signals 'offset' copies. Rather than the chunks being duplicated
+within a stripe, whole stripes are duplicated but are rotated by one
+device so duplicate blocks are on different devices. Thus subsequent
+copies of a block are in the next drive, and are one chunk further
+down.
+
+.I 'f'
+signals 'far' copies
+(multiple copies have very different offsets).
+See md(4) for more detail about 'near', 'offset', and 'far'.
+
+The number is the number of copies of each datablock. 2 is normal, 3
+can be useful. This number can be at most equal to the number of
+devices in the array. It does not need to divide evenly into that
+number (e.g. it is perfectly legal to have an 'n2' layout for an array
+with an odd number of devices).
+
+When an array is converted between RAID5 and RAID6 an intermediate
+RAID6 layout is used in which the second parity block (Q) is always on
+the last device. To convert a RAID5 to RAID6 and leave it in this new
+layout (which does not require re-striping) use
+.BR \-\-layout=preserve .
+This will try to avoid any restriping.
+
+The converse of this is
+.B \-\-layout=normalise
+which will change a non-standard RAID6 layout into a more standard
+arrangement.
+
+.TP
+.BR \-\-parity=
+same as
+.B \-\-layout
+(thus explaining the p of
+.BR \-p ).
+
+.TP
+.BR \-b ", " \-\-bitmap=
+Specify a file to store a write-intent bitmap in. The file should not
+exist unless
+.B \-\-force
+is also given. The same file should be provided
+when assembling the array. If the word
+.B "internal"
+is given, then the bitmap is stored with the metadata on the array,
+and so is replicated on all devices. If the word
+.B "none"
+is given with
+.B \-\-grow
+mode, then any bitmap that is present is removed.
+
+To help catch typing errors, the filename must contain at least one
+slash ('/') if it is a real file (not 'internal' or 'none').
+
+Note: external bitmaps are only known to work on ext2 and ext3.
+Storing bitmap files on other filesystems may result in serious problems.
+
+When creating an array on devices which are 100G or larger,
+.I mdadm
+automatically adds an internal bitmap as it will usually be
+beneficial. This can be suppressed with
+.B "\-\-bitmap=none".
+
+.TP
+.BR \-\-bitmap\-chunk=
+Set the chunksize of the bitmap. Each bit corresponds to that many
+Kilobytes of storage.
+When using a file based bitmap, the default is to use the smallest
+size that is at-least 4 and requires no more than 2^21 chunks.
+When using an
+.B internal
+bitmap, the chunksize defaults to 64Meg, or larger if necessary to
+fit the bitmap into the available space.
+
+A suffix of 'M' or 'G' can be given to indicate Megabytes or
+Gigabytes respectively.
+
+.TP
+.BR \-W ", " \-\-write\-mostly
+subsequent devices listed in a
+.BR \-\-build ,
+.BR \-\-create ,
+or
+.B \-\-add
+command will be flagged as 'write-mostly'. This is valid for RAID1
+only and means that the 'md' driver will avoid reading from these
+devices if at all possible. This can be useful if mirroring over a
+slow link.
+
+.TP
+.BR \-\-write\-behind=
+Specify that write-behind mode should be enabled (valid for RAID1
+only). If an argument is specified, it will set the maximum number
+of outstanding writes allowed. The default value is 256.
+A write-intent bitmap is required in order to use write-behind
+mode, and write-behind is only attempted on drives marked as
+.IR write-mostly .
+
+.TP
+.BR \-\-assume\-clean
+Tell
+.I mdadm
+that the array pre-existed and is known to be clean. It can be useful
+when trying to recover from a major failure as you can be sure that no
+data will be affected unless you actually write to the array. It can
+also be used when creating a RAID1 or RAID10 if you want to avoid the
+initial resync, however this practice \(em while normally safe \(em is not
+recommended. Use this only if you really know what you are doing.
+.IP
+When the devices that will be part of a new array were filled
+with zeros before creation the operator knows the array is
+actually clean. If that is the case, such as after running
+badblocks, this argument can be used to tell mdadm the
+facts the operator knows.
+.IP
+When an array is resized to a larger size with
+.B "\-\-grow \-\-size="
+the new space is normally resynced in that same way that the whole
+array is resynced at creation. From Linux version 3.0,
+.B \-\-assume\-clean
+can be used with that command to avoid the automatic resync.
+
+.TP
+.BR \-\-backup\-file=
+This is needed when
+.B \-\-grow
+is used to increase the number of raid-devices in a RAID5 or RAID6 if
+there are no spare devices available, or to shrink, change RAID level
+or layout. See the GROW MODE section below on RAID\-DEVICES CHANGES.
+The file must be stored on a separate device, not on the RAID array
+being reshaped.
+
+.TP
+.B \-\-data\-offset=
+Arrays with 1.x metadata can leave a gap between the start of the
+device and the start of array data. This gap can be used for various
+metadata. The start of data is known as the
+.IR data\-offset .
+Normally an appropriate data offset is computed automatically.
+However it can be useful to set it explicitly such as when re-creating
+an array which was originally created using a different version of
+.I mdadm
+which computed a different offset.
+
+Setting the offset explicitly over-rides the default. The value given
+is in Kilobytes unless an 'M' or 'G' suffix is given.
+
+Since Linux 3.4,
+.B \-\-data\-offset
+can also be used with
+.B --grow
+for some RAID levels (initially on RAID10). This allows the
+data\-offset to be changed as part of the reshape process. When the
+data offset is changed, no backup file is required as the difference
+in offsets is used to provide the same functionality.
+
+When the new offset is earlier than the old offset, the number of
+devices in the array cannot shrink. When it is after the old offset,
+the number of devices in the array cannot increase.
+
+When creating an array,
+.B \-\-data\-offset
+can be specified as
+.BR variable .
+In the case each member device is expected to have a offset appended
+to the name, separated by a colon. This makes it possible to recreate
+exactly an array which has varying data offsets (as can happen when
+different versions of
+.I mdadm
+are used to add different devices).
+
+.TP
+.BR \-\-continue
+This option is complementary to the
+.B \-\-freeze-reshape
+option for assembly. It is needed when
+.B \-\-grow
+operation is interrupted and it is not restarted automatically due to
+.B \-\-freeze-reshape
+usage during array assembly. This option is used together with
+.BR \-G
+, (
+.BR \-\-grow
+) command and device for a pending reshape to be continued.
+All parameters required for reshape continuation will be read from array metadata.
+If initial
+.BR \-\-grow
+command had required
+.BR \-\-backup\-file=
+option to be set, continuation option will require to have exactly the same
+backup file given as well.
+.IP
+Any other parameter passed together with
+.BR \-\-continue
+option will be ignored.
+
+.TP
+.BR \-N ", " \-\-name=
+Set a
+.B name
+for the array. This is currently only effective when creating an
+array with a version-1 superblock, or an array in a DDF container.
+The name is a simple textual string that can be used to identify array
+components when assembling. If name is needed but not specified, it
+is taken from the basename of the device that is being created.
+e.g. when creating
+.I /dev/md/home
+the
+.B name
+will default to
+.IR home .
+
+.TP
+.BR \-R ", " \-\-run
+Insist that
+.I mdadm
+run the array, even if some of the components
+appear to be active in another array or filesystem. Normally
+.I mdadm
+will ask for confirmation before including such components in an
+array. This option causes that question to be suppressed.
+
+.TP
+.BR \-f ", " \-\-force
+Insist that
+.I mdadm
+accept the geometry and layout specified without question. Normally
+.I mdadm
+will not allow creation of an array with only one device, and will try
+to create a RAID5 array with one missing drive (as this makes the
+initial resync work faster). With
+.BR \-\-force ,
+.I mdadm
+will not try to be so clever.
+
+.TP
+.BR \-o ", " \-\-readonly
+Start the array
+.B read only
+rather than read-write as normal. No writes will be allowed to the
+array, and no resync, recovery, or reshape will be started.
+
+.TP
+.BR \-a ", " "\-\-auto{=yes,md,mdp,part,p}{NN}"
+Instruct mdadm how to create the device file if needed, possibly allocating
+an unused minor number. "md" causes a non-partitionable array
+to be used (though since Linux 2.6.28, these array devices are in fact
+partitionable). "mdp", "part" or "p" causes a partitionable array (2.6 and
+later) to be used. "yes" requires the named md device to have
+a 'standard' format, and the type and minor number will be determined
+from this. With mdadm 3.0, device creation is normally left up to
+.I udev
+so this option is unlikely to be needed.
+See DEVICE NAMES below.
+
+The argument can also come immediately after
+"\-a". e.g. "\-ap".
+
+If
+.B \-\-auto
+is not given on the command line or in the config file, then
+the default will be
+.BR \-\-auto=yes .
+
+If
+.B \-\-scan
+is also given, then any
+.I auto=
+entries in the config file will override the
+.B \-\-auto
+instruction given on the command line.
+
+For partitionable arrays,
+.I mdadm
+will create the device file for the whole array and for the first 4
+partitions. A different number of partitions can be specified at the
+end of this option (e.g.
+.BR \-\-auto=p7 ).
+If the device name ends with a digit, the partition names add a 'p',
+and a number, e.g.
+.IR /dev/md/home1p3 .
+If there is no trailing digit, then the partition names just have a
+number added, e.g.
+.IR /dev/md/scratch3 .
+
+If the md device name is in a 'standard' format as described in DEVICE
+NAMES, then it will be created, if necessary, with the appropriate
+device number based on that name. If the device name is not in one of these
+formats, then a unused device number will be allocated. The device
+number will be considered unused if there is no active array for that
+number, and there is no entry in /dev for that number and with a
+non-standard name. Names that are not in 'standard' format are only
+allowed in "/dev/md/".
+
+This is meaningful with
+.B \-\-create
+or
+.BR \-\-build .
+
+.TP
+.BR \-a ", " "\-\-add"
+This option can be used in Grow mode in two cases.
+
+If the target array is a Linear array, then
+.B \-\-add
+can be used to add one or more devices to the array. They
+are simply catenated on to the end of the array. Once added, the
+devices cannot be removed.
+
+If the
+.B \-\-raid\-disks
+option is being used to increase the number of devices in an array,
+then
+.B \-\-add
+can be used to add some extra devices to be included in the array.
+In most cases this is not needed as the extra devices can be added as
+spares first, and then the number of raid-disks can be changed.
+However for RAID0, it is not possible to add spares. So to increase
+the number of devices in a RAID0, it is necessary to set the new
+number of devices, and to add the new devices, in the same command.
+
+.SH For assemble:
+
+.TP
+.BR \-u ", " \-\-uuid=
+uuid of array to assemble. Devices which don't have this uuid are
+excluded
+
+.TP
+.BR \-m ", " \-\-super\-minor=
+Minor number of device that array was created for. Devices which
+don't have this minor number are excluded. If you create an array as
+/dev/md1, then all superblocks will contain the minor number 1, even if
+the array is later assembled as /dev/md2.
+
+Giving the literal word "dev" for
+.B \-\-super\-minor
+will cause
+.I mdadm
+to use the minor number of the md device that is being assembled.
+e.g. when assembling
+.BR /dev/md0 ,
+.B \-\-super\-minor=dev
+will look for super blocks with a minor number of 0.
+
+.B \-\-super\-minor
+is only relevant for v0.90 metadata, and should not normally be used.
+Using
+.B \-\-uuid
+is much safer.
+
+.TP
+.BR \-N ", " \-\-name=
+Specify the name of the array to assemble. This must be the name
+that was specified when creating the array. It must either match
+the name stored in the superblock exactly, or it must match
+with the current
+.I homehost
+prefixed to the start of the given name.
+
+.TP
+.BR \-f ", " \-\-force
+Assemble the array even if the metadata on some devices appears to be
+out-of-date. If
+.I mdadm
+cannot find enough working devices to start the array, but can find
+some devices that are recorded as having failed, then it will mark
+those devices as working so that the array can be started.
+An array which requires
+.B \-\-force
+to be started may contain data corruption. Use it carefully.
+
+.TP
+.BR \-R ", " \-\-run
+Attempt to start the array even if fewer drives were given than were
+present last time the array was active. Normally if not all the
+expected drives are found and
+.B \-\-scan
+is not used, then the array will be assembled but not started.
+With
+.B \-\-run
+an attempt will be made to start it anyway.
+
+.TP
+.B \-\-no\-degraded
+This is the reverse of
+.B \-\-run
+in that it inhibits the startup of array unless all expected drives
+are present. This is only needed with
+.B \-\-scan,
+and can be used if the physical connections to devices are
+not as reliable as you would like.
+
+.TP
+.BR \-a ", " "\-\-auto{=no,yes,md,mdp,part}"
+See this option under Create and Build options.
+
+.TP
+.BR \-b ", " \-\-bitmap=
+Specify the bitmap file that was given when the array was created. If
+an array has an
+.B internal
+bitmap, there is no need to specify this when assembling the array.
+
+.TP
+.BR \-\-backup\-file=
+If
+.B \-\-backup\-file
+was used while reshaping an array (e.g. changing number of devices or
+chunk size) and the system crashed during the critical section, then the same
+.B \-\-backup\-file
+must be presented to
+.B \-\-assemble
+to allow possibly corrupted data to be restored, and the reshape
+to be completed.
+
+.TP
+.BR \-\-invalid\-backup
+If the file needed for the above option is not available for any
+reason an empty file can be given together with this option to
+indicate that the backup file is invalid. In this case the data that
+was being rearranged at the time of the crash could be irrecoverably
+lost, but the rest of the array may still be recoverable. This option
+should only be used as a last resort if there is no way to recover the
+backup file.
+
+
+.TP
+.BR \-U ", " \-\-update=
+Update the superblock on each device while assembling the array. The
+argument given to this flag can be one of
+.BR sparc2.2 ,
+.BR summaries ,
+.BR uuid ,
+.BR name ,
+.BR homehost ,
+.BR resync ,
+.BR byteorder ,
+.BR devicesize ,
+.BR no\-bitmap ,
+.BR bbl ,
+.BR no\-bbl ,
+.BR metadata ,
+or
+.BR super\-minor .
+
+The
+.B sparc2.2
+option will adjust the superblock of an array what was created on a Sparc
+machine running a patched 2.2 Linux kernel. This kernel got the
+alignment of part of the superblock wrong. You can use the
+.B "\-\-examine \-\-sparc2.2"
+option to
+.I mdadm
+to see what effect this would have.
+
+The
+.B super\-minor
+option will update the
+.B "preferred minor"
+field on each superblock to match the minor number of the array being
+assembled.
+This can be useful if
+.B \-\-examine
+reports a different "Preferred Minor" to
+.BR \-\-detail .
+In some cases this update will be performed automatically
+by the kernel driver. In particular the update happens automatically
+at the first write to an array with redundancy (RAID level 1 or
+greater) on a 2.6 (or later) kernel.
+
+The
+.B uuid
+option will change the uuid of the array. If a UUID is given with the
+.B \-\-uuid
+option that UUID will be used as a new UUID and will
+.B NOT
+be used to help identify the devices in the array.
+If no
+.B \-\-uuid
+is given, a random UUID is chosen.
+
+The
+.B name
+option will change the
+.I name
+of the array as stored in the superblock. This is only supported for
+version-1 superblocks.
+
+The
+.B homehost
+option will change the
+.I homehost
+as recorded in the superblock. For version-0 superblocks, this is the
+same as updating the UUID.
+For version-1 superblocks, this involves updating the name.
+
+The
+.B resync
+option will cause the array to be marked
+.I dirty
+meaning that any redundancy in the array (e.g. parity for RAID5,
+copies for RAID1) may be incorrect. This will cause the RAID system
+to perform a "resync" pass to make sure that all redundant information
+is correct.
+
+The
+.B byteorder
+option allows arrays to be moved between machines with different
+byte-order.
+When assembling such an array for the first time after a move, giving
+.B "\-\-update=byteorder"
+will cause
+.I mdadm
+to expect superblocks to have their byteorder reversed, and will
+correct that order before assembling the array. This is only valid
+with original (Version 0.90) superblocks.
+
+The
+.B summaries
+option will correct the summaries in the superblock. That is the
+counts of total, working, active, failed, and spare devices.
+
+The
+.B devicesize
+option will rarely be of use. It applies to version 1.1 and 1.2 metadata
+only (where the metadata is at the start of the device) and is only
+useful when the component device has changed size (typically become
+larger). The version 1 metadata records the amount of the device that
+can be used to store data, so if a device in a version 1.1 or 1.2
+array becomes larger, the metadata will still be visible, but the
+extra space will not. In this case it might be useful to assemble the
+array with
+.BR \-\-update=devicesize .
+This will cause
+.I mdadm
+to determine the maximum usable amount of space on each device and
+update the relevant field in the metadata.
+
+The
+.B metadata
+option only works on v0.90 metadata arrays and will convert them to
+v1.0 metadata. The array must not be dirty (i.e. it must not need a
+sync) and it must not have a write-intent bitmap.
+
+The old metadata will remain on the devices, but will appear older
+than the new metadata and so will usually be ignored. The old metadata
+(or indeed the new metadata) can be removed by giving the appropriate
+.B \-\-metadata=
+option to
+.BR \-\-zero\-superblock .
+
+The
+.B no\-bitmap
+option can be used when an array has an internal bitmap which is
+corrupt in some way so that assembling the array normally fails. It
+will cause any internal bitmap to be ignored.
+
+The
+.B bbl
+option will reserve space in each device for a bad block list. This
+will be 4K in size and positioned near the end of any free space
+between the superblock and the data.
+
+The
+.B no\-bbl
+option will cause any reservation of space for a bad block list to be
+removed. If the bad block list contains entries, this will fail, as
+removing the list could cause data corruption.
+
+.TP
+.BR \-\-freeze\-reshape
+Option is intended to be used in start-up scripts during initrd boot phase.
+When array under reshape is assembled during initrd phase, this option
+stops reshape after reshape critical section is being restored. This happens
+before file system pivot operation and avoids loss of file system context.
+Losing file system context would cause reshape to be broken.
+
+Reshape can be continued later using the
+.B \-\-continue
+option for the grow command.
+
+.SH For Manage mode:
+
+.TP
+.BR \-t ", " \-\-test
+Unless a more serious error occurred,
+.I mdadm
+will exit with a status of 2 if no changes were made to the array and
+0 if at least one change was made.
+This can be useful when an indirect specifier such as
+.BR missing ,
+.B detached
+or
+.B faulty
+is used in requesting an operation on the array.
+.B \-\-test
+will report failure if these specifiers didn't find any match.
+
+.TP
+.BR \-a ", " \-\-add
+hot-add listed devices.
+If a device appears to have recently been part of the array
+(possibly it failed or was removed) the device is re\-added as described
+in the next point.
+If that fails or the device was never part of the array, the device is
+added as a hot-spare.
+If the array is degraded, it will immediately start to rebuild data
+onto that spare.
+
+Note that this and the following options are only meaningful on array
+with redundancy. They don't apply to RAID0 or Linear.
+
+.TP
+.BR \-\-re\-add
+re\-add a device that was previously removed from an array.
+If the metadata on the device reports that it is a member of the
+array, and the slot that it used is still vacant, then the device will
+be added back to the array in the same position. This will normally
+cause the data for that device to be recovered. However based on the
+event count on the device, the recovery may only require sections that
+are flagged a write-intent bitmap to be recovered or may not require
+any recovery at all.
+
+When used on an array that has no metadata (i.e. it was built with
+.BR \-\-build)
+it will be assumed that bitmap-based recovery is enough to make the
+device fully consistent with the array.
+
+When used with v1.x metadata,
+.B \-\-re\-add
+can be accompanied by
+.BR \-\-update=devicesize ,
+.BR \-\-update=bbl ", or"
+.BR \-\-update=no\-bbl .
+See the description of these option when used in Assemble mode for an
+explanation of their use.
+
+If the device name given is
+.B missing
+then
+.I mdadm
+will try to find any device that looks like it should be
+part of the array but isn't and will try to re\-add all such devices.
+
+If the device name given is
+.B faulty
+then
+.I mdadm
+will find all devices in the array that are marked
+.BR faulty ,
+remove them and attempt to immediately re\-add them. This can be
+useful if you are certain that the reason for failure has been
+resolved.
+
+.TP
+.B \-\-add\-spare
+Add a device as a spare. This is similar to
+.B \-\-add
+except that it does not attempt
+.B \-\-re\-add
+first. The device will be added as a spare even if it looks like it
+could be an recent member of the array.
+
+.TP
+.BR \-r ", " \-\-remove
+remove listed devices. They must not be active. i.e. they should
+be failed or spare devices.
+
+As well as the name of a device file
+(e.g.
+.BR /dev/sda1 )
+the words
+.BR failed ,
+.B detached
+and names like
+.B set-A
+can be given to
+.BR \-\-remove .
+The first causes all failed device to be removed. The second causes
+any device which is no longer connected to the system (i.e an 'open'
+returns
+.BR ENXIO )
+to be removed.
+The third will remove a set as describe below under
+.BR \-\-fail .
+
+.TP
+.BR \-f ", " \-\-fail
+Mark listed devices as faulty.
+As well as the name of a device file, the word
+.B detached
+or a set name like
+.B set\-A
+can be given. The former will cause any device that has been detached from
+the system to be marked as failed. It can then be removed.
+
+For RAID10 arrays where the number of copies evenly divides the number
+of devices, the devices can be conceptually divided into sets where
+each set contains a single complete copy of the data on the array.
+Sometimes a RAID10 array will be configured so that these sets are on
+separate controllers. In this case all the devices in one set can be
+failed by giving a name like
+.B set\-A
+or
+.B set\-B
+to
+.BR \-\-fail .
+The appropriate set names are reported by
+.BR \-\-detail .
+
+.TP
+.BR \-\-set\-faulty
+same as
+.BR \-\-fail .
+
+.TP
+.B \-\-replace
+Mark listed devices as requiring replacement. As soon as a spare is
+available, it will be rebuilt and will replace the marked device.
+This is similar to marking a device as faulty, but the device remains
+in service during the recovery process to increase resilience against
+multiple failures. When the replacement process finishes, the
+replaced device will be marked as faulty.
+
+.TP
+.B \-\-with
+This can follow a list of
+.B \-\-replace
+devices. The devices listed after
+.B \-\-with
+will be preferentially used to replace the devices listed after
+.BR \-\-replace .
+These device must already be spare devices in the array.
+
+.TP
+.BR \-\-write\-mostly
+Subsequent devices that are added or re\-added will have the 'write-mostly'
+flag set. This is only valid for RAID1 and means that the 'md' driver
+will avoid reading from these devices if possible.
+.TP
+.BR \-\-readwrite
+Subsequent devices that are added or re\-added will have the 'write-mostly'
+flag cleared.
+
+.P
+Each of these options requires that the first device listed is the array
+to be acted upon, and the remainder are component devices to be added,
+removed, marked as faulty, etc. Several different operations can be
+specified for different devices, e.g.
+.in +5
+mdadm /dev/md0 \-\-add /dev/sda1 \-\-fail /dev/sdb1 \-\-remove /dev/sdb1
+.in -5
+Each operation applies to all devices listed until the next
+operation.
+
+If an array is using a write-intent bitmap, then devices which have
+been removed can be re\-added in a way that avoids a full
+reconstruction but instead just updates the blocks that have changed
+since the device was removed. For arrays with persistent metadata
+(superblocks) this is done automatically. For arrays created with
+.B \-\-build
+mdadm needs to be told that this device we removed recently with
+.BR \-\-re\-add .
+
+Devices can only be removed from an array if they are not in active
+use, i.e. that must be spares or failed devices. To remove an active
+device, it must first be marked as
+.B faulty.
+
+.SH For Misc mode:
+
+.TP
+.BR \-Q ", " \-\-query
+Examine a device to see
+(1) if it is an md device and (2) if it is a component of an md
+array.
+Information about what is discovered is presented.
+
+.TP
+.BR \-D ", " \-\-detail
+Print details of one or more md devices.
+
+.TP
+.BR \-\-detail\-platform
+Print details of the platform's RAID capabilities (firmware / hardware
+topology) for a given metadata format. If used without argument, mdadm
+will scan all controllers looking for their capabilities. Otherwise, mdadm
+will only look at the controller specified by the argument in form of an
+absolute filepath or a link, e.g.
+.IR /sys/devices/pci0000:00/0000:00:1f.2 .
+
+.TP
+.BR \-Y ", " \-\-export
+When used with
+.BR \-\-detail ,
+.BR \-\-detail-platform ,
+.BR \-\-examine ,
+or
+.B \-\-incremental
+output will be formatted as
+.B key=value
+pairs for easy import into the environment.
+
+With
+.B \-\-incremental
+The value
+.B MD_STARTED
+indicates whether an array was started
+.RB ( yes )
+or not, which may include a reason
+.RB ( unsafe ", " nothing ", " no ).
+Also the value
+.B MD_FOREIGN
+indicates if the array is expected on this host
+.RB ( no ),
+or seems to be from elsewhere
+.RB ( yes ).
+
+.TP
+.BR \-E ", " \-\-examine
+Print contents of the metadata stored on the named device(s).
+Note the contrast between
+.B \-\-examine
+and
+.BR \-\-detail .
+.B \-\-examine
+applies to devices which are components of an array, while
+.B \-\-detail
+applies to a whole array which is currently active.
+.TP
+.B \-\-sparc2.2
+If an array was created on a SPARC machine with a 2.2 Linux kernel
+patched with RAID support, the superblock will have been created
+incorrectly, or at least incompatibly with 2.4 and later kernels.
+Using the
+.B \-\-sparc2.2
+flag with
+.B \-\-examine
+will fix the superblock before displaying it. If this appears to do
+the right thing, then the array can be successfully assembled using
+.BR "\-\-assemble \-\-update=sparc2.2" .
+
+.TP
+.BR \-X ", " \-\-examine\-bitmap
+Report information about a bitmap file.
+The argument is either an external bitmap file or an array component
+in case of an internal bitmap. Note that running this on an array
+device (e.g.
+.BR /dev/md0 )
+does not report the bitmap for that array.
+
+.TP
+.B \-\-examine\-badblocks
+List the bad-blocks recorded for the device, if a bad-blocks list has
+been configured. Currently only
+.B 1.x
+metadata supports bad-blocks lists.
+
+.TP
+.BI \-\-dump= directory
+.TP
+.BI \-\-restore= directory
+Save metadata from lists devices, or restore metadata to listed devices.
+
+.TP
+.BR \-R ", " \-\-run
+start a partially assembled array. If
+.B \-\-assemble
+did not find enough devices to fully start the array, it might leaving
+it partially assembled. If you wish, you can then use
+.B \-\-run
+to start the array in degraded mode.
+
+.TP
+.BR \-S ", " \-\-stop
+deactivate array, releasing all resources.
+
+.TP
+.BR \-o ", " \-\-readonly
+mark array as readonly.
+
+.TP
+.BR \-w ", " \-\-readwrite
+mark array as readwrite.
+
+.TP
+.B \-\-zero\-superblock
+If the device contains a valid md superblock, the block is
+overwritten with zeros. With
+.B \-\-force
+the block where the superblock would be is overwritten even if it
+doesn't appear to be valid.
+
+.TP
+.B \-\-kill\-subarray=
+If the device is a container and the argument to \-\-kill\-subarray
+specifies an inactive subarray in the container, then the subarray is
+deleted. Deleting all subarrays will leave an 'empty-container' or
+spare superblock on the drives. See
+.B \-\-zero\-superblock
+for completely
+removing a superblock. Note that some formats depend on the subarray
+index for generating a UUID, this command will fail if it would change
+the UUID of an active subarray.
+
+.TP
+.B \-\-update\-subarray=
+If the device is a container and the argument to \-\-update\-subarray
+specifies a subarray in the container, then attempt to update the given
+superblock field in the subarray. See below in
+.B MISC MODE
+for details.
+
+.TP
+.BR \-t ", " \-\-test
+When used with
+.BR \-\-detail ,
+the exit status of
+.I mdadm
+is set to reflect the status of the device. See below in
+.B MISC MODE
+for details.
+
+.TP
+.BR \-W ", " \-\-wait
+For each md device given, wait for any resync, recovery, or reshape
+activity to finish before returning.
+.I mdadm
+will return with success if it actually waited for every device
+listed, otherwise it will return failure.
+
+.TP
+.BR \-\-wait\-clean
+For each md device given, or each device in /proc/mdstat if
+.B \-\-scan
+is given, arrange for the array to be marked clean as soon as possible.
+.I mdadm
+will return with success if the array uses external metadata and we
+successfully waited. For native arrays this returns immediately as the
+kernel handles dirty-clean transitions at shutdown. No action is taken
+if safe-mode handling is disabled.
+
+.TP
+.B \-\-action=
+Set the "sync_action" for all md devices given to one of
+.BR idle ,
+.BR frozen ,
+.BR check ,
+.BR repair .
+Setting to
+.B idle
+will abort any currently running action though some actions will
+automatically restart.
+Setting to
+.B frozen
+will abort any current action and ensure no other action starts
+automatically.
+
+Details of
+.B check
+and
+.B repair
+can be found it
+.IR md (4)
+under
+.BR "SCRUBBING AND MISMATCHES" .
+
+.SH For Incremental Assembly mode:
+.TP
+.BR \-\-rebuild\-map ", " \-r
+Rebuild the map file
+.RB ( {MAP_PATH} )
+that
+.I mdadm
+uses to help track which arrays are currently being assembled.
+
+.TP
+.BR \-\-run ", " \-R
+Run any array assembled as soon as a minimal number of devices are
+available, rather than waiting until all expected devices are present.
+
+.TP
+.BR \-\-scan ", " \-s
+Only meaningful with
+.B \-R
+this will scan the
+.B map
+file for arrays that are being incrementally assembled and will try to
+start any that are not already started. If any such array is listed
+in
+.B mdadm.conf
+as requiring an external bitmap, that bitmap will be attached first.
+
+.TP
+.BR \-\-fail ", " \-f
+This allows the hot-plug system to remove devices that have fully disappeared
+from the kernel. It will first fail and then remove the device from any
+array it belongs to.
+The device name given should be a kernel device name such as "sda",
+not a name in
+.IR /dev .
+
+.TP
+.BR \-\-path=
+Only used with \-\-fail. The 'path' given will be recorded so that if
+a new device appears at the same location it can be automatically
+added to the same array. This allows the failed device to be
+automatically replaced by a new device without metadata if it appears
+at specified path. This option is normally only set by a
+.I udev
+script.
+
+.SH For Monitor mode:
+.TP
+.BR \-m ", " \-\-mail
+Give a mail address to send alerts to.
+
+.TP
+.BR \-p ", " \-\-program ", " \-\-alert
+Give a program to be run whenever an event is detected.
+
+.TP
+.BR \-y ", " \-\-syslog
+Cause all events to be reported through 'syslog'. The messages have
+facility of 'daemon' and varying priorities.
+
+.TP
+.BR \-d ", " \-\-delay
+Give a delay in seconds.
+.I mdadm
+polls the md arrays and then waits this many seconds before polling
+again. The default is 60 seconds. Since 2.6.16, there is no need to
+reduce this as the kernel alerts
+.I mdadm
+immediately when there is any change.
+
+.TP
+.BR \-r ", " \-\-increment
+Give a percentage increment.
+.I mdadm
+will generate RebuildNN events with the given percentage increment.
+
+.TP
+.BR \-f ", " \-\-daemonise
+Tell
+.I mdadm
+to run as a background daemon if it decides to monitor anything. This
+causes it to fork and run in the child, and to disconnect from the
+terminal. The process id of the child is written to stdout.
+This is useful with
+.B \-\-scan
+which will only continue monitoring if a mail address or alert program
+is found in the config file.
+
+.TP
+.BR \-i ", " \-\-pid\-file
+When
+.I mdadm
+is running in daemon mode, write the pid of the daemon process to
+the specified file, instead of printing it on standard output.
+
+.TP
+.BR \-1 ", " \-\-oneshot
+Check arrays only once. This will generate
+.B NewArray
+events and more significantly
+.B DegradedArray
+and
+.B SparesMissing
+events. Running
+.in +5
+.B " mdadm \-\-monitor \-\-scan \-1"
+.in -5
+from a cron script will ensure regular notification of any degraded arrays.
+
+.TP
+.BR \-t ", " \-\-test
+Generate a
+.B TestMessage
+alert for every array found at startup. This alert gets mailed and
+passed to the alert program. This can be used for testing that alert
+message do get through successfully.
+
+.TP
+.BR \-\-no\-sharing
+This inhibits the functionality for moving spares between arrays.
+Only one monitoring process started with
+.B \-\-scan
+but without this flag is allowed, otherwise the two could interfere
+with each other.
+
+.SH ASSEMBLE MODE
+
+.HP 12
+Usage:
+.B mdadm \-\-assemble
+.I md-device options-and-component-devices...
+.HP 12
+Usage:
+.B mdadm \-\-assemble \-\-scan
+.I md-devices-and-options...
+.HP 12
+Usage:
+.B mdadm \-\-assemble \-\-scan
+.I options...
+
+.PP
+This usage assembles one or more RAID arrays from pre-existing components.
+For each array, mdadm needs to know the md device, the identity of the
+array, and a number of component-devices. These can be found in a number of ways.
+
+In the first usage example (without the
+.BR \-\-scan )
+the first device given is the md device.
+In the second usage example, all devices listed are treated as md
+devices and assembly is attempted.
+In the third (where no devices are listed) all md devices that are
+listed in the configuration file are assembled. If no arrays are
+described by the configuration file, then any arrays that
+can be found on unused devices will be assembled.
+
+If precisely one device is listed, but
+.B \-\-scan
+is not given, then
+.I mdadm
+acts as though
+.B \-\-scan
+was given and identity information is extracted from the configuration file.
+
+The identity can be given with the
+.B \-\-uuid
+option, the
+.B \-\-name
+option, or the
+.B \-\-super\-minor
+option, will be taken from the md-device record in the config file, or
+will be taken from the super block of the first component-device
+listed on the command line.
+
+Devices can be given on the
+.B \-\-assemble
+command line or in the config file. Only devices which have an md
+superblock which contains the right identity will be considered for
+any array.
+
+The config file is only used if explicitly named with
+.B \-\-config
+or requested with (a possibly implicit)
+.BR \-\-scan .
+In the later case,
+.B /etc/mdadm/mdadm.conf
+or
+.B /etc/mdadm.conf
+is used.
+
+If
+.B \-\-scan
+is not given, then the config file will only be used to find the
+identity of md arrays.
+
+Normally the array will be started after it is assembled. However if
+.B \-\-scan
+is not given and not all expected drives were listed, then the array
+is not started (to guard against usage errors). To insist that the
+array be started in this case (as may work for RAID1, 4, 5, 6, or 10),
+give the
+.B \-\-run
+flag.
+
+If
+.I udev
+is active,
+.I mdadm
+does not create any entries in
+.B /dev
+but leaves that to
+.IR udev .
+It does record information in
+.B {MAP_PATH}
+which will allow
+.I udev
+to choose the correct name.
+
+If
+.I mdadm
+detects that udev is not configured, it will create the devices in
+.B /dev
+itself.
+
+In Linux kernels prior to version 2.6.28 there were two distinctly
+different types of md devices that could be created: one that could be
+partitioned using standard partitioning tools and one that could not.
+Since 2.6.28 that distinction is no longer relevant as both type of
+devices can be partitioned.
+.I mdadm
+will normally create the type that originally could not be partitioned
+as it has a well defined major number (9).
+
+Prior to 2.6.28, it is important that mdadm chooses the correct type
+of array device to use. This can be controlled with the
+.B \-\-auto
+option. In particular, a value of "mdp" or "part" or "p" tells mdadm
+to use a partitionable device rather than the default.
+
+In the no-udev case, the value given to
+.B \-\-auto
+can be suffixed by a number. This tells
+.I mdadm
+to create that number of partition devices rather than the default of 4.
+
+The value given to
+.B \-\-auto
+can also be given in the configuration file as a word starting
+.B auto=
+on the ARRAY line for the relevant array.
+
+.SS Auto Assembly
+When
+.B \-\-assemble
+is used with
+.B \-\-scan
+and no devices are listed,
+.I mdadm
+will first attempt to assemble all the arrays listed in the config
+file.
+
+If no arrays are listed in the config (other than those marked
+.BR <ignore> )
+it will look through the available devices for possible arrays and
+will try to assemble anything that it finds. Arrays which are tagged
+as belonging to the given homehost will be assembled and started
+normally. Arrays which do not obviously belong to this host are given
+names that are expected not to conflict with anything local, and are
+started "read-auto" so that nothing is written to any device until the
+array is written to. i.e. automatic resync etc is delayed.
+
+If
+.I mdadm
+finds a consistent set of devices that look like they should comprise
+an array, and if the superblock is tagged as belonging to the given
+home host, it will automatically choose a device name and try to
+assemble the array. If the array uses version-0.90 metadata, then the
+.B minor
+number as recorded in the superblock is used to create a name in
+.B /dev/md/
+so for example
+.BR /dev/md/3 .
+If the array uses version-1 metadata, then the
+.B name
+from the superblock is used to similarly create a name in
+.B /dev/md/
+(the name will have any 'host' prefix stripped first).
+
+This behaviour can be modified by the
+.I AUTO
+line in the
+.I mdadm.conf
+configuration file. This line can indicate that specific metadata
+type should, or should not, be automatically assembled. If an array
+is found which is not listed in
+.I mdadm.conf
+and has a metadata format that is denied by the
+.I AUTO
+line, then it will not be assembled.
+The
+.I AUTO
+line can also request that all arrays identified as being for this
+homehost should be assembled regardless of their metadata type.
+See
+.IR mdadm.conf (5)
+for further details.
+
+Note: Auto assembly cannot be used for assembling and activating some
+arrays which are undergoing reshape. In particular as the
+.B backup\-file
+cannot be given, any reshape which requires a backup-file to continue
+cannot be started by auto assembly. An array which is growing to more
+devices and has passed the critical section can be assembled using
+auto-assembly.
+
+.SH BUILD MODE
+
+.HP 12
+Usage:
+.B mdadm \-\-build
+.I md-device
+.BI \-\-chunk= X
+.BI \-\-level= Y
+.BI \-\-raid\-devices= Z
+.I devices
+
+.PP
+This usage is similar to
+.BR \-\-create .
+The difference is that it creates an array without a superblock. With
+these arrays there is no difference between initially creating the array and
+subsequently assembling the array, except that hopefully there is useful
+data there in the second case.
+
+The level may raid0, linear, raid1, raid10, multipath, or faulty, or
+one of their synonyms. All devices must be listed and the array will
+be started once complete. It will often be appropriate to use
+.B \-\-assume\-clean
+with levels raid1 or raid10.
+
+.SH CREATE MODE
+
+.HP 12
+Usage:
+.B mdadm \-\-create
+.I md-device
+.BI \-\-chunk= X
+.BI \-\-level= Y
+.br
+.BI \-\-raid\-devices= Z
+.I devices
+
+.PP
+This usage will initialise a new md array, associate some devices with
+it, and activate the array.
+
+The named device will normally not exist when
+.I "mdadm \-\-create"
+is run, but will be created by
+.I udev
+once the array becomes active.
+
+As devices are added, they are checked to see if they contain RAID
+superblocks or filesystems. They are also checked to see if the variance in
+device size exceeds 1%.
+
+If any discrepancy is found, the array will not automatically be run, though
+the presence of a
+.B \-\-run
+can override this caution.
+
+To create a "degraded" array in which some devices are missing, simply
+give the word "\fBmissing\fP"
+in place of a device name. This will cause
+.I mdadm
+to leave the corresponding slot in the array empty.
+For a RAID4 or RAID5 array at most one slot can be
+"\fBmissing\fP"; for a RAID6 array at most two slots.
+For a RAID1 array, only one real device needs to be given. All of the
+others can be
+"\fBmissing\fP".
+
+When creating a RAID5 array,
+.I mdadm
+will automatically create a degraded array with an extra spare drive.
+This is because building the spare into a degraded array is in general
+faster than resyncing the parity on a non-degraded, but not clean,
+array. This feature can be overridden with the
+.B \-\-force
+option.
+
+When creating an array with version-1 metadata a name for the array is
+required.
+If this is not given with the
+.B \-\-name
+option,
+.I mdadm
+will choose a name based on the last component of the name of the
+device being created. So if
+.B /dev/md3
+is being created, then the name
+.B 3
+will be chosen.
+If
+.B /dev/md/home
+is being created, then the name
+.B home
+will be used.
+
+When creating a partition based array, using
+.I mdadm
+with version-1.x metadata, the partition type should be set to
+.B 0xDA
+(non fs-data). This type selection allows for greater precision since
+using any other [RAID auto-detect (0xFD) or a GNU/Linux partition (0x83)],
+might create problems in the event of array recovery through a live cdrom.
+
+A new array will normally get a randomly assigned 128bit UUID which is
+very likely to be unique. If you have a specific need, you can choose
+a UUID for the array by giving the
+.B \-\-uuid=
+option. Be warned that creating two arrays with the same UUID is a
+recipe for disaster. Also, using
+.B \-\-uuid=
+when creating a v0.90 array will silently override any
+.B \-\-homehost=
+setting.
+.\"If the
+.\".B \-\-size
+.\"option is given, it is not necessary to list any component-devices in this command.
+.\"They can be added later, before a
+.\".B \-\-run.
+.\"If no
+.\".B \-\-size
+.\"is given, the apparent size of the smallest drive given is used.
+
+If the array type supports a write-intent bitmap, and if the devices
+in the array exceed 100G is size, an internal write-intent bitmap
+will automatically be added unless some other option is explicitly
+requested with the
+.B \-\-bitmap
+option. In any case space for a bitmap will be reserved so that one
+can be added layer with
+.BR "\-\-grow \-\-bitmap=internal" .
+
+If the metadata type supports it (currently only 1.x metadata), space
+will be allocated to store a bad block list. This allows a modest
+number of bad blocks to be recorded, allowing the drive to remain in
+service while only partially functional.
+
+When creating an array within a
+.B CONTAINER
+.I mdadm
+can be given either the list of devices to use, or simply the name of
+the container. The former case gives control over which devices in
+the container will be used for the array. The latter case allows
+.I mdadm
+to automatically choose which devices to use based on how much spare
+space is available.
+
+The General Management options that are valid with
+.B \-\-create
+are:
+.TP
+.B \-\-run
+insist on running the array even if some devices look like they might
+be in use.
+
+.TP
+.B \-\-readonly
+start the array readonly \(em not supported yet.
+
+.SH MANAGE MODE
+.HP 12
+Usage:
+.B mdadm
+.I device
+.I options... devices...
+.PP
+
+This usage will allow individual devices in an array to be failed,
+removed or added. It is possible to perform multiple operations with
+on command. For example:
+.br
+.B " mdadm /dev/md0 \-f /dev/hda1 \-r /dev/hda1 \-a /dev/hda1"
+.br
+will firstly mark
+.B /dev/hda1
+as faulty in
+.B /dev/md0
+and will then remove it from the array and finally add it back
+in as a spare. However only one md array can be affected by a single
+command.
+
+When a device is added to an active array, mdadm checks to see if it
+has metadata on it which suggests that it was recently a member of the
+array. If it does, it tries to "re\-add" the device. If there have
+been no changes since the device was removed, or if the array has a
+write-intent bitmap which has recorded whatever changes there were,
+then the device will immediately become a full member of the array and
+those differences recorded in the bitmap will be resolved.
+
+.SH MISC MODE
+.HP 12
+Usage:
+.B mdadm
+.I options ...
+.I devices ...
+.PP
+
+MISC mode includes a number of distinct operations that
+operate on distinct devices. The operations are:
+.TP
+.B \-\-query
+The device is examined to see if it is
+(1) an active md array, or
+(2) a component of an md array.
+The information discovered is reported.
+
+.TP
+.B \-\-detail
+The device should be an active md device.
+.B mdadm
+will display a detailed description of the array.
+.B \-\-brief
+or
+.B \-\-scan
+will cause the output to be less detailed and the format to be
+suitable for inclusion in
+.BR mdadm.conf .
+The exit status of
+.I mdadm
+will normally be 0 unless
+.I mdadm
+failed to get useful information about the device(s); however, if the
+.B \-\-test
+option is given, then the exit status will be:
+.RS
+.TP
+0
+The array is functioning normally.
+.TP
+1
+The array has at least one failed device.
+.TP
+2
+The array has multiple failed devices such that it is unusable.
+.TP
+4
+There was an error while trying to get information about the device.
+.RE
+
+.TP
+.B \-\-detail\-platform
+Print detail of the platform's RAID capabilities (firmware / hardware
+topology). If the metadata is specified with
+.B \-e
+or
+.B \-\-metadata=
+then the return status will be:
+.RS
+.TP
+0
+metadata successfully enumerated its platform components on this system
+.TP
+1
+metadata is platform independent
+.TP
+2
+metadata failed to find its platform components on this system
+.RE
+
+.TP
+.B \-\-update\-subarray=
+If the device is a container and the argument to \-\-update\-subarray
+specifies a subarray in the container, then attempt to update the given
+superblock field in the subarray. Similar to updating an array in
+"assemble" mode, the field to update is selected by
+.B \-U
+or
+.B \-\-update=
+option. Currently only
+.B name
+is supported.
+
+The
+.B name
+option updates the subarray name in the metadata, it may not affect the
+device node name or the device node symlink until the subarray is
+re\-assembled. If updating
+.B name
+would change the UUID of an active subarray this operation is blocked,
+and the command will end in an error.
+
+.TP
+.B \-\-examine
+The device should be a component of an md array.
+.I mdadm
+will read the md superblock of the device and display the contents.
+If
+.B \-\-brief
+or
+.B \-\-scan
+is given, then multiple devices that are components of the one array
+are grouped together and reported in a single entry suitable
+for inclusion in
+.BR mdadm.conf .
+
+Having
+.B \-\-scan
+without listing any devices will cause all devices listed in the
+config file to be examined.
+
+.TP
+.BI \-\-dump= directory
+If the device contains RAID metadata, a file will be created in the
+.I directory
+and the metadata will be written to it. The file will be the same
+size as the device and have the metadata written in the file at the
+same locate that it exists in the device. However the file will be "sparse" so
+that only those blocks containing metadata will be allocated. The
+total space used will be small.
+
+The file name used in the
+.I directory
+will be the base name of the device. Further if any links appear in
+.I /dev/disk/by-id
+which point to the device, then hard links to the file will be created
+in
+.I directory
+based on these
+.I by-id
+names.
+
+Multiple devices can be listed and their metadata will all be stored
+in the one directory.
+
+.TP
+.BI \-\-restore= directory
+This is the reverse of
+.BR \-\-dump .
+.I mdadm
+will locate a file in the directory that has a name appropriate for
+the given device and will restore metadata from it. Names that match
+.I /dev/disk/by-id
+names are preferred, however if two of those refer to different files,
+.I mdadm
+will not choose between them but will abort the operation.
+
+If a file name is given instead of a
+.I directory
+then
+.I mdadm
+will restore from that file to a single device, always provided the
+size of the file matches that of the device, and the file contains
+valid metadata.
+.TP
+.B \-\-stop
+The devices should be active md arrays which will be deactivated, as
+long as they are not currently in use.
+
+.TP
+.B \-\-run
+This will fully activate a partially assembled md array.
+
+.TP
+.B \-\-readonly
+This will mark an active array as read-only, providing that it is
+not currently being used.
+
+.TP
+.B \-\-readwrite
+This will change a
+.B readonly
+array back to being read/write.
+
+.TP
+.B \-\-scan
+For all operations except
+.BR \-\-examine ,
+.B \-\-scan
+will cause the operation to be applied to all arrays listed in
+.BR /proc/mdstat .
+For
+.BR \-\-examine,
+.B \-\-scan
+causes all devices listed in the config file to be examined.
+
+.TP
+.BR \-b ", " \-\-brief
+Be less verbose. This is used with
+.B \-\-detail
+and
+.BR \-\-examine .
+Using
+.B \-\-brief
+with
+.B \-\-verbose
+gives an intermediate level of verbosity.
+
+.SH MONITOR MODE
+
+.HP 12
+Usage:
+.B mdadm \-\-monitor
+.I options... devices...
+
+.PP
+This usage causes
+.I mdadm
+to periodically poll a number of md arrays and to report on any events
+noticed.
+.I mdadm
+will never exit once it decides that there are arrays to be checked,
+so it should normally be run in the background.
+
+As well as reporting events,
+.I mdadm
+may move a spare drive from one array to another if they are in the
+same
+.B spare-group
+or
+.B domain
+and if the destination array has a failed drive but no spares.
+
+If any devices are listed on the command line,
+.I mdadm
+will only monitor those devices. Otherwise all arrays listed in the
+configuration file will be monitored. Further, if
+.B \-\-scan
+is given, then any other md devices that appear in
+.B /proc/mdstat
+will also be monitored.
+
+The result of monitoring the arrays is the generation of events.
+These events are passed to a separate program (if specified) and may
+be mailed to a given E-mail address.
+
+When passing events to a program, the program is run once for each event,
+and is given 2 or 3 command-line arguments: the first is the
+name of the event (see below), the second is the name of the
+md device which is affected, and the third is the name of a related
+device if relevant (such as a component device that has failed).
+
+If
+.B \-\-scan
+is given, then a program or an E-mail address must be specified on the
+command line or in the config file. If neither are available, then
+.I mdadm
+will not monitor anything.
+Without
+.B \-\-scan,
+.I mdadm
+will continue monitoring as long as something was found to monitor. If
+no program or email is given, then each event is reported to
+.BR stdout .
+
+The different events are:
+
+.RS 4
+.TP
+.B DeviceDisappeared
+An md array which previously was configured appears to no longer be
+configured. (syslog priority: Critical)
+
+If
+.I mdadm
+was told to monitor an array which is RAID0 or Linear, then it will
+report
+.B DeviceDisappeared
+with the extra information
+.BR Wrong-Level .
+This is because RAID0 and Linear do not support the device-failed,
+hot-spare and resync operations which are monitored.
+
+.TP
+.B RebuildStarted
+An md array started reconstruction (e.g. recovery, resync, reshape,
+check, repair). (syslog priority: Warning)
+
+.TP
+.BI Rebuild NN
+Where
+.I NN
+is a two-digit number (ie. 05, 48). This indicates that rebuild
+has passed that many percent of the total. The events are generated
+with fixed increment since 0. Increment size may be specified with
+a commandline option (default is 20). (syslog priority: Warning)
+
+.TP
+.B RebuildFinished
+An md array that was rebuilding, isn't any more, either because it
+finished normally or was aborted. (syslog priority: Warning)
+
+.TP
+.B Fail
+An active component device of an array has been marked as
+faulty. (syslog priority: Critical)
+
+.TP
+.B FailSpare
+A spare component device which was being rebuilt to replace a faulty
+device has failed. (syslog priority: Critical)
+
+.TP
+.B SpareActive
+A spare component device which was being rebuilt to replace a faulty
+device has been successfully rebuilt and has been made active.
+(syslog priority: Info)
+
+.TP
+.B NewArray
+A new md array has been detected in the
+.B /proc/mdstat
+file. (syslog priority: Info)
+
+.TP
+.B DegradedArray
+A newly noticed array appears to be degraded. This message is not
+generated when
+.I mdadm
+notices a drive failure which causes degradation, but only when
+.I mdadm
+notices that an array is degraded when it first sees the array.
+(syslog priority: Critical)
+
+.TP
+.B MoveSpare
+A spare drive has been moved from one array in a
+.B spare-group
+or
+.B domain
+to another to allow a failed drive to be replaced.
+(syslog priority: Info)
+
+.TP
+.B SparesMissing
+If
+.I mdadm
+has been told, via the config file, that an array should have a certain
+number of spare devices, and
+.I mdadm
+detects that it has fewer than this number when it first sees the
+array, it will report a
+.B SparesMissing
+message.
+(syslog priority: Warning)
+
+.TP
+.B TestMessage
+An array was found at startup, and the
+.B \-\-test
+flag was given.
+(syslog priority: Info)
+.RE
+
+Only
+.B Fail,
+.B FailSpare,
+.B DegradedArray,
+.B SparesMissing
+and
+.B TestMessage
+cause Email to be sent. All events cause the program to be run.
+The program is run with two or three arguments: the event
+name, the array device and possibly a second device.
+
+Each event has an associated array device (e.g.
+.BR /dev/md1 )
+and possibly a second device. For
+.BR Fail ,
+.BR FailSpare ,
+and
+.B SpareActive
+the second device is the relevant component device.
+For
+.B MoveSpare
+the second device is the array that the spare was moved from.
+
+For
+.I mdadm
+to move spares from one array to another, the different arrays need to
+be labeled with the same
+.B spare-group
+or the spares must be allowed to migrate through matching POLICY domains
+in the configuration file. The
+.B spare-group
+name can be any string; it is only necessary that different spare
+groups use different names.
+
+When
+.I mdadm
+detects that an array in a spare group has fewer active
+devices than necessary for the complete array, and has no spare
+devices, it will look for another array in the same spare group that
+has a full complement of working drive and a spare. It will then
+attempt to remove the spare from the second drive and add it to the
+first.
+If the removal succeeds but the adding fails, then it is added back to
+the original array.
+
+If the spare group for a degraded array is not defined,
+.I mdadm
+will look at the rules of spare migration specified by POLICY lines in
+.B mdadm.conf
+and then follow similar steps as above if a matching spare is found.
+
+.SH GROW MODE
+The GROW mode is used for changing the size or shape of an active
+array.
+For this to work, the kernel must support the necessary change.
+Various types of growth are being added during 2.6 development.
+
+Currently the supported changes include
+.IP \(bu 4
+change the "size" attribute for RAID1, RAID4, RAID5 and RAID6.
+.IP \(bu 4
+increase or decrease the "raid\-devices" attribute of RAID0, RAID1, RAID4,
+RAID5, and RAID6.
+.IP \(bu 4
+change the chunk-size and layout of RAID0, RAID4, RAID5, RAID6 and RAID10.
+.IP \(bu 4
+convert between RAID1 and RAID5, between RAID5 and RAID6, between
+RAID0, RAID4, and RAID5, and between RAID0 and RAID10 (in the near-2 mode).
+.IP \(bu 4
+add a write-intent bitmap to any array which supports these bitmaps, or
+remove a write-intent bitmap from such an array.
+.PP
+
+Using GROW on containers is currently supported only for Intel's IMSM
+container format. The number of devices in a container can be
+increased - which affects all arrays in the container - or an array
+in a container can be converted between levels where those levels are
+supported by the container, and the conversion is on of those listed
+above. Resizing arrays in an IMSM container with
+.B "--grow --size"
+is not yet supported.
+
+Grow functionality (e.g. expand a number of raid devices) for Intel's
+IMSM container format has an experimental status. It is guarded by the
+.B MDADM_EXPERIMENTAL
+environment variable which must be set to '1' for a GROW command to
+succeed.
+This is for the following reasons:
+
+.IP 1.
+Intel's native IMSM check-pointing is not fully tested yet.
+This can causes IMSM incompatibility during the grow process: an array
+which is growing cannot roam between Microsoft Windows(R) and Linux
+systems.
+
+.IP 2.
+Interrupting a grow operation is not recommended, because it
+has not been fully tested for Intel's IMSM container format yet.
+
+.PP
+Note: Intel's native checkpointing doesn't use
+.B --backup-file
+option and it is transparent for assembly feature.
+
+.SS SIZE CHANGES
+Normally when an array is built the "size" is taken from the smallest
+of the drives. If all the small drives in an arrays are, one at a
+time, removed and replaced with larger drives, then you could have an
+array of large drives with only a small amount used. In this
+situation, changing the "size" with "GROW" mode will allow the extra
+space to start being used. If the size is increased in this way, a
+"resync" process will start to make sure the new parts of the array
+are synchronised.
+
+Note that when an array changes size, any filesystem that may be
+stored in the array will not automatically grow or shrink to use or
+vacate the space. The
+filesystem will need to be explicitly told to use the extra space
+after growing, or to reduce its size
+.B prior
+to shrinking the array.
+
+Also the size of an array cannot be changed while it has an active
+bitmap. If an array has a bitmap, it must be removed before the size
+can be changed. Once the change is complete a new bitmap can be created.
+
+.SS RAID\-DEVICES CHANGES
+
+A RAID1 array can work with any number of devices from 1 upwards
+(though 1 is not very useful). There may be times which you want to
+increase or decrease the number of active devices. Note that this is
+different to hot-add or hot-remove which changes the number of
+inactive devices.
+
+When reducing the number of devices in a RAID1 array, the slots which
+are to be removed from the array must already be vacant. That is, the
+devices which were in those slots must be failed and removed.
+
+When the number of devices is increased, any hot spares that are
+present will be activated immediately.
+
+Changing the number of active devices in a RAID5 or RAID6 is much more
+effort. Every block in the array will need to be read and written
+back to a new location. From 2.6.17, the Linux Kernel is able to
+increase the number of devices in a RAID5 safely, including restarting
+an interrupted "reshape". From 2.6.31, the Linux Kernel is able to
+increase or decrease the number of devices in a RAID5 or RAID6.
+
+From 2.6.35, the Linux Kernel is able to convert a RAID0 in to a RAID4
+or RAID5.
+.I mdadm
+uses this functionality and the ability to add
+devices to a RAID4 to allow devices to be added to a RAID0. When
+requested to do this,
+.I mdadm
+will convert the RAID0 to a RAID4, add the necessary disks and make
+the reshape happen, and then convert the RAID4 back to RAID0.
+
+When decreasing the number of devices, the size of the array will also
+decrease. If there was data in the array, it could get destroyed and
+this is not reversible, so you should firstly shrink the filesystem on
+the array to fit within the new size. To help prevent accidents,
+.I mdadm
+requires that the size of the array be decreased first with
+.BR "mdadm --grow --array-size" .
+This is a reversible change which simply makes the end of the array
+inaccessible. The integrity of any data can then be checked before
+the non-reversible reduction in the number of devices is request.
+
+When relocating the first few stripes on a RAID5 or RAID6, it is not
+possible to keep the data on disk completely consistent and
+crash-proof. To provide the required safety, mdadm disables writes to
+the array while this "critical section" is reshaped, and takes a
+backup of the data that is in that section. For grows, this backup may be
+stored in any spare devices that the array has, however it can also be
+stored in a separate file specified with the
+.B \-\-backup\-file
+option, and is required to be specified for shrinks, RAID level
+changes and layout changes. If this option is used, and the system
+does crash during the critical period, the same file must be passed to
+.B \-\-assemble
+to restore the backup and reassemble the array. When shrinking rather
+than growing the array, the reshape is done from the end towards the
+beginning, so the "critical section" is at the end of the reshape.
+
+.SS LEVEL CHANGES
+
+Changing the RAID level of any array happens instantaneously. However
+in the RAID5 to RAID6 case this requires a non-standard layout of the
+RAID6 data, and in the RAID6 to RAID5 case that non-standard layout is
+required before the change can be accomplished. So while the level
+change is instant, the accompanying layout change can take quite a
+long time. A
+.B \-\-backup\-file
+is required. If the array is not simultaneously being grown or
+shrunk, so that the array size will remain the same - for example,
+reshaping a 3-drive RAID5 into a 4-drive RAID6 - the backup file will
+be used not just for a "cricital section" but throughout the reshape
+operation, as described below under LAYOUT CHANGES.
+
+.SS CHUNK-SIZE AND LAYOUT CHANGES
+
+Changing the chunk-size of layout without also changing the number of
+devices as the same time will involve re-writing all blocks in-place.
+To ensure against data loss in the case of a crash, a
+.B --backup-file
+must be provided for these changes. Small sections of the array will
+be copied to the backup file while they are being rearranged. This
+means that all the data is copied twice, once to the backup and once
+to the new layout on the array, so this type of reshape will go very
+slowly.
+
+If the reshape is interrupted for any reason, this backup file must be
+made available to
+.B "mdadm --assemble"
+so the array can be reassembled. Consequently the file cannot be
+stored on the device being reshaped.
+
+
+.SS BITMAP CHANGES
+
+A write-intent bitmap can be added to, or removed from, an active
+array. Either internal bitmaps, or bitmaps stored in a separate file,
+can be added. Note that if you add a bitmap stored in a file which is
+in a filesystem that is on the RAID array being affected, the system
+will deadlock. The bitmap must be on a separate filesystem.
+
+.SH INCREMENTAL MODE
+
+.HP 12
+Usage:
+.B mdadm \-\-incremental
+.RB [ \-\-run ]
+.RB [ \-\-quiet ]
+.I component-device
+.RI [ optional-aliases-for-device ]
+.HP 12
+Usage:
+.B mdadm \-\-incremental \-\-fail
+.I component-device
+.HP 12
+Usage:
+.B mdadm \-\-incremental \-\-rebuild\-map
+.HP 12
+Usage:
+.B mdadm \-\-incremental \-\-run \-\-scan
+
+.PP
+This mode is designed to be used in conjunction with a device
+discovery system. As devices are found in a system, they can be
+passed to
+.B "mdadm \-\-incremental"
+to be conditionally added to an appropriate array.
+
+Conversely, it can also be used with the
+.B \-\-fail
+flag to do just the opposite and find whatever array a particular device
+is part of and remove the device from that array.
+
+If the device passed is a
+.B CONTAINER
+device created by a previous call to
+.IR mdadm ,
+then rather than trying to add that device to an array, all the arrays
+described by the metadata of the container will be started.
+
+.I mdadm
+performs a number of tests to determine if the device is part of an
+array, and which array it should be part of. If an appropriate array
+is found, or can be created,
+.I mdadm
+adds the device to the array and conditionally starts the array.
+
+Note that
+.I mdadm
+will normally only add devices to an array which were previously working
+(active or spare) parts of that array. The support for automatic
+inclusion of a new drive as a spare in some array requires
+a configuration through POLICY in config file.
+
+The tests that
+.I mdadm
+makes are as follow:
+.IP +
+Is the device permitted by
+.BR mdadm.conf ?
+That is, is it listed in a
+.B DEVICES
+line in that file. If
+.B DEVICES
+is absent then the default it to allow any device. Similarly if
+.B DEVICES
+contains the special word
+.B partitions
+then any device is allowed. Otherwise the device name given to
+.IR mdadm ,
+or one of the aliases given, or an alias found in the filesystem,
+must match one of the names or patterns in a
+.B DEVICES
+line.
+
+This is the only context where the aliases are used. They are
+usually provided by a
+.I udev
+rules mentioning
+.BR ${DEVLINKS} .
+
+.IP +
+Does the device have a valid md superblock? If a specific metadata
+version is requested with
+.B \-\-metadata
+or
+.B \-e
+then only that style of metadata is accepted, otherwise
+.I mdadm
+finds any known version of metadata. If no
+.I md
+metadata is found, the device may be still added to an array
+as a spare if POLICY allows.
+
+.ig
+.IP +
+Does the metadata match an expected array?
+The metadata can match in two ways. Either there is an array listed
+in
+.B mdadm.conf
+which identifies the array (either by UUID, by name, by device list,
+or by minor-number), or the array was created with a
+.B homehost
+specified and that
+.B homehost
+matches the one in
+.B mdadm.conf
+or on the command line.
+If
+.I mdadm
+is not able to positively identify the array as belonging to the
+current host, the device will be rejected.
+..
+
+.PP
+.I mdadm
+keeps a list of arrays that it has partially assembled in
+.BR {MAP_PATH} .
+If no array exists which matches
+the metadata on the new device,
+.I mdadm
+must choose a device name and unit number. It does this based on any
+name given in
+.B mdadm.conf
+or any name information stored in the metadata. If this name
+suggests a unit number, that number will be used, otherwise a free
+unit number will be chosen. Normally
+.I mdadm
+will prefer to create a partitionable array, however if the
+.B CREATE
+line in
+.B mdadm.conf
+suggests that a non-partitionable array is preferred, that will be
+honoured.
+
+If the array is not found in the config file and its metadata does not
+identify it as belonging to the "homehost", then
+.I mdadm
+will choose a name for the array which is certain not to conflict with
+any array which does belong to this host. It does this be adding an
+underscore and a small number to the name preferred by the metadata.
+
+Once an appropriate array is found or created and the device is added,
+.I mdadm
+must decide if the array is ready to be started. It will
+normally compare the number of available (non-spare) devices to the
+number of devices that the metadata suggests need to be active. If
+there are at least that many, the array will be started. This means
+that if any devices are missing the array will not be restarted.
+
+As an alternative,
+.B \-\-run
+may be passed to
+.I mdadm
+in which case the array will be run as soon as there are enough
+devices present for the data to be accessible. For a RAID1, that
+means one device will start the array. For a clean RAID5, the array
+will be started as soon as all but one drive is present.
+
+Note that neither of these approaches is really ideal. If it can
+be known that all device discovery has completed, then
+.br
+.B " mdadm \-IRs"
+.br
+can be run which will try to start all arrays that are being
+incrementally assembled. They are started in "read-auto" mode in
+which they are read-only until the first write request. This means
+that no metadata updates are made and no attempt at resync or recovery
+happens. Further devices that are found before the first write can
+still be added safely.
+
+.SH ENVIRONMENT
+This section describes environment variables that affect how mdadm
+operates.
+
+.TP
+.B MDADM_NO_MDMON
+Setting this value to 1 will prevent mdadm from automatically launching
+mdmon. This variable is intended primarily for debugging mdadm/mdmon.
+
+.TP
+.B MDADM_NO_UDEV
+Normally,
+.I mdadm
+does not create any device nodes in /dev, but leaves that task to
+.IR udev .
+If
+.I udev
+appears not to be configured, or if this environment variable is set
+to '1', the
+.I mdadm
+will create and devices that are needed.
+
+.TP
+.B MDADM_NO_SYSTEMCTL
+If
+.I mdadm
+detects that
+.I systemd
+is in use it will normally request
+.I systemd
+to start various background tasks (particularly
+.IR mdmon )
+rather than forking and running them in the background. This can be
+suppressed by setting
+.BR MDADM_NO_SYSTEMCTL=1 .
+
+.TP
+.B IMSM_NO_PLATFORM
+A key value of IMSM metadata is that it allows interoperability with
+boot ROMs on Intel platforms, and with other major operating systems.
+Consequently,
+.I mdadm
+will only allow an IMSM array to be created or modified if detects
+that it is running on an Intel platform which supports IMSM, and
+supports the particular configuration of IMSM that is being requested
+(some functionality requires newer OROM support).
+
+These checks can be suppressed by setting IMSM_NO_PLATFORM=1 in the
+environment. This can be useful for testing or for disaster
+recovery. You should be aware that interoperability may be
+compromised by setting this value.
+
+.TP
+.B MDADM_GROW_ALLOW_OLD
+If an array is stopped while it is performing a reshape and that
+reshape was making use of a backup file, then when the array is
+re-assembled
+.I mdadm
+will sometimes complain that the backup file is too old. If this
+happens and you are certain it is the right backup file, you can
+over-ride this check by setting
+.B MDADM_GROW_ALLOW_OLD=1
+in the environment.
+
+.TP
+.B MDADM_CONF_AUTO
+Any string given in this variable is added to the start of the
+.B AUTO
+line in the config file, or treated as the whole
+.B AUTO
+line if none is given. It can be used to disable certain metadata
+types when
+.I mdadm
+is called from a boot script. For example
+.br
+.B " export MDADM_CONF_AUTO='-ddf -imsm'
+.br
+will make sure that
+.I mdadm
+does not automatically assemble any DDF or
+IMSM arrays that are found. This can be useful on systems configured
+to manage such arrays with
+.BR dmraid .
+
+
+.SH EXAMPLES
+
+.B " mdadm \-\-query /dev/name-of-device"
+.br
+This will find out if a given device is a RAID array, or is part of
+one, and will provide brief information about the device.
+
+.B " mdadm \-\-assemble \-\-scan"
+.br
+This will assemble and start all arrays listed in the standard config
+file. This command will typically go in a system startup file.
+
+.B " mdadm \-\-stop \-\-scan"
+.br
+This will shut down all arrays that can be shut down (i.e. are not
+currently in use). This will typically go in a system shutdown script.
+
+.B " mdadm \-\-follow \-\-scan \-\-delay=120"
+.br
+If (and only if) there is an Email address or program given in the
+standard config file, then
+monitor the status of all arrays listed in that file by
+polling them ever 2 minutes.
+
+.B " mdadm \-\-create /dev/md0 \-\-level=1 \-\-raid\-devices=2 /dev/hd[ac]1"
+.br
+Create /dev/md0 as a RAID1 array consisting of /dev/hda1 and /dev/hdc1.
+
+.br
+.B " echo 'DEVICE /dev/hd*[0\-9] /dev/sd*[0\-9]' > mdadm.conf"
+.br
+.B " mdadm \-\-detail \-\-scan >> mdadm.conf"
+.br
+This will create a prototype config file that describes currently
+active arrays that are known to be made from partitions of IDE or SCSI drives.
+This file should be reviewed before being used as it may
+contain unwanted detail.
+
+.B " echo 'DEVICE /dev/hd[a\-z] /dev/sd*[a\-z]' > mdadm.conf"
+.br
+.B " mdadm \-\-examine \-\-scan \-\-config=mdadm.conf >> mdadm.conf"
+.br
+This will find arrays which could be assembled from existing IDE and
+SCSI whole drives (not partitions), and store the information in the
+format of a config file.
+This file is very likely to contain unwanted detail, particularly
+the
+.B devices=
+entries. It should be reviewed and edited before being used as an
+actual config file.
+
+.B " mdadm \-\-examine \-\-brief \-\-scan \-\-config=partitions"
+.br
+.B " mdadm \-Ebsc partitions"
+.br
+Create a list of devices by reading
+.BR /proc/partitions ,
+scan these for RAID superblocks, and printout a brief listing of all
+that were found.
+
+.B " mdadm \-Ac partitions \-m 0 /dev/md0"
+.br
+Scan all partitions and devices listed in
+.BR /proc/partitions
+and assemble
+.B /dev/md0
+out of all such devices with a RAID superblock with a minor number of 0.
+
+.B " mdadm \-\-monitor \-\-scan \-\-daemonise > /run/mdadm/mon.pid"
+.br
+If config file contains a mail address or alert program, run mdadm in
+the background in monitor mode monitoring all md devices. Also write
+pid of mdadm daemon to
+.BR /run/mdadm/mon.pid .
+
+.B " mdadm \-Iq /dev/somedevice"
+.br
+Try to incorporate newly discovered device into some array as
+appropriate.
+
+.B " mdadm \-\-incremental \-\-rebuild\-map \-\-run \-\-scan"
+.br
+Rebuild the array map from any current arrays, and then start any that
+can be started.
+
+.B " mdadm /dev/md4 --fail detached --remove detached"
+.br
+Any devices which are components of /dev/md4 will be marked as faulty
+and then remove from the array.
+
+.B " mdadm --grow /dev/md4 --level=6 --backup-file=/root/backup-md4"
+.br
+The array
+.B /dev/md4
+which is currently a RAID5 array will be converted to RAID6. There
+should normally already be a spare drive attached to the array as a
+RAID6 needs one more drive than a matching RAID5.
+
+.B " mdadm --create /dev/md/ddf --metadata=ddf --raid-disks 6 /dev/sd[a-f]"
+.br
+Create a DDF array over 6 devices.
+
+.B " mdadm --create /dev/md/home -n3 -l5 -z 30000000 /dev/md/ddf"
+.br
+Create a RAID5 array over any 3 devices in the given DDF set. Use
+only 30 gigabytes of each device.
+
+.B " mdadm -A /dev/md/ddf1 /dev/sd[a-f]"
+.br
+Assemble a pre-exist ddf array.
+
+.B " mdadm -I /dev/md/ddf1"
+.br
+Assemble all arrays contained in the ddf array, assigning names as
+appropriate.
+
+.B " mdadm \-\-create \-\-help"
+.br
+Provide help about the Create mode.
+
+.B " mdadm \-\-config \-\-help"
+.br
+Provide help about the format of the config file.
+
+.B " mdadm \-\-help"
+.br
+Provide general help.
+
+.SH FILES
+
+.SS /proc/mdstat
+
+If you're using the
+.B /proc
+filesystem,
+.B /proc/mdstat
+lists all active md devices with information about them.
+.I mdadm
+uses this to find arrays when
+.B \-\-scan
+is given in Misc mode, and to monitor array reconstruction
+on Monitor mode.
+
+.SS /etc/mdadm/mdadm.conf (or /etc/mdadm.conf)
+
+The config file lists which devices may be scanned to see if
+they contain MD super block, and gives identifying information
+(e.g. UUID) about known MD arrays. See
+.BR mdadm.conf (5)
+for more details.
+
+.SS /etc/mdadm/mdadm.conf.d (or /etc/mdadm.conf.d)
+
+A directory containing configuration files which are read in lexical
+order.
+
+.SS {MAP_PATH}
+When
+.B \-\-incremental
+mode is used, this file gets a list of arrays currently being created.
+
+.SH DEVICE NAMES
+
+.I mdadm
+understand two sorts of names for array devices.
+
+The first is the so-called 'standard' format name, which matches the
+names used by the kernel and which appear in
+.IR /proc/mdstat .
+
+The second sort can be freely chosen, but must reside in
+.IR /dev/md/ .
+When giving a device name to
+.I mdadm
+to create or assemble an array, either full path name such as
+.I /dev/md0
+or
+.I /dev/md/home
+can be given, or just the suffix of the second sort of name, such as
+.I home
+can be given.
+
+When
+.I mdadm
+chooses device names during auto-assembly or incremental assembly, it
+will sometimes add a small sequence number to the end of the name to
+avoid conflicted between multiple arrays that have the same name. If
+.I mdadm
+can reasonably determine that the array really is meant for this host,
+either by a hostname in the metadata, or by the presence of the array
+in
+.BR mdadm.conf ,
+then it will leave off the suffix if possible.
+Also if the homehost is specified as
+.B <ignore>
+.I mdadm
+will only use a suffix if a different array of the same name already
+exists or is listed in the config file.
+
+The standard names for non-partitioned arrays (the only sort of md
+array available in 2.4 and earlier) are of the form
+.IP
+.RB /dev/md NN
+.PP
+where NN is a number.
+The standard names for partitionable arrays (as available from 2.6
+onwards) are of the form:
+.IP
+.RB /dev/md_d NN
+.PP
+Partition numbers should be indicated by adding "pMM" to these, thus "/dev/md/d1p2".
+.PP
+From kernel version 2.6.28 the "non-partitioned array" can actually
+be partitioned. So the "md_d\fBNN\fP"
+names are no longer needed, and
+partitions such as "/dev/md\fBNN\fPp\fBXX\fP"
+are possible.
+.PP
+From kernel version 2.6.29 standard names can be non-numeric following
+the form:
+.IP
+.RB /dev/md_ XXX
+.PP
+where
+.B XXX
+is any string. These names are supported by
+.I mdadm
+since version 3.3 provided they are enabled in
+.IR mdadm.conf .
+
+.SH NOTE
+.I mdadm
+was previously known as
+.IR mdctl .
+
+.SH SEE ALSO
+For further information on mdadm usage, MD and the various levels of
+RAID, see:
+.IP
+.B http://raid.wiki.kernel.org/
+.PP
+(based upon Jakob \(/Ostergaard's Software\-RAID.HOWTO)
+.PP
+The latest version of
+.I mdadm
+should always be available from
+.IP
+.B http://www.kernel.org/pub/linux/utils/raid/mdadm/
+.PP
+Related man pages:
+.PP
+.IR mdmon (8),
+.IR mdadm.conf (5),
+.IR md (4).
diff --git a/mdadm.c b/mdadm.c
new file mode 100644
index 00000000..be990b8a
--- /dev/null
+++ b/mdadm.c
@@ -0,0 +1,1873 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ *
+ * Additions for bitmap and write-behind RAID options, Copyright (C) 2003-2004,
+ * Paul Clements, SteelEye Technology, Inc.
+ */
+
+#include "mdadm.h"
+#include "md_p.h"
+#include <ctype.h>
+
+static int scan_assemble(struct supertype *ss,
+ struct context *c,
+ struct mddev_ident *ident);
+static int misc_scan(char devmode, struct context *c);
+static int stop_scan(int verbose);
+static int misc_list(struct mddev_dev *devlist,
+ struct mddev_ident *ident,
+ char *dump_directory,
+ struct supertype *ss, struct context *c);
+
+int main(int argc, char *argv[])
+{
+ int mode = 0;
+ int opt;
+ int option_index;
+ int rv;
+ int i;
+
+ unsigned long long array_size = 0;
+ unsigned long long data_offset = INVALID_SECTORS;
+ struct mddev_ident ident;
+ char *configfile = NULL;
+ int devmode = 0;
+ int bitmap_fd = -1;
+ struct mddev_dev *devlist = NULL;
+ struct mddev_dev **devlistend = & devlist;
+ struct mddev_dev *dv;
+ int devs_found = 0;
+ char *symlinks = NULL;
+ int grow_continue = 0;
+ /* autof indicates whether and how to create device node.
+ * bottom 3 bits are style. Rest (when shifted) are number of parts
+ * 0 - unset
+ * 1 - don't create (no)
+ * 2 - if is_standard, then create (yes)
+ * 3 - create as 'md' - reject is_standard mdp (md)
+ * 4 - create as 'mdp' - reject is_standard md (mdp)
+ * 5 - default to md if not is_standard (md in config file)
+ * 6 - default to mdp if not is_standard (part, or mdp in config file)
+ */
+ struct context c = {
+ .require_homehost = 1,
+ };
+ struct shape s = {
+ .level = UnSet,
+ .layout = UnSet,
+ .bitmap_chunk = UnSet,
+ };
+
+ char sys_hostname[256];
+ char *mailaddr = NULL;
+ char *program = NULL;
+ int increments = 20;
+ int daemonise = 0;
+ char *pidfile = NULL;
+ int oneshot = 0;
+ int spare_sharing = 1;
+ struct supertype *ss = NULL;
+ int writemostly = 0;
+ char *shortopt = short_options;
+ int dosyslog = 0;
+ int rebuild_map = 0;
+ char *remove_path = NULL;
+ char *udev_filename = NULL;
+ char *dump_directory = NULL;
+
+ int print_help = 0;
+ FILE *outf;
+
+ int mdfd = -1;
+
+ srandom(time(0) ^ getpid());
+
+ ident.uuid_set=0;
+ ident.level = UnSet;
+ ident.raid_disks = UnSet;
+ ident.super_minor= UnSet;
+ ident.devices=0;
+ ident.spare_group = NULL;
+ ident.autof = 0;
+ ident.st = NULL;
+ ident.bitmap_fd = -1;
+ ident.bitmap_file = NULL;
+ ident.name[0] = 0;
+ ident.container = NULL;
+ ident.member = NULL;
+
+ while ((option_index = -1) ,
+ (opt=getopt_long(argc, argv,
+ shortopt, long_options,
+ &option_index)) != -1) {
+ int newmode = mode;
+ /* firstly, some mode-independent options */
+ switch(opt) {
+ case HelpOptions:
+ print_help = 2;
+ continue;
+ case 'h':
+ print_help = 1;
+ continue;
+
+ case 'V':
+ fputs(Version, stderr);
+ exit(0);
+
+ case 'v': c.verbose++;
+ continue;
+
+ case 'q': c.verbose--;
+ continue;
+
+ case 'b':
+ if (mode == ASSEMBLE || mode == BUILD || mode == CREATE
+ || mode == GROW || mode == INCREMENTAL
+ || mode == MANAGE)
+ break; /* b means bitmap */
+ case Brief:
+ c.brief = 1;
+ continue;
+
+ case 'Y': c.export++;
+ continue;
+
+ case HomeHost:
+ if (strcasecmp(optarg, "<ignore>") == 0)
+ c.require_homehost = 0;
+ else
+ c.homehost = optarg;
+ continue;
+
+ case OffRootOpt:
+ /* Silently ignore old option */
+ continue;
+
+ case Prefer:
+ if (c.prefer)
+ free(c.prefer);
+ if (asprintf(&c.prefer, "/%s/", optarg) <= 0)
+ c.prefer = NULL;
+ continue;
+
+ case ':':
+ case '?':
+ fputs(Usage, stderr);
+ exit(2);
+ }
+ /* second, figure out the mode.
+ * Some options force the mode. Others
+ * set the mode if it isn't already
+ */
+
+ switch(opt) {
+ case ManageOpt:
+ newmode = MANAGE;
+ shortopt = short_bitmap_options;
+ break;
+ case 'a':
+ case Add:
+ case AddSpare:
+ case 'r':
+ case Remove:
+ case Replace:
+ case With:
+ case 'f':
+ case Fail:
+ case ReAdd: /* re-add */
+ if (!mode) {
+ newmode = MANAGE;
+ shortopt = short_bitmap_options;
+ }
+ break;
+
+ case 'A': newmode = ASSEMBLE;
+ shortopt = short_bitmap_auto_options;
+ break;
+ case 'B': newmode = BUILD;
+ shortopt = short_bitmap_auto_options;
+ break;
+ case 'C': newmode = CREATE;
+ shortopt = short_bitmap_auto_options;
+ break;
+ case 'F': newmode = MONITOR;
+ break;
+ case 'G': newmode = GROW;
+ shortopt = short_bitmap_options;
+ break;
+ case 'I': newmode = INCREMENTAL;
+ shortopt = short_bitmap_auto_options;
+ break;
+ case AutoDetect:
+ newmode = AUTODETECT;
+ break;
+
+ case MiscOpt:
+ case 'D':
+ case 'E':
+ case 'X':
+ case 'Q':
+ case ExamineBB:
+ case Dump:
+ case Restore:
+ case Action:
+ newmode = MISC;
+ break;
+
+ case 'R':
+ case 'S':
+ case 'o':
+ case 'w':
+ case 'W':
+ case WaitOpt:
+ case Waitclean:
+ case DetailPlatform:
+ case KillSubarray:
+ case UpdateSubarray:
+ case UdevRules:
+ case KillOpt:
+ if (!mode)
+ newmode = MISC;
+ break;
+
+ case NoSharing:
+ newmode = MONITOR;
+ break;
+ }
+ if (mode && newmode == mode) {
+ /* everybody happy ! */
+ } else if (mode && newmode != mode) {
+ /* not allowed.. */
+ pr_err("");
+ if (option_index >= 0)
+ fprintf(stderr, "--%s", long_options[option_index].name);
+ else
+ fprintf(stderr, "-%c", opt);
+ fprintf(stderr, " would set mdadm mode to \"%s\", but it is already set to \"%s\".\n",
+ map_num(modes, newmode),
+ map_num(modes, mode));
+ exit(2);
+ } else if (!mode && newmode) {
+ mode = newmode;
+ if (mode == MISC && devs_found) {
+ pr_err("No action given for %s in --misc mode\n",
+ devlist->devname);
+ cont_err("Action options must come before device names\n");
+ exit(2);
+ }
+ } else {
+ /* special case of -c --help */
+ if ((opt == 'c' || opt == ConfigFile) &&
+ ( strncmp(optarg, "--h", 3)==0 ||
+ strncmp(optarg, "-h", 2)==0)) {
+ fputs(Help_config, stdout);
+ exit(0);
+ }
+
+ /* If first option is a device, don't force the mode yet */
+ if (opt == 1) {
+ if (devs_found == 0) {
+ dv = xmalloc(sizeof(*dv));
+ dv->devname = optarg;
+ dv->disposition = devmode;
+ dv->writemostly = writemostly;
+ dv->used = 0;
+ dv->next = NULL;
+ *devlistend = dv;
+ devlistend = &dv->next;
+
+ devs_found++;
+ continue;
+ }
+ /* No mode yet, and this is the second device ... */
+ pr_err("An option must be given to set the mode before a second device\n"
+ " (%s) is listed\n", optarg);
+ exit(2);
+ }
+ if (option_index >= 0)
+ pr_err("--%s", long_options[option_index].name);
+ else
+ pr_err("-%c", opt);
+ fprintf(stderr, " does not set the mode, and so cannot be the first option.\n");
+ exit(2);
+ }
+
+ /* if we just set the mode, then done */
+ switch(opt) {
+ case ManageOpt:
+ case MiscOpt:
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'F':
+ case 'G':
+ case 'I':
+ case AutoDetect:
+ continue;
+ }
+ if (opt == 1) {
+ /* an undecorated option - must be a device name.
+ */
+
+ if (devs_found > 0 && devmode == DetailPlatform) {
+ pr_err("controller may only be specified once. %s ignored\n",
+ optarg);
+ continue;
+ }
+
+ if (devs_found > 0 && mode == MANAGE && !devmode) {
+ pr_err("Must give one of -a/-r/-f"
+ " for subsequent devices at %s\n", optarg);
+ exit(2);
+ }
+ if (devs_found > 0 && mode == GROW && !devmode) {
+ pr_err("Must give -a/--add for"
+ " devices to add: %s\n", optarg);
+ exit(2);
+ }
+ dv = xmalloc(sizeof(*dv));
+ dv->devname = optarg;
+ dv->disposition = devmode;
+ dv->writemostly = writemostly;
+ dv->used = 0;
+ dv->next = NULL;
+ *devlistend = dv;
+ devlistend = &dv->next;
+
+ devs_found++;
+ continue;
+ }
+
+ /* We've got a mode, and opt is now something else which
+ * could depend on the mode */
+#define O(a,b) ((a<<16)|b)
+ switch (O(mode,opt)) {
+ case O(GROW,'c'):
+ case O(GROW,ChunkSize):
+ case O(CREATE,'c'):
+ case O(CREATE,ChunkSize):
+ case O(BUILD,'c'): /* chunk or rounding */
+ case O(BUILD,ChunkSize): /* chunk or rounding */
+ if (s.chunk) {
+ pr_err("chunk/rounding may only be specified once. "
+ "Second value is %s.\n", optarg);
+ exit(2);
+ }
+ s.chunk = parse_size(optarg);
+ if (s.chunk == INVALID_SECTORS ||
+ s.chunk < 8 || (s.chunk&1)) {
+ pr_err("invalid chunk/rounding value: %s\n",
+ optarg);
+ exit(2);
+ }
+ /* Convert sectors to K */
+ s.chunk /= 2;
+ continue;
+
+ case O(INCREMENTAL, 'e'):
+ case O(CREATE,'e'):
+ case O(ASSEMBLE,'e'):
+ case O(MISC,'e'): /* set metadata (superblock) information */
+ if (ss) {
+ pr_err("metadata information already given\n");
+ exit(2);
+ }
+ for(i=0; !ss && superlist[i]; i++)
+ ss = superlist[i]->match_metadata_desc(optarg);
+
+ if (!ss) {
+ pr_err("unrecognised metadata identifier: %s\n", optarg);
+ exit(2);
+ }
+ continue;
+
+ case O(MANAGE,'W'):
+ case O(MANAGE,WriteMostly):
+ case O(BUILD,'W'):
+ case O(BUILD,WriteMostly):
+ case O(CREATE,'W'):
+ case O(CREATE,WriteMostly):
+ /* set write-mostly for following devices */
+ writemostly = 1;
+ continue;
+
+ case O(MANAGE,'w'):
+ /* clear write-mostly for following devices */
+ writemostly = 2;
+ continue;
+
+ case O(GROW,'z'):
+ case O(CREATE,'z'):
+ case O(BUILD,'z'): /* size */
+ if (s.size > 0) {
+ pr_err("size may only be specified once. "
+ "Second value is %s.\n", optarg);
+ exit(2);
+ }
+ if (strcmp(optarg, "max")==0)
+ s.size = MAX_SIZE;
+ else {
+ s.size = parse_size(optarg);
+ if (s.size == INVALID_SECTORS ||
+ s.size < 8) {
+ pr_err("invalid size: %s\n",
+ optarg);
+ exit(2);
+ }
+ /* convert sectors to K */
+ s.size /= 2;
+ }
+ continue;
+
+ case O(GROW,'Z'): /* array size */
+ if (array_size > 0) {
+ pr_err("array-size may only be specified once. "
+ "Second value is %s.\n", optarg);
+ exit(2);
+ }
+ if (strcmp(optarg, "max") == 0)
+ array_size = MAX_SIZE;
+ else {
+ array_size = parse_size(optarg);
+ if (array_size == 0 ||
+ array_size == INVALID_SECTORS) {
+ pr_err("invalid array size: %s\n",
+ optarg);
+ exit(2);
+ }
+ }
+ continue;
+
+ case O(CREATE,DataOffset):
+ case O(GROW,DataOffset):
+ if (data_offset != INVALID_SECTORS) {
+ pr_err("data-offset may only be specified one. "
+ "Second value is %s.\n", optarg);
+ exit(2);
+ }
+ if (mode == CREATE &&
+ strcmp(optarg, "variable") == 0)
+ data_offset = VARIABLE_OFFSET;
+ else
+ data_offset = parse_size(optarg);
+ if (data_offset == INVALID_SECTORS) {
+ pr_err("invalid data-offset: %s\n",
+ optarg);
+ exit(2);
+ }
+ continue;
+
+ case O(GROW,'l'):
+ case O(CREATE,'l'):
+ case O(BUILD,'l'): /* set raid level*/
+ if (s.level != UnSet) {
+ pr_err("raid level may only be set once. "
+ "Second value is %s.\n", optarg);
+ exit(2);
+ }
+ s.level = map_name(pers, optarg);
+ if (s.level == UnSet) {
+ pr_err("invalid raid level: %s\n",
+ optarg);
+ exit(2);
+ }
+ if (s.level != 0 && s.level != LEVEL_LINEAR && s.level != 1 &&
+ s.level != LEVEL_MULTIPATH && s.level != LEVEL_FAULTY &&
+ s.level != 10 &&
+ mode == BUILD) {
+ pr_err("Raid level %s not permitted with --build.\n",
+ optarg);
+ exit(2);
+ }
+ if (s.sparedisks > 0 && s.level < 1 && s.level >= -1) {
+ pr_err("raid level %s is incompatible with spare-devices setting.\n",
+ optarg);
+ exit(2);
+ }
+ ident.level = s.level;
+ continue;
+
+ case O(GROW, 'p'): /* new layout */
+ case O(GROW, Layout):
+ if (s.layout_str) {
+ pr_err("layout may only be sent once. "
+ "Second value was %s\n", optarg);
+ exit(2);
+ }
+ s.layout_str = optarg;
+ /* 'Grow' will parse the value */
+ continue;
+
+ case O(CREATE,'p'): /* raid5 layout */
+ case O(CREATE,Layout):
+ case O(BUILD,'p'): /* faulty layout */
+ case O(BUILD,Layout):
+ if (s.layout != UnSet) {
+ pr_err("layout may only be sent once. "
+ "Second value was %s\n", optarg);
+ exit(2);
+ }
+ switch(s.level) {
+ default:
+ pr_err("layout not meaningful for %s arrays.\n",
+ map_num(pers, s.level));
+ exit(2);
+ case UnSet:
+ pr_err("raid level must be given before layout.\n");
+ exit(2);
+
+ case 5:
+ s.layout = map_name(r5layout, optarg);
+ if (s.layout==UnSet) {
+ pr_err("layout %s not understood for raid5.\n",
+ optarg);
+ exit(2);
+ }
+ break;
+ case 6:
+ s.layout = map_name(r6layout, optarg);
+ if (s.layout==UnSet) {
+ pr_err("layout %s not understood for raid6.\n",
+ optarg);
+ exit(2);
+ }
+ break;
+
+ case 10:
+ s.layout = parse_layout_10(optarg);
+ if (s.layout < 0) {
+ pr_err("layout for raid10 must be 'nNN', 'oNN' or 'fNN' where NN is a number, not %s\n", optarg);
+ exit(2);
+ }
+ break;
+ case LEVEL_FAULTY:
+ /* Faulty
+ * modeNNN
+ */
+ s.layout = parse_layout_faulty(optarg);
+ if (s.layout == -1) {
+ pr_err("layout %s not understood for faulty.\n",
+ optarg);
+ exit(2);
+ }
+ break;
+ }
+ continue;
+
+ case O(CREATE,AssumeClean):
+ case O(BUILD,AssumeClean): /* assume clean */
+ case O(GROW,AssumeClean):
+ s.assume_clean = 1;
+ continue;
+
+ case O(GROW,'n'):
+ case O(CREATE,'n'):
+ case O(BUILD,'n'): /* number of raid disks */
+ if (s.raiddisks) {
+ pr_err("raid-devices set twice: %d and %s\n",
+ s.raiddisks, optarg);
+ exit(2);
+ }
+ s.raiddisks = parse_num(optarg);
+ if (s.raiddisks <= 0) {
+ pr_err("invalid number of raid devices: %s\n",
+ optarg);
+ exit(2);
+ }
+ ident.raid_disks = s.raiddisks;
+ continue;
+
+ case O(CREATE,'x'): /* number of spare (eXtra) disks */
+ if (s.sparedisks) {
+ pr_err("spare-devices set twice: %d and %s\n",
+ s.sparedisks, optarg);
+ exit(2);
+ }
+ if (s.level != UnSet && s.level <= 0 && s.level >= -1) {
+ pr_err("spare-devices setting is incompatible with raid level %d\n",
+ s.level);
+ exit(2);
+ }
+ s.sparedisks = parse_num(optarg);
+ if (s.sparedisks < 0) {
+ pr_err("invalid number of spare-devices: %s\n",
+ optarg);
+ exit(2);
+ }
+ continue;
+
+ case O(CREATE,'a'):
+ case O(CREATE,Auto):
+ case O(BUILD,'a'):
+ case O(BUILD,Auto):
+ case O(INCREMENTAL,'a'):
+ case O(INCREMENTAL,Auto):
+ case O(ASSEMBLE,'a'):
+ case O(ASSEMBLE,Auto): /* auto-creation of device node */
+ c.autof = parse_auto(optarg, "--auto flag", 0);
+ continue;
+
+ case O(CREATE,Symlinks):
+ case O(BUILD,Symlinks):
+ case O(ASSEMBLE,Symlinks): /* auto creation of symlinks in /dev to /dev/md */
+ symlinks = optarg;
+ continue;
+
+ case O(BUILD,'f'): /* force honouring '-n 1' */
+ case O(BUILD,Force): /* force honouring '-n 1' */
+ case O(GROW,'f'): /* ditto */
+ case O(GROW,Force): /* ditto */
+ case O(CREATE,'f'): /* force honouring of device list */
+ case O(CREATE,Force): /* force honouring of device list */
+ case O(ASSEMBLE,'f'): /* force assembly */
+ case O(ASSEMBLE,Force): /* force assembly */
+ case O(MISC,'f'): /* force zero */
+ case O(MISC,Force): /* force zero */
+ case O(MANAGE,Force): /* add device which is too large */
+ c.force=1;
+ continue;
+ /* now for the Assemble options */
+ case O(ASSEMBLE, FreezeReshape): /* Freeze reshape during
+ * initrd phase */
+ case O(INCREMENTAL, FreezeReshape):
+ c.freeze_reshape = 1;
+ continue;
+ case O(CREATE,'u'): /* uuid of array */
+ case O(ASSEMBLE,'u'): /* uuid of array */
+ if (ident.uuid_set) {
+ pr_err("uuid cannot be set twice. "
+ "Second value %s.\n", optarg);
+ exit(2);
+ }
+ if (parse_uuid(optarg, ident.uuid))
+ ident.uuid_set = 1;
+ else {
+ pr_err("Bad uuid: %s\n", optarg);
+ exit(2);
+ }
+ continue;
+
+ case O(CREATE,'N'):
+ case O(ASSEMBLE,'N'):
+ case O(MISC,'N'):
+ if (ident.name[0]) {
+ pr_err("name cannot be set twice. "
+ "Second value %s.\n", optarg);
+ exit(2);
+ }
+ if (mode == MISC && !c.subarray) {
+ pr_err("-N/--name only valid with --update-subarray in misc mode\n");
+ exit(2);
+ }
+ if (strlen(optarg) > 32) {
+ pr_err("name '%s' is too long, 32 chars max.\n",
+ optarg);
+ exit(2);
+ }
+ strcpy(ident.name, optarg);
+ continue;
+
+ case O(ASSEMBLE,'m'): /* super-minor for array */
+ case O(ASSEMBLE,SuperMinor):
+ if (ident.super_minor != UnSet) {
+ pr_err("super-minor cannot be set twice. "
+ "Second value: %s.\n", optarg);
+ exit(2);
+ }
+ if (strcmp(optarg, "dev")==0)
+ ident.super_minor = -2;
+ else {
+ ident.super_minor = parse_num(optarg);
+ if (ident.super_minor < 0) {
+ pr_err("Bad super-minor number: %s.\n", optarg);
+ exit(2);
+ }
+ }
+ continue;
+
+ case O(ASSEMBLE,'o'):
+ case O(MANAGE,'o'):
+ case O(CREATE,'o'):
+ c.readonly = 1;
+ continue;
+
+ case O(ASSEMBLE,'U'): /* update the superblock */
+ case O(MISC,'U'):
+ if (c.update) {
+ pr_err("Can only update one aspect"
+ " of superblock, both %s and %s given.\n",
+ c.update, optarg);
+ exit(2);
+ }
+ if (mode == MISC && !c.subarray) {
+ pr_err("Only subarrays can be"
+ " updated in misc mode\n");
+ exit(2);
+ }
+ c.update = optarg;
+ if (strcmp(c.update, "sparc2.2")==0)
+ continue;
+ if (strcmp(c.update, "super-minor") == 0)
+ continue;
+ if (strcmp(c.update, "summaries")==0)
+ continue;
+ if (strcmp(c.update, "resync")==0)
+ continue;
+ if (strcmp(c.update, "uuid")==0)
+ continue;
+ if (strcmp(c.update, "name")==0)
+ continue;
+ if (strcmp(c.update, "homehost")==0)
+ continue;
+ if (strcmp(c.update, "devicesize")==0)
+ continue;
+ if (strcmp(c.update, "no-bitmap")==0)
+ continue;
+ if (strcmp(c.update, "bbl") == 0)
+ continue;
+ if (strcmp(c.update, "no-bbl") == 0)
+ continue;
+ if (strcmp(c.update, "metadata") == 0)
+ continue;
+ if (strcmp(c.update, "revert-reshape") == 0)
+ continue;
+ if (strcmp(c.update, "byteorder")==0) {
+ if (ss) {
+ pr_err("must not set metadata"
+ " type with --update=byteorder.\n");
+ exit(2);
+ }
+ for(i=0; !ss && superlist[i]; i++)
+ ss = superlist[i]->match_metadata_desc(
+ "0.swap");
+ if (!ss) {
+ pr_err("INTERNAL ERROR"
+ " cannot find 0.swap\n");
+ exit(2);
+ }
+
+ continue;
+ }
+ if (strcmp(c.update,"?") == 0 ||
+ strcmp(c.update, "help") == 0) {
+ outf = stdout;
+ fprintf(outf, Name ": ");
+ } else {
+ outf = stderr;
+ fprintf(outf,
+ Name ": '--update=%s' is invalid. ",
+ c.update);
+ }
+ fprintf(outf, "Valid --update options are:\n"
+ " 'sparc2.2', 'super-minor', 'uuid', 'name', 'resync',\n"
+ " 'summaries', 'homehost', 'byteorder', 'devicesize',\n"
+ " 'no-bitmap', 'metadata', 'revert-reshape'\n");
+ exit(outf == stdout ? 0 : 2);
+
+ case O(MANAGE,'U'):
+ /* update=devicesize is allowed with --re-add */
+ if (devmode != 'A') {
+ pr_err("--update in Manage mode only"
+ " allowed with --re-add.\n");
+ exit(1);
+ }
+ if (c.update) {
+ pr_err("Can only update one aspect"
+ " of superblock, both %s and %s given.\n",
+ c.update, optarg);
+ exit(2);
+ }
+ c.update = optarg;
+ if (strcmp(c.update, "devicesize") != 0 &&
+ strcmp(c.update, "bbl") != 0 &&
+ strcmp(c.update, "no-bbl") != 0) {
+ pr_err("only 'devicesize', 'bbl' and 'no-bbl' can be"
+ " updated with --re-add\n");
+ exit(2);
+ }
+ continue;
+
+ case O(INCREMENTAL,NoDegraded):
+ pr_err("--no-degraded is deprecated in Incremental mode\n");
+ case O(ASSEMBLE,NoDegraded): /* --no-degraded */
+ c.runstop = -1; /* --stop isn't allowed for --assemble,
+ * so we overload slightly */
+ continue;
+
+ case O(ASSEMBLE,'c'):
+ case O(ASSEMBLE,ConfigFile):
+ case O(INCREMENTAL, 'c'):
+ case O(INCREMENTAL, ConfigFile):
+ case O(MISC, 'c'):
+ case O(MISC, ConfigFile):
+ case O(MONITOR,'c'):
+ case O(MONITOR,ConfigFile):
+ case O(CREATE,ConfigFile):
+ if (configfile) {
+ pr_err("configfile cannot be set twice. "
+ "Second value is %s.\n", optarg);
+ exit(2);
+ }
+ configfile = optarg;
+ set_conffile(configfile);
+ /* FIXME possibly check that config file exists. Even parse it */
+ continue;
+ case O(ASSEMBLE,'s'): /* scan */
+ case O(MISC,'s'):
+ case O(MONITOR,'s'):
+ case O(INCREMENTAL,'s'):
+ c.scan = 1;
+ continue;
+
+ case O(MONITOR,'m'): /* mail address */
+ case O(MONITOR,EMail):
+ if (mailaddr)
+ pr_err("only specify one mailaddress. %s ignored.\n",
+ optarg);
+ else
+ mailaddr = optarg;
+ continue;
+
+ case O(MONITOR,'p'): /* alert program */
+ case O(MONITOR,ProgramOpt): /* alert program */
+ if (program)
+ pr_err("only specify one alter program. %s ignored.\n",
+ optarg);
+ else
+ program = optarg;
+ continue;
+
+ case O(MONITOR,'r'): /* rebuild increments */
+ case O(MONITOR,Increment):
+ increments = atoi(optarg);
+ if (increments > 99 || increments < 1) {
+ pr_err("please specify positive integer between 1 and 99 as rebuild increments.\n");
+ exit(2);
+ }
+ continue;
+
+ case O(MONITOR,'d'): /* delay in seconds */
+ case O(GROW, 'd'):
+ case O(BUILD,'d'): /* delay for bitmap updates */
+ case O(CREATE,'d'):
+ if (c.delay)
+ pr_err("only specify delay once. %s ignored.\n",
+ optarg);
+ else {
+ c.delay = parse_num(optarg);
+ if (c.delay < 1) {
+ pr_err("invalid delay: %s\n",
+ optarg);
+ exit(2);
+ }
+ }
+ continue;
+ case O(MONITOR,'f'): /* daemonise */
+ case O(MONITOR,Fork):
+ daemonise = 1;
+ continue;
+ case O(MONITOR,'i'): /* pid */
+ if (pidfile)
+ pr_err("only specify one pid file. %s ignored.\n",
+ optarg);
+ else
+ pidfile = optarg;
+ continue;
+ case O(MONITOR,'1'): /* oneshot */
+ oneshot = 1;
+ spare_sharing = 0;
+ continue;
+ case O(MONITOR,'t'): /* test */
+ c.test = 1;
+ continue;
+ case O(MONITOR,'y'): /* log messages to syslog */
+ openlog("mdadm", LOG_PID, SYSLOG_FACILITY);
+ dosyslog = 1;
+ continue;
+ case O(MONITOR, NoSharing):
+ spare_sharing = 0;
+ continue;
+
+ /* now the general management options. Some are applicable
+ * to other modes. None have arguments.
+ */
+ case O(GROW,'a'):
+ case O(GROW,Add):
+ case O(MANAGE,'a'):
+ case O(MANAGE,Add): /* add a drive */
+ devmode = 'a';
+ continue;
+ case O(MANAGE,AddSpare): /* add drive - never re-add */
+ devmode = 'S';
+ continue;
+ case O(MANAGE,ReAdd):
+ devmode = 'A';
+ continue;
+ case O(MANAGE,'r'): /* remove a drive */
+ case O(MANAGE,Remove):
+ devmode = 'r';
+ continue;
+ case O(MANAGE,'f'): /* set faulty */
+ case O(MANAGE,Fail):
+ case O(INCREMENTAL,'f'):
+ case O(INCREMENTAL,Remove):
+ case O(INCREMENTAL,Fail): /* r for incremental is taken, use f
+ * even though we will both fail and
+ * remove the device */
+ devmode = 'f';
+ continue;
+ case O(MANAGE,Replace):
+ /* Mark these devices for replacement */
+ devmode = 'R';
+ continue;
+ case O(MANAGE,With):
+ /* These are the replacements to use */
+ if (devmode != 'R') {
+ pr_err("--with must follow --replace\n");
+ exit(2);
+ }
+ devmode = 'W';
+ continue;
+ case O(INCREMENTAL,'R'):
+ case O(MANAGE,'R'):
+ case O(ASSEMBLE,'R'):
+ case O(BUILD,'R'):
+ case O(CREATE,'R'): /* Run the array */
+ if (c.runstop < 0) {
+ pr_err("Cannot both Stop and Run an array\n");
+ exit(2);
+ }
+ c.runstop = 1;
+ continue;
+ case O(MANAGE,'S'):
+ if (c.runstop > 0) {
+ pr_err("Cannot both Run and Stop an array\n");
+ exit(2);
+ }
+ c.runstop = -1;
+ continue;
+ case O(MANAGE,'t'):
+ c.test = 1;
+ continue;
+
+ case O(MISC,'Q'):
+ case O(MISC,'D'):
+ case O(MISC,'E'):
+ case O(MISC,KillOpt):
+ case O(MISC,'R'):
+ case O(MISC,'S'):
+ case O(MISC,'X'):
+ case O(MISC, ExamineBB):
+ case O(MISC,'o'):
+ case O(MISC,'w'):
+ case O(MISC,'W'):
+ case O(MISC, WaitOpt):
+ case O(MISC, Waitclean):
+ case O(MISC, DetailPlatform):
+ case O(MISC, KillSubarray):
+ case O(MISC, UpdateSubarray):
+ case O(MISC, Dump):
+ case O(MISC, Restore):
+ case O(MISC ,Action):
+ if (opt == KillSubarray || opt == UpdateSubarray) {
+ if (c.subarray) {
+ pr_err("subarray can only"
+ " be specified once\n");
+ exit(2);
+ }
+ c.subarray = optarg;
+ }
+ if (opt == Action) {
+ if (c.action) {
+ pr_err("Only one --action can be specified\n");
+ exit(2);
+ }
+ if (strcmp(optarg, "idle") == 0 ||
+ strcmp(optarg, "frozen") == 0 ||
+ strcmp(optarg, "check") == 0 ||
+ strcmp(optarg, "repair") == 0)
+ c.action = optarg;
+ else {
+ pr_err("action must be one of idle, frozen, check, repair\n");
+ exit(2);
+ }
+ }
+ if (devmode && devmode != opt &&
+ (devmode == 'E' || (opt == 'E' && devmode != 'Q'))) {
+ pr_err("--examine/-E cannot be given with ");
+ if (devmode == 'E') {
+ if (option_index >= 0)
+ fprintf(stderr, "--%s\n",
+ long_options[option_index].name);
+ else
+ fprintf(stderr, "-%c\n", opt);
+ } else if (isalpha(devmode))
+ fprintf(stderr, "-%c\n", devmode);
+ else
+ fprintf(stderr, "previous option\n");
+ exit(2);
+ }
+ devmode = opt;
+ if (opt == Dump || opt == Restore) {
+ if (dump_directory != NULL) {
+ pr_err("dump/restore directory specified twice: %s and %s\n",
+ dump_directory, optarg);
+ exit(2);
+ }
+ dump_directory = optarg;
+ }
+ continue;
+ case O(MISC, UdevRules):
+ if (devmode && devmode != opt) {
+ pr_err("--udev-rules must"
+ " be the only option.\n");
+ } else {
+ if (udev_filename)
+ pr_err("only specify one udev "
+ "rule filename. %s ignored.\n",
+ optarg);
+ else
+ udev_filename = optarg;
+ }
+ devmode = opt;
+ continue;
+ case O(MISC,'t'):
+ c.test = 1;
+ continue;
+
+ case O(MISC, Sparc22):
+ if (devmode != 'E') {
+ pr_err("--sparc2.2 only allowed with --examine\n");
+ exit(2);
+ }
+ c.SparcAdjust = 1;
+ continue;
+
+ case O(ASSEMBLE,'b'): /* here we simply set the bitmap file */
+ case O(ASSEMBLE,Bitmap):
+ if (!optarg) {
+ pr_err("bitmap file needed with -b in --assemble mode\n");
+ exit(2);
+ }
+ if (strcmp(optarg, "internal")==0) {
+ pr_err("there is no need to specify --bitmap when assembling arrays with internal bitmaps\n");
+ continue;
+ }
+ bitmap_fd = open(optarg, O_RDWR);
+ if (!*optarg || bitmap_fd < 0) {
+ pr_err("cannot open bitmap file %s: %s\n", optarg, strerror(errno));
+ exit(2);
+ }
+ ident.bitmap_fd = bitmap_fd; /* for Assemble */
+ continue;
+
+ case O(ASSEMBLE, BackupFile):
+ case O(GROW, BackupFile):
+ /* Specify a file into which grow might place a backup,
+ * or from which assemble might recover a backup
+ */
+ if (c.backup_file) {
+ pr_err("backup file already specified, rejecting %s\n", optarg);
+ exit(2);
+ }
+ c.backup_file = optarg;
+ continue;
+
+ case O(GROW, Continue):
+ /* Continue interrupted grow
+ */
+ grow_continue = 1;
+ continue;
+ case O(ASSEMBLE, InvalidBackup):
+ /* Acknowledge that the backupfile is invalid, but ask
+ * to continue anyway
+ */
+ c.invalid_backup = 1;
+ continue;
+
+ case O(BUILD,'b'):
+ case O(BUILD,Bitmap):
+ case O(CREATE,'b'):
+ case O(CREATE,Bitmap): /* here we create the bitmap */
+ case O(GROW,'b'):
+ case O(GROW,Bitmap):
+ if (strcmp(optarg, "internal")== 0 ||
+ strcmp(optarg, "none")== 0 ||
+ strchr(optarg, '/') != NULL) {
+ s.bitmap_file = optarg;
+ continue;
+ }
+ /* probable typo */
+ pr_err("bitmap file must contain a '/', or be 'internal', or 'none'\n"
+ " not '%s'\n", optarg);
+ exit(2);
+
+ case O(GROW,BitmapChunk):
+ case O(BUILD,BitmapChunk):
+ case O(CREATE,BitmapChunk): /* bitmap chunksize */
+ s.bitmap_chunk = parse_size(optarg);
+ if (s.bitmap_chunk == 0 ||
+ s.bitmap_chunk == INVALID_SECTORS ||
+ s.bitmap_chunk & (s.bitmap_chunk - 1)) {
+ pr_err("invalid bitmap chunksize: %s\n",
+ optarg);
+ exit(2);
+ }
+ s.bitmap_chunk = s.bitmap_chunk * 512;
+ continue;
+
+ case O(GROW, WriteBehind):
+ case O(BUILD, WriteBehind):
+ case O(CREATE, WriteBehind): /* write-behind mode */
+ s.write_behind = DEFAULT_MAX_WRITE_BEHIND;
+ if (optarg) {
+ s.write_behind = parse_num(optarg);
+ if (s.write_behind < 0 ||
+ s.write_behind > 16383) {
+ pr_err("Invalid value for maximum outstanding write-behind writes: %s.\n\tMust be between 0 and 16383.\n", optarg);
+ exit(2);
+ }
+ }
+ continue;
+
+ case O(INCREMENTAL, 'r'):
+ case O(INCREMENTAL, RebuildMapOpt):
+ rebuild_map = 1;
+ continue;
+ case O(INCREMENTAL, IncrementalPath):
+ remove_path = optarg;
+ continue;
+ }
+ /* We have now processed all the valid options. Anything else is
+ * an error
+ */
+ if (option_index > 0)
+ pr_err(":option --%s not valid in %s mode\n",
+ long_options[option_index].name,
+ map_num(modes, mode));
+ else
+ pr_err("option -%c not valid in %s mode\n",
+ opt, map_num(modes, mode));
+ exit(2);
+
+ }
+
+ if (print_help) {
+ char *help_text;
+ if (print_help == 2)
+ help_text = OptionHelp;
+ else
+ help_text = mode_help[mode];
+ if (help_text == NULL)
+ help_text = Help;
+ fputs(help_text,stdout);
+ exit(0);
+ }
+
+ if (!mode && devs_found) {
+ mode = MISC;
+ devmode = 'Q';
+ if (devlist->disposition == 0)
+ devlist->disposition = devmode;
+ }
+ if (!mode) {
+ fputs(Usage, stderr);
+ exit(2);
+ }
+
+ if (symlinks) {
+ struct createinfo *ci = conf_get_create_info();
+
+ if (strcasecmp(symlinks, "yes") == 0)
+ ci->symlinks = 1;
+ else if (strcasecmp(symlinks, "no") == 0)
+ ci->symlinks = 0;
+ else {
+ pr_err("option --symlinks must be 'no' or 'yes'\n");
+ exit(2);
+ }
+ }
+ /* Ok, got the option parsing out of the way
+ * hopefully it's mostly right but there might be some stuff
+ * missing
+ *
+ * That is mosty checked in the per-mode stuff but...
+ *
+ * For @,B,C and A without -s, the first device listed must be
+ * an md device. We check that here and open it.
+ */
+
+ if (mode == MANAGE || mode == BUILD || mode == CREATE
+ || mode == GROW
+ || (mode == ASSEMBLE && ! c.scan)) {
+ if (devs_found < 1) {
+ pr_err("an md device must be given in this mode\n");
+ exit(2);
+ }
+ if ((int)ident.super_minor == -2 && c.autof) {
+ pr_err("--super-minor=dev is incompatible with --auto\n");
+ exit(2);
+ }
+ if (mode == MANAGE || mode == GROW) {
+ mdfd = open_mddev(devlist->devname, 1);
+ if (mdfd < 0)
+ exit(1);
+ } else
+ /* non-existent device is OK */
+ mdfd = open_mddev(devlist->devname, 0);
+ if (mdfd == -2) {
+ pr_err("device %s exists but is not an "
+ "md array.\n", devlist->devname);
+ exit(1);
+ }
+ if ((int)ident.super_minor == -2) {
+ struct stat stb;
+ if (mdfd < 0) {
+ pr_err("--super-minor=dev given, and "
+ "listed device %s doesn't exist.\n",
+ devlist->devname);
+ exit(1);
+ }
+ fstat(mdfd, &stb);
+ ident.super_minor = minor(stb.st_rdev);
+ }
+ if (mdfd >= 0 && mode != MANAGE && mode != GROW) {
+ /* We don't really want this open yet, we just might
+ * have wanted to check some things
+ */
+ close(mdfd);
+ mdfd = -1;
+ }
+ }
+
+ if (s.raiddisks) {
+ if (s.raiddisks == 1 && !c.force && s.level != LEVEL_FAULTY) {
+ pr_err("'1' is an unusual number of drives for an array, so it is probably\n"
+ " a mistake. If you really mean it you will need to specify --force before\n"
+ " setting the number of drives.\n");
+ exit(2);
+ }
+ }
+
+ if (c.homehost == NULL)
+ c.homehost = conf_get_homehost(&c.require_homehost);
+ if (c.homehost == NULL || strcasecmp(c.homehost, "<system>")==0) {
+ if (gethostname(sys_hostname, sizeof(sys_hostname)) == 0) {
+ sys_hostname[sizeof(sys_hostname)-1] = 0;
+ c.homehost = sys_hostname;
+ }
+ }
+ if (c.homehost && (!c.homehost[0] || strcasecmp(c.homehost, "<none>") == 0)) {
+ c.homehost = NULL;
+ c.require_homehost = 0;
+ }
+
+ if (c.backup_file && data_offset != INVALID_SECTORS) {
+ pr_err("--backup-file and --data-offset are incompatible\n");
+ exit(2);
+ }
+
+ if ((mode == MISC && devmode == 'E')
+ || (mode == MONITOR && spare_sharing == 0))
+ /* Anyone may try this */;
+ else if (geteuid() != 0) {
+ pr_err("must be super-user to perform this action\n");
+ exit(1);
+ }
+
+ ident.autof = c.autof;
+
+ if (c.scan && c.verbose < 2)
+ /* --scan implied --brief unless -vv */
+ c.brief = 1;
+
+ rv = 0;
+ switch(mode) {
+ case MANAGE:
+ /* readonly, add/remove, readwrite, runstop */
+ if (c.readonly > 0)
+ rv = Manage_ro(devlist->devname, mdfd, c.readonly);
+ if (!rv && devs_found>1)
+ rv = Manage_subdevs(devlist->devname, mdfd,
+ devlist->next, c.verbose, c.test,
+ c.update, c.force);
+ if (!rv && c.readonly < 0)
+ rv = Manage_ro(devlist->devname, mdfd, c.readonly);
+ if (!rv && c.runstop > 0)
+ rv = Manage_run(devlist->devname, mdfd, &c);
+ if (!rv && c.runstop < 0)
+ rv = Manage_stop(devlist->devname, mdfd, c.verbose, 0);
+ break;
+ case ASSEMBLE:
+ if (devs_found == 1 && ident.uuid_set == 0 &&
+ ident.super_minor == UnSet && ident.name[0] == 0 && !c.scan ) {
+ /* Only a device has been given, so get details from config file */
+ struct mddev_ident *array_ident = conf_get_ident(devlist->devname);
+ if (array_ident == NULL) {
+ pr_err("%s not identified in config file.\n",
+ devlist->devname);
+ rv |= 1;
+ if (mdfd >= 0)
+ close(mdfd);
+ } else {
+ if (array_ident->autof == 0)
+ array_ident->autof = c.autof;
+ rv |= Assemble(ss, devlist->devname, array_ident,
+ NULL, &c);
+ }
+ } else if (!c.scan)
+ rv = Assemble(ss, devlist->devname, &ident,
+ devlist->next, &c);
+ else if (devs_found > 0) {
+ if (c.update && devs_found > 1) {
+ pr_err("can only update a single array at a time\n");
+ exit(1);
+ }
+ if (c.backup_file && devs_found > 1) {
+ pr_err("can only assemble a single array when providing a backup file.\n");
+ exit(1);
+ }
+ for (dv = devlist ; dv ; dv=dv->next) {
+ struct mddev_ident *array_ident = conf_get_ident(dv->devname);
+ if (array_ident == NULL) {
+ pr_err("%s not identified in config file.\n",
+ dv->devname);
+ rv |= 1;
+ continue;
+ }
+ if (array_ident->autof == 0)
+ array_ident->autof = c.autof;
+ rv |= Assemble(ss, dv->devname, array_ident,
+ NULL, &c);
+ }
+ } else {
+ if (c.update) {
+ pr_err("--update not meaningful with a --scan assembly.\n");
+ exit(1);
+ }
+ if (c.backup_file) {
+ pr_err("--backup_file not meaningful with a --scan assembly.\n");
+ exit(1);
+ }
+ rv = scan_assemble(ss, &c, &ident);
+ }
+
+ break;
+ case BUILD:
+ if (c.delay == 0)
+ c.delay = DEFAULT_BITMAP_DELAY;
+ if (s.write_behind && !s.bitmap_file) {
+ pr_err("write-behind mode requires a bitmap.\n");
+ rv = 1;
+ break;
+ }
+ if (s.raiddisks == 0) {
+ pr_err("no raid-devices specified.\n");
+ rv = 1;
+ break;
+ }
+
+ if (s.bitmap_file) {
+ if (strcmp(s.bitmap_file, "internal")==0) {
+ pr_err("'internal' bitmaps not supported with --build\n");
+ rv |= 1;
+ break;
+ }
+ }
+ rv = Build(devlist->devname, devlist->next, &s, &c);
+ break;
+ case CREATE:
+ if (c.delay == 0)
+ c.delay = DEFAULT_BITMAP_DELAY;
+ if (s.write_behind && !s.bitmap_file) {
+ pr_err("write-behind mode requires a bitmap.\n");
+ rv = 1;
+ break;
+ }
+ if (s.raiddisks == 0) {
+ pr_err("no raid-devices specified.\n");
+ rv = 1;
+ break;
+ }
+
+ rv = Create(ss, devlist->devname,
+ ident.name, ident.uuid_set ? ident.uuid : NULL,
+ devs_found-1, devlist->next,
+ &s, &c, data_offset);
+ break;
+ case MISC:
+ if (devmode == 'E') {
+ if (devlist == NULL && !c.scan) {
+ pr_err("No devices to examine\n");
+ exit(2);
+ }
+ if (devlist == NULL)
+ devlist = conf_get_devs();
+ if (devlist == NULL) {
+ pr_err("No devices listed in %s\n", configfile?configfile:DefaultConfFile);
+ exit(1);
+ }
+ rv = Examine(devlist, &c, ss);
+ } else if (devmode == DetailPlatform) {
+ rv = Detail_Platform(ss ? ss->ss : NULL, ss ? c.scan : 1,
+ c.verbose, c.export,
+ devlist ? devlist->devname : NULL);
+ } else if (devlist == NULL) {
+ if (devmode == 'S' && c.scan)
+ rv = stop_scan(c.verbose);
+ else if ((devmode == 'D' || devmode == Waitclean) && c.scan)
+ rv = misc_scan(devmode, &c);
+ else if (devmode == UdevRules)
+ rv = Write_rules(udev_filename);
+ else {
+ pr_err("No devices given.\n");
+ exit(2);
+ }
+ } else
+ rv = misc_list(devlist, &ident, dump_directory, ss, &c);
+ break;
+ case MONITOR:
+ if (!devlist && !c.scan) {
+ pr_err("Cannot monitor: need --scan or at least one device\n");
+ rv = 1;
+ break;
+ }
+ if (pidfile && !daemonise) {
+ pr_err("Cannot write a pid file when not in daemon mode\n");
+ rv = 1;
+ break;
+ }
+ if (c.delay == 0) {
+ if (get_linux_version() > 2006016)
+ /* mdstat responds to poll */
+ c.delay = 1000;
+ else
+ c.delay = 60;
+ }
+ if (c.delay == 0)
+ c.delay = 60;
+ rv= Monitor(devlist, mailaddr, program,
+ &c, daemonise, oneshot,
+ dosyslog, pidfile, increments,
+ spare_sharing);
+ break;
+
+ case GROW:
+ if (array_size > 0) {
+ /* alway impose array size first, independent of
+ * anything else
+ * Do not allow level or raid_disks changes at the
+ * same time as that can be irreversibly destructive.
+ */
+ struct mdinfo sra;
+ int err;
+ if (s.raiddisks || s.level != UnSet) {
+ pr_err("cannot change array size in same operation "
+ "as changing raiddisks or level.\n"
+ " Change size first, then check that data is still intact.\n");
+ rv = 1;
+ break;
+ }
+ sysfs_init(&sra, mdfd, NULL);
+ if (array_size == MAX_SIZE)
+ err = sysfs_set_str(&sra, NULL, "array_size", "default");
+ else
+ err = sysfs_set_num(&sra, NULL, "array_size", array_size / 2);
+ if (err < 0) {
+ if (errno == E2BIG)
+ pr_err("--array-size setting"
+ " is too large.\n");
+ else
+ pr_err("current kernel does"
+ " not support setting --array-size\n");
+ rv = 1;
+ break;
+ }
+ }
+ if (devs_found > 1 && s.raiddisks == 0 && s.level == UnSet) {
+ /* must be '-a'. */
+ if (s.size > 0 || s.chunk || s.layout_str != NULL || s.bitmap_file) {
+ pr_err("--add cannot be used with "
+ "other geometry changes in --grow mode\n");
+ rv = 1;
+ break;
+ }
+ for (dv=devlist->next; dv ; dv=dv->next) {
+ rv = Grow_Add_device(devlist->devname, mdfd,
+ dv->devname);
+ if (rv)
+ break;
+ }
+ } else if (s.bitmap_file) {
+ if (s.size > 0 || s.raiddisks || s.chunk ||
+ s.layout_str != NULL || devs_found > 1) {
+ pr_err("--bitmap changes cannot be "
+ "used with other geometry changes "
+ "in --grow mode\n");
+ rv = 1;
+ break;
+ }
+ if (c.delay == 0)
+ c.delay = DEFAULT_BITMAP_DELAY;
+ rv = Grow_addbitmap(devlist->devname, mdfd, &c, &s);
+ } else if (grow_continue)
+ rv = Grow_continue_command(devlist->devname,
+ mdfd, c.backup_file,
+ c.verbose);
+ else if (s.size > 0 || s.raiddisks || s.layout_str != NULL
+ || s.chunk != 0 || s.level != UnSet
+ || data_offset != INVALID_SECTORS) {
+ rv = Grow_reshape(devlist->devname, mdfd,
+ devlist->next,
+ data_offset, &c, &s);
+ } else if (array_size == 0)
+ pr_err("no changes to --grow\n");
+ break;
+ case INCREMENTAL:
+ if (rebuild_map) {
+ RebuildMap();
+ }
+ if (c.scan) {
+ rv = 1;
+ if (devlist) {
+ pr_err("In --incremental mode, a device cannot be given with --scan.\n");
+ break;
+ }
+ if (c.runstop <= 0) {
+ pr_err("--incremental --scan meaningless without --run.\n");
+ break;
+ }
+ if (devmode == 'f') {
+ pr_err("--incremental --scan --fail not supported.\n");
+ break;
+ }
+ rv = IncrementalScan(&c, NULL);
+ }
+ if (!devlist) {
+ if (!rebuild_map && !c.scan) {
+ pr_err("--incremental requires a device.\n");
+ rv = 1;
+ }
+ break;
+ }
+ if (devmode == 'f') {
+ if (devlist->next) {
+ pr_err("'--incremental --fail' can only handle one device.\n");
+ rv = 1;
+ break;
+ }
+ rv = IncrementalRemove(devlist->devname, remove_path,
+ c.verbose);
+ } else
+ rv = Incremental(devlist, &c, ss);
+ break;
+ case AUTODETECT:
+ autodetect();
+ break;
+ }
+ exit(rv);
+}
+
+static int scan_assemble(struct supertype *ss,
+ struct context *c,
+ struct mddev_ident *ident)
+{
+ struct mddev_ident *a, *array_list = conf_get_ident(NULL);
+ struct mddev_dev *devlist = conf_get_devs();
+ struct map_ent *map = NULL;
+ int cnt = 0;
+ int rv = 0;
+ int failures, successes;
+
+ if (conf_verify_devnames(array_list)) {
+ pr_err("Duplicate MD device names in "
+ "conf file were found.\n");
+ return 1;
+ }
+ if (devlist == NULL) {
+ pr_err("No devices listed in conf file were found.\n");
+ return 1;
+ }
+ for (a = array_list; a ; a = a->next) {
+ a->assembled = 0;
+ if (a->autof == 0)
+ a->autof = c->autof;
+ }
+ if (map_lock(&map))
+ pr_err("%s: failed to get "
+ "exclusive lock on mapfile\n",
+ __func__);
+ do {
+ failures = 0;
+ successes = 0;
+ rv = 0;
+ for (a = array_list; a ; a = a->next) {
+ int r;
+ if (a->assembled)
+ continue;
+ if (a->devname &&
+ strcasecmp(a->devname, "<ignore>") == 0)
+ continue;
+
+ r = Assemble(ss, a->devname,
+ a, NULL, c);
+ if (r == 0) {
+ a->assembled = 1;
+ successes++;
+ } else
+ failures++;
+ rv |= r;
+ cnt++;
+ }
+ } while (failures && successes);
+ if (c->homehost && cnt == 0) {
+ /* Maybe we can auto-assemble something.
+ * Repeatedly call Assemble in auto-assemble mode
+ * until it fails
+ */
+ int rv2;
+ int acnt;
+ ident->autof = c->autof;
+ do {
+ struct mddev_dev *devlist = conf_get_devs();
+ acnt = 0;
+ do {
+ rv2 = Assemble(ss, NULL,
+ ident,
+ devlist, c);
+ if (rv2==0) {
+ cnt++;
+ acnt++;
+ }
+ } while (rv2!=2);
+ /* Incase there are stacked devices, we need to go around again */
+ } while (acnt);
+ if (cnt == 0 && rv == 0) {
+ pr_err("No arrays found in config file or automatically\n");
+ rv = 1;
+ } else if (cnt)
+ rv = 0;
+ } else if (cnt == 0 && rv == 0) {
+ pr_err("No arrays found in config file\n");
+ rv = 1;
+ }
+ map_unlock(&map);
+ return rv;
+}
+
+static int misc_scan(char devmode, struct context *c)
+{
+ /* apply --detail or --wait-clean to
+ * all devices in /proc/mdstat
+ */
+ struct mdstat_ent *ms = mdstat_read(0, 1);
+ struct mdstat_ent *e;
+ struct map_ent *map = NULL;
+ int members;
+ int rv = 0;
+
+ for (members = 0; members <= 1; members++) {
+ for (e=ms ; e ; e=e->next) {
+ char *name = NULL;
+ struct map_ent *me;
+ struct stat stb;
+ int member = e->metadata_version &&
+ strncmp(e->metadata_version,
+ "external:/", 10) == 0;
+ if (members != member)
+ continue;
+ me = map_by_devnm(&map, e->devnm);
+ if (me && me->path
+ && strcmp(me->path, "/unknown") != 0)
+ name = me->path;
+ if (name == NULL ||
+ stat(name, &stb) != 0)
+ name = get_md_name(e->devnm);
+
+ if (!name) {
+ pr_err("cannot find device file for %s\n",
+ e->dev);
+ continue;
+ }
+ if (devmode == 'D')
+ rv |= Detail(name, c);
+ else
+ rv |= WaitClean(name, -1, c->verbose);
+ put_md_name(name);
+ }
+ }
+ free_mdstat(ms);
+ return rv;
+}
+
+static int stop_scan(int verbose)
+{
+ /* apply --stop to all devices in /proc/mdstat */
+ /* Due to possible stacking of devices, repeat until
+ * nothing more can be stopped
+ */
+ int progress=1, err;
+ int last = 0;
+ int rv = 0;
+ do {
+ struct mdstat_ent *ms = mdstat_read(0, 0);
+ struct mdstat_ent *e;
+
+ if (!progress) last = 1;
+ progress = 0; err = 0;
+ for (e=ms ; e ; e=e->next) {
+ char *name = get_md_name(e->devnm);
+ int mdfd;
+
+ if (!name) {
+ pr_err("cannot find device file for %s\n",
+ e->dev);
+ continue;
+ }
+ mdfd = open_mddev(name, 1);
+ if (mdfd >= 0) {
+ if (Manage_stop(name, mdfd, verbose, !last))
+ err = 1;
+ else
+ progress = 1;
+ close(mdfd);
+ }
+
+ put_md_name(name);
+ }
+ free_mdstat(ms);
+ } while (!last && err);
+ if (err)
+ rv |= 1;
+ return rv;
+}
+
+static int misc_list(struct mddev_dev *devlist,
+ struct mddev_ident *ident,
+ char *dump_directory,
+ struct supertype *ss, struct context *c)
+{
+ struct mddev_dev *dv;
+ int rv = 0;
+
+ for (dv=devlist ; dv; dv=(rv & 16) ? NULL : dv->next) {
+ int mdfd;
+
+ switch(dv->disposition) {
+ case 'D':
+ rv |= Detail(dv->devname, c);
+ continue;
+ case KillOpt: /* Zero superblock */
+ if (ss)
+ rv |= Kill(dv->devname, ss, c->force, c->verbose,0);
+ else {
+ int v = c->verbose;
+ do {
+ rv |= Kill(dv->devname, NULL, c->force, v, 0);
+ v = -1;
+ } while (rv == 0);
+ rv &= ~2;
+ }
+ continue;
+ case 'Q':
+ rv |= Query(dv->devname); continue;
+ case 'X':
+ rv |= ExamineBitmap(dv->devname, c->brief, ss); continue;
+ case ExamineBB:
+ rv |= ExamineBadblocks(dv->devname, c->brief, ss); continue;
+ case 'W':
+ case WaitOpt:
+ rv |= Wait(dv->devname); continue;
+ case Waitclean:
+ rv |= WaitClean(dv->devname, -1, c->verbose); continue;
+ case KillSubarray:
+ rv |= Kill_subarray(dv->devname, c->subarray, c->verbose);
+ continue;
+ case UpdateSubarray:
+ if (c->update == NULL) {
+ pr_err("-U/--update must be specified with --update-subarray\n");
+ rv |= 1;
+ continue;
+ }
+ rv |= Update_subarray(dv->devname, c->subarray,
+ c->update, ident, c->verbose);
+ continue;
+ case Dump:
+ rv |= Dump_metadata(dv->devname, dump_directory, c, ss);
+ continue;
+ case Restore:
+ rv |= Restore_metadata(dv->devname, dump_directory, c, ss,
+ (dv == devlist && dv->next == NULL));
+ continue;
+ case Action:
+ rv |= SetAction(dv->devname, c->action);
+ continue;
+ }
+ if (dv->devname[0] == '/')
+ mdfd = open_mddev(dv->devname, 1);
+ else {
+ mdfd = open_dev(dv->devname);
+ if (mdfd < 0)
+ pr_err("Cannot open %s\n", dv->devname);
+ }
+ if (mdfd>=0) {
+ switch(dv->disposition) {
+ case 'R':
+ c->runstop = 1;
+ rv |= Manage_run(dv->devname, mdfd, c); break;
+ case 'S':
+ rv |= Manage_stop(dv->devname, mdfd, c->verbose, 0); break;
+ case 'o':
+ rv |= Manage_ro(dv->devname, mdfd, 1); break;
+ case 'w':
+ rv |= Manage_ro(dv->devname, mdfd, -1); break;
+ }
+ close(mdfd);
+ } else
+ rv |= 1;
+ }
+ return rv;
+}
+
+int SetAction(char *dev, char *action)
+{
+ int fd = open(dev, O_RDONLY);
+ struct mdinfo mdi;
+ if (fd < 0) {
+ pr_err("Couldn't open %s: %s\n", dev, strerror(errno));
+ return 1;
+ }
+ sysfs_init(&mdi, fd, NULL);
+ close(fd);
+ if (!mdi.sys_name[0]) {
+ pr_err("%s is no an md array\n", dev);
+ return 1;
+ }
+
+ if (sysfs_set_str(&mdi, NULL, "sync_action", action) < 0) {
+ pr_err("Count not set action for %s to %s: %s\n",
+ dev, action, strerror(errno));
+ return 1;
+ }
+ return 0;
+}
diff --git a/mdadm.conf-example b/mdadm.conf-example
new file mode 100644
index 00000000..d9d8d396
--- /dev/null
+++ b/mdadm.conf-example
@@ -0,0 +1,65 @@
+# mdadm configuration file
+#
+# mdadm will function properly without the use of a configuration file,
+# but this file is useful for keeping track of arrays and member disks.
+# In general, a mdadm.conf file is created, and updated, after arrays
+# are created. This is the opposite behavior of /etc/raidtab which is
+# created prior to array construction.
+#
+#
+# the config file takes two types of lines:
+#
+# DEVICE lines specify a list of devices of where to look for
+# potential member disks
+#
+# ARRAY lines specify information about how to identify arrays so
+# so that they can be activated
+#
+# You can have more than one device line and use wild cards. The first
+# example includes SCSI the first partition of SCSI disks /dev/sdb,
+# /dev/sdc, /dev/sdd, /dev/sdj, /dev/sdk, and /dev/sdl. The second
+# line looks for array slices on IDE disks.
+#
+#DEVICE /dev/sd[bcdjkl]1
+#DEVICE /dev/hda1 /dev/hdb1
+#
+# If you mount devfs on /dev, then a suitable way to list all devices is:
+#DEVICE /dev/discs/*/*
+#
+#
+# The AUTO line can control which arrays get assembled by auto-assembly,
+# meaing either "mdadm -As" when there are no 'ARRAY' lines in this file,
+# or "mdadm --incremental" when the array found is not listed in this file.
+# By default, all arrays that are found are assembled.
+# If you want to ignore all DDF arrays (maybe they are managed by dmraid),
+# and only assemble 1.x arrays if which are marked for 'this' homehost,
+# but assemble all others, then use
+#AUTO -ddf homehost -1.x +all
+#
+# ARRAY lines specify an array to assemble and a method of identification.
+# Arrays can currently be identified by using a UUID, superblock minor number,
+# or a listing of devices.
+#
+# super-minor is usually the minor number of the metadevice
+# UUID is the Universally Unique Identifier for the array
+# Each can be obtained using
+#
+# mdadm -D <md>
+#
+#ARRAY /dev/md0 UUID=3aaa0122:29827cfa:5331ad66:ca767371
+#ARRAY /dev/md1 super-minor=1
+#ARRAY /dev/md2 devices=/dev/hda1,/dev/hdb1
+#
+# ARRAY lines can also specify a "spare-group" for each array. mdadm --monitor
+# will then move a spare between arrays in a spare-group if one array has a failed
+# drive but no spare
+#ARRAY /dev/md4 uuid=b23f3c6d:aec43a9f:fd65db85:369432df spare-group=group1
+#ARRAY /dev/md5 uuid=19464854:03f71b1b:e0df2edd:246cc977 spare-group=group1
+#
+# When used in --follow (aka --monitor) mode, mdadm needs a
+# mail address and/or a program. This can be given with "mailaddr"
+# and "program" lines to that monitoring can be started using
+# mdadm --follow --scan & echo $! > /run/mdadm/mon.pid
+# If the lines are not found, mdadm will exit quietly
+#MAILADDR root@mydomain.tld
+#PROGRAM /usr/sbin/handle-mdadm-events
diff --git a/mdadm.conf.5 b/mdadm.conf.5
new file mode 100644
index 00000000..542e2635
--- /dev/null
+++ b/mdadm.conf.5
@@ -0,0 +1,641 @@
+.\" Copyright Neil Brown and others.
+.\" This program is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; either version 2 of the License, or
+.\" (at your option) any later version.
+.\" See file COPYING in distribution for details.
+.TH MDADM.CONF 5
+.SH NAME
+mdadm.conf \- configuration for management of Software RAID with mdadm
+.SH SYNOPSIS
+/etc/mdadm/mdadm.conf
+.SH DESCRIPTION
+.PP
+.I mdadm
+is a tool for creating, managing, and monitoring RAID devices using the
+.B md
+driver in Linux.
+.PP
+Some common tasks, such as assembling all arrays, can be simplified
+by describing the devices and arrays in this configuration file.
+
+.SS SYNTAX
+The file should be seen as a collection of words separated by white
+space (space, tab, or newline).
+Any word that beings with a hash sign (#) starts a comment and that
+word together with the remainder of the line is ignored.
+
+Spaces can be included in a word using quotation characters. Either
+single quotes
+.RB ( ' )
+or double quotes (\fB"\fP)
+may be used. All the characters from one quotation character to
+next identical character are protected and will not be used to
+separate words to start new quoted strings. To include a single quote
+it must be between double quotes. To include a double quote it must
+be between single quotes.
+
+Any line that starts with white space (space or tab) is treated as
+though it were a continuation of the previous line.
+
+Empty lines are ignored, but otherwise each (non continuation) line
+must start with a keyword as listed below. The keywords are case
+insensitive and can be abbreviated to 3 characters.
+
+The keywords are:
+.TP
+.B DEVICE
+A
+.B device
+line lists the devices (whole devices or partitions) that might contain
+a component of an MD array. When looking for the components of an
+array,
+.I mdadm
+will scan these devices (or any devices listed on the command line).
+
+The
+.B device
+line may contain a number of different devices (separated by spaces)
+and each device name can contain wild cards as defined by
+.BR glob (7).
+
+Also, there may be several device lines present in the file.
+
+Alternatively, a
+.B device
+line can contain either or both of the words
+.B containers
+and
+.BR partitions .
+The word
+.B containers
+will cause
+.I mdadm
+to look for assembled CONTAINER arrays and included them as a source
+for assembling further arrays.
+
+The word
+.I partitions
+will cause
+.I mdadm
+to read
+.I /proc/partitions
+and include all devices and partitions found therein.
+.I mdadm
+does not use the names from
+.I /proc/partitions
+but only the major and minor device numbers. It scans
+.I /dev
+to find the name that matches the numbers.
+
+If no DEVICE line is present, then "DEVICE partitions containers" is assumed.
+
+For example:
+.IP
+DEVICE /dev/hda* /dev/hdc*
+.br
+DEV /dev/sd*
+.br
+DEVICE /dev/disk/by-path/pci*
+.br
+DEVICE partitions
+
+.TP
+.B ARRAY
+The ARRAY lines identify actual arrays. The second word on the line
+may be the name of the device where the array is normally
+assembled, such as
+.B /dev/md1
+or
+.BR /dev/md/backup .
+If the name does not start with a slash
+.RB (' / '),
+it is treated as being in
+.BR /dev/md/ .
+Alternately the word
+.B <ignore>
+(complete with angle brackets) can be given in which case any array
+which matches the rest of the line will never be automatically assembled.
+If no device name is given,
+.I mdadm
+will use various heuristics to determine an appropriate name.
+
+Subsequent words identify the array, or identify the array as a member
+of a group. If multiple identities are given,
+then a component device must match ALL identities to be considered a
+match. Each identity word has a tag, and equals sign, and some value.
+The tags are:
+.RS 4
+.TP
+.B uuid=
+The value should be a 128 bit uuid in hexadecimal, with punctuation
+interspersed if desired. This must match the uuid stored in the
+superblock.
+.TP
+.B name=
+The value should be a simple textual name as was given to
+.I mdadm
+when the array was created. This must match the name stored in the
+superblock on a device for that device to be included in the array.
+Not all superblock formats support names.
+.TP
+.B super\-minor=
+The value is an integer which indicates the minor number that was
+stored in the superblock when the array was created. When an array is
+created as /dev/mdX, then the minor number X is stored.
+.TP
+.B devices=
+The value is a comma separated list of device names or device name
+patterns.
+Only devices with names which match one entry in the list will be used
+to assemble the array. Note that the devices
+listed there must also be listed on a DEVICE line.
+.TP
+.B level=
+The value is a RAID level. This is not normally used to
+identify an array, but is supported so that the output of
+
+.B "mdadm \-\-examine \-\-scan"
+
+can be use directly in the configuration file.
+.TP
+.B num\-devices=
+The value is the number of devices in a complete active array. As with
+.B level=
+this is mainly for compatibility with the output of
+
+.BR "mdadm \-\-examine \-\-scan" .
+
+.TP
+.B spares=
+The value is a number of spare devices to expect the array to have.
+The sole use of this keyword and value is as follows:
+.B mdadm \-\-monitor
+will report an array if it is found to have fewer than this number of
+spares when
+.B \-\-monitor
+starts or when
+.B \-\-oneshot
+is used.
+
+.TP
+.B spare\-group=
+The value is a textual name for a group of arrays. All arrays with
+the same
+.B spare\-group
+name are considered to be part of the same group. The significance of
+a group of arrays is that
+.I mdadm
+will, when monitoring the arrays, move a spare drive from one array in
+a group to another array in that group if the first array had a failed
+or missing drive but no spare.
+
+.TP
+.B auto=
+This option is rarely needed with mdadm-3.0, particularly if use with
+the Linux kernel v2.6.28 or later.
+It tells
+.I mdadm
+whether to use partitionable array or non-partitionable arrays and,
+in the absence of
+.IR udev ,
+how many partition devices to create. From 2.6.28 all md array
+devices are partitionable, hence this option is not needed.
+
+The value of this option can be "yes" or "md" to indicate that a
+traditional, non-partitionable md array should be created, or "mdp",
+"part" or "partition" to indicate that a partitionable md array (only
+available in linux 2.6 and later) should be used. This later set can
+also have a number appended to indicate how many partitions to create
+device files for, e.g.
+.BR auto=mdp5 .
+The default is 4.
+
+.TP
+.B bitmap=
+The option specifies a file in which a write-intent bitmap should be
+found. When assembling the array,
+.I mdadm
+will provide this file to the
+.B md
+driver as the bitmap file. This has the same function as the
+.B \-\-bitmap\-file
+option to
+.BR \-\-assemble .
+
+.TP
+.B metadata=
+Specify the metadata format that the array has. This is mainly
+recognised for comparability with the output of
+.BR "mdadm \-Es" .
+
+.TP
+.B container=
+Specify that this array is a member array of some container. The
+value given can be either a path name in /dev, or a UUID of the
+container array.
+
+.TP
+.B member=
+Specify that this array is a member array of some container. Each
+type of container has some way to enumerate member arrays, often a
+simple sequence number. The value identifies which member of a
+container the array is. It will usually accompany a "container=" word.
+.RE
+
+.TP
+.B MAILADDR
+The
+.B mailaddr
+line gives an E-mail address that alerts should be
+sent to when
+.I mdadm
+is running in
+.B \-\-monitor
+mode (and was given the
+.B \-\-scan
+option). There should only be one
+.B MAILADDR
+line and it should have only one address. Any subsequent addresses
+are silently ignored.
+
+.TP
+.B MAILFROM
+The
+.B mailfrom
+line (which can only be abbreviated to at least 5 characters) gives an
+address to appear in the "From" address for alert mails. This can be
+useful if you want to explicitly set a domain, as the default from
+address is "root" with no domain. All words on this line are
+catenated with spaces to form the address.
+
+Note that this value cannot be set via the
+.I mdadm
+commandline. It is only settable via the config file.
+
+.TP
+.B PROGRAM
+The
+.B program
+line gives the name of a program to be run when
+.B "mdadm \-\-monitor"
+detects potentially interesting events on any of the arrays that it
+is monitoring. This program gets run with two or three arguments, they
+being the Event, the md device, and possibly the related component
+device.
+
+There should only be one
+.B program
+line and it should be give only one program.
+
+
+.TP
+.B CREATE
+The
+.B create
+line gives default values to be used when creating arrays, new members
+of arrays, and device entries for arrays.
+These include:
+
+.RS 4
+.TP
+.B owner=
+.TP
+.B group=
+These can give user/group ids or names to use instead of system
+defaults (root/wheel or root/disk).
+.TP
+.B mode=
+An octal file mode such as 0660 can be given to override the default
+of 0600.
+.TP
+.B auto=
+This corresponds to the
+.B \-\-auto
+flag to mdadm. Give
+.BR yes ,
+.BR md ,
+.BR mdp ,
+.B part
+\(em possibly followed by a number of partitions \(em to indicate how
+missing device entries should be created.
+
+.TP
+.B metadata=
+The name of the metadata format to use if none is explicitly given.
+This can be useful to impose a system-wide default of version-1 superblocks.
+
+.TP
+.B symlinks=no
+Normally when creating devices in
+.B /dev/md/
+.I mdadm
+will create a matching symlink from
+.B /dev/
+with a name starting
+.B md
+or
+.BR md_ .
+Give
+.B symlinks=no
+to suppress this symlink creation.
+
+.TP
+.B names=yes
+Since Linux 2.6.29 it has been possible to create
+.B md
+devices with a name like
+.B md_home
+rather than just a number, like
+.BR md3 .
+.I mdadm
+will use the numeric alternative by default as other tools that interact
+with md arrays may expect only numbers.
+If
+.B names=yes
+is given in
+.I mdadm.conf
+then
+.I mdadm
+will use a name when appropriate.
+If
+.B names=no
+is given, then non-numeric
+.I md
+device names will not be used even if the default changes in a future
+release of
+.IR mdadm .
+
+.TP
+.B bbl=no
+By default,
+.I mdadm
+will reserve space for a bad block list (bbl) on all devices
+included in or added to any array that supports them. Setting
+.B bbl=no
+will prevent this, so newly added devices will not have a bad
+block log.
+.RE
+
+.TP
+.B HOMEHOST
+The
+.B homehost
+line gives a default value for the
+.B \-\-homehost=
+option to mdadm. There should normally be only one other word on the line.
+It should either be a host name, or one of the special words
+.BR <system>,
+.B <none>
+and
+.BR <ignore> .
+If
+.B <system>
+is given, then the
+.BR gethostname ( 2 )
+systemcall is used to get the host name. This is the default.
+
+If
+.B <ignore>
+is given, then a flag is set so that when arrays are being
+auto-assembled the checking of the recorded
+.I homehost
+is disabled.
+If
+.B <ignore>
+is given it is also possible to give an explicit name which will be
+used when creating arrays. This is the only case when there can be
+more that one other word on the
+.B HOMEHOST
+line. If there are other words, or other
+.B HOMEHOST
+lines, they are silently ignored.
+
+If
+.B <none>
+is given, then the default of using
+.BR gethostname ( 2 )
+is over-ridden and no homehost name is assumed.
+
+When arrays are created, this host name will be stored in the
+metadata. When arrays are assembled using auto-assembly, arrays which
+do not record the correct homehost name in their metadata will be
+assembled using a "foreign" name. A "foreign" name alway ends with a
+digit string preceded by an underscore to differentiate it
+from any possible local name. e.g.
+.B /dev/md/1_1
+or
+.BR /dev/md/home_0 .
+.TP
+.B AUTO
+A list of names of metadata format can be given, each preceded by a
+plus or minus sign. Also the word
+.I homehost
+is allowed as is
+.I all
+preceded by plus or minus sign.
+.I all
+is usually last.
+
+When
+.I mdadm
+is auto-assembling an array, either via
+.I \-\-assemble
+or
+.I \-\-incremental
+and it finds metadata of a given type, it checks that metadata type
+against those listed in this line. The first match wins, where
+.I all
+matches anything.
+If a match is found that was preceded by a plus sign, the auto
+assembly is allowed. If the match was preceded by a minus sign, the
+auto assembly is disallowed. If no match is found, the auto assembly
+is allowed.
+
+If the metadata indicates that the array was created for
+.I this
+host, and the word
+.I homehost
+appears before any other match, then the array is treated as a valid
+candidate for auto-assembly.
+
+This can be used to disable all auto-assembly (so that only arrays
+explicitly listed in mdadm.conf or on the command line are assembled),
+or to disable assembly of certain metadata types which might be
+handled by other software. It can also be used to disable assembly of
+all foreign arrays - normally such arrays are assembled but given a
+non-deterministic name in
+.BR /dev/md/ .
+
+The known metadata types are
+.BR 0.90 ,
+.BR 1.x ,
+.BR ddf ,
+.BR imsm .
+
+.B AUTO
+should be given at most once. Subsequent lines are silently ignored.
+Thus an earlier config file in a config directory will over-ride
+the setting in a later config file.
+
+.TP
+.B POLICY
+This is used to specify what automatic behavior is allowed on devices
+newly appearing in the system and provides a way of marking spares that can
+be moved to other arrays as well as the migration domains.
+.I Domain
+can be defined through
+.I policy
+line by specifying a domain name for a number of paths from
+.BR /dev/disk/by-path/ .
+A device may belong to several domains. The domain of an array is a union
+of domains of all devices in that array. A spare can be automatically
+moved from one array to another if the set of the destination array's
+.I domains
+contains all the
+.I domains
+of the new disk or if both arrays have the same
+.IR spare-group .
+
+To update hot plug configuration it is necessary to execute
+.B mdadm \-\-udev\-rules
+command after changing the config file
+
+Key words used in the
+.I POLICY
+line and supported values are:
+
+.RS 7
+.TP
+.B domain=
+any arbitrary string
+.TP
+.B metadata=
+0.9 1.x ddf or imsm
+.TP
+.B path=
+file glob matching anything from
+.B /dev/disk/by-path
+.TP
+.B type=
+either
+.B disk
+or
+.BR part .
+.TP
+.B action=
+include, re-add, spare, spare-same-slot, or force-spare
+.TP
+.B auto=
+yes, no, or homehost.
+
+.P
+The
+.I action
+item determines the automatic behavior allowed for devices matching the
+.I path
+and
+.I type
+in the same line. If a device matches several lines with different
+.I actions
+then the most permissive will apply. The ordering of policy lines
+is irrelevant to the end result.
+.TP
+.B include
+allows adding a disk to an array if metadata on that disk matches that array
+.TP
+.B re\-add
+will include the device in the array if it appears to be a current member
+or a member that was recently removed and the array has a
+write-intent-bitmap to allow the
+.B re\-add
+functionality.
+.TP
+.B spare
+as above and additionally: if the device is bare it can
+become a spare if there is any array that it is a candidate for based
+on domains and metadata.
+.TP
+.B spare\-same\-slot
+as above and additionally if given slot was used by an array that went
+degraded recently and the device plugged in has no metadata then it will
+be automatically added to that array (or it's container)
+.TP
+.B force\-spare
+as above and the disk will become a spare in remaining cases
+.RE
+
+.SH EXAMPLE
+DEVICE /dev/sd[bcdjkl]1
+.br
+DEVICE /dev/hda1 /dev/hdb1
+
+# /dev/md0 is known by its UUID.
+.br
+ARRAY /dev/md0 UUID=3aaa0122:29827cfa:5331ad66:ca767371
+.br
+# /dev/md1 contains all devices with a minor number of
+.br
+# 1 in the superblock.
+.br
+ARRAY /dev/md1 superminor=1
+.br
+# /dev/md2 is made from precisely these two devices
+.br
+ARRAY /dev/md2 devices=/dev/hda1,/dev/hdb1
+
+# /dev/md4 and /dev/md5 are a spare-group and spares
+.br
+# can be moved between them
+.br
+ARRAY /dev/md4 uuid=b23f3c6d:aec43a9f:fd65db85:369432df
+.br
+ spare\-group=group1
+.br
+ARRAY /dev/md5 uuid=19464854:03f71b1b:e0df2edd:246cc977
+.br
+ spare\-group=group1
+.br
+# /dev/md/home is created if need to be a partitionable md array
+.br
+# any spare device number is allocated.
+.br
+ARRAY /dev/md/home UUID=9187a482:5dde19d9:eea3cc4a:d646ab8b
+.br
+ auto=part
+.br
+# The name of this array contains a space.
+.br
+ARRAY /dev/md9 name='Data Storage'
+.sp
+POLICY domain=domain1 metadata=imsm path=pci-0000:00:1f.2-scsi-*
+.br
+ action=spare
+.br
+POLICY domain=domain1 metadata=imsm path=pci-0000:04:00.0-scsi-[01]*
+.br
+ action=include
+.br
+# One domain comprising of devices attached to specified paths is defined.
+.br
+# Bare device matching first path will be made an imsm spare on hot plug.
+.br
+# If more than one array is created on devices belonging to domain1 and
+.br
+# one of them becomes degraded, then any imsm spare matching any path for
+.br
+# given domain name can be migrated.
+.br
+MAILADDR root@mydomain.tld
+.br
+PROGRAM /usr/sbin/handle\-mdadm\-events
+.br
+CREATE group=system mode=0640 auto=part\-8
+.br
+HOMEHOST <system>
+.br
+AUTO +1.x homehost \-all
+
+.SH SEE ALSO
+.BR mdadm (8),
+.BR md (4).
diff --git a/mdadm.h b/mdadm.h
new file mode 100644
index 00000000..fc1fd318
--- /dev/null
+++ b/mdadm.h
@@ -0,0 +1,1577 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#define _GNU_SOURCE
+#define _FILE_OFFSET_BITS 64
+#include <unistd.h>
+#ifdef __GLIBC__
+extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
+#elif !defined(lseek64)
+# if defined(__NO_STAT64) || __WORDSIZE != 32
+# define lseek64 lseek
+# endif
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <syslog.h>
+#ifdef __dietlibc__
+#include <strings.h>
+/* dietlibc has deprecated random and srandom!! */
+#define random rand
+#define srandom srand
+#endif
+
+#include <linux/kdev_t.h>
+/*#include <linux/fs.h> */
+#include <sys/mount.h>
+#include <asm/types.h>
+#include <sys/ioctl.h>
+#define MD_MAJOR 9
+#define MdpMinorShift 6
+
+#ifndef BLKGETSIZE64
+#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */
+#endif
+
+#define DEFAULT_CHUNK 512
+#define DEFAULT_BITMAP_CHUNK 4096
+#define DEFAULT_BITMAP_DELAY 5
+#define DEFAULT_MAX_WRITE_BEHIND 256
+
+/* MAP_DIR should be somewhere that persists across the pivotroot
+ * from early boot to late boot.
+ * /run seems to have emerged as the best standard.
+ */
+#ifndef MAP_DIR
+#define MAP_DIR "/run/mdadm"
+#endif /* MAP_DIR */
+/* MAP_FILE is what we name the map file we put in MAP_DIR, in case you
+ * want something other than the default of "map"
+ */
+#ifndef MAP_FILE
+#define MAP_FILE "map"
+#endif /* MAP_FILE */
+/* MDMON_DIR is where pid and socket files used for communicating
+ * with mdmon normally live. Best is /var/run/mdadm as
+ * mdmon is needed at early boot then it needs to write there prior
+ * to /var/run being mounted read/write, and it also then needs to
+ * persist beyond when /var/run is mounter read-only. So, to be
+ * safe, the default is somewhere that is read/write early in the
+ * boot process and stays up as long as possible during shutdown.
+ */
+#ifndef MDMON_DIR
+#define MDMON_DIR "/run/mdadm"
+#endif /* MDMON_DIR */
+
+/* FAILED_SLOTS is where to save files storing recent removal of array
+ * member in order to allow future reuse of disk inserted in the same
+ * slot for array recovery
+ */
+#ifndef FAILED_SLOTS_DIR
+#define FAILED_SLOTS_DIR "/run/mdadm/failed-slots"
+#endif /* FAILED_SLOTS */
+
+#include "md_u.h"
+#include "md_p.h"
+#include "bitmap.h"
+#include "msg.h"
+
+#include <endian.h>
+/* Redhat don't like to #include <asm/byteorder.h>, and
+ * some time include <linux/byteorder/xxx_endian.h> isn't enough,
+ * and there is no standard conversion function so... */
+/* And dietlibc doesn't think byteswap is ok, so.. */
+/* #include <byteswap.h> */
+#define bswap_16(x) (((x) & 0x00ffU) << 8 | \
+ ((x) & 0xff00U) >> 8)
+#define bswap_32(x) (((x) & 0x000000ffU) << 24 | \
+ ((x) & 0xff000000U) >> 24 | \
+ ((x) & 0x0000ff00U) << 8 | \
+ ((x) & 0x00ff0000U) >> 8)
+#define bswap_64(x) (((x) & 0x00000000000000ffULL) << 56 | \
+ ((x) & 0xff00000000000000ULL) >> 56 | \
+ ((x) & 0x000000000000ff00ULL) << 40 | \
+ ((x) & 0x00ff000000000000ULL) >> 40 | \
+ ((x) & 0x0000000000ff0000ULL) << 24 | \
+ ((x) & 0x0000ff0000000000ULL) >> 24 | \
+ ((x) & 0x00000000ff000000ULL) << 8 | \
+ ((x) & 0x000000ff00000000ULL) >> 8)
+
+#if !defined(__KLIBC__)
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define __cpu_to_le16(_x) (unsigned int)(_x)
+#define __cpu_to_le32(_x) (unsigned int)(_x)
+#define __cpu_to_le64(_x) (unsigned long long)(_x)
+#define __le16_to_cpu(_x) (unsigned int)(_x)
+#define __le32_to_cpu(_x) (unsigned int)(_x)
+#define __le64_to_cpu(_x) (unsigned long long)(_x)
+
+#define __cpu_to_be16(_x) bswap_16(_x)
+#define __cpu_to_be32(_x) bswap_32(_x)
+#define __cpu_to_be64(_x) bswap_64(_x)
+#define __be16_to_cpu(_x) bswap_16(_x)
+#define __be32_to_cpu(_x) bswap_32(_x)
+#define __be64_to_cpu(_x) bswap_64(_x)
+#elif BYTE_ORDER == BIG_ENDIAN
+#define __cpu_to_le16(_x) bswap_16(_x)
+#define __cpu_to_le32(_x) bswap_32(_x)
+#define __cpu_to_le64(_x) bswap_64(_x)
+#define __le16_to_cpu(_x) bswap_16(_x)
+#define __le32_to_cpu(_x) bswap_32(_x)
+#define __le64_to_cpu(_x) bswap_64(_x)
+
+#define __cpu_to_be16(_x) (unsigned int)(_x)
+#define __cpu_to_be32(_x) (unsigned int)(_x)
+#define __cpu_to_be64(_x) (unsigned long long)(_x)
+#define __be16_to_cpu(_x) (unsigned int)(_x)
+#define __be32_to_cpu(_x) (unsigned int)(_x)
+#define __be64_to_cpu(_x) (unsigned long long)(_x)
+#else
+# error "unknown endianness."
+#endif
+#endif /* __KLIBC__ */
+
+/*
+ * min()/max()/clamp() macros that also do
+ * strict type-checking.. See the
+ * "unnecessary" pointer comparison.
+ */
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+
+#define max(x, y) ({ \
+ typeof(x) _max1 = (x); \
+ typeof(y) _max2 = (y); \
+ (void) (&_max1 == &_max2); \
+ _max1 > _max2 ? _max1 : _max2; })
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+
+/* general information that might be extracted from a superblock */
+struct mdinfo {
+ mdu_array_info_t array;
+ mdu_disk_info_t disk;
+ __u64 events;
+ int uuid[4];
+ char name[33];
+ unsigned long long data_offset;
+ unsigned long long new_data_offset;
+ unsigned long long component_size; /* same as array.size, except in
+ * sectors and up to 64bits.
+ */
+ unsigned long long custom_array_size; /* size for non-default sized
+ * arrays (in sectors)
+ */
+#define NO_RESHAPE 0
+#define VOLUME_RESHAPE 1
+#define CONTAINER_RESHAPE 2
+#define RESHAPE_NO_BACKUP 16 /* Mask 'or'ed in */
+ int reshape_active;
+ unsigned long long reshape_progress;
+ int recovery_blocked; /* for external metadata it
+ * indicates that there is
+ * reshape in progress in
+ * container,
+ * for native metadata it is
+ * reshape_active field mirror
+ */
+ /* During reshape we can sometimes change the data_offset to avoid
+ * over-writing still-valid data. We need to know if there is space.
+ * So getinfo_super will fill in space_before and space_after in sectors.
+ * data_offset can be increased or decreased by this amount.
+ */
+ unsigned long long space_before, space_after;
+ union {
+ unsigned long long resync_start; /* per-array resync position */
+ unsigned long long recovery_start; /* per-device rebuild position */
+ #define MaxSector (~0ULL) /* resync/recovery complete position */
+ };
+ long bitmap_offset; /* 0 == none, 1 == a file */
+ unsigned long safe_mode_delay; /* ms delay to mark clean */
+ int new_level, delta_disks, new_layout, new_chunk;
+ int errors;
+ unsigned long cache_size; /* size of raid456 stripe cache*/
+ int mismatch_cnt;
+ char text_version[50];
+
+ int container_member; /* for assembling external-metatdata arrays
+ * This is to be used internally by metadata
+ * handler only */
+ int container_enough; /* flag external handlers can set to
+ * indicate that subarrays have not enough (-1),
+ * enough to start (0), or all expected disks (1) */
+ char sys_name[20];
+ struct mdinfo *devs;
+ struct mdinfo *next;
+
+ /* Device info for mdmon: */
+ int recovery_fd;
+ int state_fd;
+ #define DS_FAULTY 1
+ #define DS_INSYNC 2
+ #define DS_WRITE_MOSTLY 4
+ #define DS_SPARE 8
+ #define DS_BLOCKED 16
+ #define DS_REMOVE 1024
+ #define DS_UNBLOCK 2048
+ int prev_state, curr_state, next_state;
+
+};
+
+struct createinfo {
+ int uid;
+ int gid;
+ int autof;
+ int mode;
+ int symlinks;
+ int names;
+ int bblist;
+ struct supertype *supertype;
+};
+
+#define Name "mdadm"
+
+enum mode {
+ ASSEMBLE=1,
+ BUILD,
+ CREATE,
+ MANAGE,
+ MISC,
+ MONITOR,
+ GROW,
+ INCREMENTAL,
+ AUTODETECT,
+ mode_count
+};
+
+extern char short_options[];
+extern char short_bitmap_options[];
+extern char short_bitmap_auto_options[];
+extern struct option long_options[];
+extern char Version[], Usage[], Help[], OptionHelp[],
+ *mode_help[],
+ Help_create[], Help_build[], Help_assemble[], Help_grow[],
+ Help_incr[],
+ Help_manage[], Help_misc[], Help_monitor[], Help_config[];
+
+/* for option that don't have short equivilents, we assign arbitrary
+ * numbers later than any 'short' character option.
+ */
+enum special_options {
+ AssumeClean = 300,
+ BitmapChunk,
+ WriteBehind,
+ ReAdd,
+ NoDegraded,
+ Sparc22,
+ BackupFile,
+ HomeHost,
+ AutoHomeHost,
+ Symlinks,
+ AutoDetect,
+ Waitclean,
+ DetailPlatform,
+ KillSubarray,
+ UpdateSubarray,
+ IncrementalPath,
+ NoSharing,
+ HelpOptions,
+ Brief,
+ ManageOpt,
+ Add,
+ AddSpare,
+ Remove,
+ Fail,
+ Replace,
+ With,
+ MiscOpt,
+ WaitOpt,
+ ConfigFile,
+ ChunkSize,
+ WriteMostly,
+ Layout,
+ Auto,
+ Force,
+ SuperMinor,
+ EMail,
+ ProgramOpt,
+ Increment,
+ Fork,
+ Bitmap,
+ RebuildMapOpt,
+ InvalidBackup,
+ UdevRules,
+ FreezeReshape,
+ Continue,
+ OffRootOpt,
+ Prefer,
+ KillOpt,
+ DataOffset,
+ ExamineBB,
+ Dump,
+ Restore,
+ Action,
+};
+
+enum prefix_standard {
+ JEDEC,
+ IEC
+};
+
+/* structures read from config file */
+/* List of mddevice names and identifiers
+ * Identifiers can be:
+ * uuid=128-hex-uuid
+ * super-minor=decimal-minor-number-from-superblock
+ * devices=comma,separated,list,of,device,names,with,wildcards
+ *
+ * If multiple fields are present, the intersection of all matching
+ * devices is considered
+ */
+#define UnSet (0xfffe)
+struct mddev_ident {
+ char *devname;
+
+ int uuid_set;
+ int uuid[4];
+ char name[33];
+
+ int super_minor;
+
+ char *devices; /* comma separated list of device
+ * names with wild cards
+ */
+ int level;
+ int raid_disks;
+ int spare_disks;
+ struct supertype *st;
+ int autof; /* 1 for normal, 2 for partitioned */
+ char *spare_group;
+ char *bitmap_file;
+ int bitmap_fd;
+
+ char *container; /* /dev/whatever name of container, or
+ * uuid of container. You would expect
+ * this to be the 'devname' or UUID
+ * of some other entry.
+ */
+ char *member; /* subarray within a container */
+
+ struct mddev_ident *next;
+ union {
+ /* fields needed by different users of this structure */
+ int assembled; /* set when assembly succeeds */
+ };
+};
+
+struct context {
+ int readonly;
+ int runstop;
+ int verbose;
+ int brief;
+ int force;
+ char *homehost;
+ int require_homehost;
+ char *prefer;
+ int export;
+ int test;
+ char *subarray;
+ char *update;
+ int scan;
+ int SparcAdjust;
+ int autof;
+ int delay;
+ int freeze_reshape;
+ char *backup_file;
+ int invalid_backup;
+ char *action;
+};
+
+struct shape {
+ int raiddisks;
+ int sparedisks;
+ int level;
+ int layout;
+ char *layout_str;
+ int chunk;
+ int bitmap_chunk;
+ char *bitmap_file;
+ int assume_clean;
+ int write_behind;
+ unsigned long long size;
+};
+
+/* List of device names - wildcards expanded */
+struct mddev_dev {
+ char *devname;
+ int disposition; /* 'a' for add, 'r' for remove, 'f' for fail,
+ * 'A' for re_add.
+ * Not set for names read from .config
+ */
+ char writemostly; /* 1 for 'set writemostly', 2 for 'clear writemostly' */
+ int used; /* set when used */
+ long long data_offset;
+ struct mddev_dev *next;
+};
+
+typedef struct mapping {
+ char *name;
+ int num;
+} mapping_t;
+
+struct mdstat_ent {
+ char *dev;
+ char devnm[32];
+ int active;
+ char *level;
+ char *pattern; /* U or up, _ for down */
+ int percent; /* -1 if no resync */
+ int resync; /* 3 if check, 2 if reshape, 1 if resync, 0 if recovery */
+ int devcnt;
+ int raid_disks;
+ char * metadata_version;
+ struct dev_member {
+ char *name;
+ struct dev_member *next;
+ } *members;
+ struct mdstat_ent *next;
+};
+
+extern struct mdstat_ent *mdstat_read(int hold, int start);
+extern void mdstat_close(void);
+extern void free_mdstat(struct mdstat_ent *ms);
+extern void mdstat_wait(int seconds);
+extern void mdstat_wait_fd(int fd, const sigset_t *sigmask);
+extern int mddev_busy(char *devnm);
+extern struct mdstat_ent *mdstat_by_component(char *name);
+extern struct mdstat_ent *mdstat_by_subdev(char *subdev, char *container);
+
+struct map_ent {
+ struct map_ent *next;
+ char devnm[32];
+ char metadata[20];
+ int uuid[4];
+ int bad;
+ char *path;
+};
+extern int map_update(struct map_ent **mpp, char *devnm, char *metadata,
+ int uuid[4], char *path);
+extern void map_remove(struct map_ent **map, char *devnm);
+extern struct map_ent *map_by_uuid(struct map_ent **map, int uuid[4]);
+#ifdef MDASSEMBLE
+static inline struct map_ent *map_by_devnm(struct map_ent **map, char *name)
+{
+ return NULL;
+}
+static inline void map_free(struct map_ent *map)
+{
+}
+#else
+extern struct map_ent *map_by_devnm(struct map_ent **map, char *devnm);
+extern void map_free(struct map_ent *map);
+#endif
+extern struct map_ent *map_by_name(struct map_ent **map, char *name);
+extern void map_read(struct map_ent **melp);
+extern int map_write(struct map_ent *mel);
+extern void map_delete(struct map_ent **mapp, char *devnm);
+extern void map_add(struct map_ent **melp,
+ char *devnm, char *metadata, int uuid[4], char *path);
+extern int map_lock(struct map_ent **melp);
+extern void map_unlock(struct map_ent **melp);
+extern void map_fork(void);
+
+/* various details can be requested */
+enum sysfs_read_flags {
+ GET_LEVEL = (1 << 0),
+ GET_LAYOUT = (1 << 1),
+ GET_COMPONENT = (1 << 2),
+ GET_CHUNK = (1 << 3),
+ GET_CACHE = (1 << 4),
+ GET_MISMATCH = (1 << 5),
+ GET_VERSION = (1 << 6),
+ GET_DISKS = (1 << 7),
+ GET_DEGRADED = (1 << 8),
+ GET_SAFEMODE = (1 << 9),
+ GET_BITMAP_LOCATION = (1 << 10),
+
+ GET_DEVS = (1 << 20), /* gets role, major, minor */
+ GET_OFFSET = (1 << 21),
+ GET_SIZE = (1 << 22),
+ GET_STATE = (1 << 23),
+ GET_ERROR = (1 << 24),
+};
+
+/* If fd >= 0, get the array it is open on,
+ * else use devnm.
+ */
+extern int sysfs_open(char *devnm, char *devname, char *attr);
+extern void sysfs_init(struct mdinfo *mdi, int fd, char *devnm);
+extern void sysfs_free(struct mdinfo *sra);
+extern struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options);
+extern int sysfs_attr_match(const char *attr, const char *str);
+extern int sysfs_match_word(const char *word, char **list);
+extern int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, char *val);
+extern int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, unsigned long long val);
+extern int sysfs_set_num_signed(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, long long val);
+extern int sysfs_uevent(struct mdinfo *sra, char *event);
+extern int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev,
+ char *name);
+extern int sysfs_fd_get_ll(int fd, unsigned long long *val);
+extern int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, unsigned long long *val);
+extern int sysfs_fd_get_two(int fd, unsigned long long *v1, unsigned long long *v2);
+extern int sysfs_get_two(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, unsigned long long *v1, unsigned long long *v2);
+extern int sysfs_fd_get_str(int fd, char *val, int size);
+extern int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev,
+ char *name);
+extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, char *val, int size);
+extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms);
+extern int sysfs_set_array(struct mdinfo *info, int vers);
+extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume);
+extern int sysfs_disk_to_scsi_id(int fd, __u32 *id);
+extern int sysfs_unique_holder(char *devnm, long rdev);
+extern int sysfs_freeze_array(struct mdinfo *sra);
+extern int sysfs_wait(int fd, int *msec);
+extern int load_sys(char *path, char *buf);
+extern int reshape_prepare_fdlist(char *devname,
+ struct mdinfo *sra,
+ int raid_disks,
+ int nrdisks,
+ unsigned long blocks,
+ char *backup_file,
+ int *fdlist,
+ unsigned long long *offsets);
+extern void reshape_free_fdlist(int *fdlist,
+ unsigned long long *offsets,
+ int size);
+extern int reshape_open_backup_file(char *backup,
+ int fd,
+ char *devname,
+ long blocks,
+ int *fdlist,
+ unsigned long long *offsets,
+ char *sysfs_name,
+ int restart);
+extern unsigned long compute_backup_blocks(int nchunk, int ochunk,
+ unsigned int ndata, unsigned int odata);
+extern char *locate_backup(char *name);
+extern char *make_backup(char *name);
+
+extern int save_stripes(int *source, unsigned long long *offsets,
+ int raid_disks, int chunk_size, int level, int layout,
+ int nwrites, int *dest,
+ unsigned long long start, unsigned long long length,
+ char *buf);
+extern int restore_stripes(int *dest, unsigned long long *offsets,
+ int raid_disks, int chunk_size, int level, int layout,
+ int source, unsigned long long read_offset,
+ unsigned long long start, unsigned long long length,
+ char *src_buf);
+
+#ifndef Sendmail
+#define Sendmail "/usr/lib/sendmail -t"
+#endif
+
+#define SYSLOG_FACILITY LOG_DAEMON
+
+extern char *map_num(mapping_t *map, int num);
+extern int map_name(mapping_t *map, char *name);
+extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[];
+
+extern char *map_dev_preferred(int major, int minor, int create,
+ char *prefer);
+static inline char *map_dev(int major, int minor, int create)
+{
+ return map_dev_preferred(major, minor, create, NULL);
+}
+
+struct active_array;
+struct metadata_update;
+
+/* 'struct reshape' records the intermediate states of
+ * a general reshape.
+ * The starting geometry is converted to the 'before' geometry
+ * by at most an atomic level change. They could be the same.
+ * Similarly the 'after' geometry is converted to the final
+ * geometry by at most a level change.
+ * Note that 'before' and 'after' must have the same level.
+ * 'blocks' is the minimum number of sectors for a reshape unit.
+ * This will be a multiple of the stripe size in each of the
+ * 'before' and 'after' geometries.
+ * If 'blocks' is 0, no restriping is necessary.
+ * 'min_offset_change' is the minimum change to data_offset to
+ * allow the reshape to happen. It is at least the larger of
+ * the old and new chunk sizes, and typically the same as 'blocks'
+ * divided by number of data disks.
+ */
+struct reshape {
+ int level;
+ int parity; /* number of parity blocks/devices */
+ struct {
+ int layout;
+ int data_disks;
+ } before, after;
+ unsigned long long backup_blocks;
+ unsigned long long min_offset_change;
+ unsigned long long stripes; /* number of old stripes that comprise 'blocks'*/
+ unsigned long long new_size; /* New size of array in sectors */
+};
+
+/* A superswitch provides entry point the a metadata handler.
+ *
+ * The superswitch primarily operates on some "metadata" that
+ * is accessed via the 'supertype'.
+ * This metadata has one of three possible sources.
+ * 1/ It is read from a single device. In this case it may not completely
+ * describe the array or arrays as some information might be on other
+ * devices.
+ * 2/ It is read from all devices in a container. In this case all
+ * information is present.
+ * 3/ It is created by ->init_super / ->add_to_super. In this case it will
+ * be complete once enough ->add_to_super calls have completed.
+ *
+ * When creating an array inside a container, the metadata will be
+ * formed by a combination of 2 and 3. The metadata or the array is read,
+ * then new information is added.
+ *
+ * The metadata must sometimes have a concept of a 'current' array
+ * and a 'current' device.
+ * The 'current' array is set by init_super to be the newly created array,
+ * or is set by super_by_fd when it finds it is looking at an array inside
+ * a container.
+ *
+ * The 'current' device is either the device that the metadata was read from
+ * in case 1, or the last device added by add_to_super in case 3.
+ * Case 2 does not identify a 'current' device.
+ */
+extern struct superswitch {
+
+ /* Used to report details of metadata read from a component
+ * device. ->load_super has been called.
+ */
+ void (*examine_super)(struct supertype *st, char *homehost);
+ void (*brief_examine_super)(struct supertype *st, int verbose);
+ void (*brief_examine_subarrays)(struct supertype *st, int verbose);
+ void (*export_examine_super)(struct supertype *st);
+ int (*examine_badblocks)(struct supertype *st, int fd, char *devname);
+ int (*copy_metadata)(struct supertype *st, int from, int to);
+
+ /* Used to report details of an active array.
+ * ->load_super was possibly given a 'component' string.
+ */
+ void (*detail_super)(struct supertype *st, char *homehost);
+ void (*brief_detail_super)(struct supertype *st);
+ void (*export_detail_super)(struct supertype *st);
+
+ /* Optional: platform hardware / firmware details */
+ int (*detail_platform)(int verbose, int enumerate_only, char *controller_path);
+ int (*export_detail_platform)(int verbose, char *controller_path);
+
+ /* Used:
+ * to get uuid to storing in bitmap metadata
+ * and 'reshape' backup-data metadata
+ * To see if a device is being re-added to an array it was part of.
+ */
+ void (*uuid_from_super)(struct supertype *st, int uuid[4]);
+
+ /* Extract generic details from metadata. This could be details about
+ * the container, or about an individual array within the container.
+ * The determination is made either by:
+ * load_super being given a 'component' string.
+ * validate_geometry determining what to create.
+ * The info includes both array information and device information.
+ * The particular device should be:
+ * The last device added by add_to_super
+ * The device the metadata was loaded from by load_super
+ * If 'map' is present, then it is an array raid_disks long
+ * (raid_disk must already be set and correct) and it is filled
+ * with 1 for slots that are thought to be active and 0 for slots which
+ * appear to be failed/missing.
+ * *info is zeroed out before data is added.
+ */
+ void (*getinfo_super)(struct supertype *st, struct mdinfo *info, char *map);
+ struct mdinfo *(*getinfo_super_disks)(struct supertype *st);
+ /* Check if the given metadata is flagged as belonging to "this"
+ * host. 0 for 'no', 1 for 'yes', -1 for "Don't record homehost"
+ */
+ int (*match_home)(struct supertype *st, char *homehost);
+
+ /* Make one of several generic modifications to metadata
+ * prior to assembly (or other times).
+ * sparc2.2 - first bug in early 0.90 metadata
+ * super-minor - change name of 0.90 metadata
+ * summaries - 'correct' any redundant data
+ * resync - mark array as dirty to trigger a resync.
+ * uuid - set new uuid - only 0.90 or 1.x
+ * name - change the name of the array (where supported)
+ * homehost - change which host this array is tied to.
+ * devicesize - If metadata is at start of device, change recorded
+ * device size to match actual device size
+ * byteorder - swap bytes for 0.90 metadata
+ *
+ * force-one - mark that device as uptodate, not old or failed.
+ * force-array - mark array as clean if it would not otherwise
+ * assemble
+ * assemble - not sure how this is different from force-one...
+ * linear-grow-new - add a new device to a linear array, but don't
+ * change the size: so superblock still matches
+ * linear-grow-update - now change the size of the array.
+ * writemostly - set the WriteMostly1 bit in the superblock devflags
+ * readwrite - clear the WriteMostly1 bit in the superblock devflags
+ * no-bitmap - clear any record that a bitmap is present.
+ * bbl - add a bad-block-log if possible
+ * no-bbl - remove and bad-block-log is it is empty.
+ * revert-reshape - If a reshape is in progress, modify metadata so
+ * it will resume going in the opposite direction.
+ */
+ int (*update_super)(struct supertype *st, struct mdinfo *info,
+ char *update,
+ char *devname, int verbose,
+ int uuid_set, char *homehost);
+
+ /* Create new metadata for new array as described. This could
+ * be a new container, or an array in a pre-existing container.
+ * Also used to zero metadata prior to writing it to invalidate old
+ * metadata.
+ */
+ int (*init_super)(struct supertype *st, mdu_array_info_t *info,
+ unsigned long long size, char *name,
+ char *homehost, int *uuid,
+ unsigned long long data_offset);
+
+ /* update the metadata to include new device, either at create or
+ * when hot-adding a spare.
+ */
+ int (*add_to_super)(struct supertype *st, mdu_disk_info_t *dinfo,
+ int fd, char *devname,
+ unsigned long long data_offset);
+ /* update the metadata to delete a device,
+ * when hot-removing.
+ */
+ int (*remove_from_super)(struct supertype *st, mdu_disk_info_t *dinfo);
+
+ /* Write metadata to one device when fixing problems or adding
+ * a new device.
+ */
+ int (*store_super)(struct supertype *st, int fd);
+
+ /* Write all metadata for this array.
+ */
+ int (*write_init_super)(struct supertype *st);
+ /* Check if metadata read from one device is compatible with an array,
+ * used when assembling an array, or pseudo-assembling was with
+ * "--examine --brief"
+ * If "st" has not yet been loaded the superblock from, "tst" is
+ * moved in, otherwise the superblock in 'st' is compared with
+ * 'tst'.
+ */
+ int (*compare_super)(struct supertype *st, struct supertype *tst);
+ /* Load metadata from a single device. If 'devname' is not NULL
+ * print error messages as appropriate */
+ int (*load_super)(struct supertype *st, int fd, char *devname);
+ /* 'fd' is a 'container' md array - load array metadata from the
+ * whole container.
+ */
+ int (*load_container)(struct supertype *st, int fd, char *devname);
+ /* If 'arg' is a valid name of this metadata type, allocate and
+ * return a 'supertype' for the particular minor version */
+ struct supertype * (*match_metadata_desc)(char *arg);
+ /* If a device has the given size, and the data_offset has been
+ * requested - work out how much space is available for data.
+ * This involves adjusting for reserved space (e.g. bitmaps)
+ * and for any rounding.
+ * 'mdadm' only calls this for existing arrays where a possible
+ * spare is being added. However some super-handlers call it
+ * internally from validate_geometry when creating an array.
+ */
+ __u64 (*avail_size)(struct supertype *st, __u64 size,
+ unsigned long long data_offset);
+ /* This is similar to 'avail_size' in purpose, but is used for
+ * containers for which there is no 'component size' to compare.
+ * This reports that whole-device size which is a minimum
+ */
+ unsigned long long (*min_acceptable_spare_size)(struct supertype *st);
+ /* Find somewhere to put a bitmap - possibly auto-size it - and
+ * update the metadata to record this. The array may be newly
+ * created, in which case data_size may be updated, or it might
+ * already exist. Metadata handler can know if init_super
+ * has been called, but not write_init_super.
+ */
+ int (*add_internal_bitmap)(struct supertype *st, int *chunkp,
+ int delay, int write_behind,
+ unsigned long long size, int may_change, int major);
+ /* Seek 'fd' to start of write-intent-bitmap. Must be an
+ * md-native format bitmap
+ */
+ void (*locate_bitmap)(struct supertype *st, int fd);
+ /* if add_internal_bitmap succeeded for existing array, this
+ * writes it out.
+ */
+ int (*write_bitmap)(struct supertype *st, int fd);
+ /* Free the superblock and any other allocated data */
+ void (*free_super)(struct supertype *st);
+
+ /* validate_geometry is called with an st returned by
+ * match_metadata_desc.
+ * It should check that the geometry described is compatible with
+ * the metadata type. It will be called repeatedly as devices
+ * added to validate changing size and new devices. If there are
+ * inter-device dependencies, it should record sufficient details
+ * so these can be validated.
+ * Both 'size' and '*freesize' are in sectors. chunk is KiB.
+ * Return value is:
+ * 1: everything is OK
+ * 0: not OK for some reason - if 'verbose', then error was reported.
+ * -1: st->sb was NULL, 'subdev' is a member of a container of this
+ * type, but array is not acceptable for some reason
+ * message was reported even if verbose is 0.
+ */
+ int (*validate_geometry)(struct supertype *st, int level, int layout,
+ int raiddisks,
+ int *chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *subdev, unsigned long long *freesize,
+ int verbose);
+
+ /* Return a linked list of 'mdinfo' structures for all arrays
+ * in the container. For non-containers, it is like
+ * getinfo_super with an allocated mdinfo.*/
+ struct mdinfo *(*container_content)(struct supertype *st, char *subarray);
+ /* query the supertype for default geometry */
+ void (*default_geometry)(struct supertype *st, int *level, int *layout, int *chunk); /* optional */
+ /* Permit subarray's to be deleted from inactive containers */
+ int (*kill_subarray)(struct supertype *st); /* optional */
+ /* Permit subarray's to be modified */
+ int (*update_subarray)(struct supertype *st, char *subarray,
+ char *update, struct mddev_ident *ident); /* optional */
+ /* Check if reshape is supported for this external format.
+ * st is obtained from super_by_fd() where st->subarray[0] is
+ * initialized to indicate if reshape is being performed at the
+ * container or subarray level
+ */
+#define APPLY_METADATA_CHANGES 1
+#define ROLLBACK_METADATA_CHANGES 0
+
+ int (*reshape_super)(struct supertype *st,
+ unsigned long long size, int level,
+ int layout, int chunksize, int raid_disks,
+ int delta_disks, char *backup, char *dev,
+ int direction,
+ int verbose); /* optional */
+ int (*manage_reshape)( /* optional */
+ int afd, struct mdinfo *sra, struct reshape *reshape,
+ struct supertype *st, unsigned long blocks,
+ int *fds, unsigned long long *offsets,
+ int dests, int *destfd, unsigned long long *destoffsets);
+
+/* for mdmon */
+ int (*open_new)(struct supertype *c, struct active_array *a,
+ char *inst);
+
+ /* Tell the metadata handler the current state of the array.
+ * This covers whether it is known to be consistent (no pending writes)
+ * and how far along a resync is known to have progressed
+ * (in a->resync_start).
+ * resync status is really irrelevant if the array is not consistent,
+ * but some metadata (DDF!) have a place to record the distinction.
+ * If 'consistent' is '2', then the array can mark it dirty if a
+ * resync/recovery/whatever is required, or leave it clean if not.
+ * Return value is 0 dirty (not consistent) and 1 if clean.
+ * it is only really important if consistent is passed in as '2'.
+ */
+ int (*set_array_state)(struct active_array *a, int consistent);
+
+ /* When the state of a device might have changed, we call set_disk to
+ * tell the metadata what the current state is.
+ * Typically this happens on spare->in_sync and (spare|in_sync)->faulty
+ * transitions.
+ * set_disk might be called when the state of the particular disk has
+ * not in fact changed.
+ */
+ void (*set_disk)(struct active_array *a, int n, int state);
+ void (*sync_metadata)(struct supertype *st);
+ void (*process_update)(struct supertype *st,
+ struct metadata_update *update);
+ /* Prepare updates allocates extra memory that might be
+ * needed. If the update cannot be understood, return 0.
+ */
+ int (*prepare_update)(struct supertype *st,
+ struct metadata_update *update);
+
+ /* activate_spare will check if the array is degraded and, if it
+ * is, try to find some spare space in the container.
+ * On success, it add appropriate updates (For process_update) to
+ * to the 'updates' list and returns a list of 'mdinfo' identifying
+ * the device, or devices as there might be multiple missing
+ * devices and multiple spares available.
+ */
+ struct mdinfo *(*activate_spare)(struct active_array *a,
+ struct metadata_update **updates);
+ /*
+ * Return statically allocated string that represents metadata specific
+ * controller domain of the disk. The domain is used in disk domain
+ * matching functions. Disks belong to the same domain if the they have
+ * the same domain from mdadm.conf and belong the same metadata domain.
+ * Returning NULL or not providing this handler means that metadata
+ * does not distinguish the differences between disks that belong to
+ * different controllers. They are in the domain specified by
+ * configuration file (mdadm.conf).
+ * In case when the metadata has the notion of domains based on disk
+ * it shall return NULL for disks that do not belong to the controller
+ * the supported domains. Such disks will form another domain and won't
+ * be mixed with supported ones.
+ */
+ const char *(*get_disk_controller_domain)(const char *path);
+
+ /* for external backup area */
+ int (*recover_backup)(struct supertype *st, struct mdinfo *info);
+
+ /* validate container after assemble */
+ int (*validate_container)(struct mdinfo *info);
+
+ int swapuuid; /* true if uuid is bigending rather than hostendian */
+ int external;
+ const char *name; /* canonical metadata name */
+} *superlist[];
+
+extern struct superswitch super0, super1;
+extern struct superswitch super_imsm, super_ddf;
+extern struct superswitch mbr, gpt;
+
+struct metadata_update {
+ int len;
+ char *buf;
+ void *space; /* allocated space that monitor will use */
+ void **space_list; /* list of allocated spaces that monitor can
+ * use or that it returned.
+ */
+ struct metadata_update *next;
+};
+
+/* A supertype holds a particular collection of metadata.
+ * It identifies the metadata type by the superswitch, and the particular
+ * sub-version of that metadata type.
+ * metadata read in or created is stored in 'sb' and 'info'.
+ * There are also fields used by mdmon to track containers.
+ *
+ * A supertype may refer to:
+ * Just an array, possibly in a container
+ * A container, not identifying any particular array
+ * Info read from just one device, not yet fully describing the array/container.
+ *
+ *
+ * A supertype is created by:
+ * super_by_fd
+ * guess_super
+ * dup_super
+ */
+struct supertype {
+ struct superswitch *ss;
+ int minor_version;
+ int max_devs;
+ char container_devnm[32]; /* devnm of container */
+ void *sb;
+ void *info;
+ void *other; /* Hack used to convert v0.90 to v1.0 */
+ unsigned long long devsize;
+ unsigned long long data_offset; /* used by v1.x only */
+ int ignore_hw_compat; /* used to inform metadata handlers that it should ignore
+ HW/firmware related incompatability to load metadata.
+ Used when examining metadata to display content of disk
+ when user has no hw/firmare compatible system.
+ */
+ struct metadata_update *updates;
+ struct metadata_update **update_tail;
+
+ /* extra stuff used by mdmon */
+ struct active_array *arrays;
+ int sock; /* listen to external programs */
+ char devnm[32]; /* e.g. md0. This appears in metadata_version:
+ * external:/md0/12
+ */
+ int devcnt;
+ int retry_soon;
+
+ struct mdinfo *devs;
+
+};
+
+extern struct supertype *super_by_fd(int fd, char **subarray);
+enum guess_types { guess_any, guess_array, guess_partitions };
+extern struct supertype *guess_super_type(int fd, enum guess_types guess_type);
+static inline struct supertype *guess_super(int fd) {
+ return guess_super_type(fd, guess_any);
+}
+extern struct supertype *dup_super(struct supertype *st);
+extern int get_dev_size(int fd, char *dname, unsigned long long *sizep);
+extern int must_be_container(int fd);
+extern int dev_size_from_id(dev_t id, unsigned long long *size);
+void wait_for(char *dev, int fd);
+
+/*
+ * Data structures for policy management.
+ * Each device can have a policy structure that lists
+ * various name/value pairs each possibly with a metadata associated.
+ * The policy list is sorted by name/value/metadata
+ */
+struct dev_policy {
+ struct dev_policy *next;
+ char *name; /* None of these strings are allocated. They are
+ * all just references to strings which are known
+ * to exist elsewhere.
+ * name and metadata can be compared by address equality.
+ */
+ const char *metadata;
+ const char *value;
+};
+
+extern char pol_act[], pol_domain[], pol_metadata[], pol_auto[];
+
+/* iterate over the sublist starting at list, having the same
+ * 'name' as 'list', and matching the given metadata (Where
+ * NULL matches anything
+ */
+#define pol_for_each(item, list, _metadata) \
+ for (item = list; \
+ item && item->name == list->name; \
+ item = item->next) \
+ if (!(!_metadata || !item->metadata || _metadata == item->metadata)) \
+ ; else
+
+/*
+ * policy records read from mdadm are largely just name-value pairs.
+ * The names are constants, not strdupped
+ */
+struct pol_rule {
+ struct pol_rule *next;
+ char *type; /* rule_policy or rule_part */
+ struct rule {
+ struct rule *next;
+ char *name;
+ char *value;
+ char *dups; /* duplicates of 'value' with a partNN appended */
+ } *rule;
+};
+
+extern char rule_policy[], rule_part[];
+extern char rule_path[], rule_type[];
+extern char type_part[], type_disk[];
+
+extern void policyline(char *line, char *type);
+extern void policy_add(char *type, ...);
+extern void policy_free(void);
+
+extern struct dev_policy *path_policy(char *path, char *type);
+extern struct dev_policy *disk_policy(struct mdinfo *disk);
+extern struct dev_policy *devid_policy(int devid);
+extern void dev_policy_free(struct dev_policy *p);
+
+//extern void pol_new(struct dev_policy **pol, char *name, char *val, char *metadata);
+extern void pol_add(struct dev_policy **pol, char *name, char *val, char *metadata);
+extern struct dev_policy *pol_find(struct dev_policy *pol, char *name);
+
+enum policy_action {
+ act_default,
+ act_include,
+ act_re_add,
+ act_spare, /* This only applies to bare devices */
+ act_spare_same_slot, /* this allows non-bare devices,
+ * but only if recent removal */
+ act_force_spare, /* this allow non-bare devices in any case */
+ act_err
+};
+
+extern int policy_action_allows(struct dev_policy *plist, const char *metadata,
+ enum policy_action want);
+extern int disk_action_allows(struct mdinfo *disk, const char *metadata,
+ enum policy_action want);
+
+struct domainlist {
+ struct domainlist *next;
+ const char *dom;
+};
+
+extern int domain_test(struct domainlist *dom, struct dev_policy *pol,
+ const char *metadata);
+extern struct domainlist *domain_from_array(struct mdinfo *mdi,
+ const char *metadata);
+extern void domainlist_add_dev(struct domainlist **dom, int devid,
+ const char *metadata);
+extern void domain_free(struct domainlist *dl);
+extern void domain_merge(struct domainlist **domp, struct dev_policy *pol,
+ const char *metadata);
+void domain_add(struct domainlist **domp, char *domain);
+
+extern void policy_save_path(char *id_path, struct map_ent *array);
+extern int policy_check_path(struct mdinfo *disk, struct map_ent *array);
+
+#if __GNUC__ < 3
+struct stat64;
+#endif
+
+#define HAVE_NFTW we assume
+#define HAVE_FTW
+
+#ifdef __UCLIBC__
+# include <features.h>
+# ifndef __UCLIBC_HAS_LFS__
+# define lseek64 lseek
+# endif
+# ifndef __UCLIBC_HAS_FTW__
+# undef HAVE_FTW
+# undef HAVE_NFTW
+# endif
+#endif
+
+#ifdef __dietlibc__
+# undef HAVE_NFTW
+#endif
+
+#if defined(__KLIBC__)
+# undef HAVE_NFTW
+# undef HAVE_FTW
+#endif
+
+#ifndef HAVE_NFTW
+# define FTW_PHYS 1
+# ifndef HAVE_FTW
+ struct FTW {};
+# endif
+#endif
+
+#ifdef HAVE_FTW
+# include <ftw.h>
+#endif
+
+extern int add_dev(const char *name, const struct stat *stb, int flag, struct FTW *s);
+
+extern int Manage_ro(char *devname, int fd, int readonly);
+extern int Manage_run(char *devname, int fd, struct context *c);
+extern int Manage_stop(char *devname, int fd, int quiet,
+ int will_retry);
+extern int Manage_subdevs(char *devname, int fd,
+ struct mddev_dev *devlist, int verbose, int test,
+ char *update, int force);
+extern int autodetect(void);
+extern int Grow_Add_device(char *devname, int fd, char *newdev);
+extern int Grow_addbitmap(char *devname, int fd,
+ struct context *c, struct shape *s);
+extern int Grow_reshape(char *devname, int fd,
+ struct mddev_dev *devlist,
+ unsigned long long data_offset,
+ struct context *c, struct shape *s);
+extern int Grow_restart(struct supertype *st, struct mdinfo *info,
+ int *fdlist, int cnt, char *backup_file, int verbose);
+extern int Grow_continue(int mdfd, struct supertype *st,
+ struct mdinfo *info, char *backup_file,
+ int forked, int freeze_reshape);
+
+extern int restore_backup(struct supertype *st,
+ struct mdinfo *content,
+ int working_disks,
+ int spares,
+ char **backup_filep,
+ int verbose);
+extern int Grow_continue_command(char *devname, int fd,
+ char *backup_file, int verbose);
+
+extern int Assemble(struct supertype *st, char *mddev,
+ struct mddev_ident *ident,
+ struct mddev_dev *devlist,
+ struct context *c);
+
+extern int Build(char *mddev, struct mddev_dev *devlist,
+ struct shape *s, struct context *c);
+
+extern int Create(struct supertype *st, char *mddev,
+ char *name, int *uuid,
+ int subdevs, struct mddev_dev *devlist,
+ struct shape *s,
+ struct context *c,
+ unsigned long long data_offset);
+
+extern int Detail(char *dev, struct context *c);
+extern int Detail_Platform(struct superswitch *ss, int scan, int verbose, int export, char *controller_path);
+extern int Query(char *dev);
+extern int ExamineBadblocks(char *devname, int brief, struct supertype *forcest);
+extern int Examine(struct mddev_dev *devlist, struct context *c,
+ struct supertype *forcest);
+extern int Monitor(struct mddev_dev *devlist,
+ char *mailaddr, char *alert_cmd,
+ struct context *c,
+ int daemonise, int oneshot,
+ int dosyslog, char *pidfile, int increments,
+ int share);
+
+extern int Kill(char *dev, struct supertype *st, int force, int verbose, int noexcl);
+extern int Kill_subarray(char *dev, char *subarray, int verbose);
+extern int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int quiet);
+extern int Wait(char *dev);
+extern int WaitClean(char *dev, int sock, int verbose);
+extern int SetAction(char *dev, char *action);
+
+extern int Incremental(struct mddev_dev *devlist, struct context *c,
+ struct supertype *st);
+extern void RebuildMap(void);
+extern int IncrementalScan(struct context *c, char *devnm);
+extern int IncrementalRemove(char *devname, char *path, int verbose);
+extern int CreateBitmap(char *filename, int force, char uuid[16],
+ unsigned long chunksize, unsigned long daemon_sleep,
+ unsigned long write_behind,
+ unsigned long long array_size,
+ int major);
+extern int ExamineBitmap(char *filename, int brief, struct supertype *st);
+extern int Write_rules(char *rule_name);
+extern int bitmap_update_uuid(int fd, int *uuid, int swap);
+extern unsigned long bitmap_sectors(struct bitmap_super_s *bsb);
+extern int Dump_metadata(char *dev, char *dir, struct context *c,
+ struct supertype *st);
+extern int Restore_metadata(char *dev, char *dir, struct context *c,
+ struct supertype *st, int only);
+
+extern int md_get_version(int fd);
+extern int get_linux_version(void);
+extern int mdadm_version(char *version);
+extern unsigned long long parse_size(char *size);
+extern int parse_uuid(char *str, int uuid[4]);
+extern int parse_layout_10(char *layout);
+extern int parse_layout_faulty(char *layout);
+extern long parse_num(char *num);
+extern int check_ext2(int fd, char *name);
+extern int check_reiser(int fd, char *name);
+extern int check_raid(int fd, char *name);
+extern int check_partitions(int fd, char *dname,
+ unsigned long long freesize,
+ unsigned long long size);
+
+extern int get_mdp_major(void);
+extern int get_maj_min(char *dev, int *major, int *minor);
+extern int dev_open(char *dev, int flags);
+extern int open_dev(char *devnm);
+extern void reopen_mddev(int mdfd);
+extern int open_dev_flags(char *devnm, int flags);
+extern int open_dev_excl(char *devnm);
+extern int is_standard(char *dev, int *nump);
+extern int same_dev(char *one, char *two);
+extern int compare_paths (char* path1,char* path2);
+extern void enable_fds(int devices);
+
+extern int parse_auto(char *str, char *msg, int config);
+extern struct mddev_ident *conf_get_ident(char *dev);
+extern struct mddev_dev *conf_get_devs(void);
+extern int conf_test_dev(char *devname);
+extern int conf_test_metadata(const char *version, struct dev_policy *pol, int is_homehost);
+extern struct createinfo *conf_get_create_info(void);
+extern void set_conffile(char *file);
+extern char *conf_get_mailaddr(void);
+extern char *conf_get_mailfrom(void);
+extern char *conf_get_program(void);
+extern char *conf_get_homehost(int *require_homehostp);
+extern char *conf_line(FILE *file);
+extern char *conf_word(FILE *file, int allow_key);
+extern void print_quoted(char *str);
+extern void print_escape(char *str);
+extern int use_udev(void);
+extern unsigned long GCD(unsigned long a, unsigned long b);
+extern int conf_name_is_free(char *name);
+extern int conf_verify_devnames(struct mddev_ident *array_list);
+extern int devname_matches(char *name, char *match);
+extern struct mddev_ident *conf_match(struct supertype *st,
+ struct mdinfo *info,
+ char *devname,
+ int verbose, int *rvp);
+extern int experimental(void);
+
+extern void free_line(char *line);
+extern int match_oneof(char *devices, char *devname);
+extern void uuid_from_super(int uuid[4], mdp_super_t *super);
+extern const int uuid_zero[4];
+extern int same_uuid(int a[4], int b[4], int swapuuid);
+extern void copy_uuid(void *a, int b[4], int swapuuid);
+extern char *__fname_from_uuid(int id[4], int swap, char *buf, char sep);
+extern char *fname_from_uuid(struct supertype *st,
+ struct mdinfo *info, char *buf, char sep);
+extern unsigned long calc_csum(void *super, int bytes);
+extern int enough(int level, int raid_disks, int layout, int clean,
+ char *avail);
+extern int enough_fd(int fd);
+extern int ask(char *mesg);
+extern unsigned long long get_component_size(int fd);
+extern void remove_partitions(int fd);
+extern int test_partition(int fd);
+extern int test_partition_from_id(dev_t id);
+extern int get_data_disks(int level, int layout, int raid_disks);
+extern unsigned long long calc_array_size(int level, int raid_disks, int layout,
+ int chunksize, unsigned long long devsize);
+extern int flush_metadata_updates(struct supertype *st);
+extern void append_metadata_update(struct supertype *st, void *buf, int len);
+extern int assemble_container_content(struct supertype *st, int mdfd,
+ struct mdinfo *content,
+ struct context *c,
+ char *chosen_name, int *result);
+#define INCR_NO 1
+#define INCR_UNSAFE 2
+#define INCR_ALREADY 4
+#define INCR_YES 8
+extern struct mdinfo *container_choose_spares(struct supertype *st,
+ unsigned long long min_size,
+ struct domainlist *domlist,
+ char *spare_group,
+ const char *metadata, int get_one);
+extern int move_spare(char *from_devname, char *to_devname, dev_t devid);
+extern int add_disk(int mdfd, struct supertype *st,
+ struct mdinfo *sra, struct mdinfo *info);
+extern int remove_disk(int mdfd, struct supertype *st,
+ struct mdinfo *sra, struct mdinfo *info);
+extern int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info);
+unsigned long long min_recovery_start(struct mdinfo *array);
+
+extern char *human_size(long long bytes);
+extern char *human_size_brief(long long bytes, int prefix);
+extern void print_r10_layout(int layout);
+
+extern char *find_free_devnm(int use_partitions);
+
+extern void put_md_name(char *name);
+extern char *devid2kname(int devid);
+extern char *devid2devnm(int devid);
+extern int devnm2devid(char *devnm);
+extern char *get_md_name(char *devnm);
+
+extern char DefaultConfFile[];
+
+extern int create_mddev(char *dev, char *name, int autof, int trustworthy,
+ char *chosen);
+/* values for 'trustworthy' */
+#define LOCAL 1
+#define LOCAL_ANY 10
+#define FOREIGN 2
+#define METADATA 3
+extern int open_mddev(char *dev, int report_errors);
+extern int open_container(int fd);
+extern int metadata_container_matches(char *metadata, char *devnm);
+extern int metadata_subdev_matches(char *metadata, char *devnm);
+extern int is_container_member(struct mdstat_ent *ent, char *devname);
+extern int is_subarray_active(char *subarray, char *devname);
+extern int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet);
+extern struct superswitch *version_to_superswitch(char *vers);
+
+extern int mdmon_running(char *devnm);
+extern int mdmon_pid(char *devnm);
+extern int check_env(char *name);
+extern __u32 random32(void);
+extern int start_mdmon(char *devnm);
+
+extern int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
+ struct supertype *st, unsigned long stripes,
+ int *fds, unsigned long long *offsets,
+ int dests, int *destfd, unsigned long long *destoffsets);
+void abort_reshape(struct mdinfo *sra);
+
+void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0);
+
+extern void fmt_devname(char *name, int num);
+extern char *stat2devnm(struct stat *st);
+extern char *fd2devnm(int fd);
+
+extern int in_initrd(void);
+
+#define _ROUND_UP(val, base) (((val) + (base) - 1) & ~(base - 1))
+#define ROUND_UP(val, base) _ROUND_UP(val, (typeof(val))(base))
+#define ROUND_UP_PTR(ptr, base) ((typeof(ptr)) \
+ (ROUND_UP((unsigned long)(ptr), base)))
+
+static inline int is_subarray(char *vers)
+{
+ /* The version string for a 'subarray' (an array in a container)
+ * is
+ * /containername/componentname for normal read-write arrays
+ * -containername/componentname for arrays which mdmon must not
+ * reconfigure. They might be read-only
+ * or might be undergoing reshape etc.
+ * containername is e.g. md0, md_d1
+ * componentname is dependant on the metadata. e.g. '1' 'S1' ...
+ */
+ return (*vers == '/' || *vers == '-');
+}
+
+static inline char *to_subarray(struct mdstat_ent *ent, char *container)
+{
+ return &ent->metadata_version[10+strlen(container)+1];
+}
+
+#ifdef DEBUG
+#define dprintf(fmt, arg...) \
+ fprintf(stderr, fmt, ##arg)
+#else
+#define dprintf(fmt, arg...) \
+ ({ if (0) fprintf(stderr, fmt, ##arg); 0; })
+#endif
+#include <assert.h>
+#include <stdarg.h>
+static inline int xasprintf(char **strp, const char *fmt, ...) {
+ va_list ap;
+ int ret;
+ va_start(ap, fmt);
+ ret = vasprintf(strp, fmt, ap);
+ va_end(ap);
+ assert(ret >= 0);
+ return ret;
+}
+
+#define pr_err(fmt ...) fprintf(stderr, Name ": " fmt)
+#define cont_err(fmt ...) fprintf(stderr, " " fmt)
+
+void *xmalloc(size_t len);
+void *xrealloc(void *ptr, size_t len);
+void *xcalloc(size_t num, size_t size);
+char *xstrdup(const char *str);
+
+#define LEVEL_MULTIPATH (-4)
+#define LEVEL_LINEAR (-1)
+#define LEVEL_FAULTY (-5)
+
+/* kernel module doesn't know about these */
+#define LEVEL_CONTAINER (-100)
+#define LEVEL_UNSUPPORTED (-200)
+
+/* the kernel does know about this one ... */
+#define LEVEL_NONE (-1000000)
+
+/* faulty stuff */
+
+#define WriteTransient 0
+#define ReadTransient 1
+#define WritePersistent 2
+#define ReadPersistent 3
+#define WriteAll 4 /* doesn't go to device */
+#define ReadFixable 5
+#define Modes 6
+
+#define ClearErrors 31
+#define ClearFaults 30
+
+#define AllPersist 100 /* internal use only */
+#define NoPersist 101
+
+#define ModeMask 0x1f
+#define ModeShift 5
+
+#ifdef __TINYC__
+#undef minor
+#undef major
+#undef makedev
+#define minor(x) ((x)&0xff)
+#define major(x) (((x)>>8)&0xff)
+#define makedev(M,m) (((M)<<8) | (m))
+#endif
+
+/* for raid4/5/6 */
+#define ALGORITHM_LEFT_ASYMMETRIC 0
+#define ALGORITHM_RIGHT_ASYMMETRIC 1
+#define ALGORITHM_LEFT_SYMMETRIC 2
+#define ALGORITHM_RIGHT_SYMMETRIC 3
+
+/* Define non-rotating (raid4) algorithms. These allow
+ * conversion of raid4 to raid5.
+ */
+#define ALGORITHM_PARITY_0 4 /* P or P,Q are initial devices */
+#define ALGORITHM_PARITY_N 5 /* P or P,Q are final devices. */
+
+/* DDF RAID6 layouts differ from md/raid6 layouts in two ways.
+ * Firstly, the exact positioning of the parity block is slightly
+ * different between the 'LEFT_*' modes of md and the "_N_*" modes
+ * of DDF.
+ * Secondly, or order of datablocks over which the Q syndrome is computed
+ * is different.
+ * Consequently we have different layouts for DDF/raid6 than md/raid6.
+ * These layouts are from the DDFv1.2 spec.
+ * Interestingly DDFv1.2-Errata-A does not specify N_CONTINUE but
+ * leaves RLQ=3 as 'Vendor Specific'
+ */
+
+#define ALGORITHM_ROTATING_ZERO_RESTART 8 /* DDF PRL=6 RLQ=1 */
+#define ALGORITHM_ROTATING_N_RESTART 9 /* DDF PRL=6 RLQ=2 */
+#define ALGORITHM_ROTATING_N_CONTINUE 10 /*DDF PRL=6 RLQ=3 */
+
+/* For every RAID5 algorithm we define a RAID6 algorithm
+ * with exactly the same layout for data and parity, and
+ * with the Q block always on the last device (N-1).
+ * This allows trivial conversion from RAID5 to RAID6
+ */
+#define ALGORITHM_LEFT_ASYMMETRIC_6 16
+#define ALGORITHM_RIGHT_ASYMMETRIC_6 17
+#define ALGORITHM_LEFT_SYMMETRIC_6 18
+#define ALGORITHM_RIGHT_SYMMETRIC_6 19
+#define ALGORITHM_PARITY_0_6 20
+#define ALGORITHM_PARITY_N_6 ALGORITHM_PARITY_N
+
+/* Define PATH_MAX in case we don't use glibc or standard library does
+ * not have PATH_MAX defined. Assume max path length is 4K characters.
+ */
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#define RESYNC_NONE -1
+#define RESYNC_DELAYED -2
+#define RESYNC_PENDING -3
+#define RESYNC_UNKNOWN -4
+
+/* When using "GET_DISK_INFO" it isn't certain how high
+ * we need to check. So we impose an absolute limit of
+ * MAX_DISKS. This needs to be much more than the largest
+ * number of devices any metadata can support. Currently
+ * v1.x can support 1920
+ */
+#define MAX_DISKS 4096
+
+/* Sometimes the 'size' value passed needs to mean "Maximum".
+ * In those cases with use MAX_SIZE
+ */
+#define MAX_SIZE 1
+
+/* We want to use unsigned numbers for sector counts, but need
+ * a value for 'invalid'. Use '1'.
+ */
+#define INVALID_SECTORS 1
+/* And another special number needed for --data_offset=variable */
+#define VARIABLE_OFFSET 3
diff --git a/mdadm.spec b/mdadm.spec
new file mode 100644
index 00000000..384a1d89
--- /dev/null
+++ b/mdadm.spec
@@ -0,0 +1,45 @@
+Summary: mdadm is used for controlling Linux md devices (aka RAID arrays)
+Name: mdadm
+Version: 3.3.2
+Release: 1
+Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.gz
+URL: http://neil.brown.name/blog/mdadm
+License: GPL
+Group: Utilities/System
+BuildRoot: %{_tmppath}/%{name}-root
+Obsoletes: mdctl
+
+%description
+mdadm is a program that can be used to create, manage, and monitor
+Linux MD (Software RAID) devices.
+
+%prep
+%setup -q
+# we want to install in /sbin, not /usr/sbin...
+%define _exec_prefix %{nil}
+
+%build
+# This is a debatable issue. The author of this RPM spec file feels that
+# people who install RPMs (especially given that the default RPM options
+# will strip the binary) are not going to be running gdb against the
+# program.
+make CXFLAGS="$RPM_OPT_FLAGS" SYSCONFDIR="%{_sysconfdir}"
+
+%install
+make DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} BINDIR=%{_sbindir} install
+install -D -m644 mdadm.conf-example $RPM_BUILD_ROOT/%{_sysconfdir}/mdadm.conf
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files
+%defattr(-,root,root)
+%doc TODO ChangeLog mdadm.conf-example COPYING
+%{_sbindir}/mdadm
+%{_sbindir}/mdmon
+/usr/lib/udev/rules.d/63-md-raid-arrays.rules
+/usr/lib/udev/rules.d/64-md-raid-assembly.rules
+%config(noreplace,missingok)/%{_sysconfdir}/mdadm.conf
+%{_mandir}/man*/md*
+
+%changelog
diff --git a/mdassemble.8 b/mdassemble.8
new file mode 100644
index 00000000..0e640023
--- /dev/null
+++ b/mdassemble.8
@@ -0,0 +1,65 @@
+.\" -*- nroff -*-
+.TH MDASSEMBLE 8 "" v3.3.2
+.SH NAME
+mdassemble \- assemble MD devices
+.I aka
+Linux Software RAID
+
+.SH SYNOPSIS
+
+.BI mdassemble
+
+.SH DESCRIPTION
+.B mdassemble
+is a tiny program that can be used to assemble MD devices inside an
+initial ramdisk (initrd) or initramfs; it is meant to replace the in-kernel
+automatic RAID detection and activation.
+It can be built statically and linked against lightweight libc alternatives, like
+.B dietlibc,
+.B klibc
+or
+.B uClibc.
+
+.SH USAGE
+Invoking
+.B mdassemble
+has the same effect as invoking
+.B mdadm \-\-assemble \-\-scan.
+.PP
+Invoking
+.B mdassemble
+a second time will make all defined arrays readwrite, this is useful if
+using the
+.B start_ro
+module parameter.
+
+.SH OPTIONS
+
+There are no options to
+.B mdassemble.
+
+.SH FILES
+
+.SS /etc/mdadm/mdadm.conf
+
+The config file lists which devices may be scanned to see if
+they contain MD super block, and gives identifying information
+(e.g. UUID) about known MD arrays. See
+.BR mdadm.conf (5)
+for more details.
+
+.B mdassemble
+supports all configuration parameters defined in
+.B mdadm.conf
+with the exception of
+.B auto=
+which is supported only if mdadm was built with the
+.B \-DMDASSEMBLE_AUTO
+define.
+
+.SH SEE ALSO
+.PP
+.BR mdadm (8),
+.BR mdadm.conf (5),
+.BR md (4),
+.BR diet (1).
diff --git a/mdassemble.c b/mdassemble.c
new file mode 100644
index 00000000..674be11f
--- /dev/null
+++ b/mdassemble.c
@@ -0,0 +1,90 @@
+/*
+ * mdassemble - assemble Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2003 Luca Berra <bluca@vodka.it>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include "md_p.h"
+
+#ifndef MDASSEMBLE_AUTO
+/* from mdopen.c */
+int open_mddev(char *dev, int report_errors/*unused*/)
+{
+ int mdfd = open(dev, O_RDWR);
+ if (mdfd < 0)
+ pr_err("error opening %s: %s\n",
+ dev, strerror(errno));
+ else if (md_get_version(mdfd) <= 0) {
+ pr_err("%s does not appear to be an md device\n",
+ dev);
+ close(mdfd);
+ mdfd = -1;
+ }
+ return mdfd;
+}
+int create_mddev(char *dev, char *name, int autof/*unused*/, int trustworthy,
+ char *chosen)
+{
+ return open_mddev(dev, 0);
+}
+#endif
+int map_update(struct map_ent **mpp, char *devnm, char *metadata,
+ int *uuid, char *path)
+{
+ return 0;
+}
+struct map_ent *map_by_name(struct map_ent **mpp, char *name)
+{
+ return NULL;
+}
+int map_lock(struct map_ent **melp){return 0;}
+void map_unlock(struct map_ent **melp){}
+struct map_ent *map_by_uuid(struct map_ent **map, int uuid[4]){return NULL;}
+
+int rv;
+int mdfd = -1;
+
+int main(int argc, char *argv[])
+{
+ struct mddev_ident *array_list = conf_get_ident(NULL);
+ struct context c = { .freeze_reshape = 1 };
+ if (!array_list) {
+ pr_err("No arrays found in config file\n");
+ rv = 1;
+ } else
+ for (; array_list; array_list = array_list->next) {
+ mdu_array_info_t array;
+ if (strcasecmp(array_list->devname, "<ignore>") == 0)
+ continue;
+ mdfd = open_mddev(array_list->devname, 0);
+ if (mdfd >= 0 && ioctl(mdfd, GET_ARRAY_INFO, &array) == 0) {
+ rv |= Manage_ro(array_list->devname, mdfd, -1); /* make it readwrite */
+ continue;
+ }
+ if (mdfd >= 0)
+ close(mdfd);
+ rv |= Assemble(array_list->st, array_list->devname,
+ array_list, NULL, &c);
+ }
+ return rv;
+}
diff --git a/mdmon-design.txt b/mdmon-design.txt
new file mode 100644
index 00000000..f09184a9
--- /dev/null
+++ b/mdmon-design.txt
@@ -0,0 +1,146 @@
+
+When managing a RAID1 array which uses metadata other than the
+"native" metadata understood by the kernel, mdadm makes use of a
+partner program named 'mdmon' to manage some aspects of updating
+that metadata and synchronising the metadata with the array state.
+
+This document provides some details on how mdmon works.
+
+Containers
+----------
+
+As background: mdadm makes a distinction between an 'array' and a
+'container'. Other sources sometimes use the term 'volume' or
+'device' for an 'array', and may use the term 'array' for a
+'container'.
+
+For our purposes:
+ - a 'container' is a collection of devices which are described by a
+ single set of metadata. The metadata may be stored equally
+ on all devices, or different devices may have quite different
+ subsets of the total metadata. But there is conceptually one set
+ of metadata that unifies the devices.
+
+ - an 'array' is a set of datablock from various devices which
+ together are used to present the abstraction of a single linear
+ sequence of block, which may provide data redundancy or enhanced
+ performance.
+
+So a container has some metadata and provides a number of arrays which
+are described by that metadata.
+
+Sometimes this model doesn't work perfectly. For example, global
+spares may have their own metadata which is quite different from the
+metadata from any device that participates in one or more arrays.
+Such a global spare might still need to belong to some container so
+that it is available to be used should a failure arise. In that case
+we consider the 'metadata' to be the union of the metadata on the
+active devices which describes the arrays, and the metadata on the
+global spares which only describes the spares. In this case different
+devices in the one container will have quite different metadata.
+
+
+Purpose
+-------
+
+The main purpose of mdmon is to update the metadata in response to
+changes to the array which need to be reflected in the metadata before
+futures writes to the array can safely be performed.
+These include:
+ - transitions from 'clean' to 'dirty'.
+ - recording the devices have failed.
+ - recording the progress of a 'reshape'
+
+This requires mdmon to be running at any time that the array is
+writable (a read-only array does not require mdmon to be running).
+
+Because mdmon must be able to process these metadata updates at any
+time, it must (when running) have exclusive write access to the
+metadata. Any other changes (e.g. reconfiguration of the array) must
+go through mdmon.
+
+A secondary role for mdmon is to activate spares when a device fails.
+This role is much less time-critical than the other metadata updates,
+so it could be performed by a separate process, possibly
+"mdadm --monitor" which has a related role of moving devices between
+arrays. A main reason for including this functionality in mdmon is
+that in the native-metadata case this function is handled in the
+kernel, and mdmon's reason for existence to provide functionality
+which is otherwise handled by the kernel.
+
+
+Design overview
+---------------
+
+mdmon is structured as two threads with a common address space and
+common data structures. These threads are know as the 'monitor' and
+the 'manager'.
+
+The 'monitor' has the primary role of monitoring the array for
+important state changes and updating the metadata accordingly. As
+writes to the array can be blocked until 'monitor' completes and
+acknowledges the update, it much be very careful not to block itself.
+In particular it must not block waiting for any write to complete else
+it could deadlock. This means that it must not allocate memory as
+doing this can require dirty memory to be written out and if the
+system choose to write to the array that mdmon is monitoring, the
+memory allocation could deadlock.
+
+So 'monitor' must never allocate memory and must limit the number of
+other system call it performs. It may:
+ - use select (or poll) to wait for activity on a file descriptor
+ - read from a sysfs file descriptor
+ - write to a sysfs file descriptor
+ - write the metadata out to the block devices using O_DIRECT
+ - send a signal (kill) to the manager thread
+
+It must not e.g. open files or do anything similar that might allocate
+resources.
+
+The 'manager' thread does everything else that is needed. If any
+files are to be opened (e.g. because a device has been added to the
+array), the manager does that. If any memory needs to be allocated
+(e.g. to hold data about a new array as can happen when one set of
+metadata describes several arrays), the manager performs that
+allocation.
+
+The 'manager' is also responsible for communicating with mdadm and
+assigning spares to replace failed devices.
+
+
+Handling metadata updates
+-------------------------
+
+There are a number of cases in which mdadm needs to update the
+metdata which mdmon is managing. These include:
+ - creating a new array in an active container
+ - adding a device to a container
+ - reconfiguring an array
+etc.
+
+To complete these updates, mdadm must send a message to mdmon which
+will merge the update into the metadata as it is at that moment.
+
+To achieve this, mdmon creates a Unix Domain Socket which the manager
+thread listens on. mdadm sends a message over this socket. The
+manager thread examines the message to see if it will require
+allocating any memory and allocates it. This is done in the
+'prepare_update' metadata method.
+
+The update message is then queued for handling by the monitor thread
+which it will do when convenient. The monitor thread calls
+->process_update which should atomically make the required changes to
+the metadata, making use of the pre-allocate memory as required. Any
+memory the is no-longer needed can be placed back in the request and
+the manager thread will free it.
+
+The exact format of a metadata update is up to the implementer of the
+metadata handlers. It will simply describe a change that needs to be
+made. It will sometimes contain fragments of the metadata to be
+copied in to place. However the ->process_update routine must make
+sure not to over-write any field that the monitor thread might have
+updated, such as a 'device failed' or 'array is dirty' state.
+
+When the monitor thread has completed the update and written it to the
+devices, an acknowledgement message is sent back over the socket so
+that mdadm knows it is complete.
diff --git a/mdmon.8 b/mdmon.8
new file mode 100644
index 00000000..4f9a439a
--- /dev/null
+++ b/mdmon.8
@@ -0,0 +1,257 @@
+.\" See file COPYING in distribution for details.
+.TH MDMON 8 "" v3.3.2
+.SH NAME
+mdmon \- monitor MD external metadata arrays
+
+.SH SYNOPSIS
+
+.BI mdmon " [--all] [--takeover] [--foreground] CONTAINER"
+
+.SH OVERVIEW
+The 2.6.27 kernel brings the ability to support external metadata arrays.
+External metadata implies that user space handles all updates to the metadata.
+The kernel's responsibility is to notify user space when a "metadata event"
+occurs, like disk failures and clean-to-dirty transitions. The kernel, in
+important cases, waits for user space to take action on these notifications.
+
+.SH DESCRIPTION
+.SS Metadata updates:
+To service metadata update requests a daemon,
+.IR mdmon ,
+is introduced.
+.I Mdmon
+is tasked with polling the sysfs namespace looking for changes in
+.BR array_state ,
+.BR sync_action ,
+and per disk
+.BR state
+attributes. When a change is detected it calls a per metadata type
+handler to make modifications to the metadata. The following actions
+are taken:
+.RS
+.TP
+.B array_state \- inactive
+Clear the dirty bit for the volume and let the array be stopped
+.TP
+.B array_state \- write pending
+Set the dirty bit for the array and then set
+.B array_state
+to
+.BR active .
+Writes
+are blocked until userspace writes
+.BR active.
+.TP
+.B array_state \- active-idle
+The safe mode timer has expired so set array state to clean to block writes to the array
+.TP
+.B array_state \- clean
+Clear the dirty bit for the volume
+.TP
+.B array_state \- read-only
+This is the initial state that all arrays start at.
+.I mdmon
+takes one of the three actions:
+.RS
+.TP
+1/
+Transition the array to read-auto keeping the dirty bit clear if the metadata
+handler determines that the array does not need resyncing or other modification
+.TP
+2/
+Transition the array to active if the metadata handler determines a resync or
+some other manipulation is necessary
+.TP
+3/
+Leave the array read\-only if the volume is marked to not be monitored; for
+example, the metadata version has been set to "external:\-dev/md127" instead of
+"external:/dev/md127"
+.RE
+.TP
+.B sync_action \- resync\-to\-idle
+Notify the metadata handler that a resync may have completed. If a resync
+process is idled before it completes this event allows the metadata handler to
+checkpoint resync.
+.TP
+.B sync_action \- recover\-to\-idle
+A spare may have completed rebuilding so tell the metadata handler about the
+state of each disk. This is the metadata handler's opportunity to clear
+any "out-of-sync" bits and clear the volume's degraded status. If a recovery
+process is idled before it completes this event allows the metadata handler to
+checkpoint recovery.
+.TP
+.B <disk>/state \- faulty
+A disk failure kicks off a series of events. First, notify the metadata
+handler that a disk has failed, and then notify the kernel that it can unblock
+writes that were dependent on this disk. After unblocking the kernel this disk
+is set to be removed+ from the member array. Finally the disk is marked failed
+in all other member arrays in the container.
+.IP
++ Note This behavior differs slightly from native MD arrays where
+removal is reserved for a
+.B mdadm --remove
+event. In the external metadata case the container holds the final
+reference on a block device and a
+.B mdadm --remove <container> <victim>
+call is still required.
+.RE
+
+.SS Containers:
+.P
+External metadata formats, like DDF, differ from the native MD metadata
+formats in that they define a set of disks and a series of sub-arrays
+within those disks. MD metadata in comparison defines a 1:1
+relationship between a set of block devices and a RAID array. For
+example to create 2 arrays at different RAID levels on a single
+set of disks, MD metadata requires the disks be partitioned and then
+each array can be created with a subset of those partitions. The
+supported external formats perform this disk carving internally.
+.P
+Container devices simply hold references to all member disks and allow
+tools like
+.I mdmon
+to determine which active arrays belong to which
+container. Some array management commands like disk removal and disk
+add are now only valid at the container level. Attempts to perform
+these actions on member arrays are blocked with error messages like:
+.IP
+"mdadm: Cannot remove disks from a \'member\' array, perform this
+operation on the parent container"
+.P
+Containers are identified in /proc/mdstat with a metadata version string
+"external:<metadata name>". Member devices are identified by
+"external:/<container device>/<member index>", or "external:-<container
+device>/<member index>" if the array is to remain readonly.
+
+.SH OPTIONS
+.TP
+CONTAINER
+The
+.B container
+device to monitor. It can be a full path like /dev/md/container, or a
+simple md device name like md127.
+.TP
+.B \-\-foreground
+Normally,
+.I mdmon
+will fork and continue in the background. Adding this option will
+skip that step and run
+.I mdmon
+in the foreground.
+.TP
+.B \-\-takeover
+This instructs
+.I mdmon
+to replace any active
+.I mdmon
+which is currently monitoring the array. This is primarily used late
+in the boot process to replace any
+.I mdmon
+which was started from an
+.B initramfs
+before the root filesystem was mounted. This avoids holding a
+reference on that
+.B initramfs
+indefinitely and ensures that the
+.I pid
+and
+.I sock
+files used to communicate with
+.I mdmon
+are in a standard place.
+.TP
+.B \-\-all
+This tells mdmon to find any active containers and start monitoring
+each of them if appropriate. This is normally used with
+.B \-\-takeover
+late in the boot sequence.
+A separate
+.I mdmon
+process is started for each container as the
+.B \-\-all
+argument is over-written with the name of the container. To allow for
+containers with names longer than 5 characters, this argument can be
+arbitrarily extended, e.g. to
+.BR \-\-all-active-arrays .
+.TP
+
+.PP
+Note that
+.I mdmon
+is automatically started by
+.I mdadm
+when needed and so does not need to be considered when working with
+RAID arrays. The only times it is run other than by
+.I mdadm
+is when the boot scripts need to restart it after mounting the new
+root filesystem.
+
+.SH START UP AND SHUTDOWN
+
+As
+.I mdmon
+needs to be running whenever any filesystem on the monitored device is
+mounted there are special considerations when the root filesystem is
+mounted from an
+.I mdmon
+monitored device.
+Note that in general
+.I mdmon
+is needed even if the filesystem is mounted read-only as some
+filesystems can still write to the device in those circumstances, for
+example to replay a journal after an unclean shutdown.
+
+When the array is assembled by the
+.B initramfs
+code, mdadm will automatically start
+.I mdmon
+as required. This means that
+.I mdmon
+must be installed on the
+.B initramfs
+and there must be a writable filesystem (typically tmpfs) in which
+.B mdmon
+can create a
+.B .pid
+and
+.B .sock
+file. The particular filesystem to use is given to mdmon at compile
+time and defaults to
+.BR /run/mdadm .
+
+This filesystem must persist through to shutdown time.
+
+After the final root filesystem has be instantiated (usually with
+.BR pivot_root )
+.I mdmon
+should be run with
+.I "\-\-all \-\-takeover"
+so that the
+.I mdmon
+running from the
+.B initramfs
+can be replaced with one running in the main root, and so the
+memory used by the initramfs can be released.
+
+At shutdown time,
+.I mdmon
+should not be killed along with other processes. Also as it holds a
+file (socket actually) open in
+.B /dev
+(by default) it will not be possible to unmount
+.B /dev
+if it is a separate filesystem.
+
+.SH EXAMPLES
+
+.B " mdmon \-\-all-active-arrays \-\-takeover"
+.br
+Any
+.I mdmon
+which is currently running is killed and a new instance is started.
+This should be run during in the boot sequence if an initramfs was
+used, so that any mdmon running from the initramfs will not hold
+the initramfs active.
+.SH SEE ALSO
+.IR mdadm (8),
+.IR md (4).
diff --git a/mdmon.c b/mdmon.c
new file mode 100644
index 00000000..27045a12
--- /dev/null
+++ b/mdmon.c
@@ -0,0 +1,600 @@
+/*
+ * mdmon - monitor external metadata arrays
+ *
+ * Copyright (C) 2007-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2007-2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/*
+ * md array manager.
+ * When md arrays have user-space managed metadata, this is the program
+ * that does the managing.
+ *
+ * Given one argument: the name of the array (e.g. /dev/md0) that is
+ * the container.
+ * We fork off a helper that runs high priority and mlocked. It responds to
+ * device failures and other events that might stop writeout, or that are
+ * trivial to deal with.
+ * The main thread then watches for new arrays being created in the container
+ * and starts monitoring them too ... along with a few other tasks.
+ *
+ * The main thread communicates with the priority thread by writing over
+ * a pipe.
+ * Separate programs can communicate with the main thread via Unix-domain
+ * socket.
+ * The two threads share address space and open file table.
+ *
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <dirent.h>
+#ifdef USE_PTHREADS
+#include <pthread.h>
+#else
+#include <sched.h>
+#endif
+
+#include "mdadm.h"
+#include "mdmon.h"
+
+struct active_array *discard_this;
+struct active_array *pending_discard;
+
+int mon_tid, mgr_tid;
+
+int sigterm;
+
+#ifdef USE_PTHREADS
+static void *run_child(void *v)
+{
+ struct supertype *c = v;
+
+ mon_tid = syscall(SYS_gettid);
+ do_monitor(c);
+ return 0;
+}
+
+static int clone_monitor(struct supertype *container)
+{
+ pthread_attr_t attr;
+ pthread_t thread;
+ int rc;
+
+ mon_tid = -1;
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 4096);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+ rc = pthread_create(&thread, &attr, run_child, container);
+ if (rc)
+ return rc;
+ while (mon_tid == -1)
+ usleep(10);
+ pthread_attr_destroy(&attr);
+
+ mgr_tid = syscall(SYS_gettid);
+
+ return mon_tid;
+}
+#else /* USE_PTHREADS */
+static int run_child(void *v)
+{
+ struct supertype *c = v;
+
+ do_monitor(c);
+ return 0;
+}
+
+#ifdef __ia64__
+int __clone2(int (*fn)(void *),
+ void *child_stack_base, size_t stack_size,
+ int flags, void *arg, ...
+ /* pid_t *pid, struct user_desc *tls, pid_t *ctid */ );
+#endif
+static int clone_monitor(struct supertype *container)
+{
+ static char stack[4096];
+
+#ifdef __ia64__
+ mon_tid = __clone2(run_child, stack, sizeof(stack),
+ CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
+ container);
+#else
+ mon_tid = clone(run_child, stack+4096-64,
+ CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
+ container);
+#endif
+
+ mgr_tid = syscall(SYS_gettid);
+
+ return mon_tid;
+}
+#endif /* USE_PTHREADS */
+
+static int make_pidfile(char *devname)
+{
+ char path[100];
+ char pid[10];
+ int fd;
+ int n;
+
+ if (mkdir(MDMON_DIR, 0755) < 0 &&
+ errno != EEXIST)
+ return -errno;
+ sprintf(path, "%s/%s.pid", MDMON_DIR, devname);
+
+ fd = open(path, O_RDWR|O_CREAT|O_EXCL, 0600);
+ if (fd < 0)
+ return -errno;
+ sprintf(pid, "%d\n", getpid());
+ n = write(fd, pid, strlen(pid));
+ close(fd);
+ if (n < 0)
+ return -errno;
+ return 0;
+}
+
+static void try_kill_monitor(pid_t pid, char *devname, int sock)
+{
+ char buf[100];
+ int fd;
+ int n;
+ long fl;
+
+ /* first rule of survival... don't off yourself */
+ if (pid == getpid())
+ return;
+
+ /* kill this process if it is mdmon */
+ sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
+ fd = open(buf, O_RDONLY);
+ if (fd < 0)
+ return;
+
+ n = read(fd, buf, sizeof(buf)-1);
+ buf[sizeof(buf)-1] = 0;
+ close(fd);
+
+ if (n < 0 || !(strstr(buf, "mdmon") ||
+ strstr(buf, "@dmon")))
+ return;
+
+ kill(pid, SIGTERM);
+
+ if (sock < 0)
+ return;
+
+ /* Wait for monitor to exit by reading from the socket, after
+ * clearing the non-blocking flag */
+ fl = fcntl(sock, F_GETFL, 0);
+ fl &= ~O_NONBLOCK;
+ fcntl(sock, F_SETFL, fl);
+ n = read(sock, buf, 100);
+ /* Ignore result, it is just the wait that
+ * matters
+ */
+}
+
+void remove_pidfile(char *devname)
+{
+ char buf[100];
+
+ sprintf(buf, "%s/%s.pid", MDMON_DIR, devname);
+ unlink(buf);
+ sprintf(buf, "%s/%s.sock", MDMON_DIR, devname);
+ unlink(buf);
+}
+
+static int make_control_sock(char *devname)
+{
+ char path[100];
+ int sfd;
+ long fl;
+ struct sockaddr_un addr;
+
+ if (sigterm)
+ return -1;
+
+ sprintf(path, "%s/%s.sock", MDMON_DIR, devname);
+ unlink(path);
+ sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
+ if (sfd < 0)
+ return -1;
+
+ addr.sun_family = PF_LOCAL;
+ strcpy(addr.sun_path, path);
+ umask(077); /* ensure no world write access */
+ if (bind(sfd, &addr, sizeof(addr)) < 0) {
+ close(sfd);
+ return -1;
+ }
+ listen(sfd, 10);
+ fl = fcntl(sfd, F_GETFL, 0);
+ fl |= O_NONBLOCK;
+ fcntl(sfd, F_SETFL, fl);
+ return sfd;
+}
+
+static void term(int sig)
+{
+ sigterm = 1;
+}
+
+static void wake_me(int sig)
+{
+
+}
+
+/* if we are debugging and starting mdmon by hand then don't fork */
+static int do_fork(void)
+{
+ #ifdef DEBUG
+ if (check_env("MDADM_NO_MDMON"))
+ return 0;
+ #endif
+
+ return 1;
+}
+
+void usage(void)
+{
+ fprintf(stderr,
+"Usage: mdmon [options] CONTAINER\n"
+"\n"
+"Options are:\n"
+" --help -h : This message\n"
+" --all -a : All devices\n"
+" --foreground -F : Run in foreground (do not fork)\n"
+" --takeover -t : Takeover container\n"
+);
+ exit(2);
+}
+
+static int mdmon(char *devnm, int must_fork, int takeover);
+
+int main(int argc, char *argv[])
+{
+ char *container_name = NULL;
+ char *devnm = NULL;
+ int status = 0;
+ int opt;
+ int all = 0;
+ int takeover = 0;
+ int dofork = 1;
+ static struct option options[] = {
+ {"all", 0, NULL, 'a'},
+ {"takeover", 0, NULL, 't'},
+ {"help", 0, NULL, 'h'},
+ {"offroot", 0, NULL, OffRootOpt},
+ {"foreground", 0, NULL, 'F'},
+ {NULL, 0, NULL, 0}
+ };
+
+ if (in_initrd()) {
+ /*
+ * set first char of argv[0] to @. This is used by
+ * systemd to signal that the task was launched from
+ * initrd/initramfs and should be preserved during shutdown
+ */
+ argv[0][0] = '@';
+ }
+
+ while ((opt = getopt_long(argc, argv, "thaF", options, NULL)) != -1) {
+ switch (opt) {
+ case 'a':
+ container_name = argv[optind-1];
+ all = 1;
+ break;
+ case 't':
+ takeover = 1;
+ break;
+ case 'F':
+ dofork = 0;
+ break;
+ case OffRootOpt:
+ argv[0][0] = '@';
+ break;
+ case 'h':
+ default:
+ usage();
+ break;
+ }
+ }
+
+ if (all == 0 && container_name == NULL) {
+ if (argv[optind])
+ container_name = argv[optind];
+ }
+
+ if (container_name == NULL)
+ usage();
+
+ if (argc - optind > 1)
+ usage();
+
+ if (strcmp(container_name, "/proc/mdstat") == 0)
+ all = 1;
+
+ if (all) {
+ struct mdstat_ent *mdstat, *e;
+ int container_len = strlen(container_name);
+
+ /* launch an mdmon instance for each container found */
+ mdstat = mdstat_read(0, 0);
+ for (e = mdstat; e; e = e->next) {
+ if (e->metadata_version &&
+ strncmp(e->metadata_version, "external:", 9) == 0 &&
+ !is_subarray(&e->metadata_version[9])) {
+ /* update cmdline so this mdmon instance can be
+ * distinguished from others in a call to ps(1)
+ */
+ if (strlen(e->devnm) <= (unsigned)container_len) {
+ memset(container_name, 0, container_len);
+ sprintf(container_name, "%s", e->devnm);
+ }
+ status |= mdmon(e->devnm, 1, takeover);
+ }
+ }
+ free_mdstat(mdstat);
+
+ return status;
+ } else if (strncmp(container_name, "md", 2) == 0) {
+ int id = devnm2devid(container_name);
+ if (id)
+ devnm = container_name;
+ } else {
+ struct stat st;
+
+ if (stat(container_name, &st) == 0)
+ devnm = xstrdup(stat2devnm(&st));
+ }
+
+ if (!devnm) {
+ pr_err("%s is not a valid md device name\n",
+ container_name);
+ exit(1);
+ }
+ return mdmon(devnm, dofork && do_fork(), takeover);
+}
+
+static int mdmon(char *devnm, int must_fork, int takeover)
+{
+ int mdfd;
+ struct mdinfo *mdi, *di;
+ struct supertype *container;
+ sigset_t set;
+ struct sigaction act;
+ int pfd[2];
+ int status;
+ int ignore;
+ pid_t victim = -1;
+ int victim_sock = -1;
+
+ dprintf("starting mdmon for %s\n", devnm);
+
+ mdfd = open_dev(devnm);
+ if (mdfd < 0) {
+ pr_err("%s: %s\n", devnm, strerror(errno));
+ return 1;
+ }
+ if (md_get_version(mdfd) < 0) {
+ pr_err("%s: Not an md device\n", devnm);
+ return 1;
+ }
+
+ /* Fork, and have the child tell us when they are ready */
+ if (must_fork) {
+ if (pipe(pfd) != 0) {
+ pr_err("failed to create pipe\n");
+ return 1;
+ }
+ switch(fork()) {
+ case -1:
+ pr_err("failed to fork: %s\n", strerror(errno));
+ return 1;
+ case 0: /* child */
+ close(pfd[0]);
+ break;
+ default: /* parent */
+ close(pfd[1]);
+ if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) {
+ wait(&status);
+ status = WEXITSTATUS(status);
+ }
+ close(pfd[0]);
+ return status;
+ }
+ } else
+ pfd[0] = pfd[1] = -1;
+
+ container = xcalloc(1, sizeof(*container));
+ strcpy(container->devnm, devnm);
+ container->arrays = NULL;
+ container->sock = -1;
+
+ mdi = sysfs_read(mdfd, container->devnm, GET_VERSION|GET_LEVEL|GET_DEVS);
+
+ if (!mdi) {
+ pr_err("failed to load sysfs info for %s\n", container->devnm);
+ exit(3);
+ }
+ if (mdi->array.level != UnSet) {
+ pr_err("%s is not a container - cannot monitor\n", devnm);
+ exit(3);
+ }
+ if (mdi->array.major_version != -1 ||
+ mdi->array.minor_version != -2) {
+ pr_err("%s does not use external metadata - cannot monitor\n",
+ devnm);
+ exit(3);
+ }
+
+ container->ss = version_to_superswitch(mdi->text_version);
+ if (container->ss == NULL) {
+ pr_err("%s uses unsupported metadata: %s\n",
+ devnm, mdi->text_version);
+ exit(3);
+ }
+
+ container->devs = NULL;
+ for (di = mdi->devs; di; di = di->next) {
+ struct mdinfo *cd = xmalloc(sizeof(*cd));
+ *cd = *di;
+ cd->next = container->devs;
+ container->devs = cd;
+ }
+ sysfs_free(mdi);
+
+ /* SIGUSR is sent between parent and child. So both block it
+ * and enable it only with pselect.
+ */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ sigaddset(&set, SIGTERM);
+ sigprocmask(SIG_BLOCK, &set, NULL);
+ act.sa_handler = wake_me;
+ act.sa_flags = 0;
+ sigaction(SIGUSR1, &act, NULL);
+ act.sa_handler = term;
+ sigaction(SIGTERM, &act, NULL);
+ act.sa_handler = SIG_IGN;
+ sigaction(SIGPIPE, &act, NULL);
+
+ victim = mdmon_pid(container->devnm);
+ if (victim >= 0)
+ victim_sock = connect_monitor(container->devnm);
+
+ ignore = chdir("/");
+ if (!takeover && victim > 0 && victim_sock >= 0) {
+ if (fping_monitor(victim_sock) == 0) {
+ pr_err("%s already managed\n", container->devnm);
+ exit(3);
+ }
+ close(victim_sock);
+ victim_sock = -1;
+ }
+ if (container->ss->load_container(container, mdfd, devnm)) {
+ pr_err("Cannot load metadata for %s\n", devnm);
+ exit(3);
+ }
+ close(mdfd);
+
+ /* Ok, this is close enough. We can say goodbye to our parent now.
+ */
+ if (victim > 0)
+ remove_pidfile(devnm);
+ if (make_pidfile(devnm) < 0) {
+ exit(3);
+ }
+ container->sock = make_control_sock(devnm);
+
+ status = 0;
+ if (pfd[1] >= 0) {
+ if (write(pfd[1], &status, sizeof(status)) < 0)
+ pr_err("failed to notify our parent: %d\n",
+ getppid());
+ close(pfd[1]);
+ }
+
+ mlockall(MCL_CURRENT | MCL_FUTURE);
+
+ if (clone_monitor(container) < 0) {
+ pr_err("failed to start monitor process: %s\n",
+ strerror(errno));
+ exit(2);
+ }
+
+ if (victim > 0) {
+ try_kill_monitor(victim, container->devnm, victim_sock);
+ if (victim_sock >= 0)
+ close(victim_sock);
+ }
+
+ setsid();
+ close(0);
+ open("/dev/null", O_RDWR);
+ close(1);
+ ignore = dup(0);
+#ifndef DEBUG
+ close(2);
+ ignore = dup(0);
+#endif
+
+ /* This silliness is to stop the compiler complaining
+ * that we ignore 'ignore'
+ */
+ if (ignore)
+ ignore++;
+
+ do_manager(container);
+
+ exit(0);
+}
+
+/* Some stub functions so super-* can link with us */
+int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
+ struct supertype *st, unsigned long blocks,
+ int *fds, unsigned long long *offsets,
+ int dests, int *destfd, unsigned long long *destoffsets)
+{
+ return 0;
+}
+
+int restore_stripes(int *dest, unsigned long long *offsets,
+ int raid_disks, int chunk_size, int level, int layout,
+ int source, unsigned long long read_offset,
+ unsigned long long start, unsigned long long length,
+ char *src_buf)
+{
+ return 1;
+}
+
+void abort_reshape(struct mdinfo *sra)
+{
+ return;
+}
+
+int save_stripes(int *source, unsigned long long *offsets,
+ int raid_disks, int chunk_size, int level, int layout,
+ int nwrites, int *dest,
+ unsigned long long start, unsigned long long length,
+ char *buf)
+{
+ return 0;
+}
+
+struct superswitch super0 = {
+ .name = "0.90",
+};
+struct superswitch super1 = {
+ .name = "1.x",
+};
diff --git a/mdmon.h b/mdmon.h
new file mode 100644
index 00000000..5a8e1209
--- /dev/null
+++ b/mdmon.h
@@ -0,0 +1,111 @@
+/*
+ * mdmon - monitor external metadata arrays
+ *
+ * Copyright (C) 2007-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2007-2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef pr_err
+#define pr_err(fmt ...) fprintf(stderr, "mdmon: " fmt)
+
+enum array_state { clear, inactive, suspended, readonly, read_auto,
+ clean, active, write_pending, active_idle, bad_word};
+
+enum sync_action { idle, reshape, resync, recover, check, repair, bad_action };
+
+struct active_array {
+ struct mdinfo info;
+ struct supertype *container;
+ struct active_array *next, *replaces;
+ int to_remove;
+
+ int action_fd;
+ int resync_start_fd;
+ int metadata_fd; /* for monitoring rw/ro status */
+ int sync_completed_fd; /* for checkpoint notification events */
+ unsigned long long last_checkpoint; /* sync_completed fires for many
+ * reasons this field makes sure the
+ * kernel has made progress before
+ * moving the checkpoint. It is
+ * cleared by the metadata handler
+ * when it determines recovery is
+ * terminated.
+ */
+
+ enum array_state prev_state, curr_state, next_state;
+ enum sync_action prev_action, curr_action, next_action;
+
+ int check_degraded; /* flag set by mon, read by manage */
+ int check_reshape; /* flag set by mon, read by manage */
+};
+
+/*
+ * Metadata updates are handled by the monitor thread,
+ * as it has exclusive access to the metadata.
+ * When the manager want to updates metadata, either
+ * for it's own reason (e.g. committing a spare) or
+ * on behalf of mdadm, it creates a metadata_update
+ * structure and queues it to the monitor.
+ * Updates are created and processed by code under the
+ * superswitch. All common code sees them as opaque
+ * blobs.
+ */
+extern struct metadata_update *update_queue, *update_queue_handled;
+
+#define MD_MAJOR 9
+
+extern struct active_array *container;
+extern struct active_array *discard_this;
+extern struct active_array *pending_discard;
+extern struct md_generic_cmd *active_cmd;
+
+void remove_pidfile(char *devname);
+void do_monitor(struct supertype *container);
+void do_manager(struct supertype *container);
+extern int sigterm;
+
+int read_dev_state(int fd);
+int is_container_member(struct mdstat_ent *mdstat, char *container);
+
+struct mdstat_ent *mdstat_read(int hold, int start);
+
+extern int exit_now, manager_ready;
+extern int mon_tid, mgr_tid;
+extern int monitor_loop_cnt;
+
+/* helper routine to determine resync completion since MaxSector is a
+ * moving target
+ */
+static inline int is_resync_complete(struct mdinfo *array)
+{
+ unsigned long long sync_size = 0;
+ int ncopies, l;
+ switch(array->array.level) {
+ case 1:
+ case 4:
+ case 5:
+ case 6:
+ sync_size = array->component_size;
+ break;
+ case 10:
+ l = array->array.layout;
+ ncopies = (l & 0xff) * ((l >> 8) && 0xff);
+ sync_size = array->component_size * array->array.raid_disks;
+ sync_size /= ncopies;
+ break;
+ }
+ return array->resync_start >= sync_size;
+}
diff --git a/mdopen.c b/mdopen.c
new file mode 100644
index 00000000..2c9d7455
--- /dev/null
+++ b/mdopen.c
@@ -0,0 +1,473 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include "md_p.h"
+#include <ctype.h>
+
+void make_parts(char *dev, int cnt)
+{
+ /* make 'cnt' partition devices for 'dev'
+ * If dev is a device name we use the
+ * major/minor from dev and add 1..cnt
+ * If it is a symlink, we make similar symlinks.
+ * If dev ends with a digit, we add "p%d" else "%d"
+ * If the name exists, we use it's owner/mode,
+ * else that of dev
+ */
+ struct stat stb;
+ int major_num;
+ int minor_num;
+ int odig;
+ int i;
+ int nlen = strlen(dev) + 20;
+ char *name;
+ int dig = isdigit(dev[strlen(dev)-1]);
+ char orig[1024];
+ char sym[1024];
+ int err;
+
+ if (cnt == 0)
+ cnt = 4;
+ if (lstat(dev, &stb)!= 0)
+ return;
+
+ if (S_ISBLK(stb.st_mode)) {
+ major_num = major(stb.st_rdev);
+ minor_num = minor(stb.st_rdev);
+ odig = -1;
+ } else if (S_ISLNK(stb.st_mode)) {
+ int len = readlink(dev, orig, sizeof(orig));
+ if (len < 0 || len > 1000)
+ return;
+ orig[len] = 0;
+ odig = isdigit(orig[len-1]);
+ major_num = -1;
+ minor_num = -1;
+ } else
+ return;
+ name = xmalloc(nlen);
+ for (i = 1; i <= cnt ; i++) {
+ struct stat stb2;
+ snprintf(name, nlen, "%s%s%d", dev, dig?"p":"", i);
+ if (stat(name, &stb2) == 0) {
+ if (!S_ISBLK(stb2.st_mode) || !S_ISBLK(stb.st_mode))
+ continue;
+ if (stb2.st_rdev == makedev(major_num, minor_num+i))
+ continue;
+ unlink(name);
+ } else {
+ stb2 = stb;
+ }
+ if (S_ISBLK(stb.st_mode)) {
+ if (mknod(name, S_IFBLK | 0600,
+ makedev(major_num, minor_num+i)))
+ perror("mknod");
+ if (chown(name, stb2.st_uid, stb2.st_gid))
+ perror("chown");
+ if (chmod(name, stb2.st_mode & 07777))
+ perror("chmod");
+ err = 0;
+ } else {
+ snprintf(sym, sizeof(sym), "%s%s%d", orig, odig?"p":"", i);
+ err = symlink(sym, name);
+ }
+
+ if (err == 0 && stat(name, &stb2) == 0)
+ add_dev(name, &stb2, 0, NULL);
+ }
+ free(name);
+}
+
+/*
+ * We need a new md device to assemble/build/create an array.
+ * 'dev' is a name given us by the user (command line or mdadm.conf)
+ * It might start with /dev or /dev/md any might end with a digit
+ * string.
+ * If it starts with just /dev, it must be /dev/mdX or /dev/md_dX
+ * If it ends with a digit string, then it must be as above, or
+ * 'trustworthy' must be 'METADATA' and the 'dev' must be
+ * /dev/md/'name'NN or 'name'NN
+ * If it doesn't end with a digit string, it must be /dev/md/'name'
+ * or 'name' or must be NULL.
+ * If the digit string is present, it gives the minor number to use
+ * If not, we choose a high, unused minor number.
+ * If the 'dev' is a standard name, it devices whether 'md' or 'mdp'.
+ * else if the name is 'd[0-9]+' then we use mdp
+ * else if trustworthy is 'METADATA' we use md
+ * else the choice depends on 'autof'.
+ * If name is NULL it is assumed to match whatever dev provides.
+ * If both name and dev are NULL, we choose a name 'mdXX' or 'mdpXX'
+ *
+ * If 'name' is given, and 'trustworthy' is 'foreign' and name is not
+ * supported by 'dev', we add a "_%d" suffix based on the minor number
+ * use that.
+ *
+ * If udev is configured, we create a temporary device, open it, and
+ * unlink it.
+ * If not, we create the /dev/mdXX device, and if name is usable,
+ * /dev/md/name
+ * In any case we return /dev/md/name or (if that isn't available)
+ * /dev/mdXX in 'chosen'.
+ *
+ * When we create devices, we use uid/gid/umask from config file.
+ */
+
+int create_mddev(char *dev, char *name, int autof, int trustworthy,
+ char *chosen)
+{
+ int mdfd;
+ struct stat stb;
+ int num = -1;
+ int use_mdp = -1;
+ struct createinfo *ci = conf_get_create_info();
+ int parts;
+ char *cname;
+ char devname[20];
+ char devnm[32];
+ char cbuf[400];
+ if (chosen == NULL)
+ chosen = cbuf;
+
+ if (autof == 0)
+ autof = ci->autof;
+
+ parts = autof >> 3;
+ autof &= 7;
+
+ strcpy(chosen, "/dev/md/");
+ cname = chosen + strlen(chosen);
+
+ if (dev) {
+ if (strncmp(dev, "/dev/md/", 8) == 0) {
+ strcpy(cname, dev+8);
+ } else if (strncmp(dev, "/dev/", 5) == 0) {
+ char *e = dev + strlen(dev);
+ while (e > dev && isdigit(e[-1]))
+ e--;
+ if (e[0])
+ num = strtoul(e, NULL, 10);
+ strcpy(cname, dev+5);
+ cname[e-(dev+5)] = 0;
+ /* name *must* be mdXX or md_dXX in this context */
+ if (num < 0 ||
+ (strcmp(cname, "md") != 0 && strcmp(cname, "md_d") != 0)) {
+ pr_err("%s is an invalid name "
+ "for an md device. Try /dev/md/%s\n",
+ dev, dev+5);
+ return -1;
+ }
+ if (strcmp(cname, "md") == 0)
+ use_mdp = 0;
+ else
+ use_mdp = 1;
+ /* recreate name: /dev/md/0 or /dev/md/d0 */
+ sprintf(cname, "%s%d", use_mdp?"d":"", num);
+ } else
+ strcpy(cname, dev);
+
+ /* 'cname' must not contain a slash, and may not be
+ * empty.
+ */
+ if (strchr(cname, '/') != NULL) {
+ pr_err("%s is an invalid name "
+ "for an md device.\n", dev);
+ return -1;
+ }
+ if (cname[0] == 0) {
+ pr_err("%s is an invalid name "
+ "for an md device (empty!).", dev);
+ return -1;
+ }
+ if (num < 0) {
+ /* If cname is 'N' or 'dN', we get dev number
+ * from there.
+ */
+ char *sp = cname;
+ char *ep;
+ if (cname[0] == 'd')
+ sp++;
+ if (isdigit(sp[0]))
+ num = strtoul(sp, &ep, 10);
+ else
+ ep = sp;
+ if (ep == sp || *ep || num < 0)
+ num = -1;
+ else if (cname[0] == 'd')
+ use_mdp = 1;
+ else
+ use_mdp = 0;
+ }
+ }
+
+ /* Now determine device number */
+ /* named 'METADATA' cannot use 'mdp'. */
+ if (name && name[0] == 0)
+ name = NULL;
+ if (name && trustworthy == METADATA && use_mdp == 1) {
+ pr_err("%s is not allowed for a %s container. "
+ "Consider /dev/md%d.\n", dev, name, num);
+ return -1;
+ }
+ if (name && trustworthy == METADATA)
+ use_mdp = 0;
+ if (use_mdp == -1) {
+ if (autof == 4 || autof == 6)
+ use_mdp = 1;
+ else
+ use_mdp = 0;
+ }
+ if (num < 0 && trustworthy == LOCAL && name) {
+ /* if name is numeric, possibly prefixed by
+ * 'md' or '/dev/md', use that for num
+ * if it is not already in use */
+ char *ep;
+ char *n2 = name;
+ if (strncmp(n2, "/dev/", 5) == 0)
+ n2 += 5;
+ if (strncmp(n2, "md", 2) == 0)
+ n2 += 2;
+ if (*n2 == '/')
+ n2++;
+ num = strtoul(n2, &ep, 10);
+ if (ep == n2 || *ep)
+ num = -1;
+ else {
+ sprintf(devnm, "md%s%d", use_mdp ? "_d":"", num);
+ if (mddev_busy(devnm))
+ num = -1;
+ }
+ }
+
+ if (cname[0] == 0 && name) {
+ /* Need to find a name if we can
+ * We don't completely trust 'name'. Truncate to
+ * reasonable length and remove '/'
+ */
+ char *cp;
+ struct map_ent *map = NULL;
+ int conflict = 1;
+ int unum = 0;
+ int cnlen;
+ strncpy(cname, name, 200);
+ cname[200] = 0;
+ for (cp = cname; *cp ; cp++)
+ switch (*cp) {
+ case '/':
+ *cp = '-';
+ break;
+ case ' ':
+ case '\t':
+ *cp = '_';
+ break;
+ }
+
+ if (trustworthy == LOCAL ||
+ (trustworthy == FOREIGN && strchr(cname, ':') != NULL)) {
+ /* Only need suffix if there is a conflict */
+ if (map_by_name(&map, cname) == NULL)
+ conflict = 0;
+ }
+ cnlen = strlen(cname);
+ while (conflict) {
+ if (trustworthy == METADATA && !isdigit(cname[cnlen-1]))
+ sprintf(cname+cnlen, "%d", unum);
+ else
+ /* add _%d to FOREIGN array that don't
+ * a 'host:' prefix
+ */
+ sprintf(cname+cnlen, "_%d", unum);
+ unum++;
+ if (map_by_name(&map, cname) == NULL)
+ conflict = 0;
+ }
+ }
+
+ devnm[0] = 0;
+ if (num < 0 && cname && ci->names) {
+ int fd;
+ int n = -1;
+ sprintf(devnm, "md_%s", cname);
+ fd = open("/sys/module/md_mod/parameters/new_array", O_WRONLY);
+ if (fd >= 0) {
+ n = write(fd, devnm, strlen(devnm));
+ close(fd);
+ }
+ if (n < 0)
+ devnm[0] = 0;
+ }
+ if (devnm[0])
+ ;
+ else if (num < 0) {
+ /* need to choose a free number. */
+ char *_devnm = find_free_devnm(use_mdp);
+ if (devnm == NULL) {
+ pr_err("No avail md devices - aborting\n");
+ return -1;
+ }
+ strcpy(devnm, _devnm);
+ } else {
+ sprintf(devnm, "%s%d", use_mdp?"md_d":"md", num);
+ if (mddev_busy(devnm)) {
+ pr_err("%s is already in use.\n",
+ dev);
+ return -1;
+ }
+ }
+
+ sprintf(devname, "/dev/%s", devnm);
+
+ if (dev && dev[0] == '/')
+ strcpy(chosen, dev);
+ else if (cname[0] == 0)
+ strcpy(chosen, devname);
+
+ /* We have a device number and name.
+ * If we cannot detect udev, we need to make
+ * devices and links ourselves.
+ */
+ if (!use_udev()) {
+ /* Make sure 'devname' exists and 'chosen' is a symlink to it */
+ if (lstat(devname, &stb) == 0) {
+ /* Must be the correct device, else error */
+ if ((stb.st_mode&S_IFMT) != S_IFBLK ||
+ stb.st_rdev != (dev_t)devnm2devid(devnm)) {
+ pr_err("%s exists but looks wrong, please fix\n",
+ devname);
+ return -1;
+ }
+ } else {
+ if (mknod(devname, S_IFBLK|0600,
+ devnm2devid(devnm)) != 0) {
+ pr_err("failed to create %s\n",
+ devname);
+ return -1;
+ }
+ if (chown(devname, ci->uid, ci->gid))
+ perror("chown");
+ if (chmod(devname, ci->mode))
+ perror("chmod");
+ stat(devname, &stb);
+ add_dev(devname, &stb, 0, NULL);
+ }
+ if (use_mdp == 1)
+ make_parts(devname, parts);
+
+ if (strcmp(chosen, devname) != 0) {
+ if (mkdir("/dev/md",0700) == 0) {
+ if (chown("/dev/md", ci->uid, ci->gid))
+ perror("chown /dev/md");
+ if (chmod("/dev/md", ci->mode| ((ci->mode>>2) & 0111)))
+ perror("chmod /dev/md");
+ }
+
+ if (dev && strcmp(chosen, dev) == 0)
+ /* We know we are allowed to use this name */
+ unlink(chosen);
+
+ if (lstat(chosen, &stb) == 0) {
+ char buf[300];
+ ssize_t link_len = readlink(chosen, buf, sizeof(buf)-1);
+ if (link_len >= 0)
+ buf[link_len] = '\0';
+
+ if ((stb.st_mode & S_IFMT) != S_IFLNK ||
+ link_len < 0 ||
+ strcmp(buf, devname) != 0) {
+ pr_err("%s exists - ignoring\n",
+ chosen);
+ strcpy(chosen, devname);
+ }
+ } else if (symlink(devname, chosen) != 0)
+ pr_err("failed to create %s: %s\n",
+ chosen, strerror(errno));
+ if (use_mdp && strcmp(chosen, devname) != 0)
+ make_parts(chosen, parts);
+ }
+ }
+ mdfd = open_dev_excl(devnm);
+ if (mdfd < 0)
+ pr_err("unexpected failure opening %s\n",
+ devname);
+ return mdfd;
+}
+
+/* Open this and check that it is an md device.
+ * On success, return filedescriptor.
+ * On failure, return -1 if it doesn't exist,
+ * or -2 if it exists but is not an md device.
+ */
+int open_mddev(char *dev, int report_errors)
+{
+ int mdfd = open(dev, O_RDWR);
+ if (mdfd < 0 && errno == EACCES)
+ mdfd = open(dev, O_RDONLY);
+ if (mdfd < 0) {
+ if (report_errors)
+ pr_err("error opening %s: %s\n",
+ dev, strerror(errno));
+ return -1;
+ }
+ if (md_get_version(mdfd) <= 0) {
+ close(mdfd);
+ if (report_errors)
+ pr_err("%s does not appear to be "
+ "an md device\n", dev);
+ return -2;
+ }
+ return mdfd;
+}
+
+char *find_free_devnm(int use_partitions)
+{
+ static char devnm[32];
+ int devnum;
+ for (devnum = 127; devnum != 128;
+ devnum = devnum ? devnum-1 : (1<<20)-1) {
+
+ if (use_partitions)
+ sprintf(devnm, "md_d%d", devnum);
+ else
+ sprintf(devnm, "md%d", devnum);
+ if (mddev_busy(devnm))
+ continue;
+ if (!conf_name_is_free(devnm))
+ continue;
+ if (!use_udev()) {
+ /* make sure it is new to /dev too, at least as a
+ * non-standard */
+ int devid = devnm2devid(devnm);
+ if (devid) {
+ char *dn = map_dev(major(devid),
+ minor(devid), 0);
+ if (dn && ! is_standard(dn, NULL))
+ continue;
+ }
+ }
+ break;
+ }
+ if (devnum == 128)
+ return NULL;
+ return devnm;
+}
diff --git a/mdstat.c b/mdstat.c
new file mode 100644
index 00000000..8b16d3e8
--- /dev/null
+++ b/mdstat.c
@@ -0,0 +1,416 @@
+/*
+ * mdstat - parse /proc/mdstat file. Part of:
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2002-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+/*
+ * The /proc/mdstat file comes in at least 3 flavours:
+ * In an unpatched 2.2 kernel (md 0.36.6):
+ * Personalities : [n raidx] ...
+ * read_ahead {not set|%d sectors}
+ * md0 : {in}active{ raidX /dev/hda... %d blocks{ maxfault=%d}}
+ * md1 : .....
+ *
+ * Normally only 4 md lines, but all are listed.
+ *
+ * In a patched 2.2 kernel (md 0.90.0)
+ * Personalities : [raidx] ...
+ * read_ahead {not set|%d sectors}
+ * mdN : {in}active {(readonly)} raidX dev[%d]{(F)} ... %d blocks STATUS RESYNC
+ * ... Only initialised arrays listed
+ * unused devices: {dev dev ... | <none>}
+ *
+ * STATUS is personality dependant:
+ * linear: %dk rounding
+ * raid0: %dk chunks
+ * raid1: [%d/%d] [U_U] ( raid/working. operational or not)
+ * raid5: level 4/5, %dk chunk, algorithm %d [%d/%d] [U_U]
+ *
+ * RESYNC is empty or:
+ * {resync|recovery}=%u%% finish=%u.%umin
+ * or
+ * resync=DELAYED
+ *
+ * In a 2.4 kernel (md 0.90.0/2.4)
+ * Personalities : [raidX] ...
+ * read_ahead {not set|%d sectors}
+ * mdN : {in}active {(read-only)} raidX dev[%d]{(F)} ...
+ * %d blocks STATUS
+ * RESYNC
+ * unused devices: {dev dev .. | <none>}
+ *
+ * STATUS matches 0.90.0/2.2
+ * RESYNC includes [===>....],
+ * adds a space after {resync|recovery} and before and after '='
+ * adds a decimal to the recovery percent.
+ * adds (%d/%d) resync amount and max_blocks, before finish.
+ * adds speed=%dK/sec after finish
+ *
+ *
+ *
+ * Out of this we want to extract:
+ * list of devices, active or not
+ * pattern of failed drives (so need number of drives)
+ * percent resync complete
+ *
+ * As continuation is indicated by leading space, we use
+ * conf_line from config.c to read logical lines
+ *
+ */
+
+#include "mdadm.h"
+#include "dlink.h"
+#include <sys/select.h>
+#include <ctype.h>
+
+static void free_member_devnames(struct dev_member *m)
+{
+ while(m) {
+ struct dev_member *t = m;
+
+ m = m->next;
+ free(t->name);
+ free(t);
+ }
+}
+
+static int add_member_devname(struct dev_member **m, char *name)
+{
+ struct dev_member *new;
+ char *t;
+
+ if ((t = strchr(name, '[')) == NULL)
+ /* not a device */
+ return 0;
+
+ new = xmalloc(sizeof(*new));
+ new->name = strndup(name, t - name);
+ new->next = *m;
+ *m = new;
+ return 1;
+}
+
+void free_mdstat(struct mdstat_ent *ms)
+{
+ while (ms) {
+ struct mdstat_ent *t;
+ free(ms->dev);
+ free(ms->level);
+ free(ms->pattern);
+ free(ms->metadata_version);
+ free_member_devnames(ms->members);
+ t = ms;
+ ms = ms->next;
+ free(t);
+ }
+}
+
+static int mdstat_fd = -1;
+struct mdstat_ent *mdstat_read(int hold, int start)
+{
+ FILE *f;
+ struct mdstat_ent *all, *rv, **end, **insert_here;
+ char *line;
+ int fd;
+
+ if (hold && mdstat_fd != -1) {
+ lseek(mdstat_fd, 0L, 0);
+ fd = dup(mdstat_fd);
+ if (fd >= 0)
+ f = fdopen(fd, "r");
+ else
+ return NULL;
+ } else
+ f = fopen("/proc/mdstat", "r");
+ if (f == NULL)
+ return NULL;
+ else
+ fcntl(fileno(f), F_SETFD, FD_CLOEXEC);
+
+ all = NULL;
+ end = &all;
+ for (; (line = conf_line(f)) ; free_line(line)) {
+ struct mdstat_ent *ent;
+ char *w;
+ char devnm[32];
+ int in_devs = 0;
+
+ if (strcmp(line, "Personalities")==0)
+ continue;
+ if (strcmp(line, "read_ahead")==0)
+ continue;
+ if (strcmp(line, "unused")==0)
+ continue;
+ insert_here = NULL;
+ /* Better be an md line.. */
+ if (strncmp(line, "md", 2)!= 0 || strlen(line) >= 32
+ || (line[2] != '_' && !isdigit(line[2])))
+ continue;
+ strcpy(devnm, line);
+
+ ent = xmalloc(sizeof(*ent));
+ ent->dev = ent->level = ent->pattern= NULL;
+ ent->next = NULL;
+ ent->percent = RESYNC_NONE;
+ ent->active = -1;
+ ent->resync = 0;
+ ent->metadata_version = NULL;
+ ent->raid_disks = 0;
+ ent->devcnt = 0;
+ ent->members = NULL;
+
+ ent->dev = xstrdup(line);
+ strcpy(ent->devnm, devnm);
+
+ for (w=dl_next(line); w!= line ; w=dl_next(w)) {
+ int l = strlen(w);
+ char *eq;
+ if (strcmp(w, "active")==0)
+ ent->active = 1;
+ else if (strcmp(w, "inactive")==0) {
+ ent->active = 0;
+ in_devs = 1;
+ } else if (ent->active > 0 &&
+ ent->level == NULL &&
+ w[0] != '(' /*readonly*/) {
+ ent->level = xstrdup(w);
+ in_devs = 1;
+ } else if (in_devs && strcmp(w, "blocks")==0)
+ in_devs = 0;
+ else if (in_devs) {
+ char *ep = strchr(w, '[');
+ ent->devcnt +=
+ add_member_devname(&ent->members, w);
+ if (ep && strncmp(w, "md", 2)==0) {
+ /* This has an md device as a component.
+ * If that device is already in the
+ * list, make sure we insert before
+ * there.
+ */
+ struct mdstat_ent **ih;
+ ih = &all;
+ while (ih != insert_here && *ih &&
+ ((int)strlen((*ih)->devnm) != ep-w
+ || strncmp((*ih)->devnm, w, ep-w) != 0))
+ ih = & (*ih)->next;
+ insert_here = ih;
+ }
+ } else if (strcmp(w, "super") == 0 &&
+ dl_next(w) != line) {
+ w = dl_next(w);
+ ent->metadata_version = xstrdup(w);
+ } else if (w[0] == '[' && isdigit(w[1])) {
+ ent->raid_disks = atoi(w+1);
+ } else if (!ent->pattern &&
+ w[0] == '[' &&
+ (w[1] == 'U' || w[1] == '_')) {
+ ent->pattern = xstrdup(w+1);
+ if (ent->pattern[l-2]==']')
+ ent->pattern[l-2] = '\0';
+ } else if (ent->percent == RESYNC_NONE &&
+ strncmp(w, "re", 2)== 0 &&
+ w[l-1] == '%' &&
+ (eq=strchr(w, '=')) != NULL ) {
+ ent->percent = atoi(eq+1);
+ if (strncmp(w,"resync", 6)==0)
+ ent->resync = 1;
+ else if (strncmp(w, "reshape", 7)==0)
+ ent->resync = 2;
+ else
+ ent->resync = 0;
+ } else if (ent->percent == RESYNC_NONE &&
+ (w[0] == 'r' || w[0] == 'c')) {
+ if (strncmp(w, "resync", 4)==0)
+ ent->resync = 1;
+ if (strncmp(w, "reshape", 7)==0)
+ ent->resync = 2;
+ if (strncmp(w, "recovery", 8)==0)
+ ent->resync = 0;
+ if (strncmp(w, "check", 5)==0)
+ ent->resync = 3;
+
+ if (l > 8 && strcmp(w+l-8, "=DELAYED") == 0)
+ ent->percent = RESYNC_DELAYED;
+ if (l > 8 && strcmp(w+l-8, "=PENDING") == 0)
+ ent->percent = RESYNC_PENDING;
+ } else if (ent->percent == RESYNC_NONE &&
+ w[0] >= '0' &&
+ w[0] <= '9' &&
+ w[l-1] == '%') {
+ ent->percent = atoi(w);
+ }
+ }
+ if (insert_here && (*insert_here)) {
+ ent->next = *insert_here;
+ *insert_here = ent;
+ } else {
+ *end = ent;
+ end = &ent->next;
+ }
+ }
+ if (hold && mdstat_fd == -1) {
+ mdstat_fd = dup(fileno(f));
+ fcntl(mdstat_fd, F_SETFD, FD_CLOEXEC);
+ }
+ fclose(f);
+
+ /* If we might want to start array,
+ * reverse the order, so that components comes before composites
+ */
+ if (start) {
+ rv = NULL;
+ while (all) {
+ struct mdstat_ent *e = all;
+ all = all->next;
+ e->next = rv;
+ rv = e;
+ }
+ } else rv = all;
+ return rv;
+}
+
+void mdstat_close(void)
+{
+ if (mdstat_fd >= 0)
+ close(mdstat_fd);
+ mdstat_fd = -1;
+}
+
+void mdstat_wait(int seconds)
+{
+ fd_set fds;
+ struct timeval tm;
+ int maxfd = 0;
+ FD_ZERO(&fds);
+ if (mdstat_fd >= 0) {
+ FD_SET(mdstat_fd, &fds);
+ maxfd = mdstat_fd;
+ }
+ tm.tv_sec = seconds;
+ tm.tv_usec = 0;
+ select(maxfd + 1, NULL, NULL, &fds, &tm);
+}
+
+void mdstat_wait_fd(int fd, const sigset_t *sigmask)
+{
+ fd_set fds, rfds;
+ int maxfd = 0;
+
+ FD_ZERO(&fds);
+ FD_ZERO(&rfds);
+ if (mdstat_fd >= 0)
+ FD_SET(mdstat_fd, &fds);
+
+ if (fd >= 0) {
+ struct stat stb;
+ fstat(fd, &stb);
+ if ((stb.st_mode & S_IFMT) == S_IFREG)
+ /* Must be a /proc or /sys fd, so expect
+ * POLLPRI
+ * i.e. an 'exceptional' event.
+ */
+ FD_SET(fd, &fds);
+ else
+ FD_SET(fd, &rfds);
+
+ if (fd > maxfd)
+ maxfd = fd;
+
+ }
+ if (mdstat_fd > maxfd)
+ maxfd = mdstat_fd;
+
+ pselect(maxfd + 1, &rfds, NULL, &fds,
+ NULL, sigmask);
+}
+
+int mddev_busy(char *devnm)
+{
+ struct mdstat_ent *mdstat = mdstat_read(0, 0);
+ struct mdstat_ent *me;
+
+ for (me = mdstat ; me ; me = me->next)
+ if (strcmp(me->devnm, devnm) == 0)
+ break;
+ free_mdstat(mdstat);
+ return me != NULL;
+}
+
+struct mdstat_ent *mdstat_by_component(char *name)
+{
+ struct mdstat_ent *mdstat = mdstat_read(0, 0);
+
+ while (mdstat) {
+ struct dev_member *m;
+ struct mdstat_ent *ent;
+ if (mdstat->metadata_version &&
+ strncmp(mdstat->metadata_version, "external:", 9) == 0 &&
+ is_subarray(mdstat->metadata_version+9))
+ /* don't return subarrays, only containers */
+ ;
+ else for (m = mdstat->members; m; m = m->next) {
+ if (strcmp(m->name, name) == 0) {
+ free_mdstat(mdstat->next);
+ mdstat->next = NULL;
+ return mdstat;
+ }
+ }
+ ent = mdstat;
+ mdstat = mdstat->next;
+ ent->next = NULL;
+ free_mdstat(ent);
+ }
+ return NULL;
+}
+
+struct mdstat_ent *mdstat_by_subdev(char *subdev, char *container)
+{
+ struct mdstat_ent *mdstat = mdstat_read(0, 0);
+ struct mdstat_ent *ent = NULL;
+
+ while (mdstat) {
+ /* metadata version must match:
+ * external:[/-]%s/%s
+ * where first %s is 'container' and second %s is 'subdev'
+ */
+ if (ent)
+ free_mdstat(ent);
+ ent = mdstat;
+ mdstat = mdstat->next;
+ ent->next = NULL;
+
+ if (ent->metadata_version == NULL ||
+ strncmp(ent->metadata_version, "external:", 9) != 0)
+ continue;
+
+ if (!metadata_container_matches(ent->metadata_version+9,
+ container) ||
+ !metadata_subdev_matches(ent->metadata_version+9,
+ subdev))
+ continue;
+
+ free_mdstat(mdstat);
+ return ent;
+ }
+ return NULL;
+}
diff --git a/misc/mdcheck b/misc/mdcheck
new file mode 100644
index 00000000..33570b97
--- /dev/null
+++ b/misc/mdcheck
@@ -0,0 +1,158 @@
+#!/bin/bash
+
+# Copyright (C) 2014 Neil Brown <neilb@suse.de>
+#
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Author: Neil Brown
+# Email: <neilb@suse.de>
+
+# This script should be run periodically to automatically
+# perform a 'check' on any md arrays.
+#
+# It supports a 'time budget' such that any incomplete 'check'
+# will be checkpointed when that time has expired.
+# A subsequent invocation can allow the 'check' to continue.
+#
+# Options are:
+# --continue Don't start new checks, only continue old ones.
+# --duration This is passed to "date --date=$duration" to find out
+# when to finish
+#
+# To support '--continue', arrays are identified by UUID and the 'sync_completed'
+# value is stored in /var/lib/mdcheck/$UUID
+
+# convert a /dev/md name into /sys/.../md equivalent
+sysname() {
+ set `ls -lLd $1`
+ maj=${5%,}
+ min=$6
+ readlink -f /sys/dev/block/$maj:$min
+}
+
+args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@")
+rv=$?
+if [ $rv -ne 0 ]; then exit $rv; fi
+
+eval set -- $args
+
+cont=
+endtime=
+while [ " $1" != " --" ]
+do
+ case $1 in
+ --help )
+ echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]'
+ echo >&2 ' time-offset must be understood by "date --date"'
+ exit 0
+ ;;
+ --continue ) cont=yes ;;
+ --duration ) shift; dur=$1
+ endtime=$(date --date "$dur" "+%s")
+ ;;
+ esac
+ shift
+done
+shift
+
+# We need a temp file occasionally...
+tmp=/var/lib/mdcheck/.md-check-$$
+trap 'rm -f "$tmp"' 0
+
+
+# firstly, clean out really old state files
+mkdir -p /var/lib/mdcheck
+find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \;
+
+# Now look at each md device.
+cnt=0
+for dev in /dev/md?*
+do
+ sys=`sysname $dev`
+ if [ ! -f "$sys/md/sync_action" ]
+ then # cannot check this array
+ continue
+ fi
+ if [ "`cat $sys/md/sync_action`" != 'idle' ]
+ then # This array is busy
+ continue
+ fi
+
+ mdadm --detail --export "$dev" > $tmp || continue
+ source $tmp
+ fl="/var/lib/mdcheck/MD_UUID_$MD_UUID"
+ if [ -z "$cont" ]
+ then
+ start=0
+ elif [ -z "$MD_UUID" -o ! -f "$fl" ]
+ then
+ # Nothing to continue here
+ continue
+ else
+ start=`cat "$fl"`
+ fi
+
+ cnt=$[cnt+1]
+ eval MD_${cnt}_fl=\$fl
+ eval MD_${cnt}_sys=\$sys
+ echo $start > $fl
+ echo $start > $sys/md/sync_min
+ echo check > $sys/md/sync_action
+done
+
+if [ -z "$endtime" ]
+then
+ exit 0
+fi
+
+while [ `date +%s` -lt $endtime ]
+do
+ any=
+ for i in `eval echo {1..$cnt}`
+ do
+ eval fl=\$MD_${i}_fl
+ eval sys=\$MD_${i}_sys
+
+ if [ -z "$fl" ]; then continue; fi
+
+ if [ "`cat $sys/md/sync_action`" != 'check' ]
+ then
+ eval MD_${i}_fl=
+ rm -f $fl
+ continue;
+ fi
+ read a rest < $sys/md/sync_completed
+ echo $a > $fl
+ any=yes
+ done
+ if [ -z "$any" ]; then exit 0; fi
+ sleep 120
+done
+
+# We've waited, and there are still checks running.
+# Time to stop them.
+for i in `eval echo {1..$cnt}`
+do
+ eval fl=\$MD_${i}_fl
+ eval sys=\$MD_${i}_sys
+
+ if [ -z "$fl" ]; then continue; fi
+
+ if [ "`cat $sys/md/sync_action`" != 'check' ]
+ then
+ eval MD_${i}_fl=
+ rm -f $fl
+ continue;
+ fi
+ echo idle > $sys/md/sync_action
+ cat $sys/md/sync_min > $fl
+done
diff --git a/misc/syslog-events b/misc/syslog-events
new file mode 100644
index 00000000..fe8c14e4
--- /dev/null
+++ b/misc/syslog-events
@@ -0,0 +1,27 @@
+#!/bin/sh
+#
+# sample event handling script for mdadm
+# e.g. mdadm --follow --program=/sbin/syslog-events --scan
+#
+# License: GPL ver.2
+# Copyright (C) 2004 SEKINE Tatsuo <tsekine@sdri.co.jp>
+
+event="$1"
+dev="$2"
+disc="$3"
+
+facility="kern"
+tag="mdmonitor"
+
+case x"${event}" in
+ xFail*) priority="error" ;;
+ xTest*) priority="debug" ;;
+ x*) priority="info" ;;
+esac
+
+msg="${event} event on ${dev}"
+if [ x"${disc}" != x ]; then
+ msg="${msg}, related to disc ${disc}"
+fi
+
+exec logger -t "${tag}" -p "${facility}.${priority}" -- "${msg}"
diff --git a/mkinitramfs b/mkinitramfs
new file mode 100644
index 00000000..c6275ddb
--- /dev/null
+++ b/mkinitramfs
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+# make sure we are being run in the right directory...
+if [ -f mkinitramfs ]
+then :
+else
+ echo >&2 mkinitramfs must be run from the mdadm source directory.
+ exit 1
+fi
+if [ -f /bin/busybox ]
+then : good, it exists
+ case `file /bin/busybox` in
+ *statically* ) : good ;;
+ * ) echo >&2 mkinitramfs: /bin/busybox is not statically linked: cannot proceed.
+ exit 1
+ esac
+else
+ echo >&2 "mkinitramfs: /bin/busybox doesn't exist - please install it statically linked."
+ exit 1
+fi
+
+rm -rf initramfs
+mkdir initramfs
+mkdir initramfs/bin
+make mdadm.static
+cp mdadm.static initramfs/bin/mdadm
+cp /bin/busybox initramfs/bin/busybox
+ln initramfs/bin/busybox initramfs/bin/sh
+cat <<- END > initramfs/init
+ #!/bin/sh
+
+ echo 'Auto-assembling boot md array'
+ mkdir /proc
+ mount -t proc proc /proc
+ if [ -n "$rootuuid" ]
+ then arg=--uuid=$rootuuid
+ elif [ -n "$mdminor" ]
+ then arg=--super-minor=$mdminor
+ else arg=--super-minor=0
+ fi
+ echo "Using $arg"
+ mdadm -Acpartitions $arg --auto=part /dev/mda
+ cd /
+ mount /dev/mda1 /root || mount /dev/mda /root
+ umount /proc
+ cd /root
+ exec chroot . /sbin/init < /dev/console > /dev/console 2>&1
+END
+chmod +x initramfs/init
+
+(cd initramfs
+ find init bin | cpio -o -H newc | gzip --best
+) > init.cpio.gz
+rm -rf initramfs
+ls -l init.cpio.gz
diff --git a/monitor.c b/monitor.c
new file mode 100644
index 00000000..f81e7075
--- /dev/null
+++ b/monitor.c
@@ -0,0 +1,713 @@
+/*
+ * mdmon - monitor external metadata arrays
+ *
+ * Copyright (C) 2007-2009 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2007-2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "mdadm.h"
+#include "mdmon.h"
+#include <sys/syscall.h>
+#include <sys/select.h>
+#include <signal.h>
+
+static char *array_states[] = {
+ "clear", "inactive", "suspended", "readonly", "read-auto",
+ "clean", "active", "write-pending", "active-idle", NULL };
+static char *sync_actions[] = {
+ "idle", "reshape", "resync", "recover", "check", "repair", NULL
+};
+
+static int write_attr(char *attr, int fd)
+{
+ return write(fd, attr, strlen(attr));
+}
+
+static void add_fd(fd_set *fds, int *maxfd, int fd)
+{
+ struct stat st;
+ if (fd < 0)
+ return;
+ if (fstat(fd, &st) == -1) {
+ dprintf("%s: Invalid fd %d\n", __func__, fd);
+ return;
+ }
+ if (st.st_nlink == 0) {
+ dprintf("%s: fd %d was deleted\n", __func__, fd);
+ return;
+ }
+ if (fd > *maxfd)
+ *maxfd = fd;
+ FD_SET(fd, fds);
+}
+
+static int read_attr(char *buf, int len, int fd)
+{
+ int n;
+
+ if (fd < 0) {
+ buf[0] = 0;
+ return 0;
+ }
+ lseek(fd, 0, 0);
+ n = read(fd, buf, len - 1);
+
+ if (n <= 0) {
+ buf[0] = 0;
+ return 0;
+ }
+ buf[n] = 0;
+ if (buf[n-1] == '\n')
+ buf[n-1] = 0;
+ return n;
+}
+
+static void read_resync_start(int fd, unsigned long long *v)
+{
+ char buf[30];
+ int n;
+
+ n = read_attr(buf, 30, fd);
+ if (n <= 0) {
+ dprintf("%s: Failed to read resync_start (%d)\n",
+ __func__, fd);
+ return;
+ }
+ if (strncmp(buf, "none", 4) == 0)
+ *v = MaxSector;
+ else
+ *v = strtoull(buf, NULL, 10);
+}
+
+static unsigned long long read_sync_completed(int fd)
+{
+ unsigned long long val;
+ char buf[50];
+ int n;
+ char *ep;
+
+ n = read_attr(buf, 50, fd);
+
+ if (n <= 0)
+ return 0;
+ buf[n] = 0;
+ val = strtoull(buf, &ep, 0);
+ if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
+ return 0;
+ return val;
+}
+
+static enum array_state read_state(int fd)
+{
+ char buf[20];
+ int n = read_attr(buf, 20, fd);
+
+ if (n <= 0)
+ return bad_word;
+ return (enum array_state) sysfs_match_word(buf, array_states);
+}
+
+static enum sync_action read_action( int fd)
+{
+ char buf[20];
+ int n = read_attr(buf, 20, fd);
+
+ if (n <= 0)
+ return bad_action;
+ return (enum sync_action) sysfs_match_word(buf, sync_actions);
+}
+
+int read_dev_state(int fd)
+{
+ char buf[60];
+ int n = read_attr(buf, 60, fd);
+ char *cp;
+ int rv = 0;
+
+ if (n <= 0)
+ return 0;
+
+ cp = buf;
+ while (cp) {
+ if (sysfs_attr_match(cp, "faulty"))
+ rv |= DS_FAULTY;
+ if (sysfs_attr_match(cp, "in_sync"))
+ rv |= DS_INSYNC;
+ if (sysfs_attr_match(cp, "write_mostly"))
+ rv |= DS_WRITE_MOSTLY;
+ if (sysfs_attr_match(cp, "spare"))
+ rv |= DS_SPARE;
+ if (sysfs_attr_match(cp, "blocked"))
+ rv |= DS_BLOCKED;
+ cp = strchr(cp, ',');
+ if (cp)
+ cp++;
+ }
+ return rv;
+}
+
+static void signal_manager(void)
+{
+ /* tgkill(getpid(), mon_tid, SIGUSR1); */
+ int pid = getpid();
+ syscall(SYS_tgkill, pid, mgr_tid, SIGUSR1);
+}
+
+/* Monitor a set of active md arrays - all of which share the
+ * same metadata - and respond to events that require
+ * metadata update.
+ *
+ * New arrays are detected by another thread which allocates
+ * required memory and attaches the data structure to our list.
+ *
+ * Events:
+ * Array stops.
+ * This is detected by array_state going to 'clear' or 'inactive'.
+ * while we thought it was active.
+ * Response is to mark metadata as clean and 'clear' the array(??)
+ * write-pending
+ * array_state if 'write-pending'
+ * We mark metadata as 'dirty' then set array to 'active'.
+ * active_idle
+ * Either ignore, or mark clean, then mark metadata as clean.
+ *
+ * device fails
+ * detected by rd-N/state reporting "faulty"
+ * mark device as 'failed' in metadata, let the kernel release the
+ * device by writing '-blocked' to rd/state, and finally write 'remove' to
+ * rd/state. Before a disk can be replaced it must be failed and removed
+ * from all container members, this will be preemptive for the other
+ * arrays... safe?
+ *
+ * sync completes
+ * sync_action was 'resync' and becomes 'idle' and resync_start becomes
+ * MaxSector
+ * Notify metadata that sync is complete.
+ *
+ * recovery completes
+ * sync_action changes from 'recover' to 'idle'
+ * Check each device state and mark metadata if 'faulty' or 'in_sync'.
+ *
+ * deal with resync
+ * This only happens on finding a new array... mdadm will have set
+ * 'resync_start' to the correct value. If 'resync_start' indicates that an
+ * resync needs to occur set the array to the 'active' state rather than the
+ * initial read-auto state.
+ *
+ *
+ *
+ * We wait for a change (poll/select) on array_state, sync_action, and
+ * each rd-X/state file.
+ * When we get any change, we check everything. So read each state file,
+ * then decide what to do.
+ *
+ * The core action is to write new metadata to all devices in the array.
+ * This is done at most once on any wakeup.
+ * After that we might:
+ * - update the array_state
+ * - set the role of some devices.
+ * - request a sync_action
+ *
+ */
+
+#define ARRAY_DIRTY 1
+#define ARRAY_BUSY 2
+static int read_and_act(struct active_array *a)
+{
+ unsigned long long sync_completed;
+ int check_degraded = 0;
+ int check_reshape = 0;
+ int deactivate = 0;
+ struct mdinfo *mdi;
+ int ret = 0;
+ int count = 0;
+ struct timeval tv;
+
+ a->next_state = bad_word;
+ a->next_action = bad_action;
+
+ a->curr_state = read_state(a->info.state_fd);
+ a->curr_action = read_action(a->action_fd);
+ if (a->curr_state != clear)
+ /*
+ * In "clear" state, resync_start may wrongly be set to "0"
+ * when the kernel called md_clean but didn't remove the
+ * sysfs attributes yet
+ */
+ read_resync_start(a->resync_start_fd, &a->info.resync_start);
+ sync_completed = read_sync_completed(a->sync_completed_fd);
+ for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
+ mdi->next_state = 0;
+ mdi->curr_state = 0;
+ if (mdi->state_fd >= 0) {
+ read_resync_start(mdi->recovery_fd,
+ &mdi->recovery_start);
+ mdi->curr_state = read_dev_state(mdi->state_fd);
+ }
+ }
+
+ gettimeofday(&tv, NULL);
+ dprintf("%s(%d): %ld.%06ld state:%s prev:%s action:%s prev: %s start:%llu\n",
+ __func__, a->info.container_member,
+ tv.tv_sec, tv.tv_usec,
+ array_states[a->curr_state],
+ array_states[a->prev_state],
+ sync_actions[a->curr_action],
+ sync_actions[a->prev_action],
+ a->info.resync_start
+ );
+
+ if ((a->curr_state == bad_word || a->curr_state <= inactive) &&
+ a->prev_state > inactive) {
+ /* array has been stopped */
+ a->container->ss->set_array_state(a, 1);
+ a->next_state = clear;
+ deactivate = 1;
+ }
+ if (a->curr_state == write_pending) {
+ a->container->ss->set_array_state(a, 0);
+ a->next_state = active;
+ ret |= ARRAY_DIRTY;
+ }
+ if (a->curr_state == active_idle) {
+ /* Set array to 'clean' FIRST, then mark clean
+ * in the metadata
+ */
+ a->next_state = clean;
+ ret |= ARRAY_DIRTY;
+ }
+ if (a->curr_state == clean) {
+ a->container->ss->set_array_state(a, 1);
+ }
+ if (a->curr_state == active ||
+ a->curr_state == suspended)
+ ret |= ARRAY_DIRTY;
+ if (a->curr_state == readonly) {
+ /* Well, I'm ready to handle things. If readonly
+ * wasn't requested, transition to read-auto.
+ */
+ char buf[64];
+ read_attr(buf, sizeof(buf), a->metadata_fd);
+ if (strncmp(buf, "external:-", 10) == 0) {
+ /* explicit request for readonly array. Leave it alone */
+ ;
+ } else {
+ if (a->container->ss->set_array_state(a, 2))
+ a->next_state = read_auto; /* array is clean */
+ else {
+ a->next_state = active; /* Now active for recovery etc */
+ ret |= ARRAY_DIRTY;
+ }
+ }
+ }
+
+ if (!deactivate &&
+ a->curr_action == idle &&
+ a->prev_action == resync) {
+ /* A resync has finished. The endpoint is recorded in
+ * 'sync_start'. We don't update the metadata
+ * until the array goes inactive or readonly though.
+ * Just check if we need to fiddle spares.
+ */
+ a->container->ss->set_array_state(a, a->curr_state <= clean);
+ check_degraded = 1;
+ }
+
+ if (!deactivate &&
+ a->curr_action == idle &&
+ a->prev_action == recover) {
+ /* A recovery has finished. Some disks may be in sync now,
+ * and the array may no longer be degraded
+ */
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
+ a->container->ss->set_disk(a, mdi->disk.raid_disk,
+ mdi->curr_state);
+ if (! (mdi->curr_state & DS_INSYNC))
+ check_degraded = 1;
+ count++;
+ }
+ if (count != a->info.array.raid_disks)
+ check_degraded = 1;
+ }
+
+ if (!deactivate &&
+ a->curr_action == reshape &&
+ a->prev_action != reshape)
+ /* reshape was requested by mdadm. Need to see if
+ * new devices have been added. Manager does that
+ * when it sees check_reshape
+ */
+ check_reshape = 1;
+
+ /* Check for failures and if found:
+ * 1/ Record the failure in the metadata and unblock the device.
+ * FIXME update the kernel to stop notifying on failed drives when
+ * the array is readonly and we have cleared 'blocked'
+ * 2/ Try to remove the device if the array is writable, or can be
+ * made writable.
+ */
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
+ if (mdi->curr_state & DS_FAULTY) {
+ a->container->ss->set_disk(a, mdi->disk.raid_disk,
+ mdi->curr_state);
+ check_degraded = 1;
+ if (mdi->curr_state & DS_BLOCKED)
+ mdi->next_state |= DS_UNBLOCK;
+ if (a->curr_state == read_auto) {
+ a->container->ss->set_array_state(a, 0);
+ a->next_state = active;
+ }
+ if (a->curr_state > readonly)
+ mdi->next_state |= DS_REMOVE;
+ }
+ }
+
+ /* Check for recovery checkpoint notifications. We need to be a
+ * minimum distance away from the last checkpoint to prevent
+ * over checkpointing. Note reshape checkpointing is handled
+ * in the second branch.
+ */
+ if (sync_completed > a->last_checkpoint &&
+ sync_completed - a->last_checkpoint > a->info.component_size >> 4 &&
+ a->curr_action > reshape) {
+ /* A (non-reshape) sync_action has reached a checkpoint.
+ * Record the updated position in the metadata
+ */
+ a->last_checkpoint = sync_completed;
+ a->container->ss->set_array_state(a, a->curr_state <= clean);
+ } else if ((a->curr_action == idle && a->prev_action == reshape) ||
+ (a->curr_action == reshape
+ && sync_completed > a->last_checkpoint) ) {
+ /* Reshape has progressed or completed so we need to
+ * update the array state - and possibly the array size
+ */
+ if (sync_completed != 0)
+ a->last_checkpoint = sync_completed;
+ /* We might need to update last_checkpoint depending on
+ * the reason that reshape finished.
+ * if array reshape is really finished:
+ * set check point to the end, this allows
+ * set_array_state() to finalize reshape in metadata
+ * if reshape if broken: do not set checkpoint to the end
+ * this allows for reshape restart from checkpoint
+ */
+ if ((a->curr_action != reshape) &&
+ (a->prev_action == reshape)) {
+ char buf[40];
+ if ((sysfs_get_str(&a->info, NULL,
+ "reshape_position",
+ buf,
+ sizeof(buf)) >= 0) &&
+ strncmp(buf, "none", 4) == 0)
+ a->last_checkpoint = a->info.component_size;
+ }
+ a->container->ss->set_array_state(a, a->curr_state <= clean);
+ a->last_checkpoint = sync_completed;
+ }
+
+ if (sync_completed > a->last_checkpoint)
+ a->last_checkpoint = sync_completed;
+
+ a->container->ss->sync_metadata(a->container);
+ dprintf("%s(%d): state:%s action:%s next(", __func__, a->info.container_member,
+ array_states[a->curr_state], sync_actions[a->curr_action]);
+
+ /* Effect state changes in the array */
+ if (a->next_state != bad_word) {
+ dprintf(" state:%s", array_states[a->next_state]);
+ write_attr(array_states[a->next_state], a->info.state_fd);
+ }
+ if (a->next_action != bad_action) {
+ write_attr(sync_actions[a->next_action], a->action_fd);
+ dprintf(" action:%s", sync_actions[a->next_action]);
+ }
+ for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
+ if (mdi->next_state & DS_UNBLOCK) {
+ dprintf(" %d:-blocked", mdi->disk.raid_disk);
+ write_attr("-blocked", mdi->state_fd);
+ }
+
+ if ((mdi->next_state & DS_REMOVE) && mdi->state_fd >= 0) {
+ int remove_result;
+
+ /* The kernel may not be able to immediately remove the
+ * disk. In that case we wait a little while and
+ * try again.
+ */
+ remove_result = write_attr("remove", mdi->state_fd);
+ if (remove_result > 0) {
+ dprintf(" %d:removed", mdi->disk.raid_disk);
+ close(mdi->state_fd);
+ close(mdi->recovery_fd);
+ mdi->state_fd = -1;
+ } else
+ ret |= ARRAY_BUSY;
+ }
+ if (mdi->next_state & DS_INSYNC) {
+ write_attr("+in_sync", mdi->state_fd);
+ dprintf(" %d:+in_sync", mdi->disk.raid_disk);
+ }
+ }
+ dprintf(" )\n");
+
+ /* move curr_ to prev_ */
+ a->prev_state = a->curr_state;
+
+ a->prev_action = a->curr_action;
+
+ for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
+ mdi->prev_state = mdi->curr_state;
+ mdi->next_state = 0;
+ }
+
+ if (check_degraded || check_reshape) {
+ /* manager will do the actual check */
+ if (check_degraded)
+ a->check_degraded = 1;
+ if (check_reshape)
+ a->check_reshape = 1;
+ signal_manager();
+ }
+
+ if (deactivate)
+ a->container = NULL;
+
+ return ret;
+}
+
+static struct mdinfo *
+find_device(struct active_array *a, int major, int minor)
+{
+ struct mdinfo *mdi;
+
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
+ if (mdi->disk.major == major && mdi->disk.minor == minor)
+ return mdi;
+
+ return NULL;
+}
+
+static void reconcile_failed(struct active_array *aa, struct mdinfo *failed)
+{
+ struct active_array *a;
+ struct mdinfo *victim;
+
+ for (a = aa; a; a = a->next) {
+ if (!a->container || a->to_remove)
+ continue;
+ victim = find_device(a, failed->disk.major, failed->disk.minor);
+ if (!victim)
+ continue;
+
+ if (!(victim->curr_state & DS_FAULTY))
+ write_attr("faulty", victim->state_fd);
+ }
+}
+
+#ifdef DEBUG
+static void dprint_wake_reasons(fd_set *fds)
+{
+ int i;
+ char proc_path[256];
+ char link[256];
+ char *basename;
+ int rv;
+
+ fprintf(stderr, "monitor: wake ( ");
+ for (i = 0; i < FD_SETSIZE; i++) {
+ if (FD_ISSET(i, fds)) {
+ sprintf(proc_path, "/proc/%d/fd/%d",
+ (int) getpid(), i);
+
+ rv = readlink(proc_path, link, sizeof(link) - 1);
+ if (rv < 0) {
+ fprintf(stderr, "%d:unknown ", i);
+ continue;
+ }
+ link[rv] = '\0';
+ basename = strrchr(link, '/');
+ fprintf(stderr, "%d:%s ",
+ i, basename ? ++basename : link);
+ }
+ }
+ fprintf(stderr, ")\n");
+}
+#endif
+
+int monitor_loop_cnt;
+
+static int wait_and_act(struct supertype *container, int nowait)
+{
+ fd_set rfds;
+ int maxfd = 0;
+ struct active_array **aap = &container->arrays;
+ struct active_array *a, **ap;
+ int rv;
+ struct mdinfo *mdi;
+ static unsigned int dirty_arrays = ~0; /* start at some non-zero value */
+
+ FD_ZERO(&rfds);
+
+ for (ap = aap ; *ap ;) {
+ a = *ap;
+ /* once an array has been deactivated we want to
+ * ask the manager to discard it.
+ */
+ if (!a->container || a->to_remove) {
+ if (discard_this) {
+ ap = &(*ap)->next;
+ continue;
+ }
+ *ap = a->next;
+ a->next = NULL;
+ discard_this = a;
+ signal_manager();
+ continue;
+ }
+
+ add_fd(&rfds, &maxfd, a->info.state_fd);
+ add_fd(&rfds, &maxfd, a->action_fd);
+ add_fd(&rfds, &maxfd, a->sync_completed_fd);
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
+ add_fd(&rfds, &maxfd, mdi->state_fd);
+
+ ap = &(*ap)->next;
+ }
+
+ if (manager_ready && (*aap == NULL || (sigterm && !dirty_arrays))) {
+ /* No interesting arrays, or we have been told to
+ * terminate and everything is clean. Lets see about
+ * exiting. Note that blocking at this point is not a
+ * problem as there are no active arrays, there is
+ * nothing that we need to be ready to do.
+ */
+ int fd;
+ if (sigterm)
+ fd = open_dev_excl(container->devnm);
+ else
+ fd = open_dev_flags(container->devnm, O_RDONLY|O_EXCL);
+ if (fd >= 0 || errno != EBUSY) {
+ /* OK, we are safe to leave */
+ if (sigterm && !dirty_arrays)
+ dprintf("caught sigterm, all clean... exiting\n");
+ else
+ dprintf("no arrays to monitor... exiting\n");
+ if (!sigterm)
+ /* On SIGTERM, someone (the take-over mdmon) will
+ * clean up
+ */
+ remove_pidfile(container->devnm);
+ exit_now = 1;
+ signal_manager();
+ close(fd);
+ exit(0);
+ }
+ }
+
+ if (!nowait) {
+ sigset_t set;
+ struct timespec ts;
+ ts.tv_sec = 24*3600;
+ ts.tv_nsec = 0;
+ if (*aap == NULL || container->retry_soon) {
+ /* just waiting to get O_EXCL access */
+ ts.tv_sec = 0;
+ ts.tv_nsec = 20000000ULL;
+ }
+ sigprocmask(SIG_UNBLOCK, NULL, &set);
+ sigdelset(&set, SIGUSR1);
+ monitor_loop_cnt |= 1;
+ rv = pselect(maxfd+1, NULL, NULL, &rfds, &ts, &set);
+ monitor_loop_cnt += 1;
+ if (rv == -1) {
+ if (errno == EINTR) {
+ rv = 0;
+ dprintf("monitor: caught signal\n");
+ } else
+ dprintf("monitor: error %d in pselect\n",
+ errno);
+ }
+ #ifdef DEBUG
+ else
+ dprint_wake_reasons(&rfds);
+ #endif
+ container->retry_soon = 0;
+ }
+
+ if (update_queue) {
+ struct metadata_update *this;
+
+ for (this = update_queue; this ; this = this->next)
+ container->ss->process_update(container, this);
+
+ update_queue_handled = update_queue;
+ update_queue = NULL;
+ signal_manager();
+ container->ss->sync_metadata(container);
+ }
+
+ rv = 0;
+ dirty_arrays = 0;
+ for (a = *aap; a ; a = a->next) {
+
+ if (a->replaces && !discard_this) {
+ struct active_array **ap;
+ for (ap = &a->next; *ap && *ap != a->replaces;
+ ap = & (*ap)->next)
+ ;
+ if (*ap)
+ *ap = (*ap)->next;
+ discard_this = a->replaces;
+ a->replaces = NULL;
+ /* FIXME check if device->state_fd need to be cleared?*/
+ signal_manager();
+ }
+ if (a->container && !a->to_remove) {
+ int ret = read_and_act(a);
+ rv |= 1;
+ dirty_arrays += !!(ret & ARRAY_DIRTY);
+ /* when terminating stop manipulating the array after it
+ * is clean, but make sure read_and_act() is given a
+ * chance to handle 'active_idle'
+ */
+ if (sigterm && !(ret & ARRAY_DIRTY))
+ a->container = NULL; /* stop touching this array */
+ if (ret & ARRAY_BUSY)
+ container->retry_soon = 1;
+ }
+ }
+
+ /* propagate failures across container members */
+ for (a = *aap; a ; a = a->next) {
+ if (!a->container || a->to_remove)
+ continue;
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
+ if (mdi->curr_state & DS_FAULTY)
+ reconcile_failed(*aap, mdi);
+ }
+
+ return rv;
+}
+
+void do_monitor(struct supertype *container)
+{
+ int rv;
+ int first = 1;
+ do {
+ rv = wait_and_act(container, first);
+ first = 0;
+ } while (rv >= 0);
+}
diff --git a/msg.c b/msg.c
new file mode 100644
index 00000000..2e64589b
--- /dev/null
+++ b/msg.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright (C) 2008 Intel Corporation
+ *
+ * mdmon socket / message handling
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include "mdadm.h"
+#include "mdmon.h"
+
+static const __u32 start_magic = 0x5a5aa5a5;
+static const __u32 end_magic = 0xa5a55a5a;
+
+static int send_buf(int fd, const void* buf, int len, int tmo)
+{
+ fd_set set;
+ int rv;
+ struct timeval timeout = {tmo, 0};
+ struct timeval *ptmo = tmo ? &timeout : NULL;
+
+ while (len) {
+ FD_ZERO(&set);
+ FD_SET(fd, &set);
+ rv = select(fd+1, NULL, &set, NULL, ptmo);
+ if (rv <= 0)
+ return -1;
+ rv = write(fd, buf, len);
+ if (rv <= 0)
+ return -1;
+ len -= rv;
+ buf += rv;
+ }
+ return 0;
+}
+
+static int recv_buf(int fd, void* buf, int len, int tmo)
+{
+ fd_set set;
+ int rv;
+ struct timeval timeout = {tmo, 0};
+ struct timeval *ptmo = tmo ? &timeout : NULL;
+
+ while (len) {
+ FD_ZERO(&set);
+ FD_SET(fd, &set);
+ rv = select(fd+1, &set, NULL, NULL, ptmo);
+ if (rv <= 0)
+ return -1;
+ rv = read(fd, buf, len);
+ if (rv <= 0)
+ return -1;
+ len -= rv;
+ buf += rv;
+ }
+ return 0;
+}
+
+int send_message(int fd, struct metadata_update *msg, int tmo)
+{
+ __s32 len = msg->len;
+ int rv;
+
+ rv = send_buf(fd, &start_magic, 4, tmo);
+ rv = rv ?: send_buf(fd, &len, 4, tmo);
+ if (len > 0)
+ rv = rv ?: send_buf(fd, msg->buf, msg->len, tmo);
+ rv = send_buf(fd, &end_magic, 4, tmo);
+
+ return rv;
+}
+
+int receive_message(int fd, struct metadata_update *msg, int tmo)
+{
+ __u32 magic;
+ __s32 len;
+ int rv;
+
+ rv = recv_buf(fd, &magic, 4, tmo);
+ if (rv < 0 || magic != start_magic)
+ return -1;
+ rv = recv_buf(fd, &len, 4, tmo);
+ if (rv < 0 || len > MSG_MAX_LEN)
+ return -1;
+ if (len > 0) {
+ msg->buf = xmalloc(len);
+ rv = recv_buf(fd, msg->buf, len, tmo);
+ if (rv < 0) {
+ free(msg->buf);
+ return -1;
+ }
+ } else
+ msg->buf = NULL;
+ rv = recv_buf(fd, &magic, 4, tmo);
+ if (rv < 0 || magic != end_magic) {
+ free(msg->buf);
+ return -1;
+ }
+ msg->len = len;
+ return 0;
+}
+
+int ack(int fd, int tmo)
+{
+ struct metadata_update msg = { .len = 0 };
+
+ return send_message(fd, &msg, tmo);
+}
+
+int wait_reply(int fd, int tmo)
+{
+ struct metadata_update msg;
+ int err = receive_message(fd, &msg, tmo);
+
+ /* mdmon sent extra data, but caller only cares that we got a
+ * successful reply
+ */
+ if (err == 0 && msg.len > 0)
+ free(msg.buf);
+
+ return err;
+}
+
+int connect_monitor(char *devname)
+{
+ char path[100];
+ int sfd;
+ long fl;
+ struct sockaddr_un addr;
+ int pos;
+ char *c;
+
+ pos = sprintf(path, "%s/", MDMON_DIR);
+ if (is_subarray(devname)) {
+ devname++;
+ c = strchr(devname, '/');
+ if (!c)
+ return -1;
+ snprintf(&path[pos], c - devname + 1, "%s", devname);
+ pos += c - devname;
+ } else
+ pos += sprintf(&path[pos], "%s", devname);
+ sprintf(&path[pos], ".sock");
+
+ sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
+ if (sfd < 0)
+ return -1;
+
+ addr.sun_family = PF_LOCAL;
+ strcpy(addr.sun_path, path);
+ if (connect(sfd, &addr, sizeof(addr)) < 0) {
+ close(sfd);
+ return -1;
+ }
+
+ fl = fcntl(sfd, F_GETFL, 0);
+ fl |= O_NONBLOCK;
+ fcntl(sfd, F_SETFL, fl);
+
+ return sfd;
+}
+
+int fping_monitor(int sfd)
+{
+ int err = 0;
+
+ if (sfd < 0)
+ return sfd;
+
+ /* try to ping existing socket */
+ if (ack(sfd, 20) != 0)
+ err = -1;
+
+ /* check the reply */
+ if (!err && wait_reply(sfd, 20) != 0)
+ err = -1;
+
+ return err;
+}
+
+/* give the monitor a chance to update the metadata */
+int ping_monitor(char *devname)
+{
+ int sfd = connect_monitor(devname);
+ int err;
+
+ if (sfd >= 0) {
+ err = fping_monitor(sfd);
+ close(sfd);
+ } else
+ err = -1;
+
+ return err;
+}
+
+static char *ping_monitor_version(char *devname)
+{
+ int sfd = connect_monitor(devname);
+ struct metadata_update msg;
+ int err = 0;
+
+ if (sfd < 0)
+ return NULL;
+
+ if (ack(sfd, 20) != 0)
+ err = -1;
+
+ if (!err && receive_message(sfd, &msg, 20) != 0)
+ err = -1;
+
+ close(sfd);
+
+ if (err || !msg.len || !msg.buf)
+ return NULL;
+ return msg.buf;
+}
+
+int unblock_subarray(struct mdinfo *sra, const int unfreeze)
+{
+ char buf[64];
+ int rc = 0;
+
+ if (sra) {
+ sprintf(buf, "external:%s\n", sra->text_version);
+ buf[9] = '/';
+ } else
+ buf[9] = '-';
+
+ if (buf[9] == '-' ||
+ sysfs_set_str(sra, NULL, "metadata_version", buf) ||
+ (unfreeze &&
+ sysfs_attribute_available(sra, NULL, "sync_action") &&
+ sysfs_set_str(sra, NULL, "sync_action", "idle")))
+ rc = -1;
+ return rc;
+}
+
+int block_subarray(struct mdinfo *sra)
+{
+ char buf[64];
+ int rc = 0;
+
+ sprintf(buf, "external:%s\n", sra->text_version);
+ buf[9] = '-';
+ if (sysfs_set_str(sra, NULL, "metadata_version", buf))
+ rc = -1;
+
+ return rc;
+}
+
+/* check mdmon version if it supports
+ * array blocking mechanism
+ */
+int check_mdmon_version(char *container)
+{
+ char *version = NULL;
+
+ if (!mdmon_running(container)) {
+ /* if mdmon is not active we assume that any instance that is
+ * later started will match the current mdadm version, if this
+ * assumption is violated we may inadvertantly rebuild an array
+ * that was meant for reshape, or start rebuild on a spare that
+ * was to be moved to another container
+ */
+ /* pass */;
+ } else {
+ int ver;
+
+ version = ping_monitor_version(container);
+ ver = version ? mdadm_version(version) : -1;
+ free(version);
+ if (ver < 3002000) {
+ pr_err("mdmon instance for %s cannot be disabled\n",
+ container);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * block_monitor - prevent mdmon spare assignment
+ * @container - container to block
+ * @freeze - flag to additionally freeze sync_action
+ *
+ * This is used by the reshape code to freeze the container, and the
+ * auto-rebuild implementation to atomically move spares.
+ * In both cases we need to stop mdmon from assigning spares to replace
+ * failed devices as we might have other plans for the spare.
+ * For the reshape case we also need to 'freeze' sync_action so that
+ * no recovery happens until we have fully prepared for the reshape.
+ *
+ * We tell mdmon that the array is frozen by marking the 'metadata' name
+ * with a leading '-'. The previously told mdmon "Don't make this array
+ * read/write, leave it readonly". Now it means a more general "Don't
+ * reconfigure this array at all".
+ * As older versions of mdmon (which might run from initrd) don't understand
+ * this, we first check that the running mdmon is new enough.
+ */
+int block_monitor(char *container, const int freeze)
+{
+ struct mdstat_ent *ent, *e, *e2;
+ struct mdinfo *sra = NULL;
+ char buf[64];
+ int rv = 0;
+
+ if (check_mdmon_version(container))
+ return -1;
+
+ ent = mdstat_read(0, 0);
+ if (!ent) {
+ pr_err("failed to read /proc/mdstat while disabling mdmon\n");
+ return -1;
+ }
+
+ /* freeze container contents */
+ for (e = ent; e; e = e->next) {
+ if (!is_container_member(e, container))
+ continue;
+ sysfs_free(sra);
+ sra = sysfs_read(-1, e->devnm, GET_VERSION);
+ if (!sra) {
+ pr_err("failed to read sysfs for subarray%s\n",
+ to_subarray(e, container));
+ break;
+ }
+ /* can't reshape an array that we can't monitor */
+ if (sra->text_version[0] == '-')
+ break;
+
+ if (freeze && sysfs_freeze_array(sra) < 1)
+ break;
+ /* flag this array to not be modified by mdmon (close race with
+ * takeover in reshape case and spare reassignment in the
+ * auto-rebuild case)
+ */
+ if (block_subarray(sra))
+ break;
+ ping_monitor(container);
+
+ /* check that we did not race with recovery */
+ if ((freeze &&
+ !sysfs_attribute_available(sra, NULL, "sync_action")) ||
+ (freeze &&
+ sysfs_attribute_available(sra, NULL, "sync_action") &&
+ sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0 &&
+ strcmp(buf, "frozen\n") == 0))
+ /* pass */;
+ else {
+ unblock_subarray(sra, 0);
+ break;
+ }
+ /* Double check against races - there should be no spares
+ * or part-spares
+ */
+ sysfs_free(sra);
+ sra = sysfs_read(-1, e->devnm, GET_DEVS | GET_STATE);
+ if (sra && sra->array.spare_disks > 0) {
+ unblock_subarray(sra, freeze);
+ break;
+ }
+ }
+
+ if (e) {
+ pr_err("failed to freeze subarray%s\n",
+ to_subarray(e, container));
+
+ /* thaw the partially frozen container */
+ for (e2 = ent; e2 && e2 != e; e2 = e2->next) {
+ if (!is_container_member(e2, container))
+ continue;
+ sysfs_free(sra);
+ sra = sysfs_read(-1, e2->devnm, GET_VERSION);
+ if (unblock_subarray(sra, freeze))
+ pr_err("Failed to unfreeze %s\n", e2->dev);
+ }
+
+ ping_monitor(container); /* cleared frozen */
+ rv = -1;
+ }
+
+ sysfs_free(sra);
+ free_mdstat(ent);
+
+ return rv;
+}
+
+void unblock_monitor(char *container, const int unfreeze)
+{
+ struct mdstat_ent *ent, *e;
+ struct mdinfo *sra = NULL;
+ int to_ping = 0;
+
+ ent = mdstat_read(0, 0);
+ if (!ent) {
+ pr_err("failed to read /proc/mdstat while unblocking container\n");
+ return;
+ }
+
+ /* unfreeze container contents */
+ for (e = ent; e; e = e->next) {
+ if (!is_container_member(e, container))
+ continue;
+ sysfs_free(sra);
+ sra = sysfs_read(-1, e->devnm, GET_VERSION|GET_LEVEL);
+ if (!sra)
+ continue;
+ if (sra->array.level > 0)
+ to_ping++;
+ if (unblock_subarray(sra, unfreeze))
+ pr_err("Failed to unfreeze %s\n", e->dev);
+ }
+ if (to_ping)
+ ping_monitor(container);
+
+ sysfs_free(sra);
+ free_mdstat(ent);
+}
+
+/* give the manager a chance to view the updated container state. This
+ * would naturally happen due to the manager noticing a change in
+ * /proc/mdstat; however, pinging encourages this detection to happen
+ * while an exclusive open() on the container is active
+ */
+int ping_manager(char *devname)
+{
+ int sfd = connect_monitor(devname);
+ struct metadata_update msg = { .len = -1 };
+ int err = 0;
+
+ if (sfd < 0)
+ return sfd;
+
+ err = send_message(sfd, &msg, 20);
+
+ /* check the reply */
+ if (!err && wait_reply(sfd, 20) != 0)
+ err = -1;
+
+ close(sfd);
+ return err;
+}
+
+/* using takeover operation for grow purposes, mdadm has to be sure
+ * that mdmon processes all updates, and if necessary it will be closed
+ * at takeover to raid0 operation
+ */
+void flush_mdmon(char *container)
+{
+ ping_manager(container);
+ ping_monitor(container);
+}
diff --git a/msg.h b/msg.h
new file mode 100644
index 00000000..016612cd
--- /dev/null
+++ b/msg.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2008 Intel Corporation
+ *
+ * mdmon socket / message handling
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+struct mdinfo;
+struct metadata_update;
+
+extern int receive_message(int fd, struct metadata_update *msg, int tmo);
+extern int send_message(int fd, struct metadata_update *msg, int tmo);
+extern int ack(int fd, int tmo);
+extern int wait_reply(int fd, int tmo);
+extern int connect_monitor(char *devname);
+extern int ping_monitor(char *devname);
+extern int block_subarray(struct mdinfo *sra);
+extern int unblock_subarray(struct mdinfo *sra, const int unfreeze);
+extern int block_monitor(char *container, const int freeze);
+extern void unblock_monitor(char *container, const int unfreeze);
+extern int fping_monitor(int sock);
+extern int ping_manager(char *devname);
+extern void flush_mdmon(char *container);
+
+#define MSG_MAX_LEN (4*1024*1024)
diff --git a/part.h b/part.h
new file mode 100644
index 00000000..862a14c3
--- /dev/null
+++ b/part.h
@@ -0,0 +1,79 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2010 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neil@brown.name>
+ *
+ */
+
+/* Structure definitions ext for MBR and GPT partition tables
+ */
+
+#define MBR_SIGNATURE_MAGIC __cpu_to_le16(0xAA55)
+#define MBR_PARTITIONS 4
+
+struct MBR_part_record {
+ __u8 bootable;
+ __u8 first_head;
+ __u8 first_sector;
+ __u8 first_cyl;
+ __u8 part_type;
+ __u8 last_head;
+ __u8 last_sector;
+ __u8 last_cyl;
+ __u32 first_sect_lba;
+ __u32 blocks_num;
+};
+
+struct MBR {
+ __u8 pad[446];
+ struct MBR_part_record parts[MBR_PARTITIONS];
+ __u16 magic;
+} __attribute__((packed));
+
+#define GPT_SIGNATURE_MAGIC __cpu_to_le64(0x5452415020494645ULL)
+#define MBR_GPT_PARTITION_TYPE 0xEE
+
+struct GPT_part_entry {
+ unsigned char type_guid[16];
+ unsigned char partition_guid[16];
+ __u64 starting_lba;
+ __u64 ending_lba;
+ unsigned char attr_bits[8];
+ unsigned char name[72];
+} __attribute__((packed));
+
+struct GPT {
+ __u64 magic;
+ __u32 revision;
+ __u32 header_size;
+ __u32 crc;
+ __u32 pad1;
+ __u64 current_lba;
+ __u64 backup_lba;
+ __u64 first_lba;
+ __u64 last_lba;
+ __u8 guid[16];
+ __u64 part_start;
+ __u32 part_cnt;
+ __u32 part_size;
+ __u32 part_crc;
+ __u8 pad2[420];
+} __attribute__((packed));
diff --git a/platform-intel.c b/platform-intel.c
new file mode 100644
index 00000000..f347382a
--- /dev/null
+++ b/platform-intel.c
@@ -0,0 +1,513 @@
+/*
+ * Intel(R) Matrix Storage Manager hardware and firmware support routines
+ *
+ * Copyright (C) 2008 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include "mdadm.h"
+#include "platform-intel.h"
+#include "probe_roms.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <limits.h>
+
+static int devpath_to_ll(const char *dev_path, const char *entry,
+ unsigned long long *val);
+
+static __u16 devpath_to_vendor(const char *dev_path);
+
+static void free_sys_dev(struct sys_dev **list)
+{
+ while (*list) {
+ struct sys_dev *next = (*list)->next;
+
+ if ((*list)->path)
+ free((*list)->path);
+ free(*list);
+ *list = next;
+ }
+}
+
+struct sys_dev *find_driver_devices(const char *bus, const char *driver)
+{
+ /* search sysfs for devices driven by 'driver' */
+ char path[292];
+ char link[256];
+ char *c;
+ DIR *driver_dir;
+ struct dirent *de;
+ struct sys_dev *head = NULL;
+ struct sys_dev *list = NULL;
+ enum sys_dev_type type;
+ unsigned long long dev_id;
+
+ if (strcmp(driver, "isci") == 0)
+ type = SYS_DEV_SAS;
+ else if (strcmp(driver, "ahci") == 0)
+ type = SYS_DEV_SATA;
+ else
+ type = SYS_DEV_UNKNOWN;
+
+ sprintf(path, "/sys/bus/%s/drivers/%s", bus, driver);
+ driver_dir = opendir(path);
+ if (!driver_dir)
+ return NULL;
+ for (de = readdir(driver_dir); de; de = readdir(driver_dir)) {
+ int n;
+
+ /* is 'de' a device? check that the 'subsystem' link exists and
+ * that its target matches 'bus'
+ */
+ sprintf(path, "/sys/bus/%s/drivers/%s/%s/subsystem",
+ bus, driver, de->d_name);
+ n = readlink(path, link, sizeof(link));
+ if (n < 0 || n >= (int)sizeof(link))
+ continue;
+ link[n] = '\0';
+ c = strrchr(link, '/');
+ if (!c)
+ continue;
+ if (strncmp(bus, c+1, strlen(bus)) != 0)
+ continue;
+
+ sprintf(path, "/sys/bus/%s/drivers/%s/%s",
+ bus, driver, de->d_name);
+
+ /* if it's not Intel device skip it. */
+ if (devpath_to_vendor(path) != 0x8086)
+ continue;
+
+ if (devpath_to_ll(path, "device", &dev_id) != 0)
+ continue;
+
+ /* start / add list entry */
+ if (!head) {
+ head = xmalloc(sizeof(*head));
+ list = head;
+ } else {
+ list->next = xmalloc(sizeof(*head));
+ list = list->next;
+ }
+
+ if (!list) {
+ free_sys_dev(&head);
+ break;
+ }
+
+ list->dev_id = (__u16) dev_id;
+ list->type = type;
+ list->path = realpath(path, NULL);
+ list->next = NULL;
+ if ((list->pci_id = strrchr(list->path, '/')) != NULL)
+ list->pci_id++;
+ }
+ closedir(driver_dir);
+ return head;
+}
+
+static struct sys_dev *intel_devices=NULL;
+static time_t valid_time = 0;
+
+static enum sys_dev_type device_type_by_id(__u16 device_id)
+{
+ struct sys_dev *iter;
+
+ for(iter = intel_devices; iter != NULL; iter = iter->next)
+ if (iter->dev_id == device_id)
+ return iter->type;
+ return SYS_DEV_UNKNOWN;
+}
+
+static int devpath_to_ll(const char *dev_path, const char *entry, unsigned long long *val)
+{
+ char path[strlen(dev_path) + strlen(entry) + 2];
+ int fd;
+ int n;
+
+ sprintf(path, "%s/%s", dev_path, entry);
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return -1;
+ n = sysfs_fd_get_ll(fd, val);
+ close(fd);
+ return n;
+}
+
+static __u16 devpath_to_vendor(const char *dev_path)
+{
+ char path[strlen(dev_path) + strlen("/vendor") + 1];
+ char vendor[7];
+ int fd;
+ __u16 id = 0xffff;
+ int n;
+
+ sprintf(path, "%s/vendor", dev_path);
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return 0xffff;
+
+ n = read(fd, vendor, sizeof(vendor));
+ if (n == sizeof(vendor)) {
+ vendor[n - 1] = '\0';
+ id = strtoul(vendor, NULL, 16);
+ }
+ close(fd);
+
+ return id;
+}
+
+struct sys_dev *find_intel_devices(void)
+{
+ struct sys_dev *ahci, *isci;
+
+ if (valid_time > time(0) - 10)
+ return intel_devices;
+
+ if (intel_devices)
+ free_sys_dev(&intel_devices);
+
+ isci = find_driver_devices("pci", "isci");
+ ahci = find_driver_devices("pci", "ahci");
+
+ if (!ahci) {
+ ahci = isci;
+ } else {
+ struct sys_dev *elem = ahci;
+ while (elem->next)
+ elem = elem->next;
+ elem->next = isci;
+ }
+ intel_devices = ahci;
+ valid_time = time(0);
+ return intel_devices;
+}
+
+/*
+ * PCI Expansion ROM Data Structure Format */
+struct pciExpDataStructFormat {
+ __u8 ver[4];
+ __u16 vendorID;
+ __u16 deviceID;
+} __attribute__ ((packed));
+
+static struct imsm_orom imsm_orom[SYS_DEV_MAX];
+static int populated_orom[SYS_DEV_MAX];
+
+static int scan(const void *start, const void *end, const void *data)
+{
+ int offset;
+ const struct imsm_orom *imsm_mem;
+ int dev;
+ int len = (end - start);
+ struct pciExpDataStructFormat *ptr= (struct pciExpDataStructFormat *)data;
+
+ if (data + 0x18 > end) {
+ dprintf("cannot find pciExpDataStruct \n");
+ return 0;
+ }
+
+ dprintf("ptr->vendorID: %lx __le16_to_cpu(ptr->deviceID): %lx \n",
+ (ulong) __le16_to_cpu(ptr->vendorID),
+ (ulong) __le16_to_cpu(ptr->deviceID));
+
+ if (__le16_to_cpu(ptr->vendorID) == 0x8086) {
+ /* serach attached intel devices by device id from OROM */
+ dev = device_type_by_id(__le16_to_cpu(ptr->deviceID));
+ if (dev == SYS_DEV_UNKNOWN)
+ return 0;
+ }
+ else
+ return 0;
+
+ for (offset = 0; offset < len; offset += 4) {
+ imsm_mem = start + offset;
+ if ((memcmp(imsm_mem->signature, "$VER", 4) == 0)) {
+ imsm_orom[dev] = *imsm_mem;
+ populated_orom[dev] = 1;
+ return populated_orom[SYS_DEV_SATA] && populated_orom[SYS_DEV_SAS];
+ }
+ }
+ return 0;
+}
+
+const struct imsm_orom *imsm_platform_test(enum sys_dev_type hba_id, int *populated,
+ struct imsm_orom *imsm_orom)
+{
+ memset(imsm_orom, 0, sizeof(*imsm_orom));
+ imsm_orom->rlc = IMSM_OROM_RLC_RAID0 | IMSM_OROM_RLC_RAID1 |
+ IMSM_OROM_RLC_RAID10 | IMSM_OROM_RLC_RAID5;
+ imsm_orom->sss = IMSM_OROM_SSS_4kB | IMSM_OROM_SSS_8kB |
+ IMSM_OROM_SSS_16kB | IMSM_OROM_SSS_32kB |
+ IMSM_OROM_SSS_64kB | IMSM_OROM_SSS_128kB |
+ IMSM_OROM_SSS_256kB | IMSM_OROM_SSS_512kB |
+ IMSM_OROM_SSS_1MB | IMSM_OROM_SSS_2MB;
+ imsm_orom->dpa = IMSM_OROM_DISKS_PER_ARRAY;
+ imsm_orom->tds = IMSM_OROM_TOTAL_DISKS;
+ imsm_orom->vpa = IMSM_OROM_VOLUMES_PER_ARRAY;
+ imsm_orom->vphba = IMSM_OROM_VOLUMES_PER_HBA;
+ imsm_orom->attr = imsm_orom->rlc | IMSM_OROM_ATTR_ChecksumVerify;
+ *populated = 1;
+
+ if (check_env("IMSM_TEST_OROM_NORAID5")) {
+ imsm_orom->rlc = IMSM_OROM_RLC_RAID0 | IMSM_OROM_RLC_RAID1 |
+ IMSM_OROM_RLC_RAID10;
+ }
+ if (check_env("IMSM_TEST_AHCI_EFI_NORAID5") && (hba_id == SYS_DEV_SAS)) {
+ imsm_orom->rlc = IMSM_OROM_RLC_RAID0 | IMSM_OROM_RLC_RAID1 |
+ IMSM_OROM_RLC_RAID10;
+ }
+ if (check_env("IMSM_TEST_SCU_EFI_NORAID5") && (hba_id == SYS_DEV_SATA)) {
+ imsm_orom->rlc = IMSM_OROM_RLC_RAID0 | IMSM_OROM_RLC_RAID1 |
+ IMSM_OROM_RLC_RAID10;
+ }
+
+ return imsm_orom;
+}
+
+static const struct imsm_orom *find_imsm_hba_orom(enum sys_dev_type hba_id)
+{
+ unsigned long align;
+
+ if (hba_id >= SYS_DEV_MAX)
+ return NULL;
+
+ /* it's static data so we only need to read it once */
+ if (populated_orom[hba_id]) {
+ dprintf("OROM CAP: %p, pid: %d pop: %d\n",
+ &imsm_orom[hba_id], (int) getpid(), populated_orom[hba_id]);
+ return &imsm_orom[hba_id];
+ }
+ if (check_env("IMSM_TEST_OROM")) {
+ dprintf("OROM CAP: %p, pid: %d pop: %d\n",
+ &imsm_orom[hba_id], (int) getpid(), populated_orom[hba_id]);
+ return imsm_platform_test(hba_id, &populated_orom[hba_id], &imsm_orom[hba_id]);
+ }
+ /* return empty OROM capabilities in EFI test mode */
+ if (check_env("IMSM_TEST_AHCI_EFI") ||
+ check_env("IMSM_TEST_SCU_EFI"))
+ return NULL;
+
+ find_intel_devices();
+
+ if (intel_devices == NULL)
+ return NULL;
+
+ /* scan option-rom memory looking for an imsm signature */
+ if (check_env("IMSM_SAFE_OROM_SCAN"))
+ align = 2048;
+ else
+ align = 512;
+ if (probe_roms_init(align) != 0)
+ return NULL;
+ probe_roms();
+ /* ignore return value - True is returned if both adapater roms are found */
+ scan_adapter_roms(scan);
+ probe_roms_exit();
+
+ if (populated_orom[hba_id])
+ return &imsm_orom[hba_id];
+ return NULL;
+}
+
+#define GUID_STR_MAX 37 /* according to GUID format:
+ * xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" */
+
+#define EFI_GUID(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+((struct efi_guid) \
+{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
+ (b) & 0xff, ((b) >> 8) & 0xff, \
+ (c) & 0xff, ((c) >> 8) & 0xff, \
+ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
+
+#define SYS_EFI_VAR_PATH "/sys/firmware/efi/vars"
+#define SCU_PROP "RstScuV"
+#define AHCI_PROP "RstSataV"
+
+#define VENDOR_GUID \
+ EFI_GUID(0x193dfefa, 0xa445, 0x4302, 0x99, 0xd8, 0xef, 0x3a, 0xad, 0x1a, 0x04, 0xc6)
+
+int populated_efi[SYS_DEV_MAX] = { 0, 0 };
+
+static struct imsm_orom imsm_efi[SYS_DEV_MAX];
+
+int read_efi_variable(void *buffer, ssize_t buf_size, char *variable_name, struct efi_guid guid)
+{
+ char path[PATH_MAX];
+ char buf[GUID_STR_MAX];
+ int dfd;
+ ssize_t n, var_data_len;
+
+ snprintf(path, PATH_MAX, "%s/%s-%s/size", SYS_EFI_VAR_PATH, variable_name, guid_str(buf, guid));
+
+ dprintf("EFI VAR: path=%s\n", path);
+ /* get size of variable data */
+ dfd = open(path, O_RDONLY);
+ if (dfd < 0)
+ return 1;
+
+ n = read(dfd, &buf, sizeof(buf));
+ close(dfd);
+ if (n < 0)
+ return 1;
+ buf[n] = '\0';
+
+ errno = 0;
+ var_data_len = strtoul(buf, NULL, 16);
+ if ((errno == ERANGE && (var_data_len == LONG_MAX))
+ || (errno != 0 && var_data_len == 0))
+ return 1;
+
+ /* get data */
+ snprintf(path, PATH_MAX, "%s/%s-%s/data", SYS_EFI_VAR_PATH, variable_name, guid_str(buf, guid));
+
+ dprintf("EFI VAR: path=%s\n", path);
+ dfd = open(path, O_RDONLY);
+ if (dfd < 0)
+ return 1;
+
+ n = read(dfd, buffer, buf_size);
+ close(dfd);
+ if (n != var_data_len || n < buf_size) {
+ return 1;
+ }
+
+ return 0;
+}
+
+const struct imsm_orom *find_imsm_efi(enum sys_dev_type hba_id)
+{
+ if (hba_id >= SYS_DEV_MAX)
+ return NULL;
+
+ dprintf("EFI CAP: %p, pid: %d pop: %d\n",
+ &imsm_efi[hba_id], (int) getpid(), populated_efi[hba_id]);
+
+ /* it's static data so we only need to read it once */
+ if (populated_efi[hba_id]) {
+ dprintf("EFI CAP: %p, pid: %d pop: %d\n",
+ &imsm_efi[hba_id], (int) getpid(), populated_efi[hba_id]);
+ return &imsm_efi[hba_id];
+ }
+ if (check_env("IMSM_TEST_AHCI_EFI") ||
+ check_env("IMSM_TEST_SCU_EFI")) {
+ dprintf("OROM CAP: %p, pid: %d pop: %d\n",
+ &imsm_efi[hba_id], (int) getpid(), populated_efi[hba_id]);
+ return imsm_platform_test(hba_id, &populated_efi[hba_id], &imsm_efi[hba_id]);
+ }
+ /* OROM test is set, return that there is no EFI capabilities */
+ if (check_env("IMSM_TEST_OROM"))
+ return NULL;
+
+ if (read_efi_variable(&imsm_efi[hba_id], sizeof(imsm_efi[0]), hba_id == SYS_DEV_SAS ? SCU_PROP : AHCI_PROP, VENDOR_GUID)) {
+ populated_efi[hba_id] = 0;
+ return NULL;
+ }
+
+ populated_efi[hba_id] = 1;
+ return &imsm_efi[hba_id];
+}
+
+/*
+ * backward interface compatibility
+ */
+const struct imsm_orom *find_imsm_orom(void)
+{
+ return find_imsm_hba_orom(SYS_DEV_SATA);
+}
+
+const struct imsm_orom *find_imsm_capability(enum sys_dev_type hba_id)
+{
+ const struct imsm_orom *cap=NULL;
+
+ if ((cap = find_imsm_efi(hba_id)) != NULL)
+ return cap;
+ if ((cap = find_imsm_hba_orom(hba_id)) != NULL)
+ return cap;
+ return NULL;
+}
+
+char *devt_to_devpath(dev_t dev)
+{
+ char device[46];
+
+ sprintf(device, "/sys/dev/block/%d:%d/device", major(dev), minor(dev));
+ return realpath(device, NULL);
+}
+
+char *diskfd_to_devpath(int fd)
+{
+ /* return the device path for a disk, return NULL on error or fd
+ * refers to a partition
+ */
+ struct stat st;
+
+ if (fstat(fd, &st) != 0)
+ return NULL;
+ if (!S_ISBLK(st.st_mode))
+ return NULL;
+
+ return devt_to_devpath(st.st_rdev);
+}
+
+int path_attached_to_hba(const char *disk_path, const char *hba_path)
+{
+ int rc;
+
+ if (check_env("IMSM_TEST_AHCI_DEV") ||
+ check_env("IMSM_TEST_SCU_DEV")) {
+ return 1;
+ }
+
+ if (!disk_path || !hba_path)
+ return 0;
+ dprintf("hba: %s - disk: %s\n", hba_path, disk_path);
+ if (strncmp(disk_path, hba_path, strlen(hba_path)) == 0)
+ rc = 1;
+ else
+ rc = 0;
+
+ return rc;
+}
+
+int devt_attached_to_hba(dev_t dev, const char *hba_path)
+{
+ char *disk_path = devt_to_devpath(dev);
+ int rc = path_attached_to_hba(disk_path, hba_path);
+
+ if (disk_path)
+ free(disk_path);
+
+ return rc;
+}
+
+int disk_attached_to_hba(int fd, const char *hba_path)
+{
+ char *disk_path = diskfd_to_devpath(fd);
+ int rc = path_attached_to_hba(disk_path, hba_path);
+
+ if (disk_path)
+ free(disk_path);
+
+ return rc;
+}
diff --git a/platform-intel.h b/platform-intel.h
new file mode 100644
index 00000000..8226be35
--- /dev/null
+++ b/platform-intel.h
@@ -0,0 +1,210 @@
+/*
+ * Intel(R) Matrix Storage Manager hardware and firmware support routines
+ *
+ * Copyright (C) 2008 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <asm/types.h>
+#include <strings.h>
+
+/* The IMSM Capability (IMSM AHCI and ISCU OROM/EFI variable) Version Table definition */
+struct imsm_orom {
+ __u8 signature[4];
+ __u8 table_ver_major; /* Currently 2 (can change with future revs) */
+ __u8 table_ver_minor; /* Currently 2 (can change with future revs) */
+ __u16 major_ver; /* Example: 8 as in 8.6.0.1020 */
+ __u16 minor_ver; /* Example: 6 as in 8.6.0.1020 */
+ __u16 hotfix_ver; /* Example: 0 as in 8.6.0.1020 */
+ __u16 build; /* Example: 1020 as in 8.6.0.1020 */
+ __u8 len; /* number of bytes in this entire table */
+ __u8 checksum; /* checksum of all the bytes in this table */
+ __u16 rlc; /* RAID Level Capability */
+ /* we assume the cpu is x86 as the orom should not be found
+ * anywhere else
+ */
+ #define IMSM_OROM_RLC_RAID0 (1 << 0)
+ #define IMSM_OROM_RLC_RAID1 (1 << 1)
+ #define IMSM_OROM_RLC_RAID10 (1 << 2)
+ #define IMSM_OROM_RLC_RAID1E (1 << 3)
+ #define IMSM_OROM_RLC_RAID5 (1 << 4)
+ #define IMSM_OROM_RLC_RAID_CNG (1 << 5)
+ __u16 sss; /* Strip Size Supported */
+ #define IMSM_OROM_SSS_2kB (1 << 0)
+ #define IMSM_OROM_SSS_4kB (1 << 1)
+ #define IMSM_OROM_SSS_8kB (1 << 2)
+ #define IMSM_OROM_SSS_16kB (1 << 3)
+ #define IMSM_OROM_SSS_32kB (1 << 4)
+ #define IMSM_OROM_SSS_64kB (1 << 5)
+ #define IMSM_OROM_SSS_128kB (1 << 6)
+ #define IMSM_OROM_SSS_256kB (1 << 7)
+ #define IMSM_OROM_SSS_512kB (1 << 8)
+ #define IMSM_OROM_SSS_1MB (1 << 9)
+ #define IMSM_OROM_SSS_2MB (1 << 10)
+ #define IMSM_OROM_SSS_4MB (1 << 11)
+ #define IMSM_OROM_SSS_8MB (1 << 12)
+ #define IMSM_OROM_SSS_16MB (1 << 13)
+ #define IMSM_OROM_SSS_32MB (1 << 14)
+ #define IMSM_OROM_SSS_64MB (1 << 15)
+ __u16 dpa; /* Disks Per Array supported */
+ #define IMSM_OROM_DISKS_PER_ARRAY 6
+ __u16 tds; /* Total Disks Supported */
+ #define IMSM_OROM_TOTAL_DISKS 6
+ __u8 vpa; /* # Volumes Per Array supported */
+ #define IMSM_OROM_VOLUMES_PER_ARRAY 2
+ __u8 vphba; /* # Volumes Per Host Bus Adapter supported */
+ #define IMSM_OROM_VOLUMES_PER_HBA 4
+ /* Attributes supported. This should map to the
+ * attributes in the MPB. Also, lower 16 bits
+ * should match/duplicate RLC bits above.
+ */
+ __u32 attr;
+ #define IMSM_OROM_ATTR_RAID0 IMSM_OROM_RLC_RAID0
+ #define IMSM_OROM_ATTR_RAID1 IMSM_OROM_RLC_RAID1
+ #define IMSM_OROM_ATTR_RAID10 IMSM_OROM_RLC_RAID10
+ #define IMSM_OROM_ATTR_RAID1E IMSM_OROM_RLC_RAID1E
+ #define IMSM_OROM_ATTR_RAID5 IMSM_OROM_RLC_RAID5
+ #define IMSM_OROM_ATTR_RAID_CNG IMSM_OROM_RLC_RAID_CNG
+ #define IMSM_OROM_ATTR_2TB_DISK (1 << 26)
+ #define IMSM_OROM_ATTR_2TB (1 << 29)
+ #define IMSM_OROM_ATTR_PM (1 << 30)
+ #define IMSM_OROM_ATTR_ChecksumVerify (1 << 31)
+ __u32 capabilities;
+ #define IMSM_OROM_CAPABILITIES_Ext_SATA (1 << 0)
+ #define IMSM_OROM_CAPABILITIES_TurboMemory (1 << 1)
+ #define IMSM_OROM_CAPABILITIES_HddPassword (1 << 2)
+ #define IMSM_OROM_CAPABILITIES_DiskCoercion (1 << 3)
+ __u32 driver_features;
+ #define IMSM_OROM_CAPABILITIES_HDDUnlock (1 << 0)
+ #define IMSM_OROM_CAPABILITIES_LEDLoc (1 << 1)
+ #define IMSM_OROM_CAPABILITIES_EnterpriseSystem (1 << 2)
+ #define IMSM_OROM_CAPABILITIES_Zpodd (1 << 3)
+ #define IMSM_OROM_CAPABILITIES_LargeDramCache (1 << 4)
+ #define IMSM_OROM_CAPABILITIES_Rohi (1 << 5)
+ #define IMSM_OROM_CAPABILITIES_ReadPatrol (1 << 6)
+ #define IMSM_OROM_CAPABILITIES_XorHw (1 << 7)
+} __attribute__((packed));
+
+static inline int imsm_orom_has_raid0(const struct imsm_orom *orom)
+{
+ return !!(orom->rlc & IMSM_OROM_RLC_RAID0);
+}
+static inline int imsm_orom_has_raid1(const struct imsm_orom *orom)
+{
+ return !!(orom->rlc & IMSM_OROM_RLC_RAID1);
+}
+static inline int imsm_orom_has_raid1e(const struct imsm_orom *orom)
+{
+ return !!(orom->rlc & IMSM_OROM_RLC_RAID1E);
+}
+static inline int imsm_orom_has_raid10(const struct imsm_orom *orom)
+{
+ return !!(orom->rlc & IMSM_OROM_RLC_RAID10);
+}
+static inline int imsm_orom_has_raid5(const struct imsm_orom *orom)
+{
+ return !!(orom->rlc & IMSM_OROM_RLC_RAID5);
+}
+
+/**
+ * imsm_orom_has_chunk - check if the orom supports the given chunk size
+ * @orom: orom pointer from find_imsm_orom
+ * @chunk: chunk size in kibibytes
+ */
+static inline int imsm_orom_has_chunk(const struct imsm_orom *orom, int chunk)
+{
+ int fs = ffs(chunk);
+ if (!fs)
+ return 0;
+ fs--; /* bit num to bit index */
+ if (chunk & (chunk-1))
+ return 0; /* not a power of 2 */
+ return !!(orom->sss & (1 << (fs - 1)));
+}
+
+/**
+ * fls - find last (most-significant) bit set
+ * @x: the word to search
+ * The funciton is borrowed from Linux kernel code
+ * include/asm-generic/bitops/fls.h
+ */
+static inline int fls(int x)
+{
+ int r = 32;
+
+ if (!x)
+ return 0;
+ if (!(x & 0xffff0000u)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xff000000u)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xf0000000u)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xc0000000u)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x80000000u)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+
+enum sys_dev_type {
+ SYS_DEV_UNKNOWN = 0,
+ SYS_DEV_SAS,
+ SYS_DEV_SATA,
+ SYS_DEV_MAX
+};
+
+struct sys_dev {
+ enum sys_dev_type type;
+ char *path;
+ char *pci_id;
+ __u16 dev_id;
+ struct sys_dev *next;
+};
+
+struct efi_guid {
+ __u8 b[16];
+};
+
+static inline char *guid_str(char *buf, struct efi_guid guid)
+{
+ sprintf(buf, "%02x%02x%02x%02x-%02x%02x-%02x%02x-"
+ "%02x%02x-%02x%02x%02x%02x%02x%02x",
+ guid.b[3], guid.b[2], guid.b[1], guid.b[0],
+ guid.b[5], guid.b[4], guid.b[7], guid.b[6],
+ guid.b[8], guid.b[9], guid.b[10], guid.b[11],
+ guid.b[12], guid.b[13], guid.b[14], guid.b[15]);
+ return buf;
+}
+
+char *diskfd_to_devpath(int fd);
+struct sys_dev *find_driver_devices(const char *bus, const char *driver);
+struct sys_dev *find_intel_devices(void);
+const struct imsm_orom *find_imsm_capability(enum sys_dev_type hba_id);
+const struct imsm_orom *find_imsm_orom(void);
+int disk_attached_to_hba(int fd, const char *hba_path);
+int devt_attached_to_hba(dev_t dev, const char *hba_path);
+char *devt_to_devpath(dev_t dev);
+int path_attached_to_hba(const char *disk_path, const char *hba_path);
+const char *get_sys_dev_type(enum sys_dev_type);
diff --git a/policy.c b/policy.c
new file mode 100644
index 00000000..ef83621a
--- /dev/null
+++ b/policy.c
@@ -0,0 +1,914 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include <dirent.h>
+#include <fnmatch.h>
+#include <ctype.h>
+#include "dlink.h"
+/*
+ * Policy module for mdadm.
+ * A policy statement about a device lists a set of values for each
+ * of a set of names. Each value can have a metadata type as context.
+ *
+ * names include:
+ * action - the actions that can be taken on hot-plug
+ * domain - the domain(s) that the device is part of
+ *
+ * Policy information is extracted from various sources, but
+ * particularly from a set of policy rules in mdadm.conf
+ */
+
+static void pol_new(struct dev_policy **pol, char *name, const char *val,
+ const char *metadata)
+{
+ struct dev_policy *n = xmalloc(sizeof(*n));
+ const char *real_metadata = NULL;
+ int i;
+
+ n->name = name;
+ n->value = val;
+
+ /* We need to normalise the metadata name */
+ if (metadata) {
+ for (i = 0; superlist[i] ; i++)
+ if (strcmp(metadata, superlist[i]->name) == 0) {
+ real_metadata = superlist[i]->name;
+ break;
+ }
+ if (!real_metadata) {
+ if (strcmp(metadata, "1") == 0 ||
+ strcmp(metadata, "1.0") == 0 ||
+ strcmp(metadata, "1.1") == 0 ||
+ strcmp(metadata, "1.2") == 0)
+ real_metadata = super1.name;
+ }
+ if (!real_metadata) {
+ static const char *prev = NULL;
+ if (prev != metadata) {
+ pr_err("metadata=%s unrecognised - ignoring rule\n",
+ metadata);
+ prev = metadata;
+ }
+ real_metadata = "unknown";
+ }
+ }
+
+ n->metadata = real_metadata;
+ n->next = *pol;
+ *pol = n;
+}
+
+static int pol_lesseq(struct dev_policy *a, struct dev_policy *b)
+{
+ int cmp;
+
+ if (a->name < b->name)
+ return 1;
+ if (a->name > b->name)
+ return 0;
+
+ cmp = strcmp(a->value, b->value);
+ if (cmp < 0)
+ return 1;
+ if (cmp > 0)
+ return 0;
+
+ return (a->metadata <= b->metadata);
+}
+
+static void pol_sort(struct dev_policy **pol)
+{
+ /* sort policy list in *pol by name/metadata/value
+ * using merge sort
+ */
+
+ struct dev_policy *pl[2];
+ pl[0] = *pol;
+ pl[1] = NULL;
+
+ do {
+ struct dev_policy **plp[2], *p[2];
+ int curr = 0;
+ struct dev_policy nul = { NULL, NULL, NULL, NULL };
+ struct dev_policy *prev = &nul;
+ int next = 0;
+
+ /* p[] are the two lists that we are merging.
+ * plp[] are the ends of the two lists we create
+ * from the merge.
+ * 'curr' is which of plp[] that we are currently
+ * adding items to.
+ * 'next' is which if p[] we will take the next
+ * item from.
+ * 'prev' is that last value, which was placed in
+ * plp[curr].
+ */
+ plp[0] = &pl[0];
+ plp[1] = &pl[1];
+ p[0] = pl[0];
+ p[1] = pl[1];
+
+ /* take least of p[0] and p[1]
+ * if it is larger than prev, add to
+ * plp[curr], else swap curr then add
+ */
+ while (p[0] || p[1]) {
+ if (p[next] == NULL ||
+ (p[1-next] != NULL &&
+ !(pol_lesseq(prev, p[1-next])
+ ^pol_lesseq(prev, p[next])
+ ^pol_lesseq(p[next], p[1-next])))
+ )
+ next = 1 - next;
+
+ if (!pol_lesseq(prev, p[next]))
+ curr = 1 - curr;
+
+ *plp[curr] = prev = p[next];
+ plp[curr] = &p[next]->next;
+ p[next] = p[next]->next;
+ }
+ *plp[0] = NULL;
+ *plp[1] = NULL;
+ } while (pl[0] && pl[1]);
+ if (pl[0])
+ *pol = pl[0];
+ else
+ *pol = pl[1];
+}
+
+static void pol_dedup(struct dev_policy *pol)
+{
+ /* This is a sorted list - remove duplicates. */
+ while (pol && pol->next) {
+ if (pol_lesseq(pol->next, pol)) {
+ struct dev_policy *tmp = pol->next;
+ pol->next = tmp->next;
+ free(tmp);
+ } else
+ pol = pol->next;
+ }
+}
+
+/*
+ * pol_find finds the first entry in the policy
+ * list to match name.
+ * If it returns non-NULL there is at least one
+ * value, but how many can only be found by
+ * iterating through the list.
+ */
+struct dev_policy *pol_find(struct dev_policy *pol, char *name)
+{
+ while (pol && pol->name < name)
+ pol = pol->next;
+
+ if (!pol || pol->name != name)
+ return NULL;
+ return pol;
+}
+
+static char *disk_path(struct mdinfo *disk)
+{
+ struct stat stb;
+ int prefix_len;
+ DIR *by_path;
+ char symlink[PATH_MAX] = "/dev/disk/by-path/";
+ char nm[PATH_MAX];
+ struct dirent *ent;
+ int rv;
+
+ by_path = opendir(symlink);
+ if (by_path) {
+ prefix_len = strlen(symlink);
+ while ((ent = readdir(by_path)) != NULL) {
+ if (ent->d_type != DT_LNK)
+ continue;
+ strncpy(symlink + prefix_len,
+ ent->d_name,
+ sizeof(symlink) - prefix_len);
+ if (stat(symlink, &stb) < 0)
+ continue;
+ if ((stb.st_mode & S_IFMT) != S_IFBLK)
+ continue;
+ if (stb.st_rdev != makedev(disk->disk.major, disk->disk.minor))
+ continue;
+ closedir(by_path);
+ return xstrdup(ent->d_name);
+ }
+ closedir(by_path);
+ }
+ /* A NULL path isn't really acceptable - use the devname.. */
+ sprintf(symlink, "/sys/dev/block/%d:%d", disk->disk.major, disk->disk.minor);
+ rv = readlink(symlink, nm, sizeof(nm)-1);
+ if (rv > 0) {
+ char *dname;
+ nm[rv] = 0;
+ dname = strrchr(nm, '/');
+ if (dname)
+ return xstrdup(dname + 1);
+ }
+ return xstrdup("unknown");
+}
+
+char type_part[] = "part";
+char type_disk[] = "disk";
+static char *disk_type(struct mdinfo *disk)
+{
+ char buf[30+20+20];
+ struct stat stb;
+ sprintf(buf, "/sys/dev/block/%d:%d/partition",
+ disk->disk.major, disk->disk.minor);
+ if (stat(buf, &stb) == 0)
+ return type_part;
+ else
+ return type_disk;
+}
+
+static int pol_match(struct rule *rule, char *path, char *type)
+{
+ /* check if this rule matches on path and type */
+ int pathok = 0; /* 0 == no path, 1 == match, -1 == no match yet */
+ int typeok = 0;
+
+ while (rule) {
+ if (rule->name == rule_path) {
+ if (pathok == 0)
+ pathok = -1;
+ if (path && fnmatch(rule->value, path, 0) == 0)
+ pathok = 1;
+ }
+ if (rule->name == rule_type) {
+ if (typeok == 0)
+ typeok = -1;
+ if (type && strcmp(rule->value, type) == 0)
+ typeok = 1;
+ }
+ rule = rule->next;
+ }
+ return pathok >= 0 && typeok >= 0;
+}
+
+static void pol_merge(struct dev_policy **pol, struct rule *rule)
+{
+ /* copy any name assignments from rule into pol */
+ struct rule *r;
+ char *metadata = NULL;
+ for (r = rule; r ; r = r->next)
+ if (r->name == pol_metadata)
+ metadata = r->value;
+
+ for (r = rule; r ; r = r->next)
+ if (r->name == pol_act ||
+ r->name == pol_domain ||
+ r->name == pol_auto)
+ pol_new(pol, r->name, r->value, metadata);
+}
+
+static int path_has_part(char *path, char **part)
+{
+ /* check if path ends with "-partNN" and
+ * if it does, place a pointer to "-pathNN"
+ * in 'part'.
+ */
+ int l;
+ if (!path)
+ return 0;
+ l = strlen(path);
+ while (l > 1 && isdigit(path[l-1]))
+ l--;
+ if (l < 5 || strncmp(path+l-5, "-part", 5) != 0)
+ return 0;
+ *part = path+l-4;
+ return 1;
+}
+
+static void pol_merge_part(struct dev_policy **pol, struct rule *rule, char *part)
+{
+ /* copy any name assignments from rule into pol, appending
+ * -part to any domain. The string with -part appended is
+ * stored with the rule so it has a lifetime to match
+ * the rule.
+ */
+ struct rule *r;
+ char *metadata = NULL;
+ for (r = rule; r ; r = r->next)
+ if (r->name == pol_metadata)
+ metadata = r->value;
+
+ for (r = rule; r ; r = r->next) {
+ if (r->name == pol_act)
+ pol_new(pol, r->name, r->value, metadata);
+ else if (r->name == pol_domain) {
+ char *dom;
+ int len;
+ if (r->dups == NULL)
+ r->dups = dl_head();
+ len = strlen(r->value);
+ for (dom = dl_next(r->dups); dom != r->dups;
+ dom = dl_next(dom))
+ if (strcmp(dom+len+1, part)== 0)
+ break;
+ if (dom == r->dups) {
+ char *newdom = dl_strndup(
+ r->value, len + 1 + strlen(part));
+ strcat(strcat(newdom, "-"), part);
+ dl_add(r->dups, newdom);
+ dom = newdom;
+ }
+ pol_new(pol, r->name, dom, metadata);
+ }
+ }
+}
+
+static struct pol_rule *config_rules = NULL;
+static struct pol_rule **config_rules_end = NULL;
+static int config_rules_has_path = 0;
+
+/*
+ * most policy comes from a set policy rules that are
+ * read from the config file.
+ * path_policy() gathers policy information for the
+ * disk described in the given a 'path' and a 'type'.
+ */
+struct dev_policy *path_policy(char *path, char *type)
+{
+ struct pol_rule *rules;
+ struct dev_policy *pol = NULL;
+ int i;
+
+ rules = config_rules;
+
+ while (rules) {
+ char *part;
+ if (rules->type == rule_policy)
+ if (pol_match(rules->rule, path, type))
+ pol_merge(&pol, rules->rule);
+ if (rules->type == rule_part && strcmp(type, type_part) == 0)
+ if (path_has_part(path, &part)) {
+ *part = 0;
+ if (pol_match(rules->rule, path, type_disk))
+ pol_merge_part(&pol, rules->rule, part+1);
+ *part = '-';
+ }
+ rules = rules->next;
+ }
+
+ /* Now add any metadata-specific internal knowledge
+ * about this path
+ */
+ for (i=0; path && superlist[i]; i++)
+ if (superlist[i]->get_disk_controller_domain) {
+ const char *d =
+ superlist[i]->get_disk_controller_domain(path);
+ if (d)
+ pol_new(&pol, pol_domain, d, superlist[i]->name);
+ }
+
+ pol_sort(&pol);
+ pol_dedup(pol);
+ return pol;
+}
+
+void pol_add(struct dev_policy **pol,
+ char *name, char *val,
+ char *metadata)
+{
+ pol_new(pol, name, val, metadata);
+ pol_sort(pol);
+ pol_dedup(*pol);
+}
+
+/*
+ * disk_policy() gathers policy information for the
+ * disk described in the given mdinfo (disk.{major,minor}).
+ */
+struct dev_policy *disk_policy(struct mdinfo *disk)
+{
+ char *path = NULL;
+ char *type = disk_type(disk);
+ struct dev_policy *pol = NULL;
+
+ if (config_rules_has_path)
+ path = disk_path(disk);
+
+ pol = path_policy(path, type);
+
+ free(path);
+ return pol;
+}
+
+struct dev_policy *devid_policy(int dev)
+{
+ struct mdinfo disk;
+ disk.disk.major = major(dev);
+ disk.disk.minor = minor(dev);
+ return disk_policy(&disk);
+}
+
+/*
+ * process policy rules read from config file.
+ */
+
+char rule_path[] = "path";
+char rule_type[] = "type";
+
+char rule_policy[] = "policy";
+char rule_part[] = "part-policy";
+
+char pol_metadata[] = "metadata";
+char pol_act[] = "action";
+char pol_domain[] = "domain";
+char pol_auto[] = "auto";
+
+static int try_rule(char *w, char *name, struct rule **rp)
+{
+ struct rule *r;
+ int len = strlen(name);
+ if (strncmp(w, name, len) != 0 ||
+ w[len] != '=')
+ return 0;
+ r = xmalloc(sizeof(*r));
+ r->next = *rp;
+ r->name = name;
+ r->value = xstrdup(w+len+1);
+ r->dups = NULL;
+ *rp = r;
+ return 1;
+}
+
+void policyline(char *line, char *type)
+{
+ struct pol_rule *pr;
+ char *w;
+
+ if (config_rules_end == NULL)
+ config_rules_end = &config_rules;
+
+ pr = xmalloc(sizeof(*pr));
+ pr->type = type;
+ pr->rule = NULL;
+ for (w = dl_next(line); w != line ; w = dl_next(w)) {
+ if (try_rule(w, rule_path, &pr->rule))
+ config_rules_has_path = 1;
+ else if (! try_rule(w, rule_type, &pr->rule) &&
+ ! try_rule(w, pol_metadata, &pr->rule) &&
+ ! try_rule(w, pol_act, &pr->rule) &&
+ ! try_rule(w, pol_domain, &pr->rule) &&
+ ! try_rule(w, pol_auto, &pr->rule))
+ pr_err("policy rule %s unrecognised and ignored\n",
+ w);
+ }
+ pr->next = config_rules;
+ config_rules = pr;
+}
+
+void policy_add(char *type, ...)
+{
+ va_list ap;
+ struct pol_rule *pr;
+ char *name, *val;
+
+ pr = xmalloc(sizeof(*pr));
+ pr->type = type;
+ pr->rule = NULL;
+
+ va_start(ap, type);
+ while ((name = va_arg(ap, char*)) != NULL) {
+ struct rule *r;
+
+ val = va_arg(ap, char*);
+ r = xmalloc(sizeof(*r));
+ r->next = pr->rule;
+ r->name = name;
+ r->value = xstrdup(val);
+ r->dups = NULL;
+ pr->rule = r;
+ }
+ pr->next = config_rules;
+ config_rules = pr;
+ va_end(ap);
+}
+
+void policy_free(void)
+{
+ while (config_rules) {
+ struct pol_rule *pr = config_rules;
+ struct rule *r;
+
+ config_rules = config_rules->next;
+
+ for (r = pr->rule; r; ) {
+ struct rule *next = r->next;
+ free(r->value);
+ if (r->dups)
+ free_line(r->dups);
+ free(r);
+ r = next;
+ }
+ free(pr);
+ }
+ config_rules_end = NULL;
+ config_rules_has_path = 0;
+}
+
+void dev_policy_free(struct dev_policy *p)
+{
+ struct dev_policy *t;
+ while (p) {
+ t = p;
+ p = p->next;
+ free(t);
+ }
+}
+
+static enum policy_action map_act(const char *act)
+{
+ if (strcmp(act, "include") == 0)
+ return act_include;
+ if (strcmp(act, "re-add") == 0)
+ return act_re_add;
+ if (strcmp(act, "spare") == 0)
+ return act_spare;
+ if (strcmp(act, "spare-same-slot") == 0)
+ return act_spare_same_slot;
+ if (strcmp(act, "force-spare") == 0)
+ return act_force_spare;
+ return act_err;
+}
+
+static enum policy_action policy_action(struct dev_policy *plist, const char *metadata)
+{
+ enum policy_action rv = act_default;
+ struct dev_policy *p;
+
+ plist = pol_find(plist, pol_act);
+ pol_for_each(p, plist, metadata) {
+ enum policy_action a = map_act(p->value);
+ if (a > rv)
+ rv = a;
+ }
+ return rv;
+}
+
+int policy_action_allows(struct dev_policy *plist, const char *metadata, enum policy_action want)
+{
+ enum policy_action act = policy_action(plist, metadata);
+
+ if (act == act_err)
+ return 0;
+ return (act >= want);
+}
+
+int disk_action_allows(struct mdinfo *disk, const char *metadata, enum policy_action want)
+{
+ struct dev_policy *pol = disk_policy(disk);
+ int rv = policy_action_allows(pol, metadata, want);
+
+ dev_policy_free(pol);
+ return rv;
+}
+
+/* Domain policy:
+ * Any device can have a list of domains asserted by different policy
+ * statements.
+ * An array also has a list of domains comprising all the domains of
+ * all the devices in an array.
+ * Where an array has a spare-group, that becomes an addition domain for
+ * every device in the array and thus for the array.
+ *
+ * We keep the list of domains in a sorted linked list
+ * As dev policies are already sorted, this is fairly easy to manage.
+ */
+
+static struct domainlist **domain_merge_one(struct domainlist **domp,
+ const char *domain)
+{
+ /* merge a domain name into a sorted list and return the
+ * location of the insertion or match
+ */
+ struct domainlist *dom = *domp;
+
+ while (dom && strcmp(dom->dom, domain) < 0) {
+ domp = &dom->next;
+ dom = *domp;
+ }
+ if (dom == NULL || strcmp(dom->dom, domain) != 0) {
+ dom = xmalloc(sizeof(*dom));
+ dom->next = *domp;
+ dom->dom = domain;
+ *domp = dom;
+ }
+ return domp;
+}
+
+#if (DEBUG)
+void dump_policy(struct dev_policy *policy)
+{
+ while (policy) {
+ dprintf("policy: %p name: %s value: %s metadata: %s\n",
+ policy,
+ policy->name,
+ policy->value,
+ policy->metadata);
+ policy = policy->next;
+ }
+}
+#endif
+
+void domain_merge(struct domainlist **domp, struct dev_policy *pollist,
+ const char *metadata)
+{
+ /* Add to 'domp' all the domains in pol that apply to 'metadata'
+ * which are not already in domp
+ */
+ struct dev_policy *pol;
+ pollist = pol_find(pollist, pol_domain);
+ pol_for_each(pol, pollist, metadata)
+ domain_merge_one(domp, pol->value);
+}
+
+int domain_test(struct domainlist *dom, struct dev_policy *pol,
+ const char *metadata)
+{
+ /* Check that all domains in pol (for metadata) are also in
+ * dom. Both lists are sorted.
+ * If pol has no domains, we don't really know about this device
+ * so we allow caller to choose:
+ * -1: has no domains
+ * 0: has domains, not all match
+ * 1: has domains, all match
+ */
+ int found_any = -1;
+ struct dev_policy *p;
+
+ pol = pol_find(pol, pol_domain);
+ pol_for_each(p, pol, metadata) {
+ found_any = 1;
+ while (dom && strcmp(dom->dom, p->value) < 0)
+ dom = dom->next;
+ if (!dom || strcmp(dom->dom, p->value) != 0)
+ return 0;
+ }
+ return found_any;
+}
+
+void domainlist_add_dev(struct domainlist **dom, int devid, const char *metadata)
+{
+ struct dev_policy *pol = devid_policy(devid);
+ domain_merge(dom, pol, metadata);
+ dev_policy_free(pol);
+}
+
+struct domainlist *domain_from_array(struct mdinfo *mdi, const char *metadata)
+{
+ struct domainlist *domlist = NULL;
+
+ if (!mdi)
+ return NULL;
+ for (mdi = mdi->devs ; mdi ; mdi = mdi->next)
+ domainlist_add_dev(&domlist, makedev(mdi->disk.major,
+ mdi->disk.minor),
+ metadata);
+
+ return domlist;
+}
+
+void domain_add(struct domainlist **domp, char *domain)
+{
+ domain_merge_one(domp, domain);
+}
+
+void domain_free(struct domainlist *dl)
+{
+ while (dl) {
+ struct domainlist *head = dl;
+ dl = dl->next;
+ free(head);
+ }
+}
+
+/*
+ * same-path policy.
+ * Some policy decisions are guided by knowledge of which
+ * array previously owned the device at a given physical location (path).
+ * When removing a device from an array we might record the array against
+ * the path, and when finding a new device, we might look for which
+ * array previously used that path.
+ *
+ * The 'array' is described by a map_ent, and the path by a the disk in an
+ * mdinfo, or a string.
+ */
+
+void policy_save_path(char *id_path, struct map_ent *array)
+{
+ char path[PATH_MAX];
+ FILE *f = NULL;
+
+ if (mkdir(FAILED_SLOTS_DIR, S_IRWXU) < 0 && errno != EEXIST) {
+ pr_err("can't create file to save path "
+ "to old disk: %s\n", strerror(errno));
+ return;
+ }
+
+ snprintf(path, PATH_MAX, FAILED_SLOTS_DIR "/%s", id_path);
+ f = fopen(path, "w");
+ if (!f) {
+ pr_err("can't create file to"
+ " save path to old disk: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ if (fprintf(f, "%s %08x:%08x:%08x:%08x\n",
+ array->metadata,
+ array->uuid[0], array->uuid[1],
+ array->uuid[2], array->uuid[3]) <= 0)
+ pr_err("Failed to write to "
+ "<id_path> cookie\n");
+
+ fclose(f);
+}
+
+int policy_check_path(struct mdinfo *disk, struct map_ent *array)
+{
+ char path[PATH_MAX];
+ FILE *f = NULL;
+ char *id_path = disk_path(disk);
+ int rv;
+
+ if (!id_path)
+ return 0;
+
+ snprintf(path, PATH_MAX, FAILED_SLOTS_DIR "/%s", id_path);
+ f = fopen(path, "r");
+ if (!f) {
+ free(id_path);
+ return 0;
+ }
+
+ rv = fscanf(f, " %s %x:%x:%x:%x\n",
+ array->metadata,
+ array->uuid,
+ array->uuid+1,
+ array->uuid+2,
+ array->uuid+3);
+ fclose(f);
+ free(id_path);
+ return rv == 5;
+}
+
+/* invocation of udev rule file */
+char udev_template_start[] =
+"# do not edit this file, it is automatically generated by mdadm\n"
+"\n";
+
+/* find rule named rule_type and return its value */
+char *find_rule(struct rule *rule, char *rule_type)
+{
+ while (rule) {
+ if (rule->name == rule_type)
+ return rule->value;
+
+ rule = rule->next;
+ }
+ return NULL;
+}
+
+#define UDEV_RULE_FORMAT \
+"ACTION==\"add\", SUBSYSTEM==\"block\", " \
+"ENV{DEVTYPE}==\"%s\", ENV{ID_PATH}==\"%s\", " \
+"RUN+=\"" BINDIR "/mdadm --incremental $env{DEVNAME}\"\n"
+
+#define UDEV_RULE_FORMAT_NOTYPE \
+"ACTION==\"add\", SUBSYSTEM==\"block\", " \
+"ENV{ID_PATH}==\"%s\", " \
+"RUN+=\"" BINDIR "/mdadm --incremental $env{DEVNAME}\"\n"
+
+/* Write rule in the rule file. Use format from UDEV_RULE_FORMAT */
+int write_rule(struct rule *rule, int fd, int force_part)
+{
+ char line[1024];
+ char *pth = find_rule(rule, rule_path);
+ char *typ = find_rule(rule, rule_type);
+ if (!pth)
+ return -1;
+
+ if (force_part)
+ typ = type_part;
+ if (typ)
+ snprintf(line, sizeof(line) - 1, UDEV_RULE_FORMAT, typ, pth);
+ else
+ snprintf(line, sizeof(line) - 1, UDEV_RULE_FORMAT_NOTYPE, pth);
+ return write(fd, line, strlen(line)) == (int)strlen(line);
+}
+
+/* Generate single entry in udev rule basing on POLICY line found in config
+ * file. Take only those with paths, only first occurrence if paths are equal
+ * and if actions supports handling of spares (>=act_spare_same_slot)
+ */
+int generate_entries(int fd)
+{
+ struct pol_rule *loop, *dup;
+ char *loop_value, *dup_value;
+ int duplicate;
+
+ for (loop = config_rules; loop; loop = loop->next) {
+ if (loop->type != rule_policy && loop->type != rule_part)
+ continue;
+ duplicate = 0;
+
+ /* only policies with paths and with actions supporting
+ * bare disks are considered */
+ loop_value = find_rule(loop->rule, pol_act);
+ if (!loop_value || map_act(loop_value) < act_spare_same_slot)
+ continue;
+ loop_value = find_rule(loop->rule, rule_path);
+ if (!loop_value)
+ continue;
+ for (dup = config_rules; dup != loop; dup = dup->next) {
+ if (dup->type != rule_policy && loop->type != rule_part)
+ continue;
+ dup_value = find_rule(dup->rule, pol_act);
+ if (!dup_value || map_act(dup_value) < act_spare_same_slot)
+ continue;
+ dup_value = find_rule(dup->rule, rule_path);
+ if (!dup_value)
+ continue;
+ if (strcmp(loop_value, dup_value) == 0) {
+ duplicate = 1;
+ break;
+ }
+ }
+
+ /* not a dup or first occurrence */
+ if (!duplicate)
+ if (!write_rule(loop->rule, fd, loop->type == rule_part) )
+ return 0;
+ }
+ return 1;
+}
+
+/* Write_rules routine creates dynamic udev rules used to handle
+ * hot-plug events for bare devices (and making them spares)
+ */
+int Write_rules(char *rule_name)
+{
+ int fd;
+ char udev_rule_file[PATH_MAX];
+
+ if (rule_name) {
+ strncpy(udev_rule_file, rule_name, sizeof(udev_rule_file) - 6);
+ udev_rule_file[sizeof(udev_rule_file) - 6] = '\0';
+ strcat(udev_rule_file, ".temp");
+ fd = creat(udev_rule_file,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd == -1)
+ return 1;
+ } else
+ fd = 1;
+
+ /* write static invocation */
+ if (write(fd, udev_template_start,
+ sizeof(udev_template_start) - 1)
+ != (int)sizeof(udev_template_start)-1)
+ goto abort;
+
+ /* iterate, if none created or error occurred, remove file */
+ if (generate_entries(fd) < 0)
+ goto abort;
+
+ fsync(fd);
+ if (rule_name) {
+ close(fd);
+ rename(udev_rule_file, rule_name);
+ }
+ return 0;
+abort:
+ if (rule_name) {
+ close(fd);
+ unlink(udev_rule_file);
+ }
+ return 1;
+}
diff --git a/probe_roms.c b/probe_roms.c
new file mode 100644
index 00000000..b0b08833
--- /dev/null
+++ b/probe_roms.c
@@ -0,0 +1,317 @@
+/*
+ * probe_roms - scan for Adapter ROMS
+ *
+ * (based on linux-2.6:arch/x86/kernel/probe_roms_32.c)
+ *
+ * Copyright (C) 2008 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "probe_roms.h"
+#include "mdadm.h"
+#include <unistd.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <asm/types.h>
+
+static void *rom_mem = MAP_FAILED;
+static int rom_fd = -1;
+static const int rom_len = 0xf0000 - 0xc0000; /* option-rom memory region */
+static int _sigbus;
+static unsigned long rom_align;
+
+static void sigbus(int sig)
+{
+ _sigbus = 1;
+}
+
+static int probe_address8(const __u8 *ptr, __u8 *val)
+{
+ int rc = 0;
+
+ *val = *ptr;
+ if (_sigbus)
+ rc = -1;
+ _sigbus = 0;
+
+ return rc;
+}
+
+static int probe_address16(const __u16 *ptr, __u16 *val)
+{
+ int rc = 0;
+
+ *val = *ptr;
+ if (_sigbus)
+ rc = -1;
+ _sigbus = 0;
+
+ return rc;
+}
+
+void probe_roms_exit(void)
+{
+ signal(SIGBUS, SIG_DFL);
+ if (rom_fd >= 0) {
+ close(rom_fd);
+ rom_fd = -1;
+ }
+ if (rom_mem != MAP_FAILED) {
+ munmap(rom_mem, rom_len);
+ rom_mem = MAP_FAILED;
+ }
+}
+
+int probe_roms_init(unsigned long align)
+{
+ int fd = -1;
+ int rc = 0;
+
+ /* valid values are 2048 and 512. 512 is for PCI-3.0 compliant
+ * systems, or systems that do not have dangerous/legacy ISA
+ * devices. 2048 should always be safe
+ */
+ if (align == 512 || align == 2048)
+ rom_align = align;
+ else
+ return -1;
+
+ if (signal(SIGBUS, sigbus) == SIG_ERR)
+ rc = -1;
+ if (rc == 0) {
+ fd = open("/dev/mem", O_RDONLY);
+ if (fd < 0)
+ rc = -1;
+ }
+ if (rc == 0) {
+ rom_mem = mmap(NULL, rom_len, PROT_READ, MAP_PRIVATE, fd, 0xc0000);
+ if (rom_mem == MAP_FAILED)
+ rc = -1;
+ }
+
+ if (rc == 0)
+ rom_fd = fd;
+ else {
+ if (fd >= 0)
+ close(fd);
+ probe_roms_exit();
+ }
+ return rc;
+}
+
+/**
+ * isa_bus_to_virt - convert physical address to mmap'd region
+ * @addr - address to convert
+ *
+ * Only valid between a successful call to probe_roms_init and the
+ * corresponding probe_roms_exit
+ */
+static void *isa_bus_to_virt(unsigned long addr)
+{
+ return rom_mem + (addr - 0xc0000);
+}
+
+struct resource {
+ unsigned long start;
+ unsigned long end;
+ unsigned long data;
+ const char *name;
+};
+
+static struct resource system_rom_resource = {
+ .name = "System ROM",
+ .start = 0xf0000,
+ .data = 0,
+ .end = 0xfffff,
+};
+
+static struct resource extension_rom_resource = {
+ .name = "Extension ROM",
+ .start = 0xe0000,
+ .data = 0,
+ .end = 0xeffff,
+};
+
+static struct resource adapter_rom_resources[] = { {
+ .name = "Adapter ROM",
+ .start = 0xc8000,
+ .data = 0,
+ .end = 0,
+}, {
+ .name = "Adapter ROM",
+ .start = 0,
+ .data = 0,
+ .end = 0,
+}, {
+ .name = "Adapter ROM",
+ .start = 0,
+ .data = 0,
+ .end = 0,
+}, {
+ .name = "Adapter ROM",
+ .start = 0,
+ .data = 0,
+ .end = 0,
+}, {
+ .name = "Adapter ROM",
+ .start = 0,
+ .data = 0,
+ .end = 0,
+}, {
+ .name = "Adapter ROM",
+ .start = 0,
+ .data = 0,
+ .end = 0,
+} };
+
+static struct resource video_rom_resource = {
+ .name = "Video ROM",
+ .start = 0xc0000,
+ .data = 0,
+ .end = 0xc7fff,
+};
+
+#define ROMSIGNATURE 0xaa55
+
+static int romsignature(const unsigned char *rom)
+{
+ const unsigned short * const ptr = (const unsigned short *)rom;
+ unsigned short sig = 0;
+
+ return probe_address16(ptr, &sig) == 0 && sig == ROMSIGNATURE;
+}
+
+static int romchecksum(const unsigned char *rom, unsigned long length)
+{
+ unsigned char sum, c;
+
+ for (sum = 0; length && probe_address8(rom++, &c) == 0; length--)
+ sum += c;
+ return !length && !sum;
+}
+
+int scan_adapter_roms(scan_fn fn)
+{
+ /* let scan_fn examing each of the adapter roms found by probe_roms */
+ unsigned int i;
+ int found;
+
+ if (rom_fd < 0)
+ return 0;
+
+ found = 0;
+ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) {
+ struct resource *res = &adapter_rom_resources[i];
+
+ if (res->start) {
+ found = fn(isa_bus_to_virt(res->start),
+ isa_bus_to_virt(res->end),
+ isa_bus_to_virt(res->data));
+ if (found)
+ break;
+ } else
+ break;
+ }
+
+ return found;
+}
+
+static unsigned long align(unsigned long addr, unsigned long alignment)
+{
+ return (addr + alignment - 1) & ~(alignment - 1);
+}
+
+void probe_roms(void)
+{
+ const void *rom;
+ unsigned long start, length, upper;
+ unsigned char c;
+ unsigned int i;
+ __u16 val=0;
+
+ if (rom_fd < 0)
+ return;
+
+ /* video rom */
+ upper = adapter_rom_resources[0].start;
+ for (start = video_rom_resource.start; start < upper; start += rom_align) {
+ rom = isa_bus_to_virt(start);
+ if (!romsignature(rom))
+ continue;
+
+ video_rom_resource.start = start;
+
+ if (probe_address8(rom + 2, &c) != 0)
+ continue;
+
+ /* 0 < length <= 0x7f * 512, historically */
+ length = c * 512;
+
+ /* if checksum okay, trust length byte */
+ if (length && romchecksum(rom, length))
+ video_rom_resource.end = start + length - 1;
+ break;
+ }
+
+ start = align(video_rom_resource.end + 1, rom_align);
+ if (start < upper)
+ start = upper;
+
+ /* system rom */
+ upper = system_rom_resource.start;
+
+ /* check for extension rom (ignore length byte!) */
+ rom = isa_bus_to_virt(extension_rom_resource.start);
+ if (romsignature(rom)) {
+ length = extension_rom_resource.end - extension_rom_resource.start + 1;
+ if (romchecksum(rom, length))
+ upper = extension_rom_resource.start;
+ }
+
+ /* check for adapter roms on 2k boundaries */
+ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += rom_align) {
+ rom = isa_bus_to_virt(start);
+ if (!romsignature(rom))
+ continue;
+
+ if (probe_address8(rom + 2, &c) != 0)
+ continue;
+
+ /* 0 < length <= 0x7f * 512, historically */
+ length = c * 512;
+
+ /* Retrieve 16-bit pointer to PCI Data Structure (offset 18h-19h)
+ * The data can be within 64KB forward of the first location
+ * of this code image. The pointer is in little-endian order
+ */
+
+ if (probe_address16(rom + 0x18, &val) != 0)
+ continue;
+ val = __le16_to_cpu(val);
+
+ /* but accept any length that fits if checksum okay */
+ if (!length || start + length > upper || !romchecksum(rom, length))
+ continue;
+
+ adapter_rom_resources[i].start = start;
+ adapter_rom_resources[i].data = start + (unsigned long) val;
+ adapter_rom_resources[i].end = start + length - 1;
+
+ start = adapter_rom_resources[i++].end & ~(rom_align - 1);
+ }
+}
diff --git a/probe_roms.h b/probe_roms.h
new file mode 100644
index 00000000..6d70411a
--- /dev/null
+++ b/probe_roms.h
@@ -0,0 +1,24 @@
+/*
+ * probe_roms - scan for Adapter ROMS
+ *
+ * Copyright (C) 2008 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+void probe_roms_exit(void);
+int probe_roms_init(unsigned long align);
+typedef int (*scan_fn)(const void *start, const void *end, const void *data);
+int scan_adapter_roms(scan_fn fn);
+void probe_roms(void);
diff --git a/pwgr.c b/pwgr.c
new file mode 100644
index 00000000..a07de336
--- /dev/null
+++ b/pwgr.c
@@ -0,0 +1,17 @@
+
+/*
+ * We cannot link a static binary with passwd/group support, so
+ * just do without
+ */
+#include <stdlib.h>
+#include <pwd.h>
+#include <grp.h>
+
+struct passwd *getpwnam(const char *name)
+{
+ return NULL;
+}
+struct group *getgrnam(const char *name)
+{
+ return NULL;
+}
diff --git a/raid5extend.c b/raid5extend.c
new file mode 100644
index 00000000..d8e62c2c
--- /dev/null
+++ b/raid5extend.c
@@ -0,0 +1,80 @@
+
+int phys2log(int phys, int stripe, int n, int layout)
+{
+ /* In an 'n' disk array using 'layout',
+ * in stripe 'stripe', the physical disc 'phys'
+ * stores what logical chunk?
+ * -1 mean parity.
+ *
+ */
+ switch(layout) {
+ case ALGORITHM_LEFT_ASYMMETRIC:
+ pd = (n-1) - (stripe % n);
+ if (phys < pd)
+ return phys;
+ else if (phys == pd)
+ return -1;
+ else return phys-1;
+
+ case ALGORITHM_RIGHT_ASYMMETRIC:
+ pd = stripe % n;
+ if (phys < pd)
+ return phys;
+ else if (phys == pd)
+ return -1;
+ else return phys-1;
+
+ case ALGORITHM_LEFT_SYMMETRIC:
+ pd = (n-1) - (stripe %n);
+ if (phys < pd)
+ return phys+ n-1-pd;
+ else if (phys == pd)
+ return -1;
+ else return phys-pd-1;
+
+ case ALGORITHM_RIGHT_SYMMETRIC:
+ pd = stripe % n;
+ if (phys < pd)
+ return phys+ n-1-pd;
+ else if (phys == pd)
+ return -1;
+ else return phys-pd-1;
+ }
+ return -2;
+}
+
+raid5_extend(unsigned long len, int chunksize, int layout, int n, int m, int rfds[], int wfds[])
+{
+
+ static char buf[4096];
+
+ unsigned long blocks = len/4;
+ unsigned int blocksperchunk= chunksize/4096;
+
+ unsigned long b;
+
+ for (b=0; b<blocks; b++) {
+ unsigned long stripe = b / blocksperchunk;
+ unsigned int offset = b - (stripe*blocksperchunk);
+ unsigned long chunk = stripe * (n-1);
+ int src;
+ for (src=0; src<n; src++) {
+ int dnum, snum;
+ if (read(rfds[src], buf, sizeof(buf)) != sizeof(buf)) {
+ error();
+ return 0;
+ }
+
+ snum = phys2log(src, stripe, n, layout);
+
+ if (snum == -1)
+ continue;
+ chunk = stripe*(n-1)+snum;
+
+ dstripe = chunk/(m-1);
+ dnum = log2phys(chunk-(stripe*(m-1)), dstripe, m, layout);
+ llseek(wfds[dnum], dstripe*chunksize+(offset*4096), 0);
+ write(wfds[dnum], buf, sizeof(buf));
+ }
+ }
+}
diff --git a/raid6check.8 b/raid6check.8
new file mode 100644
index 00000000..50033430
--- /dev/null
+++ b/raid6check.8
@@ -0,0 +1,96 @@
+.\" -*- nroff -*-
+.\" Copyright Piergiorgio Sartor and others.
+.\" This program is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; either version 2 of the License, or
+.\" (at your option) any later version.
+.\" See file COPYING in distribution for details.
+.TH RAID6CHECK 8 "" v1.0.0
+.SH NAME
+raid6check \- check MD RAID6 device for errors
+.I aka
+Linux Software RAID
+
+.SH SYNOPSIS
+
+.BI raid6check " <raid6 device> <start stripe> <number of stripes>"
+
+.SH DESCRIPTION
+RAID6 devices in which one single component drive has errors can use
+the double parity in order to find out which component drive.
+The "raid6check" tool checks, for each stripe, the double parity
+consistency, reports mismatches and, if possible, which
+component drive has the mismatch.
+Since it works at stripe level, it can report different drives with
+mismatches at different stripes.
+
+"raid6check" requires a non-degraded RAID6 MD device as first
+parameter, a starting stripe (usually 0) and the number of stripes
+to be checked.
+If this third parameter is also 0, it will check the array up to
+the end.
+
+"raid6check" will start printing information about the RAID6, then
+for each stripe, it will report the parity rotation status.
+In case of parity mismatches, "raid6check" reports, if possible,
+which component drive could be responsible. Otherwise it reports
+that it is not possible to find the component drive.
+
+If the given MD device is not a RAID6, "raid6check" will, of
+course, not continue.
+
+If the RAID6 MD device is degraded, "raid6check" will report
+an error and it will not proceed further.
+
+No write operations are performed on the array or the components.
+Furthermore, the checked array can be online and in use during
+the operation of "raid6check".
+
+.SH EXAMPLES
+
+.B " raid6check /dev/md0 0 0"
+.br
+This will check /dev/md0 from start to end.
+
+.B " raid6check /dev/md3 0 1"
+.br
+This will check the first stripe of /dev/md3.
+
+.B " raid6check /dev/md1 1000 0"
+.br
+This will check /dev/md1 from stripe 1000 up to the end.
+
+.B " raid6check /dev/m127 128 256"
+.br
+This will check 256 stripes of /dev/md127 starting from stripe 128.
+
+.B " raid6check /dev/md0 0 0 | grep -i error > md0_err.log"
+.br
+This will check /dev/md0 completely and create a log file only
+with errors, if any.
+
+.SH FILES
+
+"raid6check" uses directly the component drives as found in /dev.
+Furthermore, the sysfs interface is needed in order to find out
+the RAID6 parameters.
+
+.SH BUGS
+Negative parameters can lead to unexpected results.
+
+It is not clear what will happen if the RAID6 MD device gets
+degraded during the check.
+
+.PP
+The latest version of
+.I raid6check
+should always be available from
+.IP
+.B http://www.kernel.org/pub/linux/utils/raid/mdadm/
+.PP
+Related man pages:
+.PP
+.IR mdadm (8)
+.IR mdmon (8),
+.IR mdadm.conf (5),
+.IR md (4).
diff --git a/raid6check.c b/raid6check.c
new file mode 100644
index 00000000..1d8ac40d
--- /dev/null
+++ b/raid6check.c
@@ -0,0 +1,663 @@
+/*
+ * raid6check - extended consistency check for RAID-6
+ *
+ * Copyright (C) 2011 Piergiorgio Sartor
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Piergiorgio Sartor
+ * Based on "restripe.c" from "mdadm" codebase
+ */
+
+#include "mdadm.h"
+#include <stdint.h>
+#include <signal.h>
+#include <sys/mman.h>
+
+#define CHECK_PAGE_BITS (12)
+#define CHECK_PAGE_SIZE (1 << CHECK_PAGE_BITS)
+
+enum repair {
+ NO_REPAIR = 0,
+ MANUAL_REPAIR,
+ AUTO_REPAIR
+};
+
+int geo_map(int block, unsigned long long stripe, int raid_disks,
+ int level, int layout);
+void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size);
+void make_tables(void);
+void ensure_zero_has_size(int chunk_size);
+void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs);
+void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+ uint8_t **ptrs);
+void xor_blocks(char *target, char **sources, int disks, int size);
+
+/* Collect per stripe consistency information */
+void raid6_collect(int chunk_size, uint8_t *p, uint8_t *q,
+ char *chunkP, char *chunkQ, int *results)
+{
+ int i;
+ int data_id;
+ uint8_t Px, Qx;
+ extern uint8_t raid6_gflog[];
+
+ for(i = 0; i < chunk_size; i++) {
+ Px = (uint8_t)chunkP[i] ^ (uint8_t)p[i];
+ Qx = (uint8_t)chunkQ[i] ^ (uint8_t)q[i];
+
+ if((Px != 0) && (Qx == 0))
+ results[i] = -1;
+
+ if((Px == 0) && (Qx != 0))
+ results[i] = -2;
+
+ if((Px != 0) && (Qx != 0)) {
+ data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
+ if(data_id < 0) data_id += 255;
+ results[i] = data_id;
+ }
+
+ if((Px == 0) && (Qx == 0))
+ results[i] = -255;
+ }
+}
+
+/* Try to find out if a specific disk has problems in a CHECK_PAGE_SIZE page size */
+int raid6_stats_blk(int *results, int raid_disks)
+{
+ int i;
+ int curr_broken_disk = -255;
+ int prev_broken_disk = -255;
+ int broken_status = 0;
+
+ for(i = 0; i < CHECK_PAGE_SIZE; i++) {
+
+ if(results[i] != -255)
+ curr_broken_disk = results[i];
+
+ if(curr_broken_disk >= raid_disks)
+ broken_status = 2;
+
+ switch(broken_status) {
+ case 0:
+ if(curr_broken_disk != -255) {
+ prev_broken_disk = curr_broken_disk;
+ broken_status = 1;
+ }
+ break;
+
+ case 1:
+ if(curr_broken_disk != prev_broken_disk)
+ broken_status = 2;
+ break;
+
+ case 2:
+ default:
+ curr_broken_disk = prev_broken_disk = -65535;
+ break;
+ }
+ }
+
+ return curr_broken_disk;
+}
+
+/* Collect disks status for a strip in CHECK_PAGE_SIZE page size blocks */
+void raid6_stats(int *disk, int *results, int raid_disks, int chunk_size)
+{
+ int i, j;
+
+ for(i = 0, j = 0; i < chunk_size; i += CHECK_PAGE_SIZE, j++) {
+ disk[j] = raid6_stats_blk(&results[i], raid_disks);
+ }
+}
+
+int lock_stripe(struct mdinfo *info, unsigned long long start,
+ int chunk_size, int data_disks, sighandler_t *sig) {
+ int rv;
+ if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
+ return 2;
+ }
+
+ sig[0] = signal(SIGTERM, SIG_IGN);
+ sig[1] = signal(SIGINT, SIG_IGN);
+ sig[2] = signal(SIGQUIT, SIG_IGN);
+
+ rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
+ rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
+ return rv * 256;
+}
+
+int unlock_all_stripes(struct mdinfo *info, sighandler_t *sig) {
+ int rv;
+ rv = sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
+ rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
+ rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
+
+ signal(SIGQUIT, sig[2]);
+ signal(SIGINT, sig[1]);
+ signal(SIGTERM, sig[0]);
+
+ if(munlockall() != 0)
+ return 3;
+ return rv * 256;
+}
+
+/* Autorepair */
+int autorepair(int *disk, int diskP, int diskQ, unsigned long long start, int chunk_size,
+ char *name[], int raid_disks, int data_disks, char **blocks_page,
+ char **blocks, uint8_t *p, char **stripes, int *block_index_for_slot,
+ int *source, unsigned long long *offsets)
+{
+ int i, j;
+ int pages_to_write_count = 0;
+ int page_to_write[chunk_size >> CHECK_PAGE_BITS];
+ for(j = 0; j < (chunk_size >> CHECK_PAGE_BITS); j++) {
+ if (disk[j] >= 0) {
+ printf("Auto-repairing slot %d (%s)\n", disk[j], name[disk[j]]);
+ pages_to_write_count++;
+ page_to_write[j] = 1;
+ for(i = 0; i < raid_disks; i++) {
+ blocks_page[i] = blocks[i] + j * CHECK_PAGE_SIZE;
+ }
+ if (disk[j] == diskQ) {
+ qsyndrome(p, (uint8_t*)stripes[diskQ] + j * CHECK_PAGE_SIZE, (uint8_t**)blocks_page, data_disks, CHECK_PAGE_SIZE);
+ }
+ else {
+ char *all_but_failed_blocks[data_disks];
+ int failed_block_index = block_index_for_slot[disk[j]];
+ for(i = 0; i < data_disks; i++) {
+ if (failed_block_index == i) {
+ all_but_failed_blocks[i] = stripes[diskP] + j * CHECK_PAGE_SIZE;
+ }
+ else {
+ all_but_failed_blocks[i] = blocks_page[i];
+ }
+ }
+ xor_blocks(stripes[disk[j]] + j * CHECK_PAGE_SIZE,
+ all_but_failed_blocks, data_disks, CHECK_PAGE_SIZE);
+ }
+ }
+ else {
+ page_to_write[j] = 0;
+ }
+ }
+
+ if(pages_to_write_count > 0) {
+ int write_res = 0;
+ for(j = 0; j < (chunk_size >> CHECK_PAGE_BITS); j++) {
+ if(page_to_write[j] == 1) {
+ lseek64(source[disk[j]], offsets[disk[j]] + start * chunk_size + j * CHECK_PAGE_SIZE, SEEK_SET);
+ write_res += write(source[disk[j]], stripes[disk[j]] + j * CHECK_PAGE_SIZE, CHECK_PAGE_SIZE);
+ }
+ }
+
+ if (write_res != (CHECK_PAGE_SIZE * pages_to_write_count)) {
+ fprintf(stderr, "Failed to write a full chunk.\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/* Manual repair */
+int manual_repair(int diskP, int diskQ, int chunk_size, int raid_disks, int data_disks,
+ int failed_disk1, int failed_disk2, unsigned long long start, int *block_index_for_slot,
+ char *name[], char **stripes, char **blocks, uint8_t *p, struct mdinfo *info, sighandler_t *sig,
+ int *source, unsigned long long *offsets)
+{
+ int err = 0;
+ int i;
+ printf("Repairing stripe %llu\n", start);
+ printf("Assuming slots %d (%s) and %d (%s) are incorrect\n",
+ failed_disk1, name[failed_disk1],
+ failed_disk2, name[failed_disk2]);
+
+ if (failed_disk1 == diskQ || failed_disk2 == diskQ) {
+ char *all_but_failed_blocks[data_disks];
+ int failed_data_or_p;
+ int failed_block_index;
+
+ if (failed_disk1 == diskQ) {
+ failed_data_or_p = failed_disk2;
+ }
+ else {
+ failed_data_or_p = failed_disk1;
+ }
+ printf("Repairing D/P(%d) and Q\n", failed_data_or_p);
+ failed_block_index = block_index_for_slot[failed_data_or_p];
+ for (i = 0; i < data_disks; i++) {
+ if (failed_block_index == i) {
+ all_but_failed_blocks[i] = stripes[diskP];
+ }
+ else {
+ all_but_failed_blocks[i] = blocks[i];
+ }
+ }
+ xor_blocks(stripes[failed_data_or_p],
+ all_but_failed_blocks, data_disks, chunk_size);
+ qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
+ }
+ else {
+ ensure_zero_has_size(chunk_size);
+ if (failed_disk1 == diskP || failed_disk2 == diskP) {
+ int failed_data, failed_block_index;
+ if (failed_disk1 == diskP) {
+ failed_data = failed_disk2;
+ }
+ else {
+ failed_data = failed_disk1;
+ }
+ failed_block_index = block_index_for_slot[failed_data];
+ printf("Repairing D(%d) and P\n", failed_data);
+ raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks);
+ }
+ else {
+ printf("Repairing D and D\n");
+ int failed_block_index1 = block_index_for_slot[failed_disk1];
+ int failed_block_index2 = block_index_for_slot[failed_disk2];
+ if (failed_block_index1 > failed_block_index2) {
+ int t = failed_block_index1;
+ failed_block_index1 = failed_block_index2;
+ failed_block_index2 = t;
+ }
+ raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks);
+ }
+ }
+
+ err = lock_stripe(info, start, chunk_size, data_disks, sig);
+ if(err != 0) {
+ if (err != 2) {
+ return -1;
+ }
+ return -2;;
+ }
+
+ int write_res1, write_res2;
+ off64_t seek_res;
+
+ seek_res = lseek64(source[failed_disk1],
+ offsets[failed_disk1] + start * chunk_size, SEEK_SET);
+ if (seek_res < 0) {
+ fprintf(stderr, "lseek failed for failed_disk1\n");
+ return -1;
+ }
+ write_res1 = write(source[failed_disk1], stripes[failed_disk1], chunk_size);
+
+ seek_res = lseek64(source[failed_disk2],
+ offsets[failed_disk2] + start * chunk_size, SEEK_SET);
+ if (seek_res < 0) {
+ fprintf(stderr, "lseek failed for failed_disk1\n");
+ return -1;
+ }
+ write_res2 = write(source[failed_disk2], stripes[failed_disk2], chunk_size);
+
+ err = unlock_all_stripes(info, sig);
+ if(err != 0) {
+ return -2;
+ }
+
+ if (write_res1 != chunk_size || write_res2 != chunk_size) {
+ fprintf(stderr, "Failed to write a complete chunk.\n");
+ return -2;
+ }
+
+ return 0;
+}
+
+int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets,
+ int raid_disks, int chunk_size, int level, int layout,
+ unsigned long long start, unsigned long long length, char *name[],
+ enum repair repair, int failed_disk1, int failed_disk2)
+{
+ /* read the data and p and q blocks, and check we got them right */
+ char *stripe_buf = xmalloc(raid_disks * chunk_size);
+ char **stripes = xmalloc(raid_disks * sizeof(char*));
+ char **blocks = xmalloc(raid_disks * sizeof(char*));
+ char **blocks_page = xmalloc(raid_disks * sizeof(char*));
+ int *block_index_for_slot = xmalloc(raid_disks * sizeof(int));
+ uint8_t *p = xmalloc(chunk_size);
+ uint8_t *q = xmalloc(chunk_size);
+ int *results = xmalloc(chunk_size * sizeof(int));
+ sighandler_t *sig = xmalloc(3 * sizeof(sighandler_t));
+
+ int i, j;
+ int diskP, diskQ;
+ int data_disks = raid_disks - 2;
+ int err = 0;
+
+ extern int tables_ready;
+
+ if (!tables_ready)
+ make_tables();
+
+ for ( i = 0 ; i < raid_disks ; i++)
+ stripes[i] = stripe_buf + i * chunk_size;
+
+ while (length > 0) {
+ int disk[chunk_size >> CHECK_PAGE_BITS];
+
+ err = lock_stripe(info, start, chunk_size, data_disks, sig);
+ if(err != 0) {
+ if (err != 2)
+ unlock_all_stripes(info, sig);
+ goto exitCheck;
+ }
+ for (i = 0 ; i < raid_disks ; i++) {
+ off64_t seek_res = lseek64(source[i], offsets[i] + start * chunk_size,
+ SEEK_SET);
+ if (seek_res < 0) {
+ fprintf(stderr, "lseek to source %d failed\n", i);
+ unlock_all_stripes(info, sig);
+ err = -1;
+ goto exitCheck;
+ }
+ int read_res = read(source[i], stripes[i], chunk_size);
+ if (read_res < chunk_size) {
+ fprintf(stderr, "Failed to read complete chunk disk %d, aborting\n", i);
+ unlock_all_stripes(info, sig);
+ err = -1;
+ goto exitCheck;
+ }
+ }
+
+ for (i = 0 ; i < data_disks ; i++) {
+ int disk = geo_map(i, start, raid_disks, level, layout);
+ blocks[i] = stripes[disk];
+ block_index_for_slot[disk] = i;
+ }
+
+ qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
+ diskP = geo_map(-1, start, raid_disks, level, layout);
+ diskQ = geo_map(-2, start, raid_disks, level, layout);
+ blocks[data_disks] = stripes[diskP];
+ block_index_for_slot[diskP] = data_disks;
+ blocks[data_disks+1] = stripes[diskQ];
+ block_index_for_slot[diskQ] = data_disks+1;
+
+ raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results);
+ raid6_stats(disk, results, raid_disks, chunk_size);
+
+ for(j = 0; j < (chunk_size >> CHECK_PAGE_BITS); j++) {
+ if(disk[j] >= -2) {
+ disk[j] = geo_map(disk[j], start, raid_disks, level, layout);
+ }
+ if(disk[j] >= 0) {
+ printf("Error detected at stripe %llu, page %d: possible failed disk slot: %d --> %s\n",
+ start, j, disk[j], name[disk[j]]);
+ }
+ if(disk[j] == -65535) {
+ printf("Error detected at stripe %llu, page %d: disk slot unknown\n", start, j);
+ }
+ }
+
+ if(repair == AUTO_REPAIR) {
+ err = autorepair(disk, diskP, diskQ, start, chunk_size,
+ name, raid_disks, data_disks, blocks_page,
+ blocks, p, stripes, block_index_for_slot,
+ source, offsets);
+ if(err != 0) {
+ unlock_all_stripes(info, sig);
+ goto exitCheck;
+ }
+ }
+
+ err = unlock_all_stripes(info, sig);
+ if(err != 0) {
+ goto exitCheck;
+ }
+
+ if(repair == MANUAL_REPAIR) {
+ err = manual_repair(diskP, diskQ, chunk_size, raid_disks, data_disks,
+ failed_disk1, failed_disk2, start, block_index_for_slot,
+ name, stripes, blocks, p, info, sig,
+ source, offsets);
+ if(err == -1) {
+ unlock_all_stripes(info, sig);
+ goto exitCheck;
+ }
+ }
+
+ length--;
+ start++;
+ }
+
+exitCheck:
+
+ free(stripe_buf);
+ free(stripes);
+ free(blocks);
+ free(blocks_page);
+ free(block_index_for_slot);
+ free(p);
+ free(q);
+ free(results);
+ free(sig);
+
+ return err;
+}
+
+unsigned long long getnum(char *str, char **err)
+{
+ char *e;
+ unsigned long long rv = strtoull(str, &e, 10);
+ if (e==str || *e) {
+ *err = str;
+ return 0;
+ }
+ return rv;
+}
+
+int main(int argc, char *argv[])
+{
+ /* md_device start length */
+ int *fds = NULL;
+ char *buf = NULL;
+ char **disk_name = NULL;
+ unsigned long long *offsets = NULL;
+ int raid_disks = 0;
+ int active_disks;
+ int chunk_size = 0;
+ int layout = -1;
+ int level = 6;
+ enum repair repair = NO_REPAIR;
+ int failed_disk1 = -1;
+ int failed_disk2 = -1;
+ unsigned long long start, length;
+ int i;
+ int mdfd;
+ struct mdinfo *info = NULL, *comp = NULL;
+ char *err = NULL;
+ int exit_err = 0;
+ int close_flag = 0;
+ char *prg = strrchr(argv[0], '/');
+
+ if (prg == NULL)
+ prg = argv[0];
+ else
+ prg++;
+
+ if (argc < 4) {
+ fprintf(stderr, "Usage: %s md_device start_stripe length_stripes [autorepair]\n", prg);
+ fprintf(stderr, " or: %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
+ exit_err = 1;
+ goto exitHere;
+ }
+
+ mdfd = open(argv[1], O_RDONLY);
+ if(mdfd < 0) {
+ perror(argv[1]);
+ fprintf(stderr, "%s: cannot open %s\n", prg, argv[1]);
+ exit_err = 2;
+ goto exitHere;
+ }
+
+ info = sysfs_read(mdfd, NULL,
+ GET_LEVEL|
+ GET_LAYOUT|
+ GET_DISKS|
+ GET_DEGRADED |
+ GET_COMPONENT|
+ GET_CHUNK|
+ GET_DEVS|
+ GET_OFFSET|
+ GET_SIZE);
+
+ if(info == NULL) {
+ fprintf(stderr, "%s: Error reading sysfs information of %s\n", prg, argv[1]);
+ exit_err = 9;
+ goto exitHere;
+ }
+
+ if(info->array.level != level) {
+ fprintf(stderr, "%s: %s not a RAID-6\n", prg, argv[1]);
+ exit_err = 3;
+ goto exitHere;
+ }
+
+ if(info->array.failed_disks > 0) {
+ fprintf(stderr, "%s: %s degraded array\n", prg, argv[1]);
+ exit_err = 8;
+ goto exitHere;
+ }
+
+ printf("layout: %d\n", info->array.layout);
+ printf("disks: %d\n", info->array.raid_disks);
+ printf("component size: %llu\n", info->component_size * 512);
+ printf("total stripes: %llu\n", (info->component_size * 512) / info->array.chunk_size);
+ printf("chunk size: %d\n", info->array.chunk_size);
+ printf("\n");
+
+ comp = info->devs;
+ for(i = 0, active_disks = 0; active_disks < info->array.raid_disks; i++) {
+ printf("disk: %d - offset: %llu - size: %llu - name: %s - slot: %d\n",
+ i, comp->data_offset * 512, comp->component_size * 512,
+ map_dev(comp->disk.major, comp->disk.minor, 0),
+ comp->disk.raid_disk);
+ if(comp->disk.raid_disk >= 0)
+ active_disks++;
+ comp = comp->next;
+ }
+ printf("\n");
+
+ close(mdfd);
+
+ raid_disks = info->array.raid_disks;
+ chunk_size = info->array.chunk_size;
+ layout = info->array.layout;
+ if (strcmp(argv[2], "repair")==0) {
+ if (argc < 6) {
+ fprintf(stderr, "For repair mode, call %s md_device repair stripe failed_slot_1 failed_slot_2\n", prg);
+ exit_err = 1;
+ goto exitHere;
+ }
+ repair = MANUAL_REPAIR;
+ start = getnum(argv[3], &err);
+ length = 1;
+ failed_disk1 = getnum(argv[4], &err);
+ failed_disk2 = getnum(argv[5], &err);
+
+ if(failed_disk1 >= info->array.raid_disks) {
+ fprintf(stderr, "%s: failed_slot_1 index is higher than number of devices in raid\n", prg);
+ exit_err = 4;
+ goto exitHere;
+ }
+ if(failed_disk2 >= info->array.raid_disks) {
+ fprintf(stderr, "%s: failed_slot_2 index is higher than number of devices in raid\n", prg);
+ exit_err = 4;
+ goto exitHere;
+ }
+ if(failed_disk1 == failed_disk2) {
+ fprintf(stderr, "%s: failed_slot_1 and failed_slot_2 are the same\n", prg);
+ exit_err = 4;
+ goto exitHere;
+ }
+ }
+ else {
+ start = getnum(argv[2], &err);
+ length = getnum(argv[3], &err);
+ if (argc >= 5 && strcmp(argv[4], "autorepair")==0)
+ repair = AUTO_REPAIR;
+ }
+
+ if (err) {
+ fprintf(stderr, "%s: Bad number: %s\n", prg, err);
+ exit_err = 4;
+ goto exitHere;
+ }
+
+ if(start > ((info->component_size * 512) / chunk_size)) {
+ start = (info->component_size * 512) / chunk_size;
+ fprintf(stderr, "%s: start beyond disks size\n", prg);
+ }
+
+ if((length == 0) ||
+ ((length + start) > ((info->component_size * 512) / chunk_size))) {
+ length = (info->component_size * 512) / chunk_size - start;
+ }
+
+ disk_name = xmalloc(raid_disks * sizeof(*disk_name));
+ fds = xmalloc(raid_disks * sizeof(*fds));
+ offsets = xcalloc(raid_disks, sizeof(*offsets));
+ buf = xmalloc(raid_disks * chunk_size);
+
+ for(i=0; i<raid_disks; i++) {
+ fds[i] = -1;
+ }
+ close_flag = 1;
+
+ comp = info->devs;
+ for (i=0, active_disks=0; active_disks<raid_disks; i++) {
+ int disk_slot = comp->disk.raid_disk;
+ if(disk_slot >= 0) {
+ disk_name[disk_slot] = map_dev(comp->disk.major, comp->disk.minor, 0);
+ offsets[disk_slot] = comp->data_offset * 512;
+ fds[disk_slot] = open(disk_name[disk_slot], O_RDWR | O_SYNC);
+ if (fds[disk_slot] < 0) {
+ perror(disk_name[disk_slot]);
+ fprintf(stderr,"%s: cannot open %s\n", prg, disk_name[disk_slot]);
+ exit_err = 6;
+ goto exitHere;
+ }
+ active_disks++;
+ }
+ comp = comp->next;
+ }
+
+ int rv = check_stripes(info, fds, offsets,
+ raid_disks, chunk_size, level, layout,
+ start, length, disk_name, repair, failed_disk1, failed_disk2);
+ if (rv != 0) {
+ fprintf(stderr,
+ "%s: check_stripes returned %d\n", prg, rv);
+ exit_err = 7;
+ goto exitHere;
+ }
+
+exitHere:
+
+ if (close_flag)
+ for(i = 0; i < raid_disks; i++)
+ close(fds[i]);
+
+ free(disk_name);
+ free(fds);
+ free(offsets);
+ free(buf);
+
+ exit(exit_err);
+}
diff --git a/restripe.c b/restripe.c
new file mode 100644
index 00000000..93ef056e
--- /dev/null
+++ b/restripe.c
@@ -0,0 +1,994 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include <stdint.h>
+
+/* To restripe, we read from old geometry to a buffer, and
+ * read from buffer to new geometry.
+ * When reading, we might have missing devices and so could need
+ * to reconstruct.
+ * When writing, we need to create correct parity and Q.
+ *
+ */
+
+int geo_map(int block, unsigned long long stripe, int raid_disks,
+ int level, int layout)
+{
+ /* On the given stripe, find which disk in the array will have
+ * block numbered 'block'.
+ * '-1' means the parity block.
+ * '-2' means the Q syndrome.
+ */
+ int pd;
+
+ /* layout is not relevant for raid0 and raid4 */
+ if ((level == 0) ||
+ (level == 4))
+ layout = 0;
+
+ switch(level*100 + layout) {
+ case 000:
+ case 400:
+ case 500 + ALGORITHM_PARITY_N:
+ /* raid 4 isn't messed around by parity blocks */
+ if (block == -1)
+ return raid_disks-1; /* parity block */
+ return block;
+ case 500 + ALGORITHM_LEFT_ASYMMETRIC:
+ pd = (raid_disks-1) - stripe % raid_disks;
+ if (block == -1) return pd;
+ if (block >= pd)
+ block++;
+ return block;
+
+ case 500 + ALGORITHM_RIGHT_ASYMMETRIC:
+ pd = stripe % raid_disks;
+ if (block == -1) return pd;
+ if (block >= pd)
+ block++;
+ return block;
+
+ case 500 + ALGORITHM_LEFT_SYMMETRIC:
+ pd = (raid_disks - 1) - stripe % raid_disks;
+ if (block == -1) return pd;
+ return (pd + 1 + block) % raid_disks;
+
+ case 500 + ALGORITHM_RIGHT_SYMMETRIC:
+ pd = stripe % raid_disks;
+ if (block == -1) return pd;
+ return (pd + 1 + block) % raid_disks;
+
+ case 500 + ALGORITHM_PARITY_0:
+ return block + 1;
+
+ case 600 + ALGORITHM_PARITY_N_6:
+ if (block == -2)
+ return raid_disks - 1;
+ if (block == -1)
+ return raid_disks - 2; /* parity block */
+ return block;
+ case 600 + ALGORITHM_LEFT_ASYMMETRIC_6:
+ if (block == -2)
+ return raid_disks - 1;
+ raid_disks--;
+ pd = (raid_disks-1) - stripe % raid_disks;
+ if (block == -1) return pd;
+ if (block >= pd)
+ block++;
+ return block;
+
+ case 600 + ALGORITHM_RIGHT_ASYMMETRIC_6:
+ if (block == -2)
+ return raid_disks - 1;
+ raid_disks--;
+ pd = stripe % raid_disks;
+ if (block == -1) return pd;
+ if (block >= pd)
+ block++;
+ return block;
+
+ case 600 + ALGORITHM_LEFT_SYMMETRIC_6:
+ if (block == -2)
+ return raid_disks - 1;
+ raid_disks--;
+ pd = (raid_disks - 1) - stripe % raid_disks;
+ if (block == -1) return pd;
+ return (pd + 1 + block) % raid_disks;
+
+ case 600 + ALGORITHM_RIGHT_SYMMETRIC_6:
+ if (block == -2)
+ return raid_disks - 1;
+ raid_disks--;
+ pd = stripe % raid_disks;
+ if (block == -1) return pd;
+ return (pd + 1 + block) % raid_disks;
+
+ case 600 + ALGORITHM_PARITY_0_6:
+ if (block == -2)
+ return raid_disks - 1;
+ return block + 1;
+
+ case 600 + ALGORITHM_PARITY_0:
+ if (block == -1)
+ return 0;
+ if (block == -2)
+ return 1;
+ return block + 2;
+
+ case 600 + ALGORITHM_LEFT_ASYMMETRIC:
+ pd = raid_disks - 1 - (stripe % raid_disks);
+ if (block == -1) return pd;
+ if (block == -2) return (pd+1) % raid_disks;
+ if (pd == raid_disks - 1)
+ return block+1;
+ if (block >= pd)
+ return block+2;
+ return block;
+
+ case 600 + ALGORITHM_ROTATING_ZERO_RESTART:
+ /* Different order for calculating Q, otherwize same as ... */
+ case 600 + ALGORITHM_RIGHT_ASYMMETRIC:
+ pd = stripe % raid_disks;
+ if (block == -1) return pd;
+ if (block == -2) return (pd+1) % raid_disks;
+ if (pd == raid_disks - 1)
+ return block+1;
+ if (block >= pd)
+ return block+2;
+ return block;
+
+ case 600 + ALGORITHM_LEFT_SYMMETRIC:
+ pd = raid_disks - 1 - (stripe % raid_disks);
+ if (block == -1) return pd;
+ if (block == -2) return (pd+1) % raid_disks;
+ return (pd + 2 + block) % raid_disks;
+
+ case 600 + ALGORITHM_RIGHT_SYMMETRIC:
+ pd = stripe % raid_disks;
+ if (block == -1) return pd;
+ if (block == -2) return (pd+1) % raid_disks;
+ return (pd + 2 + block) % raid_disks;
+
+ case 600 + ALGORITHM_ROTATING_N_RESTART:
+ /* Same a left_asymmetric, by first stripe is
+ * D D D P Q rather than
+ * Q D D D P
+ */
+ pd = raid_disks - 1 - ((stripe + 1) % raid_disks);
+ if (block == -1) return pd;
+ if (block == -2) return (pd+1) % raid_disks;
+ if (pd == raid_disks - 1)
+ return block+1;
+ if (block >= pd)
+ return block+2;
+ return block;
+
+ case 600 + ALGORITHM_ROTATING_N_CONTINUE:
+ /* Same as left_symmetric but Q is before P */
+ pd = raid_disks - 1 - (stripe % raid_disks);
+ if (block == -1) return pd;
+ if (block == -2) return (pd+raid_disks-1) % raid_disks;
+ return (pd + 1 + block) % raid_disks;
+ }
+ return -1;
+}
+static int is_ddf(int layout)
+{
+ switch (layout)
+ {
+ default:
+ return 0;
+ case ALGORITHM_ROTATING_N_CONTINUE:
+ case ALGORITHM_ROTATING_N_RESTART:
+ case ALGORITHM_ROTATING_ZERO_RESTART:
+ return 1;
+ }
+}
+
+void xor_blocks(char *target, char **sources, int disks, int size)
+{
+ int i, j;
+ /* Amazingly inefficient... */
+ for (i=0; i<size; i++) {
+ char c = 0;
+ for (j=0 ; j<disks; j++)
+ c ^= sources[j][i];
+ target[i] = c;
+ }
+}
+
+void qsyndrome(uint8_t *p, uint8_t *q, uint8_t **sources, int disks, int size)
+{
+ int d, z;
+ uint8_t wq0, wp0, wd0, w10, w20;
+ for ( d = 0; d < size; d++) {
+ wq0 = wp0 = sources[disks-1][d];
+ for ( z = disks-2 ; z >= 0 ; z-- ) {
+ wd0 = sources[z][d];
+ wp0 ^= wd0;
+ w20 = (wq0&0x80) ? 0xff : 0x00;
+ w10 = (wq0 << 1) & 0xff;
+ w20 &= 0x1d;
+ w10 ^= w20;
+ wq0 = w10 ^ wd0;
+ }
+ p[d] = wp0;
+ q[d] = wq0;
+ }
+}
+
+/*
+ * The following was taken from linux/drivers/md/mktables.c, and modified
+ * to create in-memory tables rather than C code
+ */
+static uint8_t gfmul(uint8_t a, uint8_t b)
+{
+ uint8_t v = 0;
+
+ while (b) {
+ if (b & 1)
+ v ^= a;
+ a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
+ b >>= 1;
+ }
+
+ return v;
+}
+
+static uint8_t gfpow(uint8_t a, int b)
+{
+ uint8_t v = 1;
+
+ b %= 255;
+ if (b < 0)
+ b += 255;
+
+ while (b) {
+ if (b & 1)
+ v = gfmul(v, a);
+ a = gfmul(a, a);
+ b >>= 1;
+ }
+
+ return v;
+}
+
+int tables_ready = 0;
+uint8_t raid6_gfmul[256][256];
+uint8_t raid6_gfexp[256];
+uint8_t raid6_gfinv[256];
+uint8_t raid6_gfexi[256];
+uint8_t raid6_gflog[256];
+uint8_t raid6_gfilog[256];
+void make_tables(void)
+{
+ int i, j;
+ uint8_t v;
+ uint32_t b, log;
+
+ /* Compute multiplication table */
+ for (i = 0; i < 256; i++)
+ for (j = 0; j < 256; j++)
+ raid6_gfmul[i][j] = gfmul(i, j);
+
+ /* Compute power-of-2 table (exponent) */
+ v = 1;
+ for (i = 0; i < 256; i++) {
+ raid6_gfexp[i] = v;
+ v = gfmul(v, 2);
+ if (v == 1)
+ v = 0; /* For entry 255, not a real entry */
+ }
+
+ /* Compute inverse table x^-1 == x^254 */
+ for (i = 0; i < 256; i++)
+ raid6_gfinv[i] = gfpow(i, 254);
+
+ /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
+ for (i = 0; i < 256; i ++)
+ raid6_gfexi[i] = raid6_gfinv[raid6_gfexp[i] ^ 1];
+
+ /* Compute log and inverse log */
+ /* Modified code from:
+ * http://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html
+ */
+ b = 1;
+ raid6_gflog[0] = 0;
+ raid6_gfilog[255] = 0;
+
+ for (log = 0; log < 255; log++) {
+ raid6_gflog[b] = (uint8_t) log;
+ raid6_gfilog[log] = (uint8_t) b;
+ b = b << 1;
+ if (b & 256) b = b ^ 0435;
+ }
+
+ tables_ready = 1;
+}
+
+uint8_t *zero;
+int zero_size;
+
+void ensure_zero_has_size(int chunk_size)
+{
+ if (zero == NULL || chunk_size > zero_size) {
+ if (zero)
+ free(zero);
+ zero = xcalloc(1, chunk_size);
+ zero_size = chunk_size;
+ }
+}
+
+/* Following was taken from linux/drivers/md/raid6recov.c */
+
+/* Recover two failed data blocks. */
+void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+ uint8_t **ptrs)
+{
+ uint8_t *p, *q, *dp, *dq;
+ uint8_t px, qx, db;
+ const uint8_t *pbmul; /* P multiplier table for B data */
+ const uint8_t *qmul; /* Q multiplier table (for both) */
+
+ p = ptrs[disks-2];
+ q = ptrs[disks-1];
+
+ /* Compute syndrome with zero for the missing data pages
+ Use the dead data pages as temporary storage for
+ delta p and delta q */
+ dp = ptrs[faila];
+ ptrs[faila] = zero;
+ dq = ptrs[failb];
+ ptrs[failb] = zero;
+
+ qsyndrome(dp, dq, ptrs, disks-2, bytes);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
+ qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
+
+ /* Now do it... */
+ while ( bytes-- ) {
+ px = *p ^ *dp;
+ qx = qmul[*q ^ *dq];
+ *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
+ *dp++ = db ^ px; /* Reconstructed A */
+ p++; q++;
+ }
+}
+
+/* Recover failure of one data block plus the P block */
+void raid6_datap_recov(int disks, size_t bytes, int faila, uint8_t **ptrs)
+{
+ uint8_t *p, *q, *dq;
+ const uint8_t *qmul; /* Q multiplier table */
+
+ p = ptrs[disks-2];
+ q = ptrs[disks-1];
+
+ /* Compute syndrome with zero for the missing data page
+ Use the dead data page as temporary storage for delta q */
+ dq = ptrs[faila];
+ ptrs[faila] = zero;
+
+ qsyndrome(p, dq, ptrs, disks-2, bytes);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ /* Now do it... */
+ while ( bytes-- ) {
+ *p++ ^= *dq = qmul[*q ^ *dq];
+ q++; dq++;
+ }
+}
+
+/* Try to find out if a specific disk has a problem */
+int raid6_check_disks(int data_disks, int start, int chunk_size,
+ int level, int layout, int diskP, int diskQ,
+ char *p, char *q, char **stripes)
+{
+ int i;
+ int data_id, diskD;
+ uint8_t Px, Qx;
+ int curr_broken_disk = -1;
+ int prev_broken_disk = -1;
+ int broken_status = 0;
+
+ for(i = 0; i < chunk_size; i++) {
+ Px = (uint8_t)stripes[diskP][i] ^ (uint8_t)p[i];
+ Qx = (uint8_t)stripes[diskQ][i] ^ (uint8_t)q[i];
+
+ if((Px != 0) && (Qx == 0))
+ curr_broken_disk = diskP;
+
+ if((Px == 0) && (Qx != 0))
+ curr_broken_disk = diskQ;
+
+ if((Px != 0) && (Qx != 0)) {
+ data_id = (raid6_gflog[Qx] - raid6_gflog[Px]);
+ if(data_id < 0) data_id += 255;
+ diskD = geo_map(data_id, start/chunk_size,
+ data_disks + 2, level, layout);
+ curr_broken_disk = diskD;
+ }
+
+ if((Px == 0) && (Qx == 0))
+ curr_broken_disk = curr_broken_disk;
+
+ if(curr_broken_disk >= data_disks + 2)
+ broken_status = 2;
+
+ switch(broken_status) {
+ case 0:
+ if(curr_broken_disk != -1) {
+ prev_broken_disk = curr_broken_disk;
+ broken_status = 1;
+ }
+ break;
+
+ case 1:
+ if(curr_broken_disk != prev_broken_disk)
+ broken_status = 2;
+ break;
+
+ case 2:
+ default:
+ curr_broken_disk = prev_broken_disk = -2;
+ break;
+ }
+ }
+
+ return curr_broken_disk;
+}
+
+/*******************************************************************************
+ * Function: save_stripes
+ * Description:
+ * Function reads data (only data without P and Q) from array and writes
+ * it to buf and opcjonaly to backup files
+ * Parameters:
+ * source : A list of 'fds' of the active disks.
+ * Some may be absent
+ * offsets : A list of offsets on disk belonging
+ * to the array [bytes]
+ * raid_disks : geometry: number of disks in the array
+ * chunk_size : geometry: chunk size [bytes]
+ * level : geometry: RAID level
+ * layout : geometry: layout
+ * nwrites : number of backup files
+ * dest : A list of 'fds' for mirrored targets
+ * (e.g. backup files). They are already seeked to right
+ * (write) location. If NULL, data will be wrote
+ * to the buf only
+ * start : start address of data to read (must be stripe-aligned)
+ * [bytes]
+ * length - : length of data to read (must be stripe-aligned)
+ * [bytes]
+ * buf : buffer for data. It is large enough to hold
+ * one stripe. It is stripe aligned
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
+int save_stripes(int *source, unsigned long long *offsets,
+ int raid_disks, int chunk_size, int level, int layout,
+ int nwrites, int *dest,
+ unsigned long long start, unsigned long long length,
+ char *buf)
+{
+ int len;
+ int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
+ int disk;
+ int i;
+ unsigned long long length_test;
+
+ if (!tables_ready)
+ make_tables();
+ ensure_zero_has_size(chunk_size);
+
+ len = data_disks * chunk_size;
+ length_test = length / len;
+ length_test *= len;
+
+ if (length != length_test) {
+ dprintf("Error: save_stripes(): Data are not alligned. EXIT\n");
+ dprintf("\tArea for saving stripes (length) = %llu\n", length);
+ dprintf("\tWork step (len) = %i\n", len);
+ dprintf("\tExpected save area (length_test) = %llu\n",
+ length_test);
+ abort();
+ }
+
+ while (length > 0) {
+ int failed = 0;
+ int fdisk[3], fblock[3];
+ for (disk = 0; disk < raid_disks ; disk++) {
+ unsigned long long offset;
+ int dnum;
+
+ offset = (start/chunk_size/data_disks)*chunk_size;
+ dnum = geo_map(disk < data_disks ? disk : data_disks - disk - 1,
+ start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ if (dnum < 0) abort();
+ if (source[dnum] < 0 ||
+ lseek64(source[dnum], offsets[dnum]+offset, 0) < 0 ||
+ read(source[dnum], buf+disk * chunk_size, chunk_size)
+ != chunk_size)
+ if (failed <= 2) {
+ fdisk[failed] = dnum;
+ fblock[failed] = disk;
+ failed++;
+ }
+ }
+ if (failed == 0 || fblock[0] >= data_disks)
+ /* all data disks are good */
+ ;
+ else if (failed == 1 || fblock[1] >= data_disks+1) {
+ /* one failed data disk and good parity */
+ char *bufs[data_disks];
+ for (i=0; i < data_disks; i++)
+ if (fblock[0] == i)
+ bufs[i] = buf + data_disks*chunk_size;
+ else
+ bufs[i] = buf + i*chunk_size;
+
+ xor_blocks(buf + fblock[0]*chunk_size,
+ bufs, data_disks, chunk_size);
+ } else if (failed > 2 || level != 6)
+ /* too much failure */
+ return -1;
+ else {
+ /* RAID6 computations needed. */
+ uint8_t *bufs[data_disks+4];
+ int qdisk;
+ int syndrome_disks;
+ disk = geo_map(-1, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ qdisk = geo_map(-2, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ if (is_ddf(layout)) {
+ /* q over 'raid_disks' blocks, in device order.
+ * 'p' and 'q' get to be all zero
+ */
+ for (i = 0; i < raid_disks; i++)
+ bufs[i] = zero;
+ for (i = 0; i < data_disks; i++) {
+ int dnum = geo_map(i,
+ start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ int snum;
+ /* i is the logical block number, so is index to 'buf'.
+ * dnum is physical disk number
+ * and thus the syndrome number.
+ */
+ snum = dnum;
+ bufs[snum] = (uint8_t*)buf + chunk_size * i;
+ }
+ syndrome_disks = raid_disks;
+ } else {
+ /* for md, q is over 'data_disks' blocks,
+ * starting immediately after 'q'
+ * Note that for the '_6' variety, the p block
+ * makes a hole that we need to be careful of.
+ */
+ int j;
+ int snum = 0;
+ for (j = 0; j < raid_disks; j++) {
+ int dnum = (qdisk + 1 + j) % raid_disks;
+ if (dnum == disk || dnum == qdisk)
+ continue;
+ for (i = 0; i < data_disks; i++)
+ if (geo_map(i,
+ start/chunk_size/data_disks,
+ raid_disks, level, layout) == dnum)
+ break;
+ /* i is the logical block number, so is index to 'buf'.
+ * dnum is physical disk number
+ * snum is syndrome disk for which 0 is immediately after Q
+ */
+ bufs[snum] = (uint8_t*)buf + chunk_size * i;
+
+ if (fblock[0] == i)
+ fdisk[0] = snum;
+ if (fblock[1] == i)
+ fdisk[1] = snum;
+ snum++;
+ }
+
+ syndrome_disks = data_disks;
+ }
+
+ /* Place P and Q blocks at end of bufs */
+ bufs[syndrome_disks] = (uint8_t*)buf + chunk_size * data_disks;
+ bufs[syndrome_disks+1] = (uint8_t*)buf + chunk_size * (data_disks+1);
+
+ if (fblock[1] == data_disks)
+ /* One data failed, and parity failed */
+ raid6_datap_recov(syndrome_disks+2, chunk_size,
+ fdisk[0], bufs);
+ else {
+ if (fdisk[0] > fdisk[1]) {
+ int t = fdisk[0];
+ fdisk[0] = fdisk[1];
+ fdisk[1] = t;
+ }
+ /* Two data blocks failed, P,Q OK */
+ raid6_2data_recov(syndrome_disks+2, chunk_size,
+ fdisk[0], fdisk[1], bufs);
+ }
+ }
+ if (dest) {
+ for (i = 0; i < nwrites; i++)
+ if (write(dest[i], buf, len) != len)
+ return -1;
+ } else {
+ /* build next stripe in buffer */
+ buf += len;
+ }
+ length -= len;
+ start += len;
+ }
+ return 0;
+}
+
+/* Restore data:
+ * We are given:
+ * A list of 'fds' of the active disks. Some may be '-1' for not-available.
+ * A geometry: raid_disks, chunk_size, level, layout
+ * An 'fd' to read from. It is already seeked to the right (Read) location.
+ * A start and length.
+ * The length must be a multiple of the stripe size.
+ *
+ * We build a full stripe in memory and then write it out.
+ * We assume that there are enough working devices.
+ */
+int restore_stripes(int *dest, unsigned long long *offsets,
+ int raid_disks, int chunk_size, int level, int layout,
+ int source, unsigned long long read_offset,
+ unsigned long long start, unsigned long long length,
+ char *src_buf)
+{
+ char *stripe_buf;
+ char **stripes = xmalloc(raid_disks * sizeof(char*));
+ char **blocks = xmalloc(raid_disks * sizeof(char*));
+ int i;
+ int rv;
+
+ int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2);
+
+ if (posix_memalign((void**)&stripe_buf, 4096, raid_disks * chunk_size))
+ stripe_buf = NULL;
+
+ if (zero == NULL || chunk_size > zero_size) {
+ if (zero)
+ free(zero);
+ zero = xcalloc(1, chunk_size);
+ zero_size = chunk_size;
+ }
+
+ if (stripe_buf == NULL || stripes == NULL || blocks == NULL
+ || zero == NULL) {
+ rv = -2;
+ goto abort;
+ }
+ for (i = 0; i < raid_disks; i++)
+ stripes[i] = stripe_buf + i * chunk_size;
+ while (length > 0) {
+ unsigned int len = data_disks * chunk_size;
+ unsigned long long offset;
+ int disk, qdisk;
+ int syndrome_disks;
+ if (length < len) {
+ rv = -3;
+ goto abort;
+ }
+ for (i = 0; i < data_disks; i++) {
+ int disk = geo_map(i, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ if (src_buf == NULL) {
+ /* read from file */
+ if (lseek64(source, read_offset, 0) !=
+ (off64_t)read_offset) {
+ rv = -1;
+ goto abort;
+ }
+ if (read(source,
+ stripes[disk],
+ chunk_size) != chunk_size) {
+ rv = -1;
+ goto abort;
+ }
+ } else {
+ /* read from input buffer */
+ memcpy(stripes[disk],
+ src_buf + read_offset,
+ chunk_size);
+ }
+ read_offset += chunk_size;
+ }
+ /* We have the data, now do the parity */
+ offset = (start/chunk_size/data_disks) * chunk_size;
+ switch (level) {
+ case 4:
+ case 5:
+ disk = geo_map(-1, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ for (i = 0; i < data_disks; i++)
+ blocks[i] = stripes[(disk+1+i) % raid_disks];
+ xor_blocks(stripes[disk], blocks, data_disks, chunk_size);
+ break;
+ case 6:
+ disk = geo_map(-1, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ qdisk = geo_map(-2, start/chunk_size/data_disks,
+ raid_disks, level, layout);
+ if (is_ddf(layout)) {
+ /* q over 'raid_disks' blocks, in device order.
+ * 'p' and 'q' get to be all zero
+ */
+ for (i = 0; i < raid_disks; i++)
+ if (i == disk || i == qdisk)
+ blocks[i] = (char*)zero;
+ else
+ blocks[i] = stripes[i];
+ syndrome_disks = raid_disks;
+ } else {
+ /* for md, q is over 'data_disks' blocks,
+ * starting immediately after 'q'
+ */
+ for (i = 0; i < data_disks; i++)
+ blocks[i] = stripes[(qdisk+1+i) % raid_disks];
+
+ syndrome_disks = data_disks;
+ }
+ qsyndrome((uint8_t*)stripes[disk],
+ (uint8_t*)stripes[qdisk],
+ (uint8_t**)blocks,
+ syndrome_disks, chunk_size);
+ break;
+ }
+ for (i=0; i < raid_disks ; i++)
+ if (dest[i] >= 0) {
+ if (lseek64(dest[i],
+ offsets[i]+offset, 0) < 0) {
+ rv = -1;
+ goto abort;
+ }
+ if (write(dest[i], stripes[i],
+ chunk_size) != chunk_size) {
+ rv = -1;
+ goto abort;
+ }
+ }
+ length -= len;
+ start += len;
+ }
+ rv = 0;
+
+abort:
+ free(stripe_buf);
+ free(stripes);
+ free(blocks);
+ return rv;
+}
+
+#ifdef MAIN
+
+int test_stripes(int *source, unsigned long long *offsets,
+ int raid_disks, int chunk_size, int level, int layout,
+ unsigned long long start, unsigned long long length)
+{
+ /* ready the data and p (and q) blocks, and check we got them right */
+ char *stripe_buf = xmalloc(raid_disks * chunk_size);
+ char **stripes = xmalloc(raid_disks * sizeof(char*));
+ char **blocks = xmalloc(raid_disks * sizeof(char*));
+ char *p = xmalloc(chunk_size);
+ char *q = xmalloc(chunk_size);
+
+ int i;
+ int diskP, diskQ;
+ int data_disks = raid_disks - (level == 5 ? 1: 2);
+
+ if (!tables_ready)
+ make_tables();
+
+ for ( i = 0 ; i < raid_disks ; i++)
+ stripes[i] = stripe_buf + i * chunk_size;
+
+ while (length > 0) {
+ int disk;
+
+ for (i = 0 ; i < raid_disks ; i++) {
+ lseek64(source[i], offsets[i]+start, 0);
+ read(source[i], stripes[i], chunk_size);
+ }
+ for (i = 0 ; i < data_disks ; i++) {
+ int disk = geo_map(i, start/chunk_size, raid_disks,
+ level, layout);
+ blocks[i] = stripes[disk];
+ printf("%d->%d\n", i, disk);
+ }
+ switch(level) {
+ case 6:
+ qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
+ diskP = geo_map(-1, start/chunk_size, raid_disks,
+ level, layout);
+ if (memcmp(p, stripes[diskP], chunk_size) != 0) {
+ printf("P(%d) wrong at %llu\n", diskP,
+ start / chunk_size);
+ }
+ diskQ = geo_map(-2, start/chunk_size, raid_disks,
+ level, layout);
+ if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
+ printf("Q(%d) wrong at %llu\n", diskQ,
+ start / chunk_size);
+ }
+ disk = raid6_check_disks(data_disks, start, chunk_size,
+ level, layout, diskP, diskQ,
+ p, q, stripes);
+ if(disk >= 0) {
+ printf("Possible failed disk: %d\n", disk);
+ }
+ if(disk == -2) {
+ printf("Failure detected, but disk unknown\n");
+ }
+ break;
+ }
+ length -= chunk_size;
+ start += chunk_size;
+ }
+ return 0;
+}
+
+unsigned long long getnum(char *str, char **err)
+{
+ char *e;
+ unsigned long long rv = strtoull(str, &e, 10);
+ if (e==str || *e) {
+ *err = str;
+ return 0;
+ }
+ return rv;
+}
+
+main(int argc, char *argv[])
+{
+ /* save/restore file raid_disks chunk_size level layout start length devices...
+ */
+ int save;
+ int *fds;
+ char *file;
+ char *buf;
+ int storefd;
+ unsigned long long *offsets;
+ int raid_disks, chunk_size, level, layout;
+ unsigned long long start, length;
+ int i;
+
+ char *err = NULL;
+ if (argc < 10) {
+ fprintf(stderr, "Usage: test_stripe save/restore file raid_disks"
+ " chunk_size level layout start length devices...\n");
+ exit(1);
+ }
+ if (strcmp(argv[1], "save")==0)
+ save = 1;
+ else if (strcmp(argv[1], "restore") == 0)
+ save = 0;
+ else if (strcmp(argv[1], "test") == 0)
+ save = 2;
+ else {
+ fprintf(stderr, "test_stripe: must give 'save' or 'restore'.\n");
+ exit(2);
+ }
+
+ file = argv[2];
+ raid_disks = getnum(argv[3], &err);
+ chunk_size = getnum(argv[4], &err);
+ level = getnum(argv[5], &err);
+ layout = getnum(argv[6], &err);
+ start = getnum(argv[7], &err);
+ length = getnum(argv[8], &err);
+ if (err) {
+ fprintf(stderr, "test_stripe: Bad number: %s\n", err);
+ exit(2);
+ }
+ if (argc != raid_disks + 9) {
+ fprintf(stderr, "test_stripe: wrong number of devices: want %d found %d\n",
+ raid_disks, argc-9);
+ exit(2);
+ }
+ fds = xmalloc(raid_disks * sizeof(*fds));
+ offsets = xcalloc(raid_disks, sizeof(*offsets));
+
+ storefd = open(file, O_RDWR);
+ if (storefd < 0) {
+ perror(file);
+ fprintf(stderr, "test_stripe: could not open %s.\n", file);
+ exit(3);
+ }
+ for (i=0; i<raid_disks; i++) {
+ char *p;
+ p = strchr(argv[9+i], ':');
+
+ if(p != NULL) {
+ *p++ = '\0';
+ offsets[i] = atoll(p) * 512;
+ }
+
+ fds[i] = open(argv[9+i], O_RDWR);
+ if (fds[i] < 0) {
+ perror(argv[9+i]);
+ fprintf(stderr,"test_stripe: cannot open %s.\n", argv[9+i]);
+ exit(3);
+ }
+ }
+
+ buf = xmalloc(raid_disks * chunk_size);
+
+ if (save == 1) {
+ int rv = save_stripes(fds, offsets,
+ raid_disks, chunk_size, level, layout,
+ 1, &storefd,
+ start, length, buf);
+ if (rv != 0) {
+ fprintf(stderr,
+ "test_stripe: save_stripes returned %d\n", rv);
+ exit(1);
+ }
+ } else if (save == 2) {
+ int rv = test_stripes(fds, offsets,
+ raid_disks, chunk_size, level, layout,
+ start, length);
+ if (rv != 0) {
+ fprintf(stderr,
+ "test_stripe: test_stripes returned %d\n", rv);
+ exit(1);
+ }
+ } else {
+ int rv = restore_stripes(fds, offsets,
+ raid_disks, chunk_size, level, layout,
+ storefd, 0ULL,
+ start, length, NULL);
+ if (rv != 0) {
+ fprintf(stderr,
+ "test_stripe: restore_stripes returned %d\n",
+ rv);
+ exit(1);
+ }
+ }
+ exit(0);
+}
+
+#endif /* MAIN */
diff --git a/sg_io.c b/sg_io.c
new file mode 100644
index 00000000..50ad180d
--- /dev/null
+++ b/sg_io.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2007-2008 Intel Corporation
+ *
+ * Retrieve drive serial numbers for scsi disks
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <string.h>
+#include <scsi/scsi.h>
+#include <scsi/sg.h>
+#include <sys/ioctl.h>
+
+int scsi_get_serial(int fd, void *buf, size_t buf_len)
+{
+ unsigned char inq_cmd[] = {INQUIRY, 1, 0x80, 0, buf_len, 0};
+ unsigned char sense[32];
+ struct sg_io_hdr io_hdr;
+
+ memset(&io_hdr, 0, sizeof(io_hdr));
+ io_hdr.interface_id = 'S';
+ io_hdr.cmdp = inq_cmd;
+ io_hdr.cmd_len = sizeof(inq_cmd);
+ io_hdr.dxferp = buf;
+ io_hdr.dxfer_len = buf_len;
+ io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+ io_hdr.sbp = sense;
+ io_hdr.mx_sb_len = sizeof(sense);
+ io_hdr.timeout = 5000;
+
+ return ioctl(fd, SG_IO, &io_hdr);
+}
diff --git a/sha1.c b/sha1.c
new file mode 100644
index 00000000..11be7045
--- /dev/null
+++ b/sha1.c
@@ -0,0 +1,415 @@
+/* sha1.c - Functions to compute SHA1 message digest of files or
+ memory blocks according to the NIST specification FIPS-180-1.
+
+ Copyright (C) 2000, 2001, 2003, 2004, 2005, 2006, 2008 Free Software
+ Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Scott G. Miller
+ Credits:
+ Robert Klep <robert@ilse.nl> -- Expansion function fix
+*/
+
+//#include <config.h>
+
+#include "sha1.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#if USE_UNLOCKED_IO
+# include "unlocked-io.h"
+#endif
+
+#ifdef WORDS_BIGENDIAN
+# define SWAP(n) (n)
+#else
+# define SWAP(n) \
+ (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
+#endif
+
+#define BLOCKSIZE 4096
+#if BLOCKSIZE % 64 != 0
+# error "invalid BLOCKSIZE"
+#endif
+
+/* This array contains the bytes used to pad the buffer to the next
+ 64-byte boundary. (RFC 1321, 3.1: Step 1) */
+static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ };
+
+/* Take a pointer to a 160 bit block of data (five 32 bit ints) and
+ initialize it to the start constants of the SHA1 algorithm. This
+ must be called before using hash in the call to sha1_hash. */
+void
+sha1_init_ctx (struct sha1_ctx *ctx)
+{
+ ctx->A = 0x67452301;
+ ctx->B = 0xefcdab89;
+ ctx->C = 0x98badcfe;
+ ctx->D = 0x10325476;
+ ctx->E = 0xc3d2e1f0;
+
+ ctx->total[0] = ctx->total[1] = 0;
+ ctx->buflen = 0;
+}
+
+/* Put result from CTX in first 20 bytes following RESBUF. The result
+ must be in little endian byte order.
+
+ IMPORTANT: On some systems it is required that RESBUF is correctly
+ aligned for a 32-bit value. */
+void *
+sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf)
+{
+ ((sha1_uint32 *) resbuf)[0] = SWAP (ctx->A);
+ ((sha1_uint32 *) resbuf)[1] = SWAP (ctx->B);
+ ((sha1_uint32 *) resbuf)[2] = SWAP (ctx->C);
+ ((sha1_uint32 *) resbuf)[3] = SWAP (ctx->D);
+ ((sha1_uint32 *) resbuf)[4] = SWAP (ctx->E);
+
+ return resbuf;
+}
+
+/* Process the remaining bytes in the internal buffer and the usual
+ prolog according to the standard and write the result to RESBUF.
+
+ IMPORTANT: On some systems it is required that RESBUF is correctly
+ aligned for a 32-bit value. */
+void *
+sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf)
+{
+ /* Take yet unprocessed bytes into account. */
+ sha1_uint32 bytes = ctx->buflen;
+ size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4;
+
+ /* Now count remaining bytes. */
+ ctx->total[0] += bytes;
+ if (ctx->total[0] < bytes)
+ ++ctx->total[1];
+
+ /* Put the 64-bit file length in *bits* at the end of the buffer. */
+ ctx->buffer[size - 2] = SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29));
+ ctx->buffer[size - 1] = SWAP (ctx->total[0] << 3);
+
+ memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes);
+
+ /* Process last bytes. */
+ sha1_process_block (ctx->buffer, size * 4, ctx);
+
+ return sha1_read_ctx (ctx, resbuf);
+}
+
+/* Compute SHA1 message digest for bytes read from STREAM. The
+ resulting message digest number will be written into the 16 bytes
+ beginning at RESBLOCK. */
+int
+sha1_stream (FILE *stream, void *resblock)
+{
+ struct sha1_ctx ctx;
+ char buffer[BLOCKSIZE + 72];
+ size_t sum;
+
+ /* Initialize the computation context. */
+ sha1_init_ctx (&ctx);
+
+ /* Iterate over full file contents. */
+ while (1)
+ {
+ /* We read the file in blocks of BLOCKSIZE bytes. One call of the
+ computation function processes the whole buffer so that with the
+ next round of the loop another block can be read. */
+ size_t n;
+ sum = 0;
+
+ /* Read block. Take care for partial reads. */
+ while (1)
+ {
+ n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
+
+ sum += n;
+
+ if (sum == BLOCKSIZE)
+ break;
+
+ if (n == 0)
+ {
+ /* Check for the error flag IFF N == 0, so that we don't
+ exit the loop after a partial read due to e.g., EAGAIN
+ or EWOULDBLOCK. */
+ if (ferror (stream))
+ return 1;
+ goto process_partial_block;
+ }
+
+ /* We've read at least one byte, so ignore errors. But always
+ check for EOF, since feof may be true even though N > 0.
+ Otherwise, we could end up calling fread after EOF. */
+ if (feof (stream))
+ goto process_partial_block;
+ }
+
+ /* Process buffer with BLOCKSIZE bytes. Note that
+ BLOCKSIZE % 64 == 0
+ */
+ sha1_process_block (buffer, BLOCKSIZE, &ctx);
+ }
+
+ process_partial_block:;
+
+ /* Process any remaining bytes. */
+ if (sum > 0)
+ sha1_process_bytes (buffer, sum, &ctx);
+
+ /* Construct result in desired memory. */
+ sha1_finish_ctx (&ctx, resblock);
+ return 0;
+}
+
+/* Compute SHA1 message digest for LEN bytes beginning at BUFFER. The
+ result is always in little endian byte order, so that a byte-wise
+ output yields to the wanted ASCII representation of the message
+ digest. */
+void *
+sha1_buffer (const char *buffer, size_t len, void *resblock)
+{
+ struct sha1_ctx ctx;
+
+ /* Initialize the computation context. */
+ sha1_init_ctx (&ctx);
+
+ /* Process whole buffer but last len % 64 bytes. */
+ sha1_process_bytes (buffer, len, &ctx);
+
+ /* Put result in desired memory area. */
+ return sha1_finish_ctx (&ctx, resblock);
+}
+
+void
+sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
+{
+ /* When we already have some bits in our internal buffer concatenate
+ both inputs first. */
+ if (ctx->buflen != 0)
+ {
+ size_t left_over = ctx->buflen;
+ size_t add = 128 - left_over > len ? len : 128 - left_over;
+
+ memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
+ ctx->buflen += add;
+
+ if (ctx->buflen > 64)
+ {
+ sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
+
+ ctx->buflen &= 63;
+ /* The regions in the following copy operation cannot overlap. */
+ memcpy (ctx->buffer,
+ &((char *) ctx->buffer)[(left_over + add) & ~63],
+ ctx->buflen);
+ }
+
+ buffer = (const char *) buffer + add;
+ len -= add;
+ }
+
+ /* Process available complete blocks. */
+ if (len >= 64)
+ {
+#if !_STRING_ARCH_unaligned
+# define alignof(type) offsetof (struct { char c; type x; }, x)
+# define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
+ if (UNALIGNED_P (buffer))
+ while (len > 64)
+ {
+ sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
+ buffer = (const char *) buffer + 64;
+ len -= 64;
+ }
+ else
+#endif
+ {
+ sha1_process_block (buffer, len & ~63, ctx);
+ buffer = (const char *) buffer + (len & ~63);
+ len &= 63;
+ }
+ }
+
+ /* Move remaining bytes in internal buffer. */
+ if (len > 0)
+ {
+ size_t left_over = ctx->buflen;
+
+ memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
+ left_over += len;
+ if (left_over >= 64)
+ {
+ sha1_process_block (ctx->buffer, 64, ctx);
+ left_over -= 64;
+ memcpy (ctx->buffer, &ctx->buffer[16], left_over);
+ }
+ ctx->buflen = left_over;
+ }
+}
+
+/* --- Code below is the primary difference between md5.c and sha1.c --- */
+
+/* SHA1 round constants */
+#define K1 0x5a827999
+#define K2 0x6ed9eba1
+#define K3 0x8f1bbcdc
+#define K4 0xca62c1d6
+
+/* Round functions. Note that F2 is the same as F4. */
+#define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) )
+#define F2(B,C,D) (B ^ C ^ D)
+#define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) )
+#define F4(B,C,D) (B ^ C ^ D)
+
+/* Process LEN bytes of BUFFER, accumulating context into CTX.
+ It is assumed that LEN % 64 == 0.
+ Most of this code comes from GnuPG's cipher/sha1.c. */
+
+void
+sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
+{
+ const sha1_uint32 *words = (const sha1_uint32*) buffer;
+ size_t nwords = len / sizeof (sha1_uint32);
+ const sha1_uint32 *endp = words + nwords;
+ sha1_uint32 x[16];
+ sha1_uint32 a = ctx->A;
+ sha1_uint32 b = ctx->B;
+ sha1_uint32 c = ctx->C;
+ sha1_uint32 d = ctx->D;
+ sha1_uint32 e = ctx->E;
+
+ /* First increment the byte count. RFC 1321 specifies the possible
+ length of the file up to 2^64 bits. Here we only compute the
+ number of bytes. Do a double word increment. */
+ ctx->total[0] += len;
+ if (ctx->total[0] < len)
+ ++ctx->total[1];
+
+#define rol(x, n) (((x) << (n)) | ((sha1_uint32) (x) >> (32 - (n))))
+
+#define M(I) ( tm = x[I&0x0f] ^ x[(I-14)&0x0f] \
+ ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \
+ , (x[I&0x0f] = rol(tm, 1)) )
+
+#define R(A,B,C,D,E,F,K,M) do { E += rol( A, 5 ) \
+ + F( B, C, D ) \
+ + K \
+ + M; \
+ B = rol( B, 30 ); \
+ } while(0)
+
+ while (words < endp)
+ {
+ sha1_uint32 tm;
+ int t;
+ for (t = 0; t < 16; t++)
+ {
+ x[t] = SWAP (*words);
+ words++;
+ }
+
+ R( a, b, c, d, e, F1, K1, x[ 0] );
+ R( e, a, b, c, d, F1, K1, x[ 1] );
+ R( d, e, a, b, c, F1, K1, x[ 2] );
+ R( c, d, e, a, b, F1, K1, x[ 3] );
+ R( b, c, d, e, a, F1, K1, x[ 4] );
+ R( a, b, c, d, e, F1, K1, x[ 5] );
+ R( e, a, b, c, d, F1, K1, x[ 6] );
+ R( d, e, a, b, c, F1, K1, x[ 7] );
+ R( c, d, e, a, b, F1, K1, x[ 8] );
+ R( b, c, d, e, a, F1, K1, x[ 9] );
+ R( a, b, c, d, e, F1, K1, x[10] );
+ R( e, a, b, c, d, F1, K1, x[11] );
+ R( d, e, a, b, c, F1, K1, x[12] );
+ R( c, d, e, a, b, F1, K1, x[13] );
+ R( b, c, d, e, a, F1, K1, x[14] );
+ R( a, b, c, d, e, F1, K1, x[15] );
+ R( e, a, b, c, d, F1, K1, M(16) );
+ R( d, e, a, b, c, F1, K1, M(17) );
+ R( c, d, e, a, b, F1, K1, M(18) );
+ R( b, c, d, e, a, F1, K1, M(19) );
+ R( a, b, c, d, e, F2, K2, M(20) );
+ R( e, a, b, c, d, F2, K2, M(21) );
+ R( d, e, a, b, c, F2, K2, M(22) );
+ R( c, d, e, a, b, F2, K2, M(23) );
+ R( b, c, d, e, a, F2, K2, M(24) );
+ R( a, b, c, d, e, F2, K2, M(25) );
+ R( e, a, b, c, d, F2, K2, M(26) );
+ R( d, e, a, b, c, F2, K2, M(27) );
+ R( c, d, e, a, b, F2, K2, M(28) );
+ R( b, c, d, e, a, F2, K2, M(29) );
+ R( a, b, c, d, e, F2, K2, M(30) );
+ R( e, a, b, c, d, F2, K2, M(31) );
+ R( d, e, a, b, c, F2, K2, M(32) );
+ R( c, d, e, a, b, F2, K2, M(33) );
+ R( b, c, d, e, a, F2, K2, M(34) );
+ R( a, b, c, d, e, F2, K2, M(35) );
+ R( e, a, b, c, d, F2, K2, M(36) );
+ R( d, e, a, b, c, F2, K2, M(37) );
+ R( c, d, e, a, b, F2, K2, M(38) );
+ R( b, c, d, e, a, F2, K2, M(39) );
+ R( a, b, c, d, e, F3, K3, M(40) );
+ R( e, a, b, c, d, F3, K3, M(41) );
+ R( d, e, a, b, c, F3, K3, M(42) );
+ R( c, d, e, a, b, F3, K3, M(43) );
+ R( b, c, d, e, a, F3, K3, M(44) );
+ R( a, b, c, d, e, F3, K3, M(45) );
+ R( e, a, b, c, d, F3, K3, M(46) );
+ R( d, e, a, b, c, F3, K3, M(47) );
+ R( c, d, e, a, b, F3, K3, M(48) );
+ R( b, c, d, e, a, F3, K3, M(49) );
+ R( a, b, c, d, e, F3, K3, M(50) );
+ R( e, a, b, c, d, F3, K3, M(51) );
+ R( d, e, a, b, c, F3, K3, M(52) );
+ R( c, d, e, a, b, F3, K3, M(53) );
+ R( b, c, d, e, a, F3, K3, M(54) );
+ R( a, b, c, d, e, F3, K3, M(55) );
+ R( e, a, b, c, d, F3, K3, M(56) );
+ R( d, e, a, b, c, F3, K3, M(57) );
+ R( c, d, e, a, b, F3, K3, M(58) );
+ R( b, c, d, e, a, F3, K3, M(59) );
+ R( a, b, c, d, e, F4, K4, M(60) );
+ R( e, a, b, c, d, F4, K4, M(61) );
+ R( d, e, a, b, c, F4, K4, M(62) );
+ R( c, d, e, a, b, F4, K4, M(63) );
+ R( b, c, d, e, a, F4, K4, M(64) );
+ R( a, b, c, d, e, F4, K4, M(65) );
+ R( e, a, b, c, d, F4, K4, M(66) );
+ R( d, e, a, b, c, F4, K4, M(67) );
+ R( c, d, e, a, b, F4, K4, M(68) );
+ R( b, c, d, e, a, F4, K4, M(69) );
+ R( a, b, c, d, e, F4, K4, M(70) );
+ R( e, a, b, c, d, F4, K4, M(71) );
+ R( d, e, a, b, c, F4, K4, M(72) );
+ R( c, d, e, a, b, F4, K4, M(73) );
+ R( b, c, d, e, a, F4, K4, M(74) );
+ R( a, b, c, d, e, F4, K4, M(75) );
+ R( e, a, b, c, d, F4, K4, M(76) );
+ R( d, e, a, b, c, F4, K4, M(77) );
+ R( c, d, e, a, b, F4, K4, M(78) );
+ R( b, c, d, e, a, F4, K4, M(79) );
+
+ a = ctx->A += a;
+ b = ctx->B += b;
+ c = ctx->C += c;
+ d = ctx->D += d;
+ e = ctx->E += e;
+ }
+}
diff --git a/sha1.h b/sha1.h
new file mode 100644
index 00000000..0f986585
--- /dev/null
+++ b/sha1.h
@@ -0,0 +1,136 @@
+/* Declarations of functions and data types used for SHA1 sum
+ library functions.
+ Copyright (C) 2000, 2001, 2003, 2005, 2006, 2008
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef SHA1_H
+# define SHA1_H 1
+
+#include <stdio.h>
+
+#if 1 /* defined HAVE_LIMITS_H || _LIBC */
+# include <limits.h>
+#endif
+
+/* The following contortions are an attempt to use the C preprocessor
+ to determine an unsigned integral type that is 32 bits wide. An
+ alternative approach is to use autoconf's AC_CHECK_SIZEOF macro, but
+ doing that would require that the configure script compile and *run*
+ the resulting executable. Locally running cross-compiled executables
+ is usually not possible. */
+
+#if 1 /* def _LIBC */
+# include <stdint.h>
+typedef uint32_t sha1_uint32;
+typedef uintptr_t sha1_uintptr;
+#else
+# define INT_MAX_32_BITS 2147483647
+
+/* If UINT_MAX isn't defined, assume it's a 32-bit type.
+ This should be valid for all systems GNU cares about because
+ that doesn't include 16-bit systems, and only modern systems
+ (that certainly have <limits.h>) have 64+-bit integral types. */
+
+# ifndef INT_MAX
+# define INT_MAX INT_MAX_32_BITS
+# endif
+
+# if INT_MAX == INT_MAX_32_BITS
+ typedef unsigned int sha1_uint32;
+# else
+# if SHRT_MAX == INT_MAX_32_BITS
+ typedef unsigned short sha1_uint32;
+# else
+# if LONG_MAX == INT_MAX_32_BITS
+ typedef unsigned long sha1_uint32;
+# else
+ /* The following line is intended to evoke an error.
+ Using #error is not portable enough. */
+ "Cannot determine unsigned 32-bit data type."
+# endif
+# endif
+# endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Structure to save state of computation between the single steps. */
+struct sha1_ctx
+{
+ sha1_uint32 A;
+ sha1_uint32 B;
+ sha1_uint32 C;
+ sha1_uint32 D;
+ sha1_uint32 E;
+
+ sha1_uint32 total[2];
+ sha1_uint32 buflen;
+ sha1_uint32 buffer[32];
+};
+
+/* Initialize structure containing state of computation. */
+extern void sha1_init_ctx (struct sha1_ctx *ctx);
+
+/* Starting with the result of former calls of this function (or the
+ initialization function update the context for the next LEN bytes
+ starting at BUFFER.
+ It is necessary that LEN is a multiple of 64!!! */
+extern void sha1_process_block (const void *buffer, size_t len,
+ struct sha1_ctx *ctx);
+
+/* Starting with the result of former calls of this function (or the
+ initialization function update the context for the next LEN bytes
+ starting at BUFFER.
+ It is NOT required that LEN is a multiple of 64. */
+extern void sha1_process_bytes (const void *buffer, size_t len,
+ struct sha1_ctx *ctx);
+
+/* Process the remaining bytes in the buffer and put result from CTX
+ in first 20 bytes following RESBUF. The result is always in little
+ endian byte order, so that a byte-wise output yields to the wanted
+ ASCII representation of the message digest.
+
+ IMPORTANT: On some systems it is required that RESBUF be correctly
+ aligned for a 32 bits value. */
+extern void *sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf);
+
+/* Put result from CTX in first 20 bytes following RESBUF. The result is
+ always in little endian byte order, so that a byte-wise output yields
+ to the wanted ASCII representation of the message digest.
+
+ IMPORTANT: On some systems it is required that RESBUF is correctly
+ aligned for a 32 bits value. */
+extern void *sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf);
+
+/* Compute SHA1 message digest for bytes read from STREAM. The
+ resulting message digest number will be written into the 20 bytes
+ beginning at RESBLOCK. */
+extern int sha1_stream (FILE *stream, void *resblock);
+
+/* Compute SHA1 message digest for LEN bytes beginning at BUFFER. The
+ result is always in little endian byte order, so that a byte-wise
+ output yields to the wanted ASCII representation of the message
+ digest. */
+extern void *sha1_buffer (const char *buffer, size_t len, void *resblock);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/super-ddf.c b/super-ddf.c
new file mode 100644
index 00000000..bc0ce2c0
--- /dev/null
+++ b/super-ddf.c
@@ -0,0 +1,5303 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2006-2014 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neil@brown.name>
+ *
+ * Specifications for DDF taken from Common RAID DDF Specification Revision 1.2
+ * (July 28 2006). Reused by permission of SNIA.
+ */
+
+#define HAVE_STDINT_H 1
+#include "mdadm.h"
+#include "mdmon.h"
+#include "sha1.h"
+#include <values.h>
+#include <stddef.h>
+
+/* a non-official T10 name for creation GUIDs */
+static char T10[] = "Linux-MD";
+
+/* DDF timestamps are 1980 based, so we need to add
+ * second-in-decade-of-seventies to convert to linux timestamps.
+ * 10 years with 2 leap years.
+ */
+#define DECADE (3600*24*(365*10+2))
+unsigned long crc32(
+ unsigned long crc,
+ const unsigned char *buf,
+ unsigned len);
+
+#define DDF_NOTFOUND (~0U)
+#define DDF_CONTAINER (DDF_NOTFOUND-1)
+
+/* Default for safe_mode_delay. Same value as for IMSM.
+ */
+static const int DDF_SAFE_MODE_DELAY = 4000;
+
+/* The DDF metadata handling.
+ * DDF metadata lives at the end of the device.
+ * The last 512 byte block provides an 'anchor' which is used to locate
+ * the rest of the metadata which usually lives immediately behind the anchor.
+ *
+ * Note:
+ * - all multibyte numeric fields are bigendian.
+ * - all strings are space padded.
+ *
+ */
+
+typedef struct __be16 {
+ __u16 _v16;
+} be16;
+#define be16_eq(x, y) ((x)._v16 == (y)._v16)
+#define be16_and(x, y) ((x)._v16 & (y)._v16)
+#define be16_or(x, y) ((x)._v16 | (y)._v16)
+#define be16_clear(x, y) ((x)._v16 &= ~(y)._v16)
+#define be16_set(x, y) ((x)._v16 |= (y)._v16)
+
+typedef struct __be32 {
+ __u32 _v32;
+} be32;
+#define be32_eq(x, y) ((x)._v32 == (y)._v32)
+
+typedef struct __be64 {
+ __u64 _v64;
+} be64;
+#define be64_eq(x, y) ((x)._v64 == (y)._v64)
+
+#define be16_to_cpu(be) __be16_to_cpu((be)._v16)
+static inline be16 cpu_to_be16(__u16 x)
+{
+ be16 be = { ._v16 = __cpu_to_be16(x) };
+ return be;
+}
+
+#define be32_to_cpu(be) __be32_to_cpu((be)._v32)
+static inline be32 cpu_to_be32(__u32 x)
+{
+ be32 be = { ._v32 = __cpu_to_be32(x) };
+ return be;
+}
+
+#define be64_to_cpu(be) __be64_to_cpu((be)._v64)
+static inline be64 cpu_to_be64(__u64 x)
+{
+ be64 be = { ._v64 = __cpu_to_be64(x) };
+ return be;
+}
+
+/* Primary Raid Level (PRL) */
+#define DDF_RAID0 0x00
+#define DDF_RAID1 0x01
+#define DDF_RAID3 0x03
+#define DDF_RAID4 0x04
+#define DDF_RAID5 0x05
+#define DDF_RAID1E 0x11
+#define DDF_JBOD 0x0f
+#define DDF_CONCAT 0x1f
+#define DDF_RAID5E 0x15
+#define DDF_RAID5EE 0x25
+#define DDF_RAID6 0x06
+
+/* Raid Level Qualifier (RLQ) */
+#define DDF_RAID0_SIMPLE 0x00
+#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
+#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
+#define DDF_RAID3_0 0x00 /* parity in first extent */
+#define DDF_RAID3_N 0x01 /* parity in last extent */
+#define DDF_RAID4_0 0x00 /* parity in first extent */
+#define DDF_RAID4_N 0x01 /* parity in last extent */
+/* these apply to raid5e and raid5ee as well */
+#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
+#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
+#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
+#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
+
+#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
+#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
+
+/* Secondary RAID Level (SRL) */
+#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
+#define DDF_2MIRRORED 0x01
+#define DDF_2CONCAT 0x02
+#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
+
+/* Magic numbers */
+#define DDF_HEADER_MAGIC cpu_to_be32(0xDE11DE11)
+#define DDF_CONTROLLER_MAGIC cpu_to_be32(0xAD111111)
+#define DDF_PHYS_RECORDS_MAGIC cpu_to_be32(0x22222222)
+#define DDF_PHYS_DATA_MAGIC cpu_to_be32(0x33333333)
+#define DDF_VIRT_RECORDS_MAGIC cpu_to_be32(0xDDDDDDDD)
+#define DDF_VD_CONF_MAGIC cpu_to_be32(0xEEEEEEEE)
+#define DDF_SPARE_ASSIGN_MAGIC cpu_to_be32(0x55555555)
+#define DDF_VU_CONF_MAGIC cpu_to_be32(0x88888888)
+#define DDF_VENDOR_LOG_MAGIC cpu_to_be32(0x01dBEEF0)
+#define DDF_BBM_LOG_MAGIC cpu_to_be32(0xABADB10C)
+
+#define DDF_GUID_LEN 24
+#define DDF_REVISION_0 "01.00.00"
+#define DDF_REVISION_2 "01.02.00"
+
+struct ddf_header {
+ be32 magic; /* DDF_HEADER_MAGIC */
+ be32 crc;
+ char guid[DDF_GUID_LEN];
+ char revision[8]; /* 01.02.00 */
+ be32 seq; /* starts at '1' */
+ be32 timestamp;
+ __u8 openflag;
+ __u8 foreignflag;
+ __u8 enforcegroups;
+ __u8 pad0; /* 0xff */
+ __u8 pad1[12]; /* 12 * 0xff */
+ /* 64 bytes so far */
+ __u8 header_ext[32]; /* reserved: fill with 0xff */
+ be64 primary_lba;
+ be64 secondary_lba;
+ __u8 type;
+ __u8 pad2[3]; /* 0xff */
+ be32 workspace_len; /* sectors for vendor space -
+ * at least 32768(sectors) */
+ be64 workspace_lba;
+ be16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
+ be16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
+ be16 max_partitions; /* i.e. max num of configuration
+ record entries per disk */
+ be16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
+ *12/512) */
+ be16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
+ __u8 pad3[54]; /* 0xff */
+ /* 192 bytes so far */
+ be32 controller_section_offset;
+ be32 controller_section_length;
+ be32 phys_section_offset;
+ be32 phys_section_length;
+ be32 virt_section_offset;
+ be32 virt_section_length;
+ be32 config_section_offset;
+ be32 config_section_length;
+ be32 data_section_offset;
+ be32 data_section_length;
+ be32 bbm_section_offset;
+ be32 bbm_section_length;
+ be32 diag_space_offset;
+ be32 diag_space_length;
+ be32 vendor_offset;
+ be32 vendor_length;
+ /* 256 bytes so far */
+ __u8 pad4[256]; /* 0xff */
+};
+
+/* type field */
+#define DDF_HEADER_ANCHOR 0x00
+#define DDF_HEADER_PRIMARY 0x01
+#define DDF_HEADER_SECONDARY 0x02
+
+/* The content of the 'controller section' - global scope */
+struct ddf_controller_data {
+ be32 magic; /* DDF_CONTROLLER_MAGIC */
+ be32 crc;
+ char guid[DDF_GUID_LEN];
+ struct controller_type {
+ be16 vendor_id;
+ be16 device_id;
+ be16 sub_vendor_id;
+ be16 sub_device_id;
+ } type;
+ char product_id[16];
+ __u8 pad[8]; /* 0xff */
+ __u8 vendor_data[448];
+};
+
+/* The content of phys_section - global scope */
+struct phys_disk {
+ be32 magic; /* DDF_PHYS_RECORDS_MAGIC */
+ be32 crc;
+ be16 used_pdes; /* This is a counter, not a max - the list
+ * of used entries may not be dense */
+ be16 max_pdes;
+ __u8 pad[52];
+ struct phys_disk_entry {
+ char guid[DDF_GUID_LEN];
+ be32 refnum;
+ be16 type;
+ be16 state;
+ be64 config_size; /* DDF structures must be after here */
+ char path[18]; /* Another horrible structure really
+ * but is "used for information
+ * purposes only" */
+ __u8 pad[6];
+ } entries[0];
+};
+
+/* phys_disk_entry.type is a bitmap - bigendian remember */
+#define DDF_Forced_PD_GUID 1
+#define DDF_Active_in_VD 2
+#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
+#define DDF_Spare 8 /* overrides Global_spare */
+#define DDF_Foreign 16
+#define DDF_Legacy 32 /* no DDF on this device */
+
+#define DDF_Interface_mask 0xf00
+#define DDF_Interface_SCSI 0x100
+#define DDF_Interface_SAS 0x200
+#define DDF_Interface_SATA 0x300
+#define DDF_Interface_FC 0x400
+
+/* phys_disk_entry.state is a bigendian bitmap */
+#define DDF_Online 1
+#define DDF_Failed 2 /* overrides 1,4,8 */
+#define DDF_Rebuilding 4
+#define DDF_Transition 8
+#define DDF_SMART 16
+#define DDF_ReadErrors 32
+#define DDF_Missing 64
+
+/* The content of the virt_section global scope */
+struct virtual_disk {
+ be32 magic; /* DDF_VIRT_RECORDS_MAGIC */
+ be32 crc;
+ be16 populated_vdes;
+ be16 max_vdes;
+ __u8 pad[52];
+ struct virtual_entry {
+ char guid[DDF_GUID_LEN];
+ be16 unit;
+ __u16 pad0; /* 0xffff */
+ be16 guid_crc;
+ be16 type;
+ __u8 state;
+ __u8 init_state;
+ __u8 pad1[14];
+ char name[16];
+ } entries[0];
+};
+
+/* virtual_entry.type is a bitmap - bigendian */
+#define DDF_Shared 1
+#define DDF_Enforce_Groups 2
+#define DDF_Unicode 4
+#define DDF_Owner_Valid 8
+
+/* virtual_entry.state is a bigendian bitmap */
+#define DDF_state_mask 0x7
+#define DDF_state_optimal 0x0
+#define DDF_state_degraded 0x1
+#define DDF_state_deleted 0x2
+#define DDF_state_missing 0x3
+#define DDF_state_failed 0x4
+#define DDF_state_part_optimal 0x5
+
+#define DDF_state_morphing 0x8
+#define DDF_state_inconsistent 0x10
+
+/* virtual_entry.init_state is a bigendian bitmap */
+#define DDF_initstate_mask 0x03
+#define DDF_init_not 0x00
+#define DDF_init_quick 0x01 /* initialisation is progress.
+ * i.e. 'state_inconsistent' */
+#define DDF_init_full 0x02
+
+#define DDF_access_mask 0xc0
+#define DDF_access_rw 0x00
+#define DDF_access_ro 0x80
+#define DDF_access_blocked 0xc0
+
+/* The content of the config_section - local scope
+ * It has multiple records each config_record_len sectors
+ * They can be vd_config or spare_assign
+ */
+
+struct vd_config {
+ be32 magic; /* DDF_VD_CONF_MAGIC */
+ be32 crc;
+ char guid[DDF_GUID_LEN];
+ be32 timestamp;
+ be32 seqnum;
+ __u8 pad0[24];
+ be16 prim_elmnt_count;
+ __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
+ __u8 prl;
+ __u8 rlq;
+ __u8 sec_elmnt_count;
+ __u8 sec_elmnt_seq;
+ __u8 srl;
+ be64 blocks; /* blocks per component could be different
+ * on different component devices...(only
+ * for concat I hope) */
+ be64 array_blocks; /* blocks in array */
+ __u8 pad1[8];
+ be32 spare_refs[8]; /* This is used to detect missing spares.
+ * As we don't have an interface for that
+ * the values are ignored.
+ */
+ __u8 cache_pol[8];
+ __u8 bg_rate;
+ __u8 pad2[3];
+ __u8 pad3[52];
+ __u8 pad4[192];
+ __u8 v0[32]; /* reserved- 0xff */
+ __u8 v1[32]; /* reserved- 0xff */
+ __u8 v2[16]; /* reserved- 0xff */
+ __u8 v3[16]; /* reserved- 0xff */
+ __u8 vendor[32];
+ be32 phys_refnum[0]; /* refnum of each disk in sequence */
+ /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
+ bvd are always the same size */
+};
+#define LBA_OFFSET(ddf, vd) ((be64 *) &(vd)->phys_refnum[(ddf)->mppe])
+
+/* vd_config.cache_pol[7] is a bitmap */
+#define DDF_cache_writeback 1 /* else writethrough */
+#define DDF_cache_wadaptive 2 /* only applies if writeback */
+#define DDF_cache_readahead 4
+#define DDF_cache_radaptive 8 /* only if doing read-ahead */
+#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
+#define DDF_cache_wallowed 32 /* enable write caching */
+#define DDF_cache_rallowed 64 /* enable read caching */
+
+struct spare_assign {
+ be32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
+ be32 crc;
+ be32 timestamp;
+ __u8 reserved[7];
+ __u8 type;
+ be16 populated; /* SAEs used */
+ be16 max; /* max SAEs */
+ __u8 pad[8];
+ struct spare_assign_entry {
+ char guid[DDF_GUID_LEN];
+ be16 secondary_element;
+ __u8 pad[6];
+ } spare_ents[0];
+};
+/* spare_assign.type is a bitmap */
+#define DDF_spare_dedicated 0x1 /* else global */
+#define DDF_spare_revertible 0x2 /* else committable */
+#define DDF_spare_active 0x4 /* else not active */
+#define DDF_spare_affinity 0x8 /* enclosure affinity */
+
+/* The data_section contents - local scope */
+struct disk_data {
+ be32 magic; /* DDF_PHYS_DATA_MAGIC */
+ be32 crc;
+ char guid[DDF_GUID_LEN];
+ be32 refnum; /* crc of some magic drive data ... */
+ __u8 forced_ref; /* set when above was not result of magic */
+ __u8 forced_guid; /* set if guid was forced rather than magic */
+ __u8 vendor[32];
+ __u8 pad[442];
+};
+
+/* bbm_section content */
+struct bad_block_log {
+ be32 magic;
+ be32 crc;
+ be16 entry_count;
+ be32 spare_count;
+ __u8 pad[10];
+ be64 first_spare;
+ struct mapped_block {
+ be64 defective_start;
+ be32 replacement_start;
+ be16 remap_count;
+ __u8 pad[2];
+ } entries[0];
+};
+
+/* Struct for internally holding ddf structures */
+/* The DDF structure stored on each device is potentially
+ * quite different, as some data is global and some is local.
+ * The global data is:
+ * - ddf header
+ * - controller_data
+ * - Physical disk records
+ * - Virtual disk records
+ * The local data is:
+ * - Configuration records
+ * - Physical Disk data section
+ * ( and Bad block and vendor which I don't care about yet).
+ *
+ * The local data is parsed into separate lists as it is read
+ * and reconstructed for writing. This means that we only need
+ * to make config changes once and they are automatically
+ * propagated to all devices.
+ * The global (config and disk data) records are each in a list
+ * of separate data structures. When writing we find the entry
+ * or entries applicable to the particular device.
+ */
+struct ddf_super {
+ struct ddf_header anchor, primary, secondary;
+ struct ddf_controller_data controller;
+ struct ddf_header *active;
+ struct phys_disk *phys;
+ struct virtual_disk *virt;
+ char *conf;
+ int pdsize, vdsize;
+ unsigned int max_part, mppe, conf_rec_len;
+ int currentdev;
+ int updates_pending;
+ struct vcl {
+ union {
+ char space[512];
+ struct {
+ struct vcl *next;
+ unsigned int vcnum; /* index into ->virt */
+ /* For an array with a secondary level there are
+ * multiple vd_config structures, all with the same
+ * guid but with different sec_elmnt_seq.
+ * One of these structures is in 'conf' below.
+ * The others are in other_bvds, not in any
+ * particular order.
+ */
+ struct vd_config **other_bvds;
+ __u64 *block_sizes; /* NULL if all the same */
+ };
+ };
+ struct vd_config conf;
+ } *conflist, *currentconf;
+ struct dl {
+ union {
+ char space[512];
+ struct {
+ struct dl *next;
+ int major, minor;
+ char *devname;
+ int fd;
+ unsigned long long size; /* sectors */
+ be64 primary_lba; /* sectors */
+ be64 secondary_lba; /* sectors */
+ be64 workspace_lba; /* sectors */
+ int pdnum; /* index in ->phys */
+ struct spare_assign *spare;
+ void *mdupdate; /* hold metadata update */
+
+ /* These fields used by auto-layout */
+ int raiddisk; /* slot to fill in autolayout */
+ __u64 esize;
+ int displayed;
+ };
+ };
+ struct disk_data disk;
+ struct vcl *vlist[0]; /* max_part in size */
+ } *dlist, *add_list;
+};
+
+#ifndef MDASSEMBLE
+static int load_super_ddf_all(struct supertype *st, int fd,
+ void **sbp, char *devname);
+static int get_svd_state(const struct ddf_super *, const struct vcl *);
+static int
+validate_geometry_ddf_container(struct supertype *st,
+ int level, int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *dev, unsigned long long *freesize,
+ int verbose);
+
+static int validate_geometry_ddf_bvd(struct supertype *st,
+ int level, int layout, int raiddisks,
+ int *chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *dev, unsigned long long *freesize,
+ int verbose);
+#endif
+
+static void free_super_ddf(struct supertype *st);
+static int all_ff(const char *guid);
+static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
+ be32 refnum, unsigned int nmax,
+ const struct vd_config **bvd,
+ unsigned int *idx);
+static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
+static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
+static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
+static void _ddf_array_name(char *name, const struct ddf_super *ddf, int i);
+static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
+static int init_super_ddf_bvd(struct supertype *st,
+ mdu_array_info_t *info,
+ unsigned long long size,
+ char *name, char *homehost,
+ int *uuid, unsigned long long data_offset);
+
+#if DEBUG
+static void pr_state(struct ddf_super *ddf, const char *msg)
+{
+ unsigned int i;
+ dprintf("%s/%s: ", __func__, msg);
+ for (i = 0; i < be16_to_cpu(ddf->active->max_vd_entries); i++) {
+ if (all_ff(ddf->virt->entries[i].guid))
+ continue;
+ dprintf("%u(s=%02x i=%02x) ", i,
+ ddf->virt->entries[i].state,
+ ddf->virt->entries[i].init_state);
+ }
+ dprintf("\n");
+}
+#else
+static void pr_state(const struct ddf_super *ddf, const char *msg) {}
+#endif
+
+static void _ddf_set_updates_pending(struct ddf_super *ddf, struct vd_config *vc,
+ const char *func)
+{
+ if (vc) {
+ vc->timestamp = cpu_to_be32(time(0)-DECADE);
+ vc->seqnum = cpu_to_be32(be32_to_cpu(vc->seqnum) + 1);
+ }
+ if (ddf->updates_pending)
+ return;
+ ddf->updates_pending = 1;
+ ddf->active->seq = cpu_to_be32((be32_to_cpu(ddf->active->seq)+1));
+ pr_state(ddf, func);
+}
+
+#define ddf_set_updates_pending(x,v) _ddf_set_updates_pending((x), (v), __func__)
+
+static be32 calc_crc(void *buf, int len)
+{
+ /* crcs are always at the same place as in the ddf_header */
+ struct ddf_header *ddf = buf;
+ be32 oldcrc = ddf->crc;
+ __u32 newcrc;
+ ddf->crc = cpu_to_be32(0xffffffff);
+
+ newcrc = crc32(0, buf, len);
+ ddf->crc = oldcrc;
+ /* The crc is stored (like everything) bigendian, so convert
+ * here for simplicity
+ */
+ return cpu_to_be32(newcrc);
+}
+
+#define DDF_INVALID_LEVEL 0xff
+#define DDF_NO_SECONDARY 0xff
+static int err_bad_md_layout(const mdu_array_info_t *array)
+{
+ pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
+ array->level, array->layout, array->raid_disks);
+ return -1;
+}
+
+static int layout_md2ddf(const mdu_array_info_t *array,
+ struct vd_config *conf)
+{
+ be16 prim_elmnt_count = cpu_to_be16(array->raid_disks);
+ __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
+ __u8 sec_elmnt_count = 1;
+ __u8 srl = DDF_NO_SECONDARY;
+
+ switch (array->level) {
+ case LEVEL_LINEAR:
+ prl = DDF_CONCAT;
+ break;
+ case 0:
+ rlq = DDF_RAID0_SIMPLE;
+ prl = DDF_RAID0;
+ break;
+ case 1:
+ switch (array->raid_disks) {
+ case 2:
+ rlq = DDF_RAID1_SIMPLE;
+ break;
+ case 3:
+ rlq = DDF_RAID1_MULTI;
+ break;
+ default:
+ return err_bad_md_layout(array);
+ }
+ prl = DDF_RAID1;
+ break;
+ case 4:
+ if (array->layout != 0)
+ return err_bad_md_layout(array);
+ rlq = DDF_RAID4_N;
+ prl = DDF_RAID4;
+ break;
+ case 5:
+ switch (array->layout) {
+ case ALGORITHM_LEFT_ASYMMETRIC:
+ rlq = DDF_RAID5_N_RESTART;
+ break;
+ case ALGORITHM_RIGHT_ASYMMETRIC:
+ rlq = DDF_RAID5_0_RESTART;
+ break;
+ case ALGORITHM_LEFT_SYMMETRIC:
+ rlq = DDF_RAID5_N_CONTINUE;
+ break;
+ case ALGORITHM_RIGHT_SYMMETRIC:
+ /* not mentioned in standard */
+ default:
+ return err_bad_md_layout(array);
+ }
+ prl = DDF_RAID5;
+ break;
+ case 6:
+ switch (array->layout) {
+ case ALGORITHM_ROTATING_N_RESTART:
+ rlq = DDF_RAID5_N_RESTART;
+ break;
+ case ALGORITHM_ROTATING_ZERO_RESTART:
+ rlq = DDF_RAID6_0_RESTART;
+ break;
+ case ALGORITHM_ROTATING_N_CONTINUE:
+ rlq = DDF_RAID5_N_CONTINUE;
+ break;
+ default:
+ return err_bad_md_layout(array);
+ }
+ prl = DDF_RAID6;
+ break;
+ case 10:
+ if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
+ rlq = DDF_RAID1_SIMPLE;
+ prim_elmnt_count = cpu_to_be16(2);
+ sec_elmnt_count = array->raid_disks / 2;
+ srl = DDF_2SPANNED;
+ prl = DDF_RAID1;
+ } else if (array->raid_disks % 3 == 0
+ && array->layout == 0x103) {
+ rlq = DDF_RAID1_MULTI;
+ prim_elmnt_count = cpu_to_be16(3);
+ sec_elmnt_count = array->raid_disks / 3;
+ srl = DDF_2SPANNED;
+ prl = DDF_RAID1;
+ } else if (array->layout == 0x201) {
+ prl = DDF_RAID1E;
+ rlq = DDF_RAID1E_OFFSET;
+ } else if (array->layout == 0x102) {
+ prl = DDF_RAID1E;
+ rlq = DDF_RAID1E_ADJACENT;
+ } else
+ return err_bad_md_layout(array);
+ break;
+ default:
+ return err_bad_md_layout(array);
+ }
+ conf->prl = prl;
+ conf->prim_elmnt_count = prim_elmnt_count;
+ conf->rlq = rlq;
+ conf->srl = srl;
+ conf->sec_elmnt_count = sec_elmnt_count;
+ return 0;
+}
+
+static int err_bad_ddf_layout(const struct vd_config *conf)
+{
+ pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
+ conf->prl, conf->rlq, be16_to_cpu(conf->prim_elmnt_count));
+ return -1;
+}
+
+static int layout_ddf2md(const struct vd_config *conf,
+ mdu_array_info_t *array)
+{
+ int level = LEVEL_UNSUPPORTED;
+ int layout = 0;
+ int raiddisks = be16_to_cpu(conf->prim_elmnt_count);
+
+ if (conf->sec_elmnt_count > 1) {
+ /* see also check_secondary() */
+ if (conf->prl != DDF_RAID1 ||
+ (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
+ pr_err("Unsupported secondary RAID level %u/%u\n",
+ conf->prl, conf->srl);
+ return -1;
+ }
+ if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
+ layout = 0x102;
+ else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
+ layout = 0x103;
+ else
+ return err_bad_ddf_layout(conf);
+ raiddisks *= conf->sec_elmnt_count;
+ level = 10;
+ goto good;
+ }
+
+ switch (conf->prl) {
+ case DDF_CONCAT:
+ level = LEVEL_LINEAR;
+ break;
+ case DDF_RAID0:
+ if (conf->rlq != DDF_RAID0_SIMPLE)
+ return err_bad_ddf_layout(conf);
+ level = 0;
+ break;
+ case DDF_RAID1:
+ if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
+ (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
+ return err_bad_ddf_layout(conf);
+ level = 1;
+ break;
+ case DDF_RAID1E:
+ if (conf->rlq == DDF_RAID1E_ADJACENT)
+ layout = 0x102;
+ else if (conf->rlq == DDF_RAID1E_OFFSET)
+ layout = 0x201;
+ else
+ return err_bad_ddf_layout(conf);
+ level = 10;
+ break;
+ case DDF_RAID4:
+ if (conf->rlq != DDF_RAID4_N)
+ return err_bad_ddf_layout(conf);
+ level = 4;
+ break;
+ case DDF_RAID5:
+ switch (conf->rlq) {
+ case DDF_RAID5_N_RESTART:
+ layout = ALGORITHM_LEFT_ASYMMETRIC;
+ break;
+ case DDF_RAID5_0_RESTART:
+ layout = ALGORITHM_RIGHT_ASYMMETRIC;
+ break;
+ case DDF_RAID5_N_CONTINUE:
+ layout = ALGORITHM_LEFT_SYMMETRIC;
+ break;
+ default:
+ return err_bad_ddf_layout(conf);
+ }
+ level = 5;
+ break;
+ case DDF_RAID6:
+ switch (conf->rlq) {
+ case DDF_RAID5_N_RESTART:
+ layout = ALGORITHM_ROTATING_N_RESTART;
+ break;
+ case DDF_RAID6_0_RESTART:
+ layout = ALGORITHM_ROTATING_ZERO_RESTART;
+ break;
+ case DDF_RAID5_N_CONTINUE:
+ layout = ALGORITHM_ROTATING_N_CONTINUE;
+ break;
+ default:
+ return err_bad_ddf_layout(conf);
+ }
+ level = 6;
+ break;
+ default:
+ return err_bad_ddf_layout(conf);
+ };
+
+good:
+ array->level = level;
+ array->layout = layout;
+ array->raid_disks = raiddisks;
+ return 0;
+}
+
+static int load_ddf_header(int fd, unsigned long long lba,
+ unsigned long long size,
+ int type,
+ struct ddf_header *hdr, struct ddf_header *anchor)
+{
+ /* read a ddf header (primary or secondary) from fd/lba
+ * and check that it is consistent with anchor
+ * Need to check:
+ * magic, crc, guid, rev, and LBA's header_type, and
+ * everything after header_type must be the same
+ */
+ if (lba >= size-1)
+ return 0;
+
+ if (lseek64(fd, lba<<9, 0) < 0)
+ return 0;
+
+ if (read(fd, hdr, 512) != 512)
+ return 0;
+
+ if (!be32_eq(hdr->magic, DDF_HEADER_MAGIC)) {
+ pr_err("%s: bad header magic\n", __func__);
+ return 0;
+ }
+ if (!be32_eq(calc_crc(hdr, 512), hdr->crc)) {
+ pr_err("%s: bad CRC\n", __func__);
+ return 0;
+ }
+ if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
+ memcmp(anchor->revision, hdr->revision, 8) != 0 ||
+ !be64_eq(anchor->primary_lba, hdr->primary_lba) ||
+ !be64_eq(anchor->secondary_lba, hdr->secondary_lba) ||
+ hdr->type != type ||
+ memcmp(anchor->pad2, hdr->pad2, 512 -
+ offsetof(struct ddf_header, pad2)) != 0) {
+ pr_err("%s: header mismatch\n", __func__);
+ return 0;
+ }
+
+ /* Looks good enough to me... */
+ return 1;
+}
+
+static void *load_section(int fd, struct ddf_super *super, void *buf,
+ be32 offset_be, be32 len_be, int check)
+{
+ unsigned long long offset = be32_to_cpu(offset_be);
+ unsigned long long len = be32_to_cpu(len_be);
+ int dofree = (buf == NULL);
+
+ if (check)
+ if (len != 2 && len != 8 && len != 32
+ && len != 128 && len != 512)
+ return NULL;
+
+ if (len > 1024)
+ return NULL;
+ if (!buf && posix_memalign(&buf, 512, len<<9) != 0)
+ buf = NULL;
+
+ if (!buf)
+ return NULL;
+
+ if (super->active->type == 1)
+ offset += be64_to_cpu(super->active->primary_lba);
+ else
+ offset += be64_to_cpu(super->active->secondary_lba);
+
+ if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
+ if (dofree)
+ free(buf);
+ return NULL;
+ }
+ if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
+ if (dofree)
+ free(buf);
+ return NULL;
+ }
+ return buf;
+}
+
+static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
+{
+ unsigned long long dsize;
+
+ get_dev_size(fd, NULL, &dsize);
+
+ if (lseek64(fd, dsize-512, 0) < 0) {
+ if (devname)
+ pr_err("Cannot seek to anchor block on %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+ if (read(fd, &super->anchor, 512) != 512) {
+ if (devname)
+ pr_err("Cannot read anchor block on %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+ if (!be32_eq(super->anchor.magic, DDF_HEADER_MAGIC)) {
+ if (devname)
+ pr_err("no DDF anchor found on %s\n",
+ devname);
+ return 2;
+ }
+ if (!be32_eq(calc_crc(&super->anchor, 512), super->anchor.crc)) {
+ if (devname)
+ pr_err("bad CRC on anchor on %s\n",
+ devname);
+ return 2;
+ }
+ if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
+ memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
+ if (devname)
+ pr_err("can only support super revision"
+ " %.8s and earlier, not %.8s on %s\n",
+ DDF_REVISION_2, super->anchor.revision,devname);
+ return 2;
+ }
+ super->active = NULL;
+ if (load_ddf_header(fd, be64_to_cpu(super->anchor.primary_lba),
+ dsize >> 9, 1,
+ &super->primary, &super->anchor) == 0) {
+ if (devname)
+ pr_err("Failed to load primary DDF header "
+ "on %s\n", devname);
+ } else
+ super->active = &super->primary;
+
+ if (load_ddf_header(fd, be64_to_cpu(super->anchor.secondary_lba),
+ dsize >> 9, 2,
+ &super->secondary, &super->anchor)) {
+ if (super->active == NULL
+ || (be32_to_cpu(super->primary.seq)
+ < be32_to_cpu(super->secondary.seq) &&
+ !super->secondary.openflag)
+ || (be32_to_cpu(super->primary.seq)
+ == be32_to_cpu(super->secondary.seq) &&
+ super->primary.openflag && !super->secondary.openflag)
+ )
+ super->active = &super->secondary;
+ } else if (devname &&
+ be64_to_cpu(super->anchor.secondary_lba) != ~(__u64)0)
+ pr_err("Failed to load secondary DDF header on %s\n",
+ devname);
+ if (super->active == NULL)
+ return 2;
+ return 0;
+}
+
+static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
+{
+ void *ok;
+ ok = load_section(fd, super, &super->controller,
+ super->active->controller_section_offset,
+ super->active->controller_section_length,
+ 0);
+ super->phys = load_section(fd, super, NULL,
+ super->active->phys_section_offset,
+ super->active->phys_section_length,
+ 1);
+ super->pdsize = be32_to_cpu(super->active->phys_section_length) * 512;
+
+ super->virt = load_section(fd, super, NULL,
+ super->active->virt_section_offset,
+ super->active->virt_section_length,
+ 1);
+ super->vdsize = be32_to_cpu(super->active->virt_section_length) * 512;
+ if (!ok ||
+ !super->phys ||
+ !super->virt) {
+ free(super->phys);
+ free(super->virt);
+ super->phys = NULL;
+ super->virt = NULL;
+ return 2;
+ }
+ super->conflist = NULL;
+ super->dlist = NULL;
+
+ super->max_part = be16_to_cpu(super->active->max_partitions);
+ super->mppe = be16_to_cpu(super->active->max_primary_element_entries);
+ super->conf_rec_len = be16_to_cpu(super->active->config_record_len);
+ return 0;
+}
+
+#define DDF_UNUSED_BVD 0xff
+static int alloc_other_bvds(const struct ddf_super *ddf, struct vcl *vcl)
+{
+ unsigned int n_vds = vcl->conf.sec_elmnt_count - 1;
+ unsigned int i, vdsize;
+ void *p;
+ if (n_vds == 0) {
+ vcl->other_bvds = NULL;
+ return 0;
+ }
+ vdsize = ddf->conf_rec_len * 512;
+ if (posix_memalign(&p, 512, n_vds *
+ (vdsize + sizeof(struct vd_config *))) != 0)
+ return -1;
+ vcl->other_bvds = (struct vd_config **) (p + n_vds * vdsize);
+ for (i = 0; i < n_vds; i++) {
+ vcl->other_bvds[i] = p + i * vdsize;
+ memset(vcl->other_bvds[i], 0, vdsize);
+ vcl->other_bvds[i]->sec_elmnt_seq = DDF_UNUSED_BVD;
+ }
+ return 0;
+}
+
+static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
+ unsigned int len)
+{
+ int i;
+ for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
+ if (vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
+ break;
+
+ if (i < vcl->conf.sec_elmnt_count-1) {
+ if (be32_to_cpu(vd->seqnum) <=
+ be32_to_cpu(vcl->other_bvds[i]->seqnum))
+ return;
+ } else {
+ for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
+ if (vcl->other_bvds[i]->sec_elmnt_seq == DDF_UNUSED_BVD)
+ break;
+ if (i == vcl->conf.sec_elmnt_count-1) {
+ pr_err("no space for sec level config %u, count is %u\n",
+ vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
+ return;
+ }
+ }
+ memcpy(vcl->other_bvds[i], vd, len);
+}
+
+static int load_ddf_local(int fd, struct ddf_super *super,
+ char *devname, int keep)
+{
+ struct dl *dl;
+ struct stat stb;
+ char *conf;
+ unsigned int i;
+ unsigned int confsec;
+ int vnum;
+ unsigned int max_virt_disks =
+ be16_to_cpu(super->active->max_vd_entries);
+ unsigned long long dsize;
+
+ /* First the local disk info */
+ if (posix_memalign((void**)&dl, 512,
+ sizeof(*dl) +
+ (super->max_part) * sizeof(dl->vlist[0])) != 0) {
+ pr_err("%s could not allocate disk info buffer\n",
+ __func__);
+ return 1;
+ }
+
+ load_section(fd, super, &dl->disk,
+ super->active->data_section_offset,
+ super->active->data_section_length,
+ 0);
+ dl->devname = devname ? xstrdup(devname) : NULL;
+
+ fstat(fd, &stb);
+ dl->major = major(stb.st_rdev);
+ dl->minor = minor(stb.st_rdev);
+ dl->next = super->dlist;
+ dl->fd = keep ? fd : -1;
+
+ dl->size = 0;
+ if (get_dev_size(fd, devname, &dsize))
+ dl->size = dsize >> 9;
+ /* If the disks have different sizes, the LBAs will differ
+ * between phys disks.
+ * At this point here, the values in super->active must be valid
+ * for this phys disk. */
+ dl->primary_lba = super->active->primary_lba;
+ dl->secondary_lba = super->active->secondary_lba;
+ dl->workspace_lba = super->active->workspace_lba;
+ dl->spare = NULL;
+ for (i = 0 ; i < super->max_part ; i++)
+ dl->vlist[i] = NULL;
+ super->dlist = dl;
+ dl->pdnum = -1;
+ for (i = 0; i < be16_to_cpu(super->active->max_pd_entries); i++)
+ if (memcmp(super->phys->entries[i].guid,
+ dl->disk.guid, DDF_GUID_LEN) == 0)
+ dl->pdnum = i;
+
+ /* Now the config list. */
+ /* 'conf' is an array of config entries, some of which are
+ * probably invalid. Those which are good need to be copied into
+ * the conflist
+ */
+
+ conf = load_section(fd, super, super->conf,
+ super->active->config_section_offset,
+ super->active->config_section_length,
+ 0);
+ super->conf = conf;
+ vnum = 0;
+ for (confsec = 0;
+ confsec < be32_to_cpu(super->active->config_section_length);
+ confsec += super->conf_rec_len) {
+ struct vd_config *vd =
+ (struct vd_config *)((char*)conf + confsec*512);
+ struct vcl *vcl;
+
+ if (be32_eq(vd->magic, DDF_SPARE_ASSIGN_MAGIC)) {
+ if (dl->spare)
+ continue;
+ if (posix_memalign((void**)&dl->spare, 512,
+ super->conf_rec_len*512) != 0) {
+ pr_err("%s could not allocate spare info buf\n",
+ __func__);
+ return 1;
+ }
+
+ memcpy(dl->spare, vd, super->conf_rec_len*512);
+ continue;
+ }
+ if (!be32_eq(vd->magic, DDF_VD_CONF_MAGIC))
+ /* Must be vendor-unique - I cannot handle those */
+ continue;
+
+ for (vcl = super->conflist; vcl; vcl = vcl->next) {
+ if (memcmp(vcl->conf.guid,
+ vd->guid, DDF_GUID_LEN) == 0)
+ break;
+ }
+
+ if (vcl) {
+ dl->vlist[vnum++] = vcl;
+ if (vcl->other_bvds != NULL &&
+ vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
+ add_other_bvd(vcl, vd, super->conf_rec_len*512);
+ continue;
+ }
+ if (be32_to_cpu(vd->seqnum) <=
+ be32_to_cpu(vcl->conf.seqnum))
+ continue;
+ } else {
+ if (posix_memalign((void**)&vcl, 512,
+ (super->conf_rec_len*512 +
+ offsetof(struct vcl, conf))) != 0) {
+ pr_err("%s could not allocate vcl buf\n",
+ __func__);
+ return 1;
+ }
+ vcl->next = super->conflist;
+ vcl->block_sizes = NULL; /* FIXME not for CONCAT */
+ vcl->conf.sec_elmnt_count = vd->sec_elmnt_count;
+ if (alloc_other_bvds(super, vcl) != 0) {
+ pr_err("%s could not allocate other bvds\n",
+ __func__);
+ free(vcl);
+ return 1;
+ };
+ super->conflist = vcl;
+ dl->vlist[vnum++] = vcl;
+ }
+ memcpy(&vcl->conf, vd, super->conf_rec_len*512);
+ for (i=0; i < max_virt_disks ; i++)
+ if (memcmp(super->virt->entries[i].guid,
+ vcl->conf.guid, DDF_GUID_LEN)==0)
+ break;
+ if (i < max_virt_disks)
+ vcl->vcnum = i;
+ }
+
+ return 0;
+}
+
+static int load_super_ddf(struct supertype *st, int fd,
+ char *devname)
+{
+ unsigned long long dsize;
+ struct ddf_super *super;
+ int rv;
+
+ if (get_dev_size(fd, devname, &dsize) == 0)
+ return 1;
+
+ if (test_partition(fd))
+ /* DDF is not allowed on partitions */
+ return 1;
+
+ /* 32M is a lower bound */
+ if (dsize <= 32*1024*1024) {
+ if (devname)
+ pr_err("%s is too small for ddf: "
+ "size is %llu sectors.\n",
+ devname, dsize>>9);
+ return 1;
+ }
+ if (dsize & 511) {
+ if (devname)
+ pr_err("%s is an odd size for ddf: "
+ "size is %llu bytes.\n",
+ devname, dsize);
+ return 1;
+ }
+
+ free_super_ddf(st);
+
+ if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
+ pr_err("malloc of %zu failed.\n",
+ sizeof(*super));
+ return 1;
+ }
+ memset(super, 0, sizeof(*super));
+
+ rv = load_ddf_headers(fd, super, devname);
+ if (rv) {
+ free(super);
+ return rv;
+ }
+
+ /* Have valid headers and have chosen the best. Let's read in the rest*/
+
+ rv = load_ddf_global(fd, super, devname);
+
+ if (rv) {
+ if (devname)
+ pr_err("Failed to load all information "
+ "sections on %s\n", devname);
+ free(super);
+ return rv;
+ }
+
+ rv = load_ddf_local(fd, super, devname, 0);
+
+ if (rv) {
+ if (devname)
+ pr_err("Failed to load all information "
+ "sections on %s\n", devname);
+ free(super);
+ return rv;
+ }
+
+ /* Should possibly check the sections .... */
+
+ st->sb = super;
+ if (st->ss == NULL) {
+ st->ss = &super_ddf;
+ st->minor_version = 0;
+ st->max_devs = 512;
+ }
+ return 0;
+
+}
+
+static void free_super_ddf(struct supertype *st)
+{
+ struct ddf_super *ddf = st->sb;
+ if (ddf == NULL)
+ return;
+ free(ddf->phys);
+ free(ddf->virt);
+ free(ddf->conf);
+ while (ddf->conflist) {
+ struct vcl *v = ddf->conflist;
+ ddf->conflist = v->next;
+ if (v->block_sizes)
+ free(v->block_sizes);
+ if (v->other_bvds)
+ /*
+ v->other_bvds[0] points to beginning of buffer,
+ see alloc_other_bvds()
+ */
+ free(v->other_bvds[0]);
+ free(v);
+ }
+ while (ddf->dlist) {
+ struct dl *d = ddf->dlist;
+ ddf->dlist = d->next;
+ if (d->fd >= 0)
+ close(d->fd);
+ if (d->spare)
+ free(d->spare);
+ free(d);
+ }
+ while (ddf->add_list) {
+ struct dl *d = ddf->add_list;
+ ddf->add_list = d->next;
+ if (d->fd >= 0)
+ close(d->fd);
+ if (d->spare)
+ free(d->spare);
+ free(d);
+ }
+ free(ddf);
+ st->sb = NULL;
+}
+
+static struct supertype *match_metadata_desc_ddf(char *arg)
+{
+ /* 'ddf' only supports containers */
+ struct supertype *st;
+ if (strcmp(arg, "ddf") != 0 &&
+ strcmp(arg, "default") != 0
+ )
+ return NULL;
+
+ st = xcalloc(1, sizeof(*st));
+ st->ss = &super_ddf;
+ st->max_devs = 512;
+ st->minor_version = 0;
+ st->sb = NULL;
+ return st;
+}
+
+#ifndef MDASSEMBLE
+
+static mapping_t ddf_state[] = {
+ { "Optimal", 0},
+ { "Degraded", 1},
+ { "Deleted", 2},
+ { "Missing", 3},
+ { "Failed", 4},
+ { "Partially Optimal", 5},
+ { "-reserved-", 6},
+ { "-reserved-", 7},
+ { NULL, 0}
+};
+
+static mapping_t ddf_init_state[] = {
+ { "Not Initialised", 0},
+ { "QuickInit in Progress", 1},
+ { "Fully Initialised", 2},
+ { "*UNKNOWN*", 3},
+ { NULL, 0}
+};
+static mapping_t ddf_access[] = {
+ { "Read/Write", 0},
+ { "Reserved", 1},
+ { "Read Only", 2},
+ { "Blocked (no access)", 3},
+ { NULL ,0}
+};
+
+static mapping_t ddf_level[] = {
+ { "RAID0", DDF_RAID0},
+ { "RAID1", DDF_RAID1},
+ { "RAID3", DDF_RAID3},
+ { "RAID4", DDF_RAID4},
+ { "RAID5", DDF_RAID5},
+ { "RAID1E",DDF_RAID1E},
+ { "JBOD", DDF_JBOD},
+ { "CONCAT",DDF_CONCAT},
+ { "RAID5E",DDF_RAID5E},
+ { "RAID5EE",DDF_RAID5EE},
+ { "RAID6", DDF_RAID6},
+ { NULL, 0}
+};
+static mapping_t ddf_sec_level[] = {
+ { "Striped", DDF_2STRIPED},
+ { "Mirrored", DDF_2MIRRORED},
+ { "Concat", DDF_2CONCAT},
+ { "Spanned", DDF_2SPANNED},
+ { NULL, 0}
+};
+#endif
+
+static int all_ff(const char *guid)
+{
+ int i;
+ for (i = 0; i < DDF_GUID_LEN; i++)
+ if (guid[i] != (char)0xff)
+ return 0;
+ return 1;
+}
+
+static const char *guid_str(const char *guid)
+{
+ static char buf[DDF_GUID_LEN*2+1];
+ int i;
+ char *p = buf;
+ for (i = 0; i < DDF_GUID_LEN; i++) {
+ unsigned char c = guid[i];
+ if (c >= 32 && c < 127)
+ p += sprintf(p, "%c", c);
+ else
+ p += sprintf(p, "%02x", c);
+ }
+ *p = '\0';
+ return (const char *) buf;
+}
+
+#ifndef MDASSEMBLE
+static void print_guid(char *guid, int tstamp)
+{
+ /* A GUIDs are part (or all) ASCII and part binary.
+ * They tend to be space padded.
+ * We print the GUID in HEX, then in parentheses add
+ * any initial ASCII sequence, and a possible
+ * time stamp from bytes 16-19
+ */
+ int l = DDF_GUID_LEN;
+ int i;
+
+ for (i=0 ; i<DDF_GUID_LEN ; i++) {
+ if ((i&3)==0 && i != 0) printf(":");
+ printf("%02X", guid[i]&255);
+ }
+
+ printf("\n (");
+ while (l && guid[l-1] == ' ')
+ l--;
+ for (i=0 ; i<l ; i++) {
+ if (guid[i] >= 0x20 && guid[i] < 0x7f)
+ fputc(guid[i], stdout);
+ else
+ break;
+ }
+ if (tstamp) {
+ time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
+ char tbuf[100];
+ struct tm *tm;
+ tm = localtime(&then);
+ strftime(tbuf, 100, " %D %T",tm);
+ fputs(tbuf, stdout);
+ }
+ printf(")");
+}
+
+static void examine_vd(int n, struct ddf_super *sb, char *guid)
+{
+ int crl = sb->conf_rec_len;
+ struct vcl *vcl;
+
+ for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
+ unsigned int i;
+ struct vd_config *vc = &vcl->conf;
+
+ if (!be32_eq(calc_crc(vc, crl*512), vc->crc))
+ continue;
+ if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
+ continue;
+
+ /* Ok, we know about this VD, let's give more details */
+ printf(" Raid Devices[%d] : %d (", n,
+ be16_to_cpu(vc->prim_elmnt_count));
+ for (i = 0; i < be16_to_cpu(vc->prim_elmnt_count); i++) {
+ int j;
+ int cnt = be16_to_cpu(sb->phys->max_pdes);
+ for (j=0; j<cnt; j++)
+ if (be32_eq(vc->phys_refnum[i],
+ sb->phys->entries[j].refnum))
+ break;
+ if (i) printf(" ");
+ if (j < cnt)
+ printf("%d", j);
+ else
+ printf("--");
+ printf("@%lluK", (unsigned long long) be64_to_cpu(LBA_OFFSET(sb, vc)[i])/2);
+ }
+ printf(")\n");
+ if (vc->chunk_shift != 255)
+ printf(" Chunk Size[%d] : %d sectors\n", n,
+ 1 << vc->chunk_shift);
+ printf(" Raid Level[%d] : %s\n", n,
+ map_num(ddf_level, vc->prl)?:"-unknown-");
+ if (vc->sec_elmnt_count != 1) {
+ printf(" Secondary Position[%d] : %d of %d\n", n,
+ vc->sec_elmnt_seq, vc->sec_elmnt_count);
+ printf(" Secondary Level[%d] : %s\n", n,
+ map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
+ }
+ printf(" Device Size[%d] : %llu\n", n,
+ be64_to_cpu(vc->blocks)/2);
+ printf(" Array Size[%d] : %llu\n", n,
+ be64_to_cpu(vc->array_blocks)/2);
+ }
+}
+
+static void examine_vds(struct ddf_super *sb)
+{
+ int cnt = be16_to_cpu(sb->virt->populated_vdes);
+ unsigned int i;
+ printf(" Virtual Disks : %d\n", cnt);
+
+ for (i = 0; i < be16_to_cpu(sb->virt->max_vdes); i++) {
+ struct virtual_entry *ve = &sb->virt->entries[i];
+ if (all_ff(ve->guid))
+ continue;
+ printf("\n");
+ printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
+ printf("\n");
+ printf(" unit[%d] : %d\n", i, be16_to_cpu(ve->unit));
+ printf(" state[%d] : %s, %s%s\n", i,
+ map_num(ddf_state, ve->state & 7),
+ (ve->state & DDF_state_morphing) ? "Morphing, ": "",
+ (ve->state & DDF_state_inconsistent)? "Not Consistent" : "Consistent");
+ printf(" init state[%d] : %s\n", i,
+ map_num(ddf_init_state, ve->init_state&DDF_initstate_mask));
+ printf(" access[%d] : %s\n", i,
+ map_num(ddf_access, (ve->init_state & DDF_access_mask) >> 6));
+ printf(" Name[%d] : %.16s\n", i, ve->name);
+ examine_vd(i, sb, ve->guid);
+ }
+ if (cnt) printf("\n");
+}
+
+static void examine_pds(struct ddf_super *sb)
+{
+ int cnt = be16_to_cpu(sb->phys->max_pdes);
+ int i;
+ struct dl *dl;
+ int unlisted = 0;
+ printf(" Physical Disks : %d\n", cnt);
+ printf(" Number RefNo Size Device Type/State\n");
+
+ for (dl = sb->dlist; dl; dl = dl->next)
+ dl->displayed = 0;
+
+ for (i=0 ; i<cnt ; i++) {
+ struct phys_disk_entry *pd = &sb->phys->entries[i];
+ int type = be16_to_cpu(pd->type);
+ int state = be16_to_cpu(pd->state);
+
+ if (be32_to_cpu(pd->refnum) == 0xffffffff)
+ /* Not in use */
+ continue;
+ //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
+ //printf("\n");
+ printf(" %3d %08x ", i,
+ be32_to_cpu(pd->refnum));
+ printf("%8lluK ",
+ be64_to_cpu(pd->config_size)>>1);
+ for (dl = sb->dlist; dl ; dl = dl->next) {
+ if (be32_eq(dl->disk.refnum, pd->refnum)) {
+ char *dv = map_dev(dl->major, dl->minor, 0);
+ if (dv) {
+ printf("%-15s", dv);
+ break;
+ }
+ }
+ }
+ if (!dl)
+ printf("%15s","");
+ else
+ dl->displayed = 1;
+ printf(" %s%s%s%s%s",
+ (type&2) ? "active":"",
+ (type&4) ? "Global-Spare":"",
+ (type&8) ? "spare" : "",
+ (type&16)? ", foreign" : "",
+ (type&32)? "pass-through" : "");
+ if (state & DDF_Failed)
+ /* This over-rides these three */
+ state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
+ printf("/%s%s%s%s%s%s%s",
+ (state&1)? "Online": "Offline",
+ (state&2)? ", Failed": "",
+ (state&4)? ", Rebuilding": "",
+ (state&8)? ", in-transition": "",
+ (state&16)? ", SMART-errors": "",
+ (state&32)? ", Unrecovered-Read-Errors": "",
+ (state&64)? ", Missing" : "");
+ printf("\n");
+ }
+ for (dl = sb->dlist; dl; dl = dl->next) {
+ char *dv;
+ if (dl->displayed)
+ continue;
+ if (!unlisted)
+ printf(" Physical disks not in metadata!:\n");
+ unlisted = 1;
+ dv = map_dev(dl->major, dl->minor, 0);
+ printf(" %08x %s\n", be32_to_cpu(dl->disk.refnum),
+ dv ? dv : "-unknown-");
+ }
+ if (unlisted)
+ printf("\n");
+}
+
+static void examine_super_ddf(struct supertype *st, char *homehost)
+{
+ struct ddf_super *sb = st->sb;
+
+ printf(" Magic : %08x\n", be32_to_cpu(sb->anchor.magic));
+ printf(" Version : %.8s\n", sb->anchor.revision);
+ printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
+ printf("\n");
+ printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
+ printf("\n");
+ printf(" Seq : %08x\n", be32_to_cpu(sb->active->seq));
+ printf(" Redundant hdr : %s\n", (be32_eq(sb->secondary.magic,
+ DDF_HEADER_MAGIC)
+ ?"yes" : "no"));
+ examine_vds(sb);
+ examine_pds(sb);
+}
+
+static unsigned int get_vd_num_of_subarray(struct supertype *st)
+{
+ /*
+ * Figure out the VD number for this supertype.
+ * Returns DDF_CONTAINER for the container itself,
+ * and DDF_NOTFOUND on error.
+ */
+ struct ddf_super *ddf = st->sb;
+ struct mdinfo *sra;
+ char *sub, *end;
+ unsigned int vcnum;
+
+ if (*st->container_devnm == '\0')
+ return DDF_CONTAINER;
+
+ sra = sysfs_read(-1, st->devnm, GET_VERSION);
+ if (!sra || sra->array.major_version != -1 ||
+ sra->array.minor_version != -2 ||
+ !is_subarray(sra->text_version))
+ return DDF_NOTFOUND;
+
+ sub = strchr(sra->text_version + 1, '/');
+ if (sub != NULL)
+ vcnum = strtoul(sub + 1, &end, 10);
+ if (sub == NULL || *sub == '\0' || *end != '\0' ||
+ vcnum >= be16_to_cpu(ddf->active->max_vd_entries))
+ return DDF_NOTFOUND;
+
+ return vcnum;
+}
+
+static void brief_examine_super_ddf(struct supertype *st, int verbose)
+{
+ /* We just write a generic DDF ARRAY entry
+ */
+ struct mdinfo info;
+ char nbuf[64];
+ getinfo_super_ddf(st, &info, NULL);
+ fname_from_uuid(st, &info, nbuf, ':');
+
+ printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
+}
+
+static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
+{
+ /* We write a DDF ARRAY member entry for each vd, identifying container
+ * by uuid and member by unit number and uuid.
+ */
+ struct ddf_super *ddf = st->sb;
+ struct mdinfo info;
+ unsigned int i;
+ char nbuf[64];
+ getinfo_super_ddf(st, &info, NULL);
+ fname_from_uuid(st, &info, nbuf, ':');
+
+ for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
+ struct virtual_entry *ve = &ddf->virt->entries[i];
+ struct vcl vcl;
+ char nbuf1[64];
+ char namebuf[17];
+ if (all_ff(ve->guid))
+ continue;
+ memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
+ ddf->currentconf =&vcl;
+ vcl.vcnum = i;
+ uuid_from_super_ddf(st, info.uuid);
+ fname_from_uuid(st, &info, nbuf1, ':');
+ _ddf_array_name(namebuf, ddf, i);
+ printf("ARRAY%s%s container=%s member=%d UUID=%s\n",
+ namebuf[0] == '\0' ? "" : " /dev/md/", namebuf,
+ nbuf+5, i, nbuf1+5);
+ }
+}
+
+static void export_examine_super_ddf(struct supertype *st)
+{
+ struct mdinfo info;
+ char nbuf[64];
+ getinfo_super_ddf(st, &info, NULL);
+ fname_from_uuid(st, &info, nbuf, ':');
+ printf("MD_METADATA=ddf\n");
+ printf("MD_LEVEL=container\n");
+ printf("MD_UUID=%s\n", nbuf+5);
+ printf("MD_DEVICES=%u\n",
+ be16_to_cpu(((struct ddf_super *)st->sb)->phys->used_pdes));
+}
+
+static int copy_metadata_ddf(struct supertype *st, int from, int to)
+{
+ void *buf;
+ unsigned long long dsize, offset;
+ int bytes;
+ struct ddf_header *ddf;
+ int written = 0;
+
+ /* The meta consists of an anchor, a primary, and a secondary.
+ * This all lives at the end of the device.
+ * So it is easiest to find the earliest of primary and
+ * secondary, and copy everything from there.
+ *
+ * Anchor is 512 from end. It contains primary_lba and secondary_lba
+ * we choose one of those
+ */
+
+ if (posix_memalign(&buf, 4096, 4096) != 0)
+ return 1;
+
+ if (!get_dev_size(from, NULL, &dsize))
+ goto err;
+
+ if (lseek64(from, dsize-512, 0) < 0)
+ goto err;
+ if (read(from, buf, 512) != 512)
+ goto err;
+ ddf = buf;
+ if (!be32_eq(ddf->magic, DDF_HEADER_MAGIC) ||
+ !be32_eq(calc_crc(ddf, 512), ddf->crc) ||
+ (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
+ memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
+ goto err;
+
+ offset = dsize - 512;
+ if ((be64_to_cpu(ddf->primary_lba) << 9) < offset)
+ offset = be64_to_cpu(ddf->primary_lba) << 9;
+ if ((be64_to_cpu(ddf->secondary_lba) << 9) < offset)
+ offset = be64_to_cpu(ddf->secondary_lba) << 9;
+
+ bytes = dsize - offset;
+
+ if (lseek64(from, offset, 0) < 0 ||
+ lseek64(to, offset, 0) < 0)
+ goto err;
+ while (written < bytes) {
+ int n = bytes - written;
+ if (n > 4096)
+ n = 4096;
+ if (read(from, buf, n) != n)
+ goto err;
+ if (write(to, buf, n) != n)
+ goto err;
+ written += n;
+ }
+ free(buf);
+ return 0;
+err:
+ free(buf);
+ return 1;
+}
+
+static void detail_super_ddf(struct supertype *st, char *homehost)
+{
+ struct ddf_super *sb = st->sb;
+ int cnt = be16_to_cpu(sb->virt->populated_vdes);
+
+ printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
+ printf("\n");
+ printf(" Seq : %08x\n", be32_to_cpu(sb->active->seq));
+ printf(" Virtual Disks : %d\n", cnt);
+ printf("\n");
+}
+#endif
+
+static const char *vendors_with_variable_volume_UUID[] = {
+ "LSI ",
+};
+
+static int volume_id_is_reliable(const struct ddf_super *ddf)
+{
+ int n = ARRAY_SIZE(vendors_with_variable_volume_UUID);
+ int i;
+ for (i = 0; i < n; i++)
+ if (!memcmp(ddf->controller.guid,
+ vendors_with_variable_volume_UUID[i], 8))
+ return 0;
+ return 1;
+}
+
+static void uuid_of_ddf_subarray(const struct ddf_super *ddf,
+ unsigned int vcnum, int uuid[4])
+{
+ char buf[DDF_GUID_LEN+18], sha[20], *p;
+ struct sha1_ctx ctx;
+ if (volume_id_is_reliable(ddf)) {
+ uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, uuid);
+ return;
+ }
+ /*
+ * Some fake RAID BIOSes (in particular, LSI ones) change the
+ * VD GUID at every boot. These GUIDs are not suitable for
+ * identifying an array. Luckily the header GUID appears to
+ * remain constant.
+ * We construct a pseudo-UUID from the header GUID and those
+ * properties of the subarray that we expect to remain constant.
+ */
+ memset(buf, 0, sizeof(buf));
+ p = buf;
+ memcpy(p, ddf->anchor.guid, DDF_GUID_LEN);
+ p += DDF_GUID_LEN;
+ memcpy(p, ddf->virt->entries[vcnum].name, 16);
+ p += 16;
+ *((__u16 *) p) = vcnum;
+ sha1_init_ctx(&ctx);
+ sha1_process_bytes(buf, sizeof(buf), &ctx);
+ sha1_finish_ctx(&ctx, sha);
+ memcpy(uuid, sha, 4*4);
+}
+
+#ifndef MDASSEMBLE
+static void brief_detail_super_ddf(struct supertype *st)
+{
+ struct mdinfo info;
+ char nbuf[64];
+ struct ddf_super *ddf = st->sb;
+ unsigned int vcnum = get_vd_num_of_subarray(st);
+ if (vcnum == DDF_CONTAINER)
+ uuid_from_super_ddf(st, info.uuid);
+ else if (vcnum == DDF_NOTFOUND)
+ return;
+ else
+ uuid_of_ddf_subarray(ddf, vcnum, info.uuid);
+ fname_from_uuid(st, &info, nbuf,':');
+ printf(" UUID=%s", nbuf + 5);
+}
+#endif
+
+static int match_home_ddf(struct supertype *st, char *homehost)
+{
+ /* It matches 'this' host if the controller is a
+ * Linux-MD controller with vendor_data matching
+ * the hostname. It would be nice if we could
+ * test against controller found in /sys or somewhere...
+ */
+ struct ddf_super *ddf = st->sb;
+ unsigned int len;
+
+ if (!homehost)
+ return 0;
+ len = strlen(homehost);
+
+ return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
+ len < sizeof(ddf->controller.vendor_data) &&
+ memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
+ ddf->controller.vendor_data[len] == 0);
+}
+
+#ifndef MDASSEMBLE
+static int find_index_in_bvd(const struct ddf_super *ddf,
+ const struct vd_config *conf, unsigned int n,
+ unsigned int *n_bvd)
+{
+ /*
+ * Find the index of the n-th valid physical disk in this BVD.
+ * Unused entries can be sprinkled in with the used entries,
+ * but don't count.
+ */
+ unsigned int i, j;
+ for (i = 0, j = 0;
+ i < ddf->mppe && j < be16_to_cpu(conf->prim_elmnt_count);
+ i++) {
+ if (be32_to_cpu(conf->phys_refnum[i]) != 0xffffffff) {
+ if (n == j) {
+ *n_bvd = i;
+ return 1;
+ }
+ j++;
+ }
+ }
+ dprintf("%s: couldn't find BVD member %u (total %u)\n",
+ __func__, n, be16_to_cpu(conf->prim_elmnt_count));
+ return 0;
+}
+
+/* Given a member array instance number, and a raid disk within that instance,
+ * find the vd_config structure. The offset of the given disk in the phys_refnum
+ * table is returned in n_bvd.
+ * For two-level members with a secondary raid level the vd_config for
+ * the appropriate BVD is returned.
+ * The return value is always &vlc->conf, where vlc is returned in last pointer.
+ */
+static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
+ unsigned int n,
+ unsigned int *n_bvd, struct vcl **vcl)
+{
+ struct vcl *v;
+
+ for (v = ddf->conflist; v; v = v->next) {
+ unsigned int nsec, ibvd = 0;
+ struct vd_config *conf;
+ if (inst != v->vcnum)
+ continue;
+ conf = &v->conf;
+ if (conf->sec_elmnt_count == 1) {
+ if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
+ *vcl = v;
+ return conf;
+ } else
+ goto bad;
+ }
+ if (v->other_bvds == NULL) {
+ pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
+ __func__, conf->sec_elmnt_count);
+ goto bad;
+ }
+ nsec = n / be16_to_cpu(conf->prim_elmnt_count);
+ if (conf->sec_elmnt_seq != nsec) {
+ for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
+ if (v->other_bvds[ibvd-1]->sec_elmnt_seq
+ == nsec)
+ break;
+ }
+ if (ibvd == conf->sec_elmnt_count)
+ goto bad;
+ conf = v->other_bvds[ibvd-1];
+ }
+ if (!find_index_in_bvd(ddf, conf,
+ n - nsec*conf->sec_elmnt_count, n_bvd))
+ goto bad;
+ dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
+ , __func__, n, *n_bvd, ibvd, inst);
+ *vcl = v;
+ return conf;
+ }
+bad:
+ pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
+ return NULL;
+}
+#endif
+
+static int find_phys(const struct ddf_super *ddf, be32 phys_refnum)
+{
+ /* Find the entry in phys_disk which has the given refnum
+ * and return it's index
+ */
+ unsigned int i;
+ for (i = 0; i < be16_to_cpu(ddf->phys->max_pdes); i++)
+ if (be32_eq(ddf->phys->entries[i].refnum, phys_refnum))
+ return i;
+ return -1;
+}
+
+static void uuid_from_ddf_guid(const char *guid, int uuid[4])
+{
+ char buf[20];
+ struct sha1_ctx ctx;
+ sha1_init_ctx(&ctx);
+ sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
+ sha1_finish_ctx(&ctx, buf);
+ memcpy(uuid, buf, 4*4);
+}
+
+static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
+{
+ /* The uuid returned here is used for:
+ * uuid to put into bitmap file (Create, Grow)
+ * uuid for backup header when saving critical section (Grow)
+ * comparing uuids when re-adding a device into an array
+ * In these cases the uuid required is that of the data-array,
+ * not the device-set.
+ * uuid to recognise same set when adding a missing device back
+ * to an array. This is a uuid for the device-set.
+ *
+ * For each of these we can make do with a truncated
+ * or hashed uuid rather than the original, as long as
+ * everyone agrees.
+ * In the case of SVD we assume the BVD is of interest,
+ * though that might be the case if a bitmap were made for
+ * a mirrored SVD - worry about that later.
+ * So we need to find the VD configuration record for the
+ * relevant BVD and extract the GUID and Secondary_Element_Seq.
+ * The first 16 bytes of the sha1 of these is used.
+ */
+ struct ddf_super *ddf = st->sb;
+ struct vcl *vcl = ddf->currentconf;
+
+ if (vcl)
+ uuid_of_ddf_subarray(ddf, vcl->vcnum, uuid);
+ else
+ uuid_from_ddf_guid(ddf->anchor.guid, uuid);
+}
+
+static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
+{
+ struct ddf_super *ddf = st->sb;
+ int map_disks = info->array.raid_disks;
+ __u32 *cptr;
+
+ if (ddf->currentconf) {
+ getinfo_super_ddf_bvd(st, info, map);
+ return;
+ }
+ memset(info, 0, sizeof(*info));
+
+ info->array.raid_disks = be16_to_cpu(ddf->phys->used_pdes);
+ info->array.level = LEVEL_CONTAINER;
+ info->array.layout = 0;
+ info->array.md_minor = -1;
+ cptr = (__u32 *)(ddf->anchor.guid + 16);
+ info->array.ctime = DECADE + __be32_to_cpu(*cptr);
+
+ info->array.chunk_size = 0;
+ info->container_enough = 1;
+
+ info->disk.major = 0;
+ info->disk.minor = 0;
+ if (ddf->dlist) {
+ struct phys_disk_entry *pde = NULL;
+ info->disk.number = be32_to_cpu(ddf->dlist->disk.refnum);
+ info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
+
+ info->data_offset = be64_to_cpu(ddf->phys->
+ entries[info->disk.raid_disk].
+ config_size);
+ info->component_size = ddf->dlist->size - info->data_offset;
+ if (info->disk.raid_disk >= 0)
+ pde = ddf->phys->entries + info->disk.raid_disk;
+ if (pde &&
+ !(be16_to_cpu(pde->state) & DDF_Failed) &&
+ !(be16_to_cpu(pde->state) & DDF_Missing))
+ info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
+ else
+ info->disk.state = 1 << MD_DISK_FAULTY;
+
+ } else {
+ /* There should always be a dlist, but just in case...*/
+ info->disk.number = -1;
+ info->disk.raid_disk = -1;
+ info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
+ }
+ info->events = be32_to_cpu(ddf->active->seq);
+ info->array.utime = DECADE + be32_to_cpu(ddf->active->timestamp);
+
+ info->recovery_start = MaxSector;
+ info->reshape_active = 0;
+ info->recovery_blocked = 0;
+ info->name[0] = 0;
+
+ info->array.major_version = -1;
+ info->array.minor_version = -2;
+ strcpy(info->text_version, "ddf");
+ info->safe_mode_delay = 0;
+
+ uuid_from_super_ddf(st, info->uuid);
+
+ if (map) {
+ int i, e = 0;
+ int max = be16_to_cpu(ddf->phys->max_pdes);
+ for (i = e = 0 ; i < map_disks ; i++, e++) {
+ while (e < max &&
+ be32_to_cpu(ddf->phys->entries[e].refnum) == 0xffffffff)
+ e++;
+ if (i < info->array.raid_disks && e < max &&
+ !(be16_to_cpu(ddf->phys->entries[e].state)
+ & DDF_Failed))
+ map[i] = 1;
+ else
+ map[i] = 0;
+ }
+ }
+}
+
+/* size of name must be at least 17 bytes! */
+static void _ddf_array_name(char *name, const struct ddf_super *ddf, int i)
+{
+ int j;
+ memcpy(name, ddf->virt->entries[i].name, 16);
+ name[16] = 0;
+ for(j = 0; j < 16; j++)
+ if (name[j] == ' ')
+ name[j] = 0;
+}
+
+static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
+{
+ struct ddf_super *ddf = st->sb;
+ struct vcl *vc = ddf->currentconf;
+ int cd = ddf->currentdev;
+ int n_prim;
+ int j;
+ struct dl *dl = NULL;
+ int map_disks = info->array.raid_disks;
+ __u32 *cptr;
+ struct vd_config *conf;
+
+ memset(info, 0, sizeof(*info));
+ if (layout_ddf2md(&vc->conf, &info->array) == -1)
+ return;
+ info->array.md_minor = -1;
+ cptr = (__u32 *)(vc->conf.guid + 16);
+ info->array.ctime = DECADE + __be32_to_cpu(*cptr);
+ info->array.utime = DECADE + be32_to_cpu(vc->conf.timestamp);
+ info->array.chunk_size = 512 << vc->conf.chunk_shift;
+ info->custom_array_size = be64_to_cpu(vc->conf.array_blocks);
+
+ conf = &vc->conf;
+ n_prim = be16_to_cpu(conf->prim_elmnt_count);
+ if (conf->sec_elmnt_count > 1 && cd >= n_prim) {
+ int ibvd = cd / n_prim - 1;
+ cd %= n_prim;
+ conf = vc->other_bvds[ibvd];
+ }
+
+ if (cd >= 0 && (unsigned)cd < ddf->mppe) {
+ info->data_offset =
+ be64_to_cpu(LBA_OFFSET(ddf, conf)[cd]);
+ if (vc->block_sizes)
+ info->component_size = vc->block_sizes[cd];
+ else
+ info->component_size = be64_to_cpu(conf->blocks);
+
+ for (dl = ddf->dlist; dl ; dl = dl->next)
+ if (be32_eq(dl->disk.refnum, conf->phys_refnum[cd]))
+ break;
+ }
+
+ info->disk.major = 0;
+ info->disk.minor = 0;
+ info->disk.state = 0;
+ if (dl && dl->pdnum >= 0) {
+ info->disk.major = dl->major;
+ info->disk.minor = dl->minor;
+ info->disk.raid_disk = cd + conf->sec_elmnt_seq
+ * be16_to_cpu(conf->prim_elmnt_count);
+ info->disk.number = dl->pdnum;
+ info->disk.state = 0;
+ if (info->disk.number >= 0 &&
+ (be16_to_cpu(ddf->phys->entries[info->disk.number].state) & DDF_Online) &&
+ !(be16_to_cpu(ddf->phys->entries[info->disk.number].state) & DDF_Failed))
+ info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
+ info->events = be32_to_cpu(ddf->active->seq);
+ }
+
+ info->container_member = ddf->currentconf->vcnum;
+
+ info->recovery_start = MaxSector;
+ info->resync_start = 0;
+ info->reshape_active = 0;
+ info->recovery_blocked = 0;
+ if (!(ddf->virt->entries[info->container_member].state
+ & DDF_state_inconsistent) &&
+ (ddf->virt->entries[info->container_member].init_state
+ & DDF_initstate_mask)
+ == DDF_init_full)
+ info->resync_start = MaxSector;
+
+ uuid_from_super_ddf(st, info->uuid);
+
+ info->array.major_version = -1;
+ info->array.minor_version = -2;
+ sprintf(info->text_version, "/%s/%d",
+ st->container_devnm,
+ info->container_member);
+ info->safe_mode_delay = DDF_SAFE_MODE_DELAY;
+
+ _ddf_array_name(info->name, ddf, info->container_member);
+
+ if (map)
+ for (j = 0; j < map_disks; j++) {
+ map[j] = 0;
+ if (j < info->array.raid_disks) {
+ int i = find_phys(ddf, vc->conf.phys_refnum[j]);
+ if (i >= 0 &&
+ (be16_to_cpu(ddf->phys->entries[i].state)
+ & DDF_Online) &&
+ !(be16_to_cpu(ddf->phys->entries[i].state)
+ & DDF_Failed))
+ map[i] = 1;
+ }
+ }
+}
+
+static int update_super_ddf(struct supertype *st, struct mdinfo *info,
+ char *update,
+ char *devname, int verbose,
+ int uuid_set, char *homehost)
+{
+ /* For 'assemble' and 'force' we need to return non-zero if any
+ * change was made. For others, the return value is ignored.
+ * Update options are:
+ * force-one : This device looks a bit old but needs to be included,
+ * update age info appropriately.
+ * assemble: clear any 'faulty' flag to allow this device to
+ * be assembled.
+ * force-array: Array is degraded but being forced, mark it clean
+ * if that will be needed to assemble it.
+ *
+ * newdev: not used ????
+ * grow: Array has gained a new device - this is currently for
+ * linear only
+ * resync: mark as dirty so a resync will happen.
+ * uuid: Change the uuid of the array to match what is given
+ * homehost: update the recorded homehost
+ * name: update the name - preserving the homehost
+ * _reshape_progress: record new reshape_progress position.
+ *
+ * Following are not relevant for this version:
+ * sparc2.2 : update from old dodgey metadata
+ * super-minor: change the preferred_minor number
+ * summaries: update redundant counters.
+ */
+ int rv = 0;
+// struct ddf_super *ddf = st->sb;
+// struct vd_config *vd = find_vdcr(ddf, info->container_member);
+// struct virtual_entry *ve = find_ve(ddf);
+
+ /* we don't need to handle "force-*" or "assemble" as
+ * there is no need to 'trick' the kernel. When the metadata is
+ * first updated to activate the array, all the implied modifications
+ * will just happen.
+ */
+
+ if (strcmp(update, "grow") == 0) {
+ /* FIXME */
+ } else if (strcmp(update, "resync") == 0) {
+// info->resync_checkpoint = 0;
+ } else if (strcmp(update, "homehost") == 0) {
+ /* homehost is stored in controller->vendor_data,
+ * or it is when we are the vendor
+ */
+// if (info->vendor_is_local)
+// strcpy(ddf->controller.vendor_data, homehost);
+ rv = -1;
+ } else if (strcmp(update, "name") == 0) {
+ /* name is stored in virtual_entry->name */
+// memset(ve->name, ' ', 16);
+// strncpy(ve->name, info->name, 16);
+ rv = -1;
+ } else if (strcmp(update, "_reshape_progress") == 0) {
+ /* We don't support reshape yet */
+ } else if (strcmp(update, "assemble") == 0 ) {
+ /* Do nothing, just succeed */
+ rv = 0;
+ } else
+ rv = -1;
+
+// update_all_csum(ddf);
+
+ return rv;
+}
+
+static void make_header_guid(char *guid)
+{
+ be32 stamp;
+ /* Create a DDF Header of Virtual Disk GUID */
+
+ /* 24 bytes of fiction required.
+ * first 8 are a 'vendor-id' - "Linux-MD"
+ * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
+ * Remaining 8 random number plus timestamp
+ */
+ memcpy(guid, T10, sizeof(T10));
+ stamp = cpu_to_be32(0xdeadbeef);
+ memcpy(guid+8, &stamp, 4);
+ stamp = cpu_to_be32(0);
+ memcpy(guid+12, &stamp, 4);
+ stamp = cpu_to_be32(time(0) - DECADE);
+ memcpy(guid+16, &stamp, 4);
+ stamp._v32 = random32();
+ memcpy(guid+20, &stamp, 4);
+}
+
+static unsigned int find_unused_vde(const struct ddf_super *ddf)
+{
+ unsigned int i;
+ for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
+ if (all_ff(ddf->virt->entries[i].guid))
+ return i;
+ }
+ return DDF_NOTFOUND;
+}
+
+static unsigned int find_vde_by_name(const struct ddf_super *ddf,
+ const char *name)
+{
+ unsigned int i;
+ if (name == NULL)
+ return DDF_NOTFOUND;
+ for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
+ if (all_ff(ddf->virt->entries[i].guid))
+ continue;
+ if (!strncmp(name, ddf->virt->entries[i].name,
+ sizeof(ddf->virt->entries[i].name)))
+ return i;
+ }
+ return DDF_NOTFOUND;
+}
+
+#ifndef MDASSEMBLE
+static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
+ const char *guid)
+{
+ unsigned int i;
+ if (guid == NULL || all_ff(guid))
+ return DDF_NOTFOUND;
+ for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++)
+ if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
+ return i;
+ return DDF_NOTFOUND;
+}
+#endif
+
+static int init_super_ddf(struct supertype *st,
+ mdu_array_info_t *info,
+ unsigned long long size, char *name, char *homehost,
+ int *uuid, unsigned long long data_offset)
+{
+ /* This is primarily called by Create when creating a new array.
+ * We will then get add_to_super called for each component, and then
+ * write_init_super called to write it out to each device.
+ * For DDF, Create can create on fresh devices or on a pre-existing
+ * array.
+ * To create on a pre-existing array a different method will be called.
+ * This one is just for fresh drives.
+ *
+ * We need to create the entire 'ddf' structure which includes:
+ * DDF headers - these are easy.
+ * Controller data - a Sector describing this controller .. not that
+ * this is a controller exactly.
+ * Physical Disk Record - one entry per device, so
+ * leave plenty of space.
+ * Virtual Disk Records - again, just leave plenty of space.
+ * This just lists VDs, doesn't give details.
+ * Config records - describe the VDs that use this disk
+ * DiskData - describes 'this' device.
+ * BadBlockManagement - empty
+ * Diag Space - empty
+ * Vendor Logs - Could we put bitmaps here?
+ *
+ */
+ struct ddf_super *ddf;
+ char hostname[17];
+ int hostlen;
+ int max_phys_disks, max_virt_disks;
+ unsigned long long sector;
+ int clen;
+ int i;
+ int pdsize, vdsize;
+ struct phys_disk *pd;
+ struct virtual_disk *vd;
+
+ if (st->sb)
+ return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
+ data_offset);
+
+ if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
+ pr_err("%s could not allocate superblock\n", __func__);
+ return 0;
+ }
+ memset(ddf, 0, sizeof(*ddf));
+ st->sb = ddf;
+
+ if (info == NULL) {
+ /* zeroing superblock */
+ return 0;
+ }
+
+ /* At least 32MB *must* be reserved for the ddf. So let's just
+ * start 32MB from the end, and put the primary header there.
+ * Don't do secondary for now.
+ * We don't know exactly where that will be yet as it could be
+ * different on each device. So just set up the lengths.
+ */
+
+ ddf->anchor.magic = DDF_HEADER_MAGIC;
+ make_header_guid(ddf->anchor.guid);
+
+ memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
+ ddf->anchor.seq = cpu_to_be32(1);
+ ddf->anchor.timestamp = cpu_to_be32(time(0) - DECADE);
+ ddf->anchor.openflag = 0xFF;
+ ddf->anchor.foreignflag = 0;
+ ddf->anchor.enforcegroups = 0; /* Is this best?? */
+ ddf->anchor.pad0 = 0xff;
+ memset(ddf->anchor.pad1, 0xff, 12);
+ memset(ddf->anchor.header_ext, 0xff, 32);
+ ddf->anchor.primary_lba = cpu_to_be64(~(__u64)0);
+ ddf->anchor.secondary_lba = cpu_to_be64(~(__u64)0);
+ ddf->anchor.type = DDF_HEADER_ANCHOR;
+ memset(ddf->anchor.pad2, 0xff, 3);
+ ddf->anchor.workspace_len = cpu_to_be32(32768); /* Must be reserved */
+ /* Put this at bottom of 32M reserved.. */
+ ddf->anchor.workspace_lba = cpu_to_be64(~(__u64)0);
+ max_phys_disks = 1023; /* Should be enough, 4095 is also allowed */
+ ddf->anchor.max_pd_entries = cpu_to_be16(max_phys_disks);
+ max_virt_disks = 255; /* 15, 63, 255, 1024, 4095 are all allowed */
+ ddf->anchor.max_vd_entries = cpu_to_be16(max_virt_disks);
+ ddf->max_part = 64;
+ ddf->anchor.max_partitions = cpu_to_be16(ddf->max_part);
+ ddf->mppe = 256; /* 16, 64, 256, 1024, 4096 are all allowed */
+ ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
+ ddf->anchor.config_record_len = cpu_to_be16(ddf->conf_rec_len);
+ ddf->anchor.max_primary_element_entries = cpu_to_be16(ddf->mppe);
+ memset(ddf->anchor.pad3, 0xff, 54);
+ /* Controller section is one sector long immediately
+ * after the ddf header */
+ sector = 1;
+ ddf->anchor.controller_section_offset = cpu_to_be32(sector);
+ ddf->anchor.controller_section_length = cpu_to_be32(1);
+ sector += 1;
+
+ /* phys is 8 sectors after that */
+ pdsize = ROUND_UP(sizeof(struct phys_disk) +
+ sizeof(struct phys_disk_entry)*max_phys_disks,
+ 512);
+ switch(pdsize/512) {
+ case 2: case 8: case 32: case 128: case 512: break;
+ default: abort();
+ }
+ ddf->anchor.phys_section_offset = cpu_to_be32(sector);
+ ddf->anchor.phys_section_length =
+ cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
+ sector += pdsize/512;
+
+ /* virt is another 32 sectors */
+ vdsize = ROUND_UP(sizeof(struct virtual_disk) +
+ sizeof(struct virtual_entry) * max_virt_disks,
+ 512);
+ switch(vdsize/512) {
+ case 2: case 8: case 32: case 128: case 512: break;
+ default: abort();
+ }
+ ddf->anchor.virt_section_offset = cpu_to_be32(sector);
+ ddf->anchor.virt_section_length =
+ cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
+ sector += vdsize/512;
+
+ clen = ddf->conf_rec_len * (ddf->max_part+1);
+ ddf->anchor.config_section_offset = cpu_to_be32(sector);
+ ddf->anchor.config_section_length = cpu_to_be32(clen);
+ sector += clen;
+
+ ddf->anchor.data_section_offset = cpu_to_be32(sector);
+ ddf->anchor.data_section_length = cpu_to_be32(1);
+ sector += 1;
+
+ ddf->anchor.bbm_section_length = cpu_to_be32(0);
+ ddf->anchor.bbm_section_offset = cpu_to_be32(0xFFFFFFFF);
+ ddf->anchor.diag_space_length = cpu_to_be32(0);
+ ddf->anchor.diag_space_offset = cpu_to_be32(0xFFFFFFFF);
+ ddf->anchor.vendor_length = cpu_to_be32(0);
+ ddf->anchor.vendor_offset = cpu_to_be32(0xFFFFFFFF);
+
+ memset(ddf->anchor.pad4, 0xff, 256);
+
+ memcpy(&ddf->primary, &ddf->anchor, 512);
+ memcpy(&ddf->secondary, &ddf->anchor, 512);
+
+ ddf->primary.openflag = 1; /* I guess.. */
+ ddf->primary.type = DDF_HEADER_PRIMARY;
+
+ ddf->secondary.openflag = 1; /* I guess.. */
+ ddf->secondary.type = DDF_HEADER_SECONDARY;
+
+ ddf->active = &ddf->primary;
+
+ ddf->controller.magic = DDF_CONTROLLER_MAGIC;
+
+ /* 24 more bytes of fiction required.
+ * first 8 are a 'vendor-id' - "Linux-MD"
+ * Remaining 16 are serial number.... maybe a hostname would do?
+ */
+ memcpy(ddf->controller.guid, T10, sizeof(T10));
+ gethostname(hostname, sizeof(hostname));
+ hostname[sizeof(hostname) - 1] = 0;
+ hostlen = strlen(hostname);
+ memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
+ for (i = strlen(T10) ; i+hostlen < 24; i++)
+ ddf->controller.guid[i] = ' ';
+
+ ddf->controller.type.vendor_id = cpu_to_be16(0xDEAD);
+ ddf->controller.type.device_id = cpu_to_be16(0xBEEF);
+ ddf->controller.type.sub_vendor_id = cpu_to_be16(0);
+ ddf->controller.type.sub_device_id = cpu_to_be16(0);
+ memcpy(ddf->controller.product_id, "What Is My PID??", 16);
+ memset(ddf->controller.pad, 0xff, 8);
+ memset(ddf->controller.vendor_data, 0xff, 448);
+ if (homehost && strlen(homehost) < 440)
+ strcpy((char*)ddf->controller.vendor_data, homehost);
+
+ if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
+ pr_err("%s could not allocate pd\n", __func__);
+ return 0;
+ }
+ ddf->phys = pd;
+ ddf->pdsize = pdsize;
+
+ memset(pd, 0xff, pdsize);
+ memset(pd, 0, sizeof(*pd));
+ pd->magic = DDF_PHYS_RECORDS_MAGIC;
+ pd->used_pdes = cpu_to_be16(0);
+ pd->max_pdes = cpu_to_be16(max_phys_disks);
+ memset(pd->pad, 0xff, 52);
+ for (i = 0; i < max_phys_disks; i++)
+ memset(pd->entries[i].guid, 0xff, DDF_GUID_LEN);
+
+ if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
+ pr_err("%s could not allocate vd\n", __func__);
+ return 0;
+ }
+ ddf->virt = vd;
+ ddf->vdsize = vdsize;
+ memset(vd, 0, vdsize);
+ vd->magic = DDF_VIRT_RECORDS_MAGIC;
+ vd->populated_vdes = cpu_to_be16(0);
+ vd->max_vdes = cpu_to_be16(max_virt_disks);
+ memset(vd->pad, 0xff, 52);
+
+ for (i=0; i<max_virt_disks; i++)
+ memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
+
+ st->sb = ddf;
+ ddf_set_updates_pending(ddf, NULL);
+ return 1;
+}
+
+static int chunk_to_shift(int chunksize)
+{
+ return ffs(chunksize/512)-1;
+}
+
+#ifndef MDASSEMBLE
+struct extent {
+ unsigned long long start, size;
+};
+static int cmp_extent(const void *av, const void *bv)
+{
+ const struct extent *a = av;
+ const struct extent *b = bv;
+ if (a->start < b->start)
+ return -1;
+ if (a->start > b->start)
+ return 1;
+ return 0;
+}
+
+static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
+{
+ /* Find a list of used extents on the given physical device
+ * (dnum) of the given ddf.
+ * Return a malloced array of 'struct extent'
+ */
+ struct extent *rv;
+ int n = 0;
+ unsigned int i;
+ __u16 state;
+
+ if (dl->pdnum < 0)
+ return NULL;
+ state = be16_to_cpu(ddf->phys->entries[dl->pdnum].state);
+
+ if ((state & (DDF_Online|DDF_Failed|DDF_Missing)) != DDF_Online)
+ return NULL;
+
+ rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
+
+ for (i = 0; i < ddf->max_part; i++) {
+ const struct vd_config *bvd;
+ unsigned int ibvd;
+ struct vcl *v = dl->vlist[i];
+ if (v == NULL ||
+ get_pd_index_from_refnum(v, dl->disk.refnum, ddf->mppe,
+ &bvd, &ibvd) == DDF_NOTFOUND)
+ continue;
+ rv[n].start = be64_to_cpu(LBA_OFFSET(ddf, bvd)[ibvd]);
+ rv[n].size = be64_to_cpu(bvd->blocks);
+ n++;
+ }
+ qsort(rv, n, sizeof(*rv), cmp_extent);
+
+ rv[n].start = be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
+ rv[n].size = 0;
+ return rv;
+}
+
+static unsigned long long find_space(
+ struct ddf_super *ddf, struct dl *dl,
+ unsigned long long data_offset,
+ unsigned long long *size)
+{
+ /* Find if the requested amount of space is available.
+ * If it is, return start.
+ * If not, set *size to largest space.
+ * If data_offset != INVALID_SECTORS, then the space must start
+ * at this location.
+ */
+ struct extent *e = get_extents(ddf, dl);
+ int i = 0;
+ unsigned long long pos = 0;
+ unsigned long long max_size = 0;
+
+ if (!e) {
+ *size = 0;
+ return INVALID_SECTORS;
+ }
+ do {
+ unsigned long long esize = e[i].start - pos;
+ if (data_offset != INVALID_SECTORS &&
+ pos <= data_offset &&
+ e[i].start > data_offset) {
+ pos = data_offset;
+ esize = e[i].start - pos;
+ }
+ if (data_offset != INVALID_SECTORS &&
+ pos != data_offset) {
+ i++;
+ continue;
+ }
+ if (esize >= *size) {
+ /* Found! */
+ free(e);
+ return pos;
+ }
+ if (esize > max_size)
+ max_size = esize;
+ pos = e[i].start + e[i].size;
+ i++;
+ } while (e[i-1].size);
+ *size = max_size;
+ free(e);
+ return INVALID_SECTORS;
+}
+#endif
+
+static int init_super_ddf_bvd(struct supertype *st,
+ mdu_array_info_t *info,
+ unsigned long long size,
+ char *name, char *homehost,
+ int *uuid, unsigned long long data_offset)
+{
+ /* We are creating a BVD inside a pre-existing container.
+ * so st->sb is already set.
+ * We need to create a new vd_config and a new virtual_entry
+ */
+ struct ddf_super *ddf = st->sb;
+ unsigned int venum, i;
+ struct virtual_entry *ve;
+ struct vcl *vcl;
+ struct vd_config *vc;
+
+ if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
+ pr_err("This ddf already has an array called %s\n", name);
+ return 0;
+ }
+ venum = find_unused_vde(ddf);
+ if (venum == DDF_NOTFOUND) {
+ pr_err("Cannot find spare slot for virtual disk\n");
+ return 0;
+ }
+ ve = &ddf->virt->entries[venum];
+
+ /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
+ * timestamp, random number
+ */
+ make_header_guid(ve->guid);
+ ve->unit = cpu_to_be16(info->md_minor);
+ ve->pad0 = 0xFFFF;
+ ve->guid_crc._v16 = crc32(0, (unsigned char *)ddf->anchor.guid,
+ DDF_GUID_LEN);
+ ve->type = cpu_to_be16(0);
+ ve->state = DDF_state_degraded; /* Will be modified as devices are added */
+ if (info->state & 1) /* clean */
+ ve->init_state = DDF_init_full;
+ else
+ ve->init_state = DDF_init_not;
+
+ memset(ve->pad1, 0xff, 14);
+ memset(ve->name, ' ', 16);
+ if (name)
+ strncpy(ve->name, name, 16);
+ ddf->virt->populated_vdes =
+ cpu_to_be16(be16_to_cpu(ddf->virt->populated_vdes)+1);
+
+ /* Now create a new vd_config */
+ if (posix_memalign((void**)&vcl, 512,
+ (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
+ pr_err("%s could not allocate vd_config\n", __func__);
+ return 0;
+ }
+ vcl->vcnum = venum;
+ vcl->block_sizes = NULL; /* FIXME not for CONCAT */
+ vc = &vcl->conf;
+
+ vc->magic = DDF_VD_CONF_MAGIC;
+ memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
+ vc->timestamp = cpu_to_be32(time(0)-DECADE);
+ vc->seqnum = cpu_to_be32(1);
+ memset(vc->pad0, 0xff, 24);
+ vc->chunk_shift = chunk_to_shift(info->chunk_size);
+ if (layout_md2ddf(info, vc) == -1 ||
+ be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
+ pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
+ __func__, info->level, info->layout, info->raid_disks);
+ free(vcl);
+ return 0;
+ }
+ vc->sec_elmnt_seq = 0;
+ if (alloc_other_bvds(ddf, vcl) != 0) {
+ pr_err("%s could not allocate other bvds\n",
+ __func__);
+ free(vcl);
+ return 0;
+ }
+ vc->blocks = cpu_to_be64(info->size * 2);
+ vc->array_blocks = cpu_to_be64(
+ calc_array_size(info->level, info->raid_disks, info->layout,
+ info->chunk_size, info->size*2));
+ memset(vc->pad1, 0xff, 8);
+ vc->spare_refs[0] = cpu_to_be32(0xffffffff);
+ vc->spare_refs[1] = cpu_to_be32(0xffffffff);
+ vc->spare_refs[2] = cpu_to_be32(0xffffffff);
+ vc->spare_refs[3] = cpu_to_be32(0xffffffff);
+ vc->spare_refs[4] = cpu_to_be32(0xffffffff);
+ vc->spare_refs[5] = cpu_to_be32(0xffffffff);
+ vc->spare_refs[6] = cpu_to_be32(0xffffffff);
+ vc->spare_refs[7] = cpu_to_be32(0xffffffff);
+ memset(vc->cache_pol, 0, 8);
+ vc->bg_rate = 0x80;
+ memset(vc->pad2, 0xff, 3);
+ memset(vc->pad3, 0xff, 52);
+ memset(vc->pad4, 0xff, 192);
+ memset(vc->v0, 0xff, 32);
+ memset(vc->v1, 0xff, 32);
+ memset(vc->v2, 0xff, 16);
+ memset(vc->v3, 0xff, 16);
+ memset(vc->vendor, 0xff, 32);
+
+ memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
+ memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
+
+ for (i = 1; i < vc->sec_elmnt_count; i++) {
+ memcpy(vcl->other_bvds[i-1], vc, ddf->conf_rec_len * 512);
+ vcl->other_bvds[i-1]->sec_elmnt_seq = i;
+ }
+
+ vcl->next = ddf->conflist;
+ ddf->conflist = vcl;
+ ddf->currentconf = vcl;
+ ddf_set_updates_pending(ddf, NULL);
+ return 1;
+}
+
+#ifndef MDASSEMBLE
+static void add_to_super_ddf_bvd(struct supertype *st,
+ mdu_disk_info_t *dk, int fd, char *devname,
+ unsigned long long data_offset)
+{
+ /* fd and devname identify a device within the ddf container (st).
+ * dk identifies a location in the new BVD.
+ * We need to find suitable free space in that device and update
+ * the phys_refnum and lba_offset for the newly created vd_config.
+ * We might also want to update the type in the phys_disk
+ * section.
+ *
+ * Alternately: fd == -1 and we have already chosen which device to
+ * use and recorded in dlist->raid_disk;
+ */
+ struct dl *dl;
+ struct ddf_super *ddf = st->sb;
+ struct vd_config *vc;
+ unsigned int i;
+ unsigned long long blocks, pos;
+ unsigned int raid_disk = dk->raid_disk;
+
+ if (fd == -1) {
+ for (dl = ddf->dlist; dl ; dl = dl->next)
+ if (dl->raiddisk == dk->raid_disk)
+ break;
+ } else {
+ for (dl = ddf->dlist; dl ; dl = dl->next)
+ if (dl->major == dk->major &&
+ dl->minor == dk->minor)
+ break;
+ }
+ if (!dl || dl->pdnum < 0 || ! (dk->state & (1<<MD_DISK_SYNC)))
+ return;
+
+ vc = &ddf->currentconf->conf;
+ if (vc->sec_elmnt_count > 1) {
+ unsigned int n = be16_to_cpu(vc->prim_elmnt_count);
+ if (raid_disk >= n)
+ vc = ddf->currentconf->other_bvds[raid_disk / n - 1];
+ raid_disk %= n;
+ }
+
+ blocks = be64_to_cpu(vc->blocks);
+ if (ddf->currentconf->block_sizes)
+ blocks = ddf->currentconf->block_sizes[dk->raid_disk];
+
+ pos = find_space(ddf, dl, data_offset, &blocks);
+ if (pos == INVALID_SECTORS)
+ return;
+
+ ddf->currentdev = dk->raid_disk;
+ vc->phys_refnum[raid_disk] = dl->disk.refnum;
+ LBA_OFFSET(ddf, vc)[raid_disk] = cpu_to_be64(pos);
+
+ for (i = 0; i < ddf->max_part ; i++)
+ if (dl->vlist[i] == NULL)
+ break;
+ if (i == ddf->max_part)
+ return;
+ dl->vlist[i] = ddf->currentconf;
+
+ if (fd >= 0)
+ dl->fd = fd;
+ if (devname)
+ dl->devname = devname;
+
+ /* Check if we can mark array as optimal yet */
+ i = ddf->currentconf->vcnum;
+ ddf->virt->entries[i].state =
+ (ddf->virt->entries[i].state & ~DDF_state_mask)
+ | get_svd_state(ddf, ddf->currentconf);
+ be16_clear(ddf->phys->entries[dl->pdnum].type,
+ cpu_to_be16(DDF_Global_Spare));
+ be16_set(ddf->phys->entries[dl->pdnum].type,
+ cpu_to_be16(DDF_Active_in_VD));
+ dprintf("%s: added disk %d/%08x to VD %d/%s as disk %d\n",
+ __func__, dl->pdnum, be32_to_cpu(dl->disk.refnum),
+ ddf->currentconf->vcnum, guid_str(vc->guid),
+ dk->raid_disk);
+ ddf_set_updates_pending(ddf, vc);
+}
+
+static unsigned int find_unused_pde(const struct ddf_super *ddf)
+{
+ unsigned int i;
+ for (i = 0; i < be16_to_cpu(ddf->phys->max_pdes); i++) {
+ if (all_ff(ddf->phys->entries[i].guid))
+ return i;
+ }
+ return DDF_NOTFOUND;
+}
+
+static void _set_config_size(struct phys_disk_entry *pde, const struct dl *dl)
+{
+ __u64 cfs, t;
+ cfs = min(dl->size - 32*1024*2ULL, be64_to_cpu(dl->primary_lba));
+ t = be64_to_cpu(dl->secondary_lba);
+ if (t != ~(__u64)0)
+ cfs = min(cfs, t);
+ /*
+ * Some vendor DDF structures interpret workspace_lba
+ * very differently than we do: Make a sanity check on the value.
+ */
+ t = be64_to_cpu(dl->workspace_lba);
+ if (t < cfs) {
+ __u64 wsp = cfs - t;
+ if (wsp > 1024*1024*2ULL && wsp > dl->size / 16) {
+ pr_err("%s: %x:%x: workspace size 0x%llx too big, ignoring\n",
+ __func__, dl->major, dl->minor,
+ (unsigned long long)wsp);
+ } else
+ cfs = t;
+ }
+ pde->config_size = cpu_to_be64(cfs);
+ dprintf("%s: %x:%x config_size %llx, DDF structure is %llx blocks\n",
+ __func__, dl->major, dl->minor,
+ (unsigned long long)cfs, (unsigned long long)(dl->size-cfs));
+}
+
+/* Add a device to a container, either while creating it or while
+ * expanding a pre-existing container
+ */
+static int add_to_super_ddf(struct supertype *st,
+ mdu_disk_info_t *dk, int fd, char *devname,
+ unsigned long long data_offset)
+{
+ struct ddf_super *ddf = st->sb;
+ struct dl *dd;
+ time_t now;
+ struct tm *tm;
+ unsigned long long size;
+ struct phys_disk_entry *pde;
+ unsigned int n, i;
+ struct stat stb;
+ __u32 *tptr;
+
+ if (ddf->currentconf) {
+ add_to_super_ddf_bvd(st, dk, fd, devname, data_offset);
+ return 0;
+ }
+
+ /* This is device numbered dk->number. We need to create
+ * a phys_disk entry and a more detailed disk_data entry.
+ */
+ fstat(fd, &stb);
+ n = find_unused_pde(ddf);
+ if (n == DDF_NOTFOUND) {
+ pr_err("%s: No free slot in array, cannot add disk\n",
+ __func__);
+ return 1;
+ }
+ pde = &ddf->phys->entries[n];
+ get_dev_size(fd, NULL, &size);
+ if (size <= 32*1024*1024) {
+ pr_err("%s: device size must be at least 32MB\n",
+ __func__);
+ return 1;
+ }
+ size >>= 9;
+
+ if (posix_memalign((void**)&dd, 512,
+ sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
+ pr_err("%s could allocate buffer for new disk, aborting\n",
+ __func__);
+ return 1;
+ }
+ dd->major = major(stb.st_rdev);
+ dd->minor = minor(stb.st_rdev);
+ dd->devname = devname;
+ dd->fd = fd;
+ dd->spare = NULL;
+
+ dd->disk.magic = DDF_PHYS_DATA_MAGIC;
+ now = time(0);
+ tm = localtime(&now);
+ sprintf(dd->disk.guid, "%8s%04d%02d%02d",
+ T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
+ tptr = (__u32 *)(dd->disk.guid + 16);
+ *tptr++ = random32();
+ *tptr = random32();
+
+ do {
+ /* Cannot be bothered finding a CRC of some irrelevant details*/
+ dd->disk.refnum._v32 = random32();
+ for (i = be16_to_cpu(ddf->active->max_pd_entries);
+ i > 0; i--)
+ if (be32_eq(ddf->phys->entries[i-1].refnum,
+ dd->disk.refnum))
+ break;
+ } while (i > 0);
+
+ dd->disk.forced_ref = 1;
+ dd->disk.forced_guid = 1;
+ memset(dd->disk.vendor, ' ', 32);
+ memcpy(dd->disk.vendor, "Linux", 5);
+ memset(dd->disk.pad, 0xff, 442);
+ for (i = 0; i < ddf->max_part ; i++)
+ dd->vlist[i] = NULL;
+
+ dd->pdnum = n;
+
+ if (st->update_tail) {
+ int len = (sizeof(struct phys_disk) +
+ sizeof(struct phys_disk_entry));
+ struct phys_disk *pd;
+
+ pd = xmalloc(len);
+ pd->magic = DDF_PHYS_RECORDS_MAGIC;
+ pd->used_pdes = cpu_to_be16(n);
+ pde = &pd->entries[0];
+ dd->mdupdate = pd;
+ } else
+ ddf->phys->used_pdes = cpu_to_be16(
+ 1 + be16_to_cpu(ddf->phys->used_pdes));
+
+ memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
+ pde->refnum = dd->disk.refnum;
+ pde->type = cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
+ pde->state = cpu_to_be16(DDF_Online);
+ dd->size = size;
+ /*
+ * If there is already a device in dlist, try to reserve the same
+ * amount of workspace. Otherwise, use 32MB.
+ * We checked disk size above already.
+ */
+#define __calc_lba(new, old, lba, mb) do { \
+ unsigned long long dif; \
+ if ((old) != NULL) \
+ dif = (old)->size - be64_to_cpu((old)->lba); \
+ else \
+ dif = (new)->size; \
+ if ((new)->size > dif) \
+ (new)->lba = cpu_to_be64((new)->size - dif); \
+ else \
+ (new)->lba = cpu_to_be64((new)->size - (mb*1024*2)); \
+ } while (0)
+ __calc_lba(dd, ddf->dlist, workspace_lba, 32);
+ __calc_lba(dd, ddf->dlist, primary_lba, 16);
+ if (ddf->dlist == NULL ||
+ be64_to_cpu(ddf->dlist->secondary_lba) != ~(__u64)0)
+ __calc_lba(dd, ddf->dlist, secondary_lba, 32);
+ _set_config_size(pde, dd);
+
+ sprintf(pde->path, "%17.17s","Information: nil") ;
+ memset(pde->pad, 0xff, 6);
+
+ if (st->update_tail) {
+ dd->next = ddf->add_list;
+ ddf->add_list = dd;
+ } else {
+ dd->next = ddf->dlist;
+ ddf->dlist = dd;
+ ddf_set_updates_pending(ddf, NULL);
+ }
+
+ return 0;
+}
+
+static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
+{
+ struct ddf_super *ddf = st->sb;
+ struct dl *dl;
+
+ /* mdmon has noticed that this disk (dk->major/dk->minor) has
+ * disappeared from the container.
+ * We need to arrange that it disappears from the metadata and
+ * internal data structures too.
+ * Most of the work is done by ddf_process_update which edits
+ * the metadata and closes the file handle and attaches the memory
+ * where free_updates will free it.
+ */
+ for (dl = ddf->dlist; dl ; dl = dl->next)
+ if (dl->major == dk->major &&
+ dl->minor == dk->minor)
+ break;
+ if (!dl || dl->pdnum < 0)
+ return -1;
+
+ if (st->update_tail) {
+ int len = (sizeof(struct phys_disk) +
+ sizeof(struct phys_disk_entry));
+ struct phys_disk *pd;
+
+ pd = xmalloc(len);
+ pd->magic = DDF_PHYS_RECORDS_MAGIC;
+ pd->used_pdes = cpu_to_be16(dl->pdnum);
+ pd->entries[0].state = cpu_to_be16(DDF_Missing);
+ append_metadata_update(st, pd, len);
+ }
+ return 0;
+}
+#endif
+
+/*
+ * This is the write_init_super method for a ddf container. It is
+ * called when creating a container or adding another device to a
+ * container.
+ */
+
+static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type)
+{
+ unsigned long long sector;
+ struct ddf_header *header;
+ int fd, i, n_config, conf_size, buf_size;
+ int ret = 0;
+ char *conf;
+
+ fd = d->fd;
+
+ switch (type) {
+ case DDF_HEADER_PRIMARY:
+ header = &ddf->primary;
+ sector = be64_to_cpu(header->primary_lba);
+ break;
+ case DDF_HEADER_SECONDARY:
+ header = &ddf->secondary;
+ sector = be64_to_cpu(header->secondary_lba);
+ break;
+ default:
+ return 0;
+ }
+ if (sector == ~(__u64)0)
+ return 0;
+
+ header->type = type;
+ header->openflag = 1;
+ header->crc = calc_crc(header, 512);
+
+ lseek64(fd, sector<<9, 0);
+ if (write(fd, header, 512) < 0)
+ goto out;
+
+ ddf->controller.crc = calc_crc(&ddf->controller, 512);
+ if (write(fd, &ddf->controller, 512) < 0)
+ goto out;
+
+ ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
+ if (write(fd, ddf->phys, ddf->pdsize) < 0)
+ goto out;
+ ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
+ if (write(fd, ddf->virt, ddf->vdsize) < 0)
+ goto out;
+
+ /* Now write lots of config records. */
+ n_config = ddf->max_part;
+ conf_size = ddf->conf_rec_len * 512;
+ conf = ddf->conf;
+ buf_size = conf_size * (n_config + 1);
+ if (!conf) {
+ if (posix_memalign((void**)&conf, 512, buf_size) != 0)
+ goto out;
+ ddf->conf = conf;
+ }
+ for (i = 0 ; i <= n_config ; i++) {
+ struct vcl *c;
+ struct vd_config *vdc = NULL;
+ if (i == n_config) {
+ c = (struct vcl *)d->spare;
+ if (c)
+ vdc = &c->conf;
+ } else {
+ unsigned int dummy;
+ c = d->vlist[i];
+ if (c)
+ get_pd_index_from_refnum(
+ c, d->disk.refnum,
+ ddf->mppe,
+ (const struct vd_config **)&vdc,
+ &dummy);
+ }
+ if (vdc) {
+ dprintf("writing conf record %i on disk %08x for %s/%u\n",
+ i, be32_to_cpu(d->disk.refnum),
+ guid_str(vdc->guid),
+ vdc->sec_elmnt_seq);
+ vdc->crc = calc_crc(vdc, conf_size);
+ memcpy(conf + i*conf_size, vdc, conf_size);
+ } else
+ memset(conf + i*conf_size, 0xff, conf_size);
+ }
+ if (write(fd, conf, buf_size) != buf_size)
+ goto out;
+
+ d->disk.crc = calc_crc(&d->disk, 512);
+ if (write(fd, &d->disk, 512) < 0)
+ goto out;
+
+ ret = 1;
+out:
+ header->openflag = 0;
+ header->crc = calc_crc(header, 512);
+
+ lseek64(fd, sector<<9, 0);
+ if (write(fd, header, 512) < 0)
+ ret = 0;
+
+ return ret;
+}
+
+static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d)
+{
+ unsigned long long size;
+ int fd = d->fd;
+ if (fd < 0)
+ return 0;
+
+ /* We need to fill in the primary, (secondary) and workspace
+ * lba's in the headers, set their checksums,
+ * Also checksum phys, virt....
+ *
+ * Then write everything out, finally the anchor is written.
+ */
+ get_dev_size(fd, NULL, &size);
+ size /= 512;
+ memcpy(&ddf->anchor, ddf->active, 512);
+ if (be64_to_cpu(d->workspace_lba) != 0ULL)
+ ddf->anchor.workspace_lba = d->workspace_lba;
+ else
+ ddf->anchor.workspace_lba =
+ cpu_to_be64(size - 32*1024*2);
+ if (be64_to_cpu(d->primary_lba) != 0ULL)
+ ddf->anchor.primary_lba = d->primary_lba;
+ else
+ ddf->anchor.primary_lba =
+ cpu_to_be64(size - 16*1024*2);
+ if (be64_to_cpu(d->secondary_lba) != 0ULL)
+ ddf->anchor.secondary_lba = d->secondary_lba;
+ else
+ ddf->anchor.secondary_lba =
+ cpu_to_be64(size - 32*1024*2);
+ ddf->anchor.timestamp = cpu_to_be32(time(0) - DECADE);
+ memcpy(&ddf->primary, &ddf->anchor, 512);
+ memcpy(&ddf->secondary, &ddf->anchor, 512);
+
+ ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
+ ddf->anchor.seq = cpu_to_be32(0xFFFFFFFF); /* no sequencing in anchor */
+ ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
+
+ if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY))
+ return 0;
+
+ if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY))
+ return 0;
+
+ lseek64(fd, (size-1)*512, SEEK_SET);
+ if (write(fd, &ddf->anchor, 512) < 0)
+ return 0;
+
+ return 1;
+}
+
+#ifndef MDASSEMBLE
+static int __write_init_super_ddf(struct supertype *st)
+{
+ struct ddf_super *ddf = st->sb;
+ struct dl *d;
+ int attempts = 0;
+ int successes = 0;
+
+ pr_state(ddf, __func__);
+
+ /* try to write updated metadata,
+ * if we catch a failure move on to the next disk
+ */
+ for (d = ddf->dlist; d; d=d->next) {
+ attempts++;
+ successes += _write_super_to_disk(ddf, d);
+ }
+
+ return attempts != successes;
+}
+
+static int write_init_super_ddf(struct supertype *st)
+{
+ struct ddf_super *ddf = st->sb;
+ struct vcl *currentconf = ddf->currentconf;
+
+ /* We are done with currentconf - reset it so st refers to the container */
+ ddf->currentconf = NULL;
+
+ if (st->update_tail) {
+ /* queue the virtual_disk and vd_config as metadata updates */
+ struct virtual_disk *vd;
+ struct vd_config *vc;
+ int len, tlen;
+ unsigned int i;
+
+ if (!currentconf) {
+ /* Must be adding a physical disk to the container */
+ int len = (sizeof(struct phys_disk) +
+ sizeof(struct phys_disk_entry));
+
+ /* adding a disk to the container. */
+ if (!ddf->add_list)
+ return 0;
+
+ append_metadata_update(st, ddf->add_list->mdupdate, len);
+ ddf->add_list->mdupdate = NULL;
+ return 0;
+ }
+
+ /* Newly created VD */
+
+ /* First the virtual disk. We have a slightly fake header */
+ len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
+ vd = xmalloc(len);
+ *vd = *ddf->virt;
+ vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
+ vd->populated_vdes = cpu_to_be16(currentconf->vcnum);
+ append_metadata_update(st, vd, len);
+
+ /* Then the vd_config */
+ len = ddf->conf_rec_len * 512;
+ tlen = len * currentconf->conf.sec_elmnt_count;
+ vc = xmalloc(tlen);
+ memcpy(vc, &currentconf->conf, len);
+ for (i = 1; i < currentconf->conf.sec_elmnt_count; i++)
+ memcpy((char *)vc + i*len, currentconf->other_bvds[i-1],
+ len);
+ append_metadata_update(st, vc, tlen);
+
+ return 0;
+ } else {
+ struct dl *d;
+ if (!currentconf)
+ for (d = ddf->dlist; d; d=d->next)
+ while (Kill(d->devname, NULL, 0, -1, 1) == 0);
+ /* Note: we don't close the fd's now, but a subsequent
+ * ->free_super() will
+ */
+ return __write_init_super_ddf(st);
+ }
+}
+
+#endif
+
+static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
+ unsigned long long data_offset)
+{
+ /* We must reserve the last 32Meg */
+ if (devsize <= 32*1024*2)
+ return 0;
+ return devsize - 32*1024*2;
+}
+
+#ifndef MDASSEMBLE
+
+static int reserve_space(struct supertype *st, int raiddisks,
+ unsigned long long size, int chunk,
+ unsigned long long data_offset,
+ unsigned long long *freesize)
+{
+ /* Find 'raiddisks' spare extents at least 'size' big (but
+ * only caring about multiples of 'chunk') and remember
+ * them. If size==0, find the largest size possible.
+ * Report available size in *freesize
+ * If space cannot be found, fail.
+ */
+ struct dl *dl;
+ struct ddf_super *ddf = st->sb;
+ int cnt = 0;
+
+ for (dl = ddf->dlist; dl ; dl=dl->next) {
+ dl->raiddisk = -1;
+ dl->esize = 0;
+ }
+ /* Now find largest extent on each device */
+ for (dl = ddf->dlist ; dl ; dl=dl->next) {
+ unsigned long long minsize = ULLONG_MAX;
+
+ find_space(ddf, dl, data_offset, &minsize);
+ if (minsize >= size && minsize >= (unsigned)chunk) {
+ cnt++;
+ dl->esize = minsize;
+ }
+ }
+ if (cnt < raiddisks) {
+ pr_err("not enough devices with space to create array.\n");
+ return 0; /* No enough free spaces large enough */
+ }
+ if (size == 0) {
+ /* choose the largest size of which there are at least 'raiddisk' */
+ for (dl = ddf->dlist ; dl ; dl=dl->next) {
+ struct dl *dl2;
+ if (dl->esize <= size)
+ continue;
+ /* This is bigger than 'size', see if there are enough */
+ cnt = 0;
+ for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
+ if (dl2->esize >= dl->esize)
+ cnt++;
+ if (cnt >= raiddisks)
+ size = dl->esize;
+ }
+ if (chunk) {
+ size = size / chunk;
+ size *= chunk;
+ }
+ *freesize = size;
+ if (size < 32) {
+ pr_err("not enough spare devices to create array.\n");
+ return 0;
+ }
+ }
+ /* We have a 'size' of which there are enough spaces.
+ * We simply do a first-fit */
+ cnt = 0;
+ for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
+ if (dl->esize < size)
+ continue;
+
+ dl->raiddisk = cnt;
+ cnt++;
+ }
+ return 1;
+}
+
+static int validate_geometry_ddf(struct supertype *st,
+ int level, int layout, int raiddisks,
+ int *chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *dev, unsigned long long *freesize,
+ int verbose)
+{
+ int fd;
+ struct mdinfo *sra;
+ int cfd;
+
+ /* ddf potentially supports lots of things, but it depends on
+ * what devices are offered (and maybe kernel version?)
+ * If given unused devices, we will make a container.
+ * If given devices in a container, we will make a BVD.
+ * If given BVDs, we make an SVD, changing all the GUIDs in the process.
+ */
+
+ if (*chunk == UnSet)
+ *chunk = DEFAULT_CHUNK;
+
+ if (level == LEVEL_NONE)
+ level = LEVEL_CONTAINER;
+ if (level == LEVEL_CONTAINER) {
+ /* Must be a fresh device to add to a container */
+ return validate_geometry_ddf_container(st, level, layout,
+ raiddisks, *chunk,
+ size, data_offset, dev,
+ freesize,
+ verbose);
+ }
+
+ if (!dev) {
+ mdu_array_info_t array = {
+ .level = level,
+ .layout = layout,
+ .raid_disks = raiddisks
+ };
+ struct vd_config conf;
+ if (layout_md2ddf(&array, &conf) == -1) {
+ if (verbose)
+ pr_err("DDF does not support level %d /layout %d arrays with %d disks\n",
+ level, layout, raiddisks);
+ return 0;
+ }
+ /* Should check layout? etc */
+
+ if (st->sb && freesize) {
+ /* --create was given a container to create in.
+ * So we need to check that there are enough
+ * free spaces and return the amount of space.
+ * We may as well remember which drives were
+ * chosen so that add_to_super/getinfo_super
+ * can return them.
+ */
+ return reserve_space(st, raiddisks, size, *chunk,
+ data_offset, freesize);
+ }
+ return 1;
+ }
+
+ if (st->sb) {
+ /* A container has already been opened, so we are
+ * creating in there. Maybe a BVD, maybe an SVD.
+ * Should make a distinction one day.
+ */
+ return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
+ chunk, size, data_offset, dev,
+ freesize,
+ verbose);
+ }
+ /* This is the first device for the array.
+ * If it is a container, we read it in and do automagic allocations,
+ * no other devices should be given.
+ * Otherwise it must be a member device of a container, and we
+ * do manual allocation.
+ * Later we should check for a BVD and make an SVD.
+ */
+ fd = open(dev, O_RDONLY|O_EXCL, 0);
+ if (fd >= 0) {
+ close(fd);
+ /* Just a bare device, no good to us */
+ if (verbose)
+ pr_err("ddf: Cannot create this array "
+ "on device %s - a container is required.\n",
+ dev);
+ return 0;
+ }
+ if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
+ if (verbose)
+ pr_err("ddf: Cannot open %s: %s\n",
+ dev, strerror(errno));
+ return 0;
+ }
+ /* Well, it is in use by someone, maybe a 'ddf' container. */
+ cfd = open_container(fd);
+ if (cfd < 0) {
+ close(fd);
+ if (verbose)
+ pr_err("ddf: Cannot use %s: %s\n",
+ dev, strerror(EBUSY));
+ return 0;
+ }
+ sra = sysfs_read(cfd, NULL, GET_VERSION);
+ close(fd);
+ if (sra && sra->array.major_version == -1 &&
+ strcmp(sra->text_version, "ddf") == 0) {
+ /* This is a member of a ddf container. Load the container
+ * and try to create a bvd
+ */
+ struct ddf_super *ddf;
+ if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
+ st->sb = ddf;
+ strcpy(st->container_devnm, fd2devnm(cfd));
+ close(cfd);
+ return validate_geometry_ddf_bvd(st, level, layout,
+ raiddisks, chunk, size,
+ data_offset,
+ dev, freesize,
+ verbose);
+ }
+ close(cfd);
+ } else /* device may belong to a different container */
+ return 0;
+
+ return 1;
+}
+
+static int
+validate_geometry_ddf_container(struct supertype *st,
+ int level, int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *dev, unsigned long long *freesize,
+ int verbose)
+{
+ int fd;
+ unsigned long long ldsize;
+
+ if (level != LEVEL_CONTAINER)
+ return 0;
+ if (!dev)
+ return 1;
+
+ fd = open(dev, O_RDONLY|O_EXCL, 0);
+ if (fd < 0) {
+ if (verbose)
+ pr_err("ddf: Cannot open %s: %s\n",
+ dev, strerror(errno));
+ return 0;
+ }
+ if (!get_dev_size(fd, dev, &ldsize)) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+
+ *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
+ if (*freesize == 0)
+ return 0;
+
+ return 1;
+}
+
+static int validate_geometry_ddf_bvd(struct supertype *st,
+ int level, int layout, int raiddisks,
+ int *chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *dev, unsigned long long *freesize,
+ int verbose)
+{
+ struct stat stb;
+ struct ddf_super *ddf = st->sb;
+ struct dl *dl;
+ unsigned long long maxsize;
+ /* ddf/bvd supports lots of things, but not containers */
+ if (level == LEVEL_CONTAINER) {
+ if (verbose)
+ pr_err("DDF cannot create a container within an container\n");
+ return 0;
+ }
+ /* We must have the container info already read in. */
+ if (!ddf)
+ return 0;
+
+ if (!dev) {
+ /* General test: make sure there is space for
+ * 'raiddisks' device extents of size 'size'.
+ */
+ unsigned long long minsize = size;
+ int dcnt = 0;
+ if (minsize == 0)
+ minsize = 8;
+ for (dl = ddf->dlist; dl ; dl = dl->next) {
+ if (find_space(ddf, dl, data_offset, &minsize)
+ != INVALID_SECTORS)
+ dcnt++;
+ }
+ if (dcnt < raiddisks) {
+ if (verbose)
+ pr_err("ddf: Not enough devices with "
+ "space for this array (%d < %d)\n",
+ dcnt, raiddisks);
+ return 0;
+ }
+ return 1;
+ }
+ /* This device must be a member of the set */
+ if (stat(dev, &stb) < 0)
+ return 0;
+ if ((S_IFMT & stb.st_mode) != S_IFBLK)
+ return 0;
+ for (dl = ddf->dlist ; dl ; dl = dl->next) {
+ if (dl->major == (int)major(stb.st_rdev) &&
+ dl->minor == (int)minor(stb.st_rdev))
+ break;
+ }
+ if (!dl) {
+ if (verbose)
+ pr_err("ddf: %s is not in the "
+ "same DDF set\n",
+ dev);
+ return 0;
+ }
+ maxsize = ULLONG_MAX;
+ find_space(ddf, dl, data_offset, &maxsize);
+ *freesize = maxsize;
+
+ return 1;
+}
+
+static int load_super_ddf_all(struct supertype *st, int fd,
+ void **sbp, char *devname)
+{
+ struct mdinfo *sra;
+ struct ddf_super *super;
+ struct mdinfo *sd, *best = NULL;
+ int bestseq = 0;
+ int seq;
+ char nm[20];
+ int dfd;
+
+ sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
+ if (!sra)
+ return 1;
+ if (sra->array.major_version != -1 ||
+ sra->array.minor_version != -2 ||
+ strcmp(sra->text_version, "ddf") != 0)
+ return 1;
+
+ if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
+ return 1;
+ memset(super, 0, sizeof(*super));
+
+ /* first, try each device, and choose the best ddf */
+ for (sd = sra->devs ; sd ; sd = sd->next) {
+ int rv;
+ sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
+ dfd = dev_open(nm, O_RDONLY);
+ if (dfd < 0)
+ return 2;
+ rv = load_ddf_headers(dfd, super, NULL);
+ close(dfd);
+ if (rv == 0) {
+ seq = be32_to_cpu(super->active->seq);
+ if (super->active->openflag)
+ seq--;
+ if (!best || seq > bestseq) {
+ bestseq = seq;
+ best = sd;
+ }
+ }
+ }
+ if (!best)
+ return 1;
+ /* OK, load this ddf */
+ sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
+ dfd = dev_open(nm, O_RDONLY);
+ if (dfd < 0)
+ return 1;
+ load_ddf_headers(dfd, super, NULL);
+ load_ddf_global(dfd, super, NULL);
+ close(dfd);
+ /* Now we need the device-local bits */
+ for (sd = sra->devs ; sd ; sd = sd->next) {
+ int rv;
+
+ sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
+ dfd = dev_open(nm, O_RDWR);
+ if (dfd < 0)
+ return 2;
+ rv = load_ddf_headers(dfd, super, NULL);
+ if (rv == 0)
+ rv = load_ddf_local(dfd, super, NULL, 1);
+ if (rv)
+ return 1;
+ }
+
+ *sbp = super;
+ if (st->ss == NULL) {
+ st->ss = &super_ddf;
+ st->minor_version = 0;
+ st->max_devs = 512;
+ }
+ strcpy(st->container_devnm, fd2devnm(fd));
+ return 0;
+}
+
+static int load_container_ddf(struct supertype *st, int fd,
+ char *devname)
+{
+ return load_super_ddf_all(st, fd, &st->sb, devname);
+}
+
+#endif /* MDASSEMBLE */
+
+static int check_secondary(const struct vcl *vc)
+{
+ const struct vd_config *conf = &vc->conf;
+ int i;
+
+ /* The only DDF secondary RAID level md can support is
+ * RAID 10, if the stripe sizes and Basic volume sizes
+ * are all equal.
+ * Other configurations could in theory be supported by exposing
+ * the BVDs to user space and using device mapper for the secondary
+ * mapping. So far we don't support that.
+ */
+
+ __u64 sec_elements[4] = {0, 0, 0, 0};
+#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
+#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
+
+ if (vc->other_bvds == NULL) {
+ pr_err("No BVDs for secondary RAID found\n");
+ return -1;
+ }
+ if (conf->prl != DDF_RAID1) {
+ pr_err("Secondary RAID level only supported for mirrored BVD\n");
+ return -1;
+ }
+ if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
+ pr_err("Secondary RAID level %d is unsupported\n",
+ conf->srl);
+ return -1;
+ }
+ __set_sec_seen(conf->sec_elmnt_seq);
+ for (i = 0; i < conf->sec_elmnt_count-1; i++) {
+ const struct vd_config *bvd = vc->other_bvds[i];
+ if (bvd->sec_elmnt_seq == DDF_UNUSED_BVD)
+ continue;
+ if (bvd->srl != conf->srl) {
+ pr_err("Inconsistent secondary RAID level across BVDs\n");
+ return -1;
+ }
+ if (bvd->prl != conf->prl) {
+ pr_err("Different RAID levels for BVDs are unsupported\n");
+ return -1;
+ }
+ if (!be16_eq(bvd->prim_elmnt_count, conf->prim_elmnt_count)) {
+ pr_err("All BVDs must have the same number of primary elements\n");
+ return -1;
+ }
+ if (bvd->chunk_shift != conf->chunk_shift) {
+ pr_err("Different strip sizes for BVDs are unsupported\n");
+ return -1;
+ }
+ if (!be64_eq(bvd->array_blocks, conf->array_blocks)) {
+ pr_err("Different BVD sizes are unsupported\n");
+ return -1;
+ }
+ __set_sec_seen(bvd->sec_elmnt_seq);
+ }
+ for (i = 0; i < conf->sec_elmnt_count; i++) {
+ if (!__was_sec_seen(i)) {
+ /* pr_err("BVD %d is missing\n", i); */
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
+ be32 refnum, unsigned int nmax,
+ const struct vd_config **bvd,
+ unsigned int *idx)
+{
+ unsigned int i, j, n, sec, cnt;
+
+ cnt = be16_to_cpu(vc->conf.prim_elmnt_count);
+ sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
+
+ for (i = 0, j = 0 ; i < nmax ; i++) {
+ /* j counts valid entries for this BVD */
+ if (be32_eq(vc->conf.phys_refnum[i], refnum)) {
+ *bvd = &vc->conf;
+ *idx = i;
+ return sec * cnt + j;
+ }
+ if (be32_to_cpu(vc->conf.phys_refnum[i]) != 0xffffffff)
+ j++;
+ }
+ if (vc->other_bvds == NULL)
+ goto bad;
+
+ for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
+ struct vd_config *vd = vc->other_bvds[n-1];
+ sec = vd->sec_elmnt_seq;
+ if (sec == DDF_UNUSED_BVD)
+ continue;
+ for (i = 0, j = 0 ; i < nmax ; i++) {
+ if (be32_eq(vd->phys_refnum[i], refnum)) {
+ *bvd = vd;
+ *idx = i;
+ return sec * cnt + j;
+ }
+ if (be32_to_cpu(vd->phys_refnum[i]) != 0xffffffff)
+ j++;
+ }
+ }
+bad:
+ *bvd = NULL;
+ return DDF_NOTFOUND;
+}
+
+static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
+{
+ /* Given a container loaded by load_super_ddf_all,
+ * extract information about all the arrays into
+ * an mdinfo tree.
+ *
+ * For each vcl in conflist: create an mdinfo, fill it in,
+ * then look for matching devices (phys_refnum) in dlist
+ * and create appropriate device mdinfo.
+ */
+ struct ddf_super *ddf = st->sb;
+ struct mdinfo *rest = NULL;
+ struct vcl *vc;
+
+ for (vc = ddf->conflist ; vc ; vc=vc->next) {
+ unsigned int i;
+ struct mdinfo *this;
+ char *ep;
+ __u32 *cptr;
+ unsigned int pd;
+
+ if (subarray &&
+ (strtoul(subarray, &ep, 10) != vc->vcnum ||
+ *ep != '\0'))
+ continue;
+
+ if (vc->conf.sec_elmnt_count > 1) {
+ if (check_secondary(vc) != 0)
+ continue;
+ }
+
+ this = xcalloc(1, sizeof(*this));
+ this->next = rest;
+ rest = this;
+
+ if (layout_ddf2md(&vc->conf, &this->array))
+ continue;
+ this->array.md_minor = -1;
+ this->array.major_version = -1;
+ this->array.minor_version = -2;
+ this->safe_mode_delay = DDF_SAFE_MODE_DELAY;
+ cptr = (__u32 *)(vc->conf.guid + 16);
+ this->array.ctime = DECADE + __be32_to_cpu(*cptr);
+ this->array.utime = DECADE +
+ be32_to_cpu(vc->conf.timestamp);
+ this->array.chunk_size = 512 << vc->conf.chunk_shift;
+
+ i = vc->vcnum;
+ if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
+ (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
+ DDF_init_full) {
+ this->array.state = 0;
+ this->resync_start = 0;
+ } else {
+ this->array.state = 1;
+ this->resync_start = MaxSector;
+ }
+ _ddf_array_name(this->name, ddf, i);
+ memset(this->uuid, 0, sizeof(this->uuid));
+ this->component_size = be64_to_cpu(vc->conf.blocks);
+ this->array.size = this->component_size / 2;
+ this->container_member = i;
+
+ ddf->currentconf = vc;
+ uuid_from_super_ddf(st, this->uuid);
+ if (!subarray)
+ ddf->currentconf = NULL;
+
+ sprintf(this->text_version, "/%s/%d",
+ st->container_devnm, this->container_member);
+
+ for (pd = 0; pd < be16_to_cpu(ddf->phys->max_pdes); pd++) {
+ struct mdinfo *dev;
+ struct dl *d;
+ const struct vd_config *bvd;
+ unsigned int iphys;
+ int stt;
+
+ if (be32_to_cpu(ddf->phys->entries[pd].refnum)
+ == 0xFFFFFFFF)
+ continue;
+
+ stt = be16_to_cpu(ddf->phys->entries[pd].state);
+ if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
+ != DDF_Online)
+ continue;
+
+ i = get_pd_index_from_refnum(
+ vc, ddf->phys->entries[pd].refnum,
+ ddf->mppe, &bvd, &iphys);
+ if (i == DDF_NOTFOUND)
+ continue;
+
+ this->array.working_disks++;
+
+ for (d = ddf->dlist; d ; d=d->next)
+ if (be32_eq(d->disk.refnum,
+ ddf->phys->entries[pd].refnum))
+ break;
+ if (d == NULL)
+ /* Haven't found that one yet, maybe there are others */
+ continue;
+
+ dev = xcalloc(1, sizeof(*dev));
+ dev->next = this->devs;
+ this->devs = dev;
+
+ dev->disk.number = be32_to_cpu(d->disk.refnum);
+ dev->disk.major = d->major;
+ dev->disk.minor = d->minor;
+ dev->disk.raid_disk = i;
+ dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
+ dev->recovery_start = MaxSector;
+
+ dev->events = be32_to_cpu(ddf->active->seq);
+ dev->data_offset =
+ be64_to_cpu(LBA_OFFSET(ddf, bvd)[iphys]);
+ dev->component_size = be64_to_cpu(bvd->blocks);
+ if (d->devname)
+ strcpy(dev->name, d->devname);
+ }
+ }
+ return rest;
+}
+
+static int store_super_ddf(struct supertype *st, int fd)
+{
+ struct ddf_super *ddf = st->sb;
+ unsigned long long dsize;
+ void *buf;
+ int rc;
+
+ if (!ddf)
+ return 1;
+
+ if (!get_dev_size(fd, NULL, &dsize))
+ return 1;
+
+ if (ddf->dlist || ddf->conflist) {
+ struct stat sta;
+ struct dl *dl;
+ int ofd, ret;
+
+ if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
+ pr_err("%s: file descriptor for invalid device\n",
+ __func__);
+ return 1;
+ }
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ if (dl->major == (int)major(sta.st_rdev) &&
+ dl->minor == (int)minor(sta.st_rdev))
+ break;
+ if (!dl) {
+ pr_err("%s: couldn't find disk %d/%d\n", __func__,
+ (int)major(sta.st_rdev),
+ (int)minor(sta.st_rdev));
+ return 1;
+ }
+ ofd = dl->fd;
+ dl->fd = fd;
+ ret = (_write_super_to_disk(ddf, dl) != 1);
+ dl->fd = ofd;
+ return ret;
+ }
+
+ if (posix_memalign(&buf, 512, 512) != 0)
+ return 1;
+ memset(buf, 0, 512);
+
+ lseek64(fd, dsize-512, 0);
+ rc = write(fd, buf, 512);
+ free(buf);
+ if (rc < 0)
+ return 1;
+ return 0;
+}
+
+static int compare_super_ddf(struct supertype *st, struct supertype *tst)
+{
+ /*
+ * return:
+ * 0 same, or first was empty, and second was copied
+ * 1 second had wrong magic number - but that isn't possible
+ * 2 wrong uuid
+ * 3 wrong other info
+ */
+ struct ddf_super *first = st->sb;
+ struct ddf_super *second = tst->sb;
+ struct dl *dl1, *dl2;
+ struct vcl *vl1, *vl2;
+ unsigned int max_vds, max_pds, pd, vd;
+
+ if (!first) {
+ st->sb = tst->sb;
+ tst->sb = NULL;
+ return 0;
+ }
+
+ if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
+ return 2;
+
+ /* It is only OK to compare info in the anchor. Anything else
+ * could be changing due to a reconfig so must be ignored.
+ * guid really should be enough anyway.
+ */
+
+ if (!be32_eq(first->active->seq, second->active->seq)) {
+ dprintf("%s: sequence number mismatch %u<->%u\n", __func__,
+ be32_to_cpu(first->active->seq),
+ be32_to_cpu(second->active->seq));
+ return 0;
+ }
+
+ /*
+ * At this point we are fairly sure that the meta data matches.
+ * But the new disk may contain additional local data.
+ * Add it to the super block.
+ */
+ max_vds = be16_to_cpu(first->active->max_vd_entries);
+ max_pds = be16_to_cpu(first->phys->max_pdes);
+ for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
+ for (vl1 = first->conflist; vl1; vl1 = vl1->next)
+ if (!memcmp(vl1->conf.guid, vl2->conf.guid,
+ DDF_GUID_LEN))
+ break;
+ if (vl1) {
+ if (vl1->other_bvds != NULL &&
+ vl1->conf.sec_elmnt_seq !=
+ vl2->conf.sec_elmnt_seq) {
+ dprintf("%s: adding BVD %u\n", __func__,
+ vl2->conf.sec_elmnt_seq);
+ add_other_bvd(vl1, &vl2->conf,
+ first->conf_rec_len*512);
+ }
+ continue;
+ }
+
+ if (posix_memalign((void **)&vl1, 512,
+ (first->conf_rec_len*512 +
+ offsetof(struct vcl, conf))) != 0) {
+ pr_err("%s could not allocate vcl buf\n",
+ __func__);
+ return 3;
+ }
+
+ vl1->next = first->conflist;
+ vl1->block_sizes = NULL;
+ memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
+ if (alloc_other_bvds(first, vl1) != 0) {
+ pr_err("%s could not allocate other bvds\n",
+ __func__);
+ free(vl1);
+ return 3;
+ }
+ for (vd = 0; vd < max_vds; vd++)
+ if (!memcmp(first->virt->entries[vd].guid,
+ vl1->conf.guid, DDF_GUID_LEN))
+ break;
+ vl1->vcnum = vd;
+ dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
+ first->conflist = vl1;
+ }
+
+ for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
+ for (dl1 = first->dlist; dl1; dl1 = dl1->next)
+ if (be32_eq(dl1->disk.refnum, dl2->disk.refnum))
+ break;
+ if (dl1)
+ continue;
+
+ if (posix_memalign((void **)&dl1, 512,
+ sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
+ != 0) {
+ pr_err("%s could not allocate disk info buffer\n",
+ __func__);
+ return 3;
+ }
+ memcpy(dl1, dl2, sizeof(*dl1));
+ dl1->mdupdate = NULL;
+ dl1->next = first->dlist;
+ dl1->fd = -1;
+ for (pd = 0; pd < max_pds; pd++)
+ if (be32_eq(first->phys->entries[pd].refnum,
+ dl1->disk.refnum))
+ break;
+ dl1->pdnum = pd < max_pds ? (int)pd : -1;
+ if (dl2->spare) {
+ if (posix_memalign((void **)&dl1->spare, 512,
+ first->conf_rec_len*512) != 0) {
+ pr_err("%s could not allocate spare info buf\n",
+ __func__);
+ return 3;
+ }
+ memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
+ }
+ for (vd = 0 ; vd < first->max_part ; vd++) {
+ if (!dl2->vlist[vd]) {
+ dl1->vlist[vd] = NULL;
+ continue;
+ }
+ for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
+ if (!memcmp(vl1->conf.guid,
+ dl2->vlist[vd]->conf.guid,
+ DDF_GUID_LEN))
+ break;
+ dl1->vlist[vd] = vl1;
+ }
+ }
+ first->dlist = dl1;
+ dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
+ be32_to_cpu(dl1->disk.refnum));
+ }
+
+ return 0;
+}
+
+#ifndef MDASSEMBLE
+/*
+ * A new array 'a' has been started which claims to be instance 'inst'
+ * within container 'c'.
+ * We need to confirm that the array matches the metadata in 'c' so
+ * that we don't corrupt any metadata.
+ */
+static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
+{
+ struct ddf_super *ddf = c->sb;
+ int n = atoi(inst);
+ struct mdinfo *dev;
+ struct dl *dl;
+ static const char faulty[] = "faulty";
+
+ if (all_ff(ddf->virt->entries[n].guid)) {
+ pr_err("%s: subarray %d doesn't exist\n", __func__, n);
+ return -ENODEV;
+ }
+ dprintf("%s: new subarray %d, GUID: %s\n", __func__, n,
+ guid_str(ddf->virt->entries[n].guid));
+ for (dev = a->info.devs; dev; dev = dev->next) {
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ if (dl->major == dev->disk.major &&
+ dl->minor == dev->disk.minor)
+ break;
+ if (!dl || dl->pdnum < 0) {
+ pr_err("%s: device %d/%d of subarray %d not found in meta data\n",
+ __func__, dev->disk.major, dev->disk.minor, n);
+ return -1;
+ }
+ if ((be16_to_cpu(ddf->phys->entries[dl->pdnum].state) &
+ (DDF_Online|DDF_Missing|DDF_Failed)) != DDF_Online) {
+ pr_err("%s: new subarray %d contains broken device %d/%d (%02x)\n",
+ __func__, n, dl->major, dl->minor,
+ be16_to_cpu(
+ ddf->phys->entries[dl->pdnum].state));
+ if (write(dev->state_fd, faulty, sizeof(faulty)-1) !=
+ sizeof(faulty) - 1)
+ pr_err("Write to state_fd failed\n");
+ dev->curr_state = DS_FAULTY;
+ }
+ }
+ a->info.container_member = n;
+ return 0;
+}
+
+static void handle_missing(struct ddf_super *ddf, struct active_array *a, int inst)
+{
+ /* This member array is being activated. If any devices
+ * are missing they must now be marked as failed.
+ */
+ struct vd_config *vc;
+ unsigned int n_bvd;
+ struct vcl *vcl;
+ struct dl *dl;
+ int pd;
+ int n;
+ int state;
+
+ for (n = 0; ; n++) {
+ vc = find_vdcr(ddf, inst, n, &n_bvd, &vcl);
+ if (!vc)
+ break;
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ if (be32_eq(dl->disk.refnum, vc->phys_refnum[n_bvd]))
+ break;
+ if (dl)
+ /* Found this disk, so not missing */
+ continue;
+
+ /* Mark the device as failed/missing. */
+ pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
+ if (pd >= 0 && be16_and(ddf->phys->entries[pd].state,
+ cpu_to_be16(DDF_Online))) {
+ be16_clear(ddf->phys->entries[pd].state,
+ cpu_to_be16(DDF_Online));
+ be16_set(ddf->phys->entries[pd].state,
+ cpu_to_be16(DDF_Failed|DDF_Missing));
+ vc->phys_refnum[n_bvd] = cpu_to_be32(0);
+ ddf_set_updates_pending(ddf, vc);
+ }
+
+ /* Mark the array as Degraded */
+ state = get_svd_state(ddf, vcl);
+ if (ddf->virt->entries[inst].state !=
+ ((ddf->virt->entries[inst].state & ~DDF_state_mask)
+ | state)) {
+ ddf->virt->entries[inst].state =
+ (ddf->virt->entries[inst].state & ~DDF_state_mask)
+ | state;
+ a->check_degraded = 1;
+ ddf_set_updates_pending(ddf, vc);
+ }
+ }
+}
+
+/*
+ * The array 'a' is to be marked clean in the metadata.
+ * If '->resync_start' is not ~(unsigned long long)0, then the array is only
+ * clean up to the point (in sectors). If that cannot be recorded in the
+ * metadata, then leave it as dirty.
+ *
+ * For DDF, we need to clear the DDF_state_inconsistent bit in the
+ * !global! virtual_disk.virtual_entry structure.
+ */
+static int ddf_set_array_state(struct active_array *a, int consistent)
+{
+ struct ddf_super *ddf = a->container->sb;
+ int inst = a->info.container_member;
+ int old = ddf->virt->entries[inst].state;
+ if (consistent == 2) {
+ handle_missing(ddf, a, inst);
+ consistent = 1;
+ if (!is_resync_complete(&a->info))
+ consistent = 0;
+ }
+ if (consistent)
+ ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
+ else
+ ddf->virt->entries[inst].state |= DDF_state_inconsistent;
+ if (old != ddf->virt->entries[inst].state)
+ ddf_set_updates_pending(ddf, NULL);
+
+ old = ddf->virt->entries[inst].init_state;
+ ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
+ if (is_resync_complete(&a->info))
+ ddf->virt->entries[inst].init_state |= DDF_init_full;
+ else if (a->info.resync_start == 0)
+ ddf->virt->entries[inst].init_state |= DDF_init_not;
+ else
+ ddf->virt->entries[inst].init_state |= DDF_init_quick;
+ if (old != ddf->virt->entries[inst].init_state)
+ ddf_set_updates_pending(ddf, NULL);
+
+ dprintf("ddf mark %d/%s (%d) %s %llu\n", inst,
+ guid_str(ddf->virt->entries[inst].guid), a->curr_state,
+ consistent?"clean":"dirty",
+ a->info.resync_start);
+ return consistent;
+}
+
+static int get_bvd_state(const struct ddf_super *ddf,
+ const struct vd_config *vc)
+{
+ unsigned int i, n_bvd, working = 0;
+ unsigned int n_prim = be16_to_cpu(vc->prim_elmnt_count);
+ int pd, st, state;
+ char *avail = xcalloc(1, n_prim);
+ mdu_array_info_t array;
+
+ layout_ddf2md(vc, &array);
+
+ for (i = 0; i < n_prim; i++) {
+ if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
+ continue;
+ pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
+ if (pd < 0)
+ continue;
+ st = be16_to_cpu(ddf->phys->entries[pd].state);
+ if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
+ == DDF_Online) {
+ working++;
+ avail[i] = 1;
+ }
+ }
+
+ state = DDF_state_degraded;
+ if (working == n_prim)
+ state = DDF_state_optimal;
+ else
+ switch (vc->prl) {
+ case DDF_RAID0:
+ case DDF_CONCAT:
+ case DDF_JBOD:
+ state = DDF_state_failed;
+ break;
+ case DDF_RAID1:
+ if (working == 0)
+ state = DDF_state_failed;
+ else if (working >= 2)
+ state = DDF_state_part_optimal;
+ break;
+ case DDF_RAID1E:
+ if (!enough(10, n_prim, array.layout, 1, avail))
+ state = DDF_state_failed;
+ break;
+ case DDF_RAID4:
+ case DDF_RAID5:
+ if (working < n_prim - 1)
+ state = DDF_state_failed;
+ break;
+ case DDF_RAID6:
+ if (working < n_prim - 2)
+ state = DDF_state_failed;
+ else if (working == n_prim - 1)
+ state = DDF_state_part_optimal;
+ break;
+ }
+ return state;
+}
+
+static int secondary_state(int state, int other, int seclevel)
+{
+ if (state == DDF_state_optimal && other == DDF_state_optimal)
+ return DDF_state_optimal;
+ if (seclevel == DDF_2MIRRORED) {
+ if (state == DDF_state_optimal || other == DDF_state_optimal)
+ return DDF_state_part_optimal;
+ if (state == DDF_state_failed && other == DDF_state_failed)
+ return DDF_state_failed;
+ return DDF_state_degraded;
+ } else {
+ if (state == DDF_state_failed || other == DDF_state_failed)
+ return DDF_state_failed;
+ if (state == DDF_state_degraded || other == DDF_state_degraded)
+ return DDF_state_degraded;
+ return DDF_state_part_optimal;
+ }
+}
+
+static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
+{
+ int state = get_bvd_state(ddf, &vcl->conf);
+ unsigned int i;
+ for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
+ state = secondary_state(
+ state,
+ get_bvd_state(ddf, vcl->other_bvds[i-1]),
+ vcl->conf.srl);
+ }
+ return state;
+}
+
+/*
+ * The state of each disk is stored in the global phys_disk structure
+ * in phys_disk.entries[n].state.
+ * This makes various combinations awkward.
+ * - When a device fails in any array, it must be failed in all arrays
+ * that include a part of this device.
+ * - When a component is rebuilding, we cannot include it officially in the
+ * array unless this is the only array that uses the device.
+ *
+ * So: when transitioning:
+ * Online -> failed, just set failed flag. monitor will propagate
+ * spare -> online, the device might need to be added to the array.
+ * spare -> failed, just set failed. Don't worry if in array or not.
+ */
+static void ddf_set_disk(struct active_array *a, int n, int state)
+{
+ struct ddf_super *ddf = a->container->sb;
+ unsigned int inst = a->info.container_member, n_bvd;
+ struct vcl *vcl;
+ struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
+ &n_bvd, &vcl);
+ int pd;
+ struct mdinfo *mdi;
+ struct dl *dl;
+ int update = 0;
+
+ dprintf("%s: %d to %x\n", __func__, n, state);
+ if (vc == NULL) {
+ dprintf("ddf: cannot find instance %d!!\n", inst);
+ return;
+ }
+ /* Find the matching slot in 'info'. */
+ for (mdi = a->info.devs; mdi; mdi = mdi->next)
+ if (mdi->disk.raid_disk == n)
+ break;
+ if (!mdi) {
+ pr_err("%s: cannot find raid disk %d\n",
+ __func__, n);
+ return;
+ }
+
+ /* and find the 'dl' entry corresponding to that. */
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ if (mdi->state_fd >= 0 &&
+ mdi->disk.major == dl->major &&
+ mdi->disk.minor == dl->minor)
+ break;
+ if (!dl) {
+ pr_err("%s: cannot find raid disk %d (%d/%d)\n",
+ __func__, n,
+ mdi->disk.major, mdi->disk.minor);
+ return;
+ }
+
+ pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
+ if (pd < 0 || pd != dl->pdnum) {
+ /* disk doesn't currently exist or has changed.
+ * If it is now in_sync, insert it. */
+ dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
+ __func__, dl->pdnum, dl->major, dl->minor,
+ be32_to_cpu(dl->disk.refnum));
+ dprintf("%s: array %u disk %u ref %08x pd %d\n",
+ __func__, inst, n_bvd,
+ be32_to_cpu(vc->phys_refnum[n_bvd]), pd);
+ if ((state & DS_INSYNC) && ! (state & DS_FAULTY) &&
+ dl->pdnum >= 0) {
+ pd = dl->pdnum;
+ vc->phys_refnum[n_bvd] = dl->disk.refnum;
+ LBA_OFFSET(ddf, vc)[n_bvd] =
+ cpu_to_be64(mdi->data_offset);
+ be16_clear(ddf->phys->entries[pd].type,
+ cpu_to_be16(DDF_Global_Spare));
+ be16_set(ddf->phys->entries[pd].type,
+ cpu_to_be16(DDF_Active_in_VD));
+ update = 1;
+ }
+ } else {
+ be16 old = ddf->phys->entries[pd].state;
+ if (state & DS_FAULTY)
+ be16_set(ddf->phys->entries[pd].state,
+ cpu_to_be16(DDF_Failed));
+ if (state & DS_INSYNC) {
+ be16_set(ddf->phys->entries[pd].state,
+ cpu_to_be16(DDF_Online));
+ be16_clear(ddf->phys->entries[pd].state,
+ cpu_to_be16(DDF_Rebuilding));
+ }
+ if (!be16_eq(old, ddf->phys->entries[pd].state))
+ update = 1;
+ }
+
+ dprintf("ddf: set_disk %d (%08x) to %x->%02x\n", n,
+ be32_to_cpu(dl->disk.refnum), state,
+ be16_to_cpu(ddf->phys->entries[pd].state));
+
+ /* Now we need to check the state of the array and update
+ * virtual_disk.entries[n].state.
+ * It needs to be one of "optimal", "degraded", "failed".
+ * I don't understand 'deleted' or 'missing'.
+ */
+ state = get_svd_state(ddf, vcl);
+
+ if (ddf->virt->entries[inst].state !=
+ ((ddf->virt->entries[inst].state & ~DDF_state_mask)
+ | state)) {
+ ddf->virt->entries[inst].state =
+ (ddf->virt->entries[inst].state & ~DDF_state_mask)
+ | state;
+ update = 1;
+ }
+ if (update)
+ ddf_set_updates_pending(ddf, vc);
+}
+
+static void ddf_sync_metadata(struct supertype *st)
+{
+ /*
+ * Write all data to all devices.
+ * Later, we might be able to track whether only local changes
+ * have been made, or whether any global data has been changed,
+ * but ddf is sufficiently weird that it probably always
+ * changes global data ....
+ */
+ struct ddf_super *ddf = st->sb;
+ if (!ddf->updates_pending)
+ return;
+ ddf->updates_pending = 0;
+ __write_init_super_ddf(st);
+ dprintf("ddf: sync_metadata\n");
+}
+
+static int del_from_conflist(struct vcl **list, const char *guid)
+{
+ struct vcl **p;
+ int found = 0;
+ for (p = list; p && *p; p = &((*p)->next))
+ if (!memcmp((*p)->conf.guid, guid, DDF_GUID_LEN)) {
+ found = 1;
+ *p = (*p)->next;
+ }
+ return found;
+}
+
+static int _kill_subarray_ddf(struct ddf_super *ddf, const char *guid)
+{
+ struct dl *dl;
+ unsigned int vdnum, i;
+ vdnum = find_vde_by_guid(ddf, guid);
+ if (vdnum == DDF_NOTFOUND) {
+ pr_err("%s: could not find VD %s\n", __func__,
+ guid_str(guid));
+ return -1;
+ }
+ if (del_from_conflist(&ddf->conflist, guid) == 0) {
+ pr_err("%s: could not find conf %s\n", __func__,
+ guid_str(guid));
+ return -1;
+ }
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ for (i = 0; i < ddf->max_part; i++)
+ if (dl->vlist[i] != NULL &&
+ !memcmp(dl->vlist[i]->conf.guid, guid,
+ DDF_GUID_LEN))
+ dl->vlist[i] = NULL;
+ memset(ddf->virt->entries[vdnum].guid, 0xff, DDF_GUID_LEN);
+ dprintf("%s: deleted %s\n", __func__, guid_str(guid));
+ return 0;
+}
+
+static int kill_subarray_ddf(struct supertype *st)
+{
+ struct ddf_super *ddf = st->sb;
+ /*
+ * currentconf is set in container_content_ddf,
+ * called with subarray arg
+ */
+ struct vcl *victim = ddf->currentconf;
+ struct vd_config *conf;
+ unsigned int vdnum;
+
+ ddf->currentconf = NULL;
+ if (!victim) {
+ pr_err("%s: nothing to kill\n", __func__);
+ return -1;
+ }
+ conf = &victim->conf;
+ vdnum = find_vde_by_guid(ddf, conf->guid);
+ if (vdnum == DDF_NOTFOUND) {
+ pr_err("%s: could not find VD %s\n", __func__,
+ guid_str(conf->guid));
+ return -1;
+ }
+ if (st->update_tail) {
+ struct virtual_disk *vd;
+ int len = sizeof(struct virtual_disk)
+ + sizeof(struct virtual_entry);
+ vd = xmalloc(len);
+ if (vd == NULL) {
+ pr_err("%s: failed to allocate %d bytes\n", __func__,
+ len);
+ return -1;
+ }
+ memset(vd, 0 , len);
+ vd->magic = DDF_VIRT_RECORDS_MAGIC;
+ vd->populated_vdes = cpu_to_be16(0);
+ memcpy(vd->entries[0].guid, conf->guid, DDF_GUID_LEN);
+ /* we use DDF_state_deleted as marker */
+ vd->entries[0].state = DDF_state_deleted;
+ append_metadata_update(st, vd, len);
+ } else {
+ _kill_subarray_ddf(ddf, conf->guid);
+ ddf_set_updates_pending(ddf, NULL);
+ ddf_sync_metadata(st);
+ }
+ return 0;
+}
+
+static void copy_matching_bvd(struct ddf_super *ddf,
+ struct vd_config *conf,
+ const struct metadata_update *update)
+{
+ unsigned int mppe =
+ be16_to_cpu(ddf->anchor.max_primary_element_entries);
+ unsigned int len = ddf->conf_rec_len * 512;
+ char *p;
+ struct vd_config *vc;
+ for (p = update->buf; p < update->buf + update->len; p += len) {
+ vc = (struct vd_config *) p;
+ if (vc->sec_elmnt_seq == conf->sec_elmnt_seq) {
+ memcpy(conf->phys_refnum, vc->phys_refnum,
+ mppe * (sizeof(__u32) + sizeof(__u64)));
+ return;
+ }
+ }
+ pr_err("%s: no match for BVD %d of %s in update\n", __func__,
+ conf->sec_elmnt_seq, guid_str(conf->guid));
+}
+
+static void ddf_process_phys_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ struct ddf_super *ddf = st->sb;
+ struct phys_disk *pd;
+ unsigned int ent;
+
+ pd = (struct phys_disk*)update->buf;
+ ent = be16_to_cpu(pd->used_pdes);
+ if (ent >= be16_to_cpu(ddf->phys->max_pdes))
+ return;
+ if (be16_and(pd->entries[0].state, cpu_to_be16(DDF_Missing))) {
+ struct dl **dlp;
+ /* removing this disk. */
+ be16_set(ddf->phys->entries[ent].state,
+ cpu_to_be16(DDF_Missing));
+ for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
+ struct dl *dl = *dlp;
+ if (dl->pdnum == (signed)ent) {
+ close(dl->fd);
+ dl->fd = -1;
+ *dlp = dl->next;
+ update->space = dl->devname;
+ *(void**)dl = update->space_list;
+ update->space_list = (void**)dl;
+ break;
+ }
+ }
+ ddf_set_updates_pending(ddf, NULL);
+ return;
+ }
+ if (!all_ff(ddf->phys->entries[ent].guid))
+ return;
+ ddf->phys->entries[ent] = pd->entries[0];
+ ddf->phys->used_pdes = cpu_to_be16
+ (1 + be16_to_cpu(ddf->phys->used_pdes));
+ ddf_set_updates_pending(ddf, NULL);
+ if (ddf->add_list) {
+ struct active_array *a;
+ struct dl *al = ddf->add_list;
+ ddf->add_list = al->next;
+
+ al->next = ddf->dlist;
+ ddf->dlist = al;
+
+ /* As a device has been added, we should check
+ * for any degraded devices that might make
+ * use of this spare */
+ for (a = st->arrays ; a; a=a->next)
+ a->check_degraded = 1;
+ }
+}
+
+static void ddf_process_virt_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ struct ddf_super *ddf = st->sb;
+ struct virtual_disk *vd;
+ unsigned int ent;
+
+ vd = (struct virtual_disk*)update->buf;
+
+ if (vd->entries[0].state == DDF_state_deleted) {
+ if (_kill_subarray_ddf(ddf, vd->entries[0].guid))
+ return;
+ } else {
+ ent = find_vde_by_guid(ddf, vd->entries[0].guid);
+ if (ent != DDF_NOTFOUND) {
+ dprintf("%s: VD %s exists already in slot %d\n",
+ __func__, guid_str(vd->entries[0].guid),
+ ent);
+ return;
+ }
+ ent = find_unused_vde(ddf);
+ if (ent == DDF_NOTFOUND)
+ return;
+ ddf->virt->entries[ent] = vd->entries[0];
+ ddf->virt->populated_vdes =
+ cpu_to_be16(
+ 1 + be16_to_cpu(
+ ddf->virt->populated_vdes));
+ dprintf("%s: added VD %s in slot %d(s=%02x i=%02x)\n",
+ __func__, guid_str(vd->entries[0].guid), ent,
+ ddf->virt->entries[ent].state,
+ ddf->virt->entries[ent].init_state);
+ }
+ ddf_set_updates_pending(ddf, NULL);
+}
+
+static void ddf_remove_failed(struct ddf_super *ddf)
+{
+ /* Now remove any 'Failed' devices that are not part
+ * of any VD. They will have the Transition flag set.
+ * Once done, we need to update all dl->pdnum numbers.
+ */
+ unsigned int pdnum;
+ unsigned int pd2 = 0;
+ struct dl *dl;
+
+ for (pdnum = 0; pdnum < be16_to_cpu(ddf->phys->max_pdes);
+ pdnum++) {
+ if (be32_to_cpu(ddf->phys->entries[pdnum].refnum) ==
+ 0xFFFFFFFF)
+ continue;
+ if (be16_and(ddf->phys->entries[pdnum].state,
+ cpu_to_be16(DDF_Failed))
+ && be16_and(ddf->phys->entries[pdnum].state,
+ cpu_to_be16(DDF_Transition))) {
+ /* skip this one unless in dlist*/
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ if (dl->pdnum == (int)pdnum)
+ break;
+ if (!dl)
+ continue;
+ }
+ if (pdnum == pd2)
+ pd2++;
+ else {
+ ddf->phys->entries[pd2] =
+ ddf->phys->entries[pdnum];
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ if (dl->pdnum == (int)pdnum)
+ dl->pdnum = pd2;
+ pd2++;
+ }
+ }
+ ddf->phys->used_pdes = cpu_to_be16(pd2);
+ while (pd2 < pdnum) {
+ memset(ddf->phys->entries[pd2].guid, 0xff,
+ DDF_GUID_LEN);
+ pd2++;
+ }
+}
+
+static void ddf_update_vlist(struct ddf_super *ddf, struct dl *dl)
+{
+ struct vcl *vcl;
+ unsigned int vn = 0;
+ int in_degraded = 0;
+
+ if (dl->pdnum < 0)
+ return;
+ for (vcl = ddf->conflist; vcl ; vcl = vcl->next) {
+ unsigned int dn, ibvd;
+ const struct vd_config *conf;
+ int vstate;
+ dn = get_pd_index_from_refnum(vcl,
+ dl->disk.refnum,
+ ddf->mppe,
+ &conf, &ibvd);
+ if (dn == DDF_NOTFOUND)
+ continue;
+ dprintf("dev %d/%08x has %s (sec=%u) at %d\n",
+ dl->pdnum,
+ be32_to_cpu(dl->disk.refnum),
+ guid_str(conf->guid),
+ conf->sec_elmnt_seq, vn);
+ /* Clear the Transition flag */
+ if (be16_and
+ (ddf->phys->entries[dl->pdnum].state,
+ cpu_to_be16(DDF_Failed)))
+ be16_clear(ddf->phys
+ ->entries[dl->pdnum].state,
+ cpu_to_be16(DDF_Transition));
+ dl->vlist[vn++] = vcl;
+ vstate = ddf->virt->entries[vcl->vcnum].state
+ & DDF_state_mask;
+ if (vstate == DDF_state_degraded ||
+ vstate == DDF_state_part_optimal)
+ in_degraded = 1;
+ }
+ while (vn < ddf->max_part)
+ dl->vlist[vn++] = NULL;
+ if (dl->vlist[0]) {
+ be16_clear(ddf->phys->entries[dl->pdnum].type,
+ cpu_to_be16(DDF_Global_Spare));
+ if (!be16_and(ddf->phys
+ ->entries[dl->pdnum].type,
+ cpu_to_be16(DDF_Active_in_VD))) {
+ be16_set(ddf->phys
+ ->entries[dl->pdnum].type,
+ cpu_to_be16(DDF_Active_in_VD));
+ if (in_degraded)
+ be16_set(ddf->phys
+ ->entries[dl->pdnum]
+ .state,
+ cpu_to_be16
+ (DDF_Rebuilding));
+ }
+ }
+ if (dl->spare) {
+ be16_clear(ddf->phys->entries[dl->pdnum].type,
+ cpu_to_be16(DDF_Global_Spare));
+ be16_set(ddf->phys->entries[dl->pdnum].type,
+ cpu_to_be16(DDF_Spare));
+ }
+ if (!dl->vlist[0] && !dl->spare) {
+ be16_set(ddf->phys->entries[dl->pdnum].type,
+ cpu_to_be16(DDF_Global_Spare));
+ be16_clear(ddf->phys->entries[dl->pdnum].type,
+ cpu_to_be16(DDF_Spare));
+ be16_clear(ddf->phys->entries[dl->pdnum].type,
+ cpu_to_be16(DDF_Active_in_VD));
+ }
+}
+
+static void ddf_process_conf_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ struct ddf_super *ddf = st->sb;
+ struct vd_config *vc;
+ struct vcl *vcl;
+ struct dl *dl;
+ unsigned int ent;
+ unsigned int pdnum, len;
+
+ vc = (struct vd_config*)update->buf;
+ len = ddf->conf_rec_len * 512;
+ if ((unsigned int)update->len != len * vc->sec_elmnt_count) {
+ pr_err("%s: %s: insufficient data (%d) for %u BVDs\n",
+ __func__, guid_str(vc->guid), update->len,
+ vc->sec_elmnt_count);
+ return;
+ }
+ for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
+ if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
+ break;
+ dprintf("%s: conf update for %s (%s)\n", __func__,
+ guid_str(vc->guid), (vcl ? "old" : "new"));
+ if (vcl) {
+ /* An update, just copy the phys_refnum and lba_offset
+ * fields
+ */
+ unsigned int i;
+ unsigned int k;
+ copy_matching_bvd(ddf, &vcl->conf, update);
+ for (k = 0; k < be16_to_cpu(vc->prim_elmnt_count); k++)
+ dprintf("BVD %u has %08x at %llu\n", 0,
+ be32_to_cpu(vcl->conf.phys_refnum[k]),
+ be64_to_cpu(LBA_OFFSET(ddf,
+ &vcl->conf)[k]));
+ for (i = 1; i < vc->sec_elmnt_count; i++) {
+ copy_matching_bvd(ddf, vcl->other_bvds[i-1],
+ update);
+ for (k = 0; k < be16_to_cpu(
+ vc->prim_elmnt_count); k++)
+ dprintf("BVD %u has %08x at %llu\n", i,
+ be32_to_cpu
+ (vcl->other_bvds[i-1]->
+ phys_refnum[k]),
+ be64_to_cpu
+ (LBA_OFFSET
+ (ddf,
+ vcl->other_bvds[i-1])[k]));
+ }
+ } else {
+ /* A new VD_CONF */
+ unsigned int i;
+ if (!update->space)
+ return;
+ vcl = update->space;
+ update->space = NULL;
+ vcl->next = ddf->conflist;
+ memcpy(&vcl->conf, vc, len);
+ ent = find_vde_by_guid(ddf, vc->guid);
+ if (ent == DDF_NOTFOUND)
+ return;
+ vcl->vcnum = ent;
+ ddf->conflist = vcl;
+ for (i = 1; i < vc->sec_elmnt_count; i++)
+ memcpy(vcl->other_bvds[i-1],
+ update->buf + len * i, len);
+ }
+ /* Set DDF_Transition on all Failed devices - to help
+ * us detect those that are no longer in use
+ */
+ for (pdnum = 0; pdnum < be16_to_cpu(ddf->phys->max_pdes);
+ pdnum++)
+ if (be16_and(ddf->phys->entries[pdnum].state,
+ cpu_to_be16(DDF_Failed)))
+ be16_set(ddf->phys->entries[pdnum].state,
+ cpu_to_be16(DDF_Transition));
+
+ /* Now make sure vlist is correct for each dl. */
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ ddf_update_vlist(ddf, dl);
+ ddf_remove_failed(ddf);
+
+ ddf_set_updates_pending(ddf, vc);
+}
+
+static void ddf_process_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ /* Apply this update to the metadata.
+ * The first 4 bytes are a DDF_*_MAGIC which guides
+ * our actions.
+ * Possible update are:
+ * DDF_PHYS_RECORDS_MAGIC
+ * Add a new physical device or remove an old one.
+ * Changes to this record only happen implicitly.
+ * used_pdes is the device number.
+ * DDF_VIRT_RECORDS_MAGIC
+ * Add a new VD. Possibly also change the 'access' bits.
+ * populated_vdes is the entry number.
+ * DDF_VD_CONF_MAGIC
+ * New or updated VD. the VIRT_RECORD must already
+ * exist. For an update, phys_refnum and lba_offset
+ * (at least) are updated, and the VD_CONF must
+ * be written to precisely those devices listed with
+ * a phys_refnum.
+ * DDF_SPARE_ASSIGN_MAGIC
+ * replacement Spare Assignment Record... but for which device?
+ *
+ * So, e.g.:
+ * - to create a new array, we send a VIRT_RECORD and
+ * a VD_CONF. Then assemble and start the array.
+ * - to activate a spare we send a VD_CONF to add the phys_refnum
+ * and offset. This will also mark the spare as active with
+ * a spare-assignment record.
+ */
+ be32 *magic = (be32 *)update->buf;
+
+ dprintf("Process update %x\n", be32_to_cpu(*magic));
+
+ if (be32_eq(*magic, DDF_PHYS_RECORDS_MAGIC)) {
+ if (update->len == (sizeof(struct phys_disk) +
+ sizeof(struct phys_disk_entry)))
+ ddf_process_phys_update(st, update);
+ } else if (be32_eq(*magic, DDF_VIRT_RECORDS_MAGIC)) {
+ if (update->len == (sizeof(struct virtual_disk) +
+ sizeof(struct virtual_entry)))
+ ddf_process_virt_update(st, update);
+ } else if (be32_eq(*magic, DDF_VD_CONF_MAGIC)) {
+ ddf_process_conf_update(st, update);
+ }
+ /* case DDF_SPARE_ASSIGN_MAGIC */
+}
+
+static int ddf_prepare_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ /* This update arrived at managemon.
+ * We are about to pass it to monitor.
+ * If a malloc is needed, do it here.
+ */
+ struct ddf_super *ddf = st->sb;
+ be32 *magic;
+ if (update->len < 4)
+ return 0;
+ magic = (be32 *)update->buf;
+ if (be32_eq(*magic, DDF_VD_CONF_MAGIC)) {
+ struct vcl *vcl;
+ struct vd_config *conf;
+ if (update->len < (int)sizeof(*conf))
+ return 0;
+ conf = (struct vd_config *) update->buf;
+ if (posix_memalign(&update->space, 512,
+ offsetof(struct vcl, conf)
+ + ddf->conf_rec_len * 512) != 0) {
+ update->space = NULL;
+ return 0;
+ }
+ vcl = update->space;
+ vcl->conf.sec_elmnt_count = conf->sec_elmnt_count;
+ if (alloc_other_bvds(ddf, vcl) != 0) {
+ free(update->space);
+ update->space = NULL;
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/*
+ * Check degraded state of a RAID10.
+ * returns 2 for good, 1 for degraded, 0 for failed, and -1 for error
+ */
+static int raid10_degraded(struct mdinfo *info)
+{
+ int n_prim, n_bvds;
+ int i;
+ struct mdinfo *d;
+ char *found;
+ int ret = -1;
+
+ n_prim = info->array.layout & ~0x100;
+ n_bvds = info->array.raid_disks / n_prim;
+ found = xmalloc(n_bvds);
+ if (found == NULL)
+ return ret;
+ memset(found, 0, n_bvds);
+ for (d = info->devs; d; d = d->next) {
+ i = d->disk.raid_disk / n_prim;
+ if (i >= n_bvds) {
+ pr_err("%s: BUG: invalid raid disk\n", __func__);
+ goto out;
+ }
+ if (d->state_fd > 0)
+ found[i]++;
+ }
+ ret = 2;
+ for (i = 0; i < n_bvds; i++)
+ if (!found[i]) {
+ dprintf("%s: BVD %d/%d failed\n", __func__, i, n_bvds);
+ ret = 0;
+ goto out;
+ } else if (found[i] < n_prim) {
+ dprintf("%s: BVD %d/%d degraded\n", __func__, i,
+ n_bvds);
+ ret = 1;
+ }
+out:
+ free(found);
+ return ret;
+}
+
+/*
+ * Check if the array 'a' is degraded but not failed.
+ * If it is, find as many spares as are available and needed and
+ * arrange for their inclusion.
+ * We only choose devices which are not already in the array,
+ * and prefer those with a spare-assignment to this array.
+ * Otherwise we choose global spares - assuming always that
+ * there is enough room.
+ * For each spare that we assign, we return an 'mdinfo' which
+ * describes the position for the device in the array.
+ * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
+ * the new phys_refnum and lba_offset values.
+ *
+ * Only worry about BVDs at the moment.
+ */
+static struct mdinfo *ddf_activate_spare(struct active_array *a,
+ struct metadata_update **updates)
+{
+ int working = 0;
+ struct mdinfo *d;
+ struct ddf_super *ddf = a->container->sb;
+ int global_ok = 0;
+ struct mdinfo *rv = NULL;
+ struct mdinfo *di;
+ struct metadata_update *mu;
+ struct dl *dl;
+ int i;
+ unsigned int j;
+ struct vcl *vcl;
+ struct vd_config *vc;
+ unsigned int n_bvd;
+
+ for (d = a->info.devs ; d ; d = d->next) {
+ if ((d->curr_state & DS_FAULTY) &&
+ d->state_fd >= 0)
+ /* wait for Removal to happen */
+ return NULL;
+ if (d->state_fd >= 0)
+ working ++;
+ }
+
+ dprintf("%s: working=%d (%d) level=%d\n", __func__, working,
+ a->info.array.raid_disks,
+ a->info.array.level);
+ if (working == a->info.array.raid_disks)
+ return NULL; /* array not degraded */
+ switch (a->info.array.level) {
+ case 1:
+ if (working == 0)
+ return NULL; /* failed */
+ break;
+ case 4:
+ case 5:
+ if (working < a->info.array.raid_disks - 1)
+ return NULL; /* failed */
+ break;
+ case 6:
+ if (working < a->info.array.raid_disks - 2)
+ return NULL; /* failed */
+ break;
+ case 10:
+ if (raid10_degraded(&a->info) < 1)
+ return NULL;
+ break;
+ default: /* concat or stripe */
+ return NULL; /* failed */
+ }
+
+ /* For each slot, if it is not working, find a spare */
+ dl = ddf->dlist;
+ for (i = 0; i < a->info.array.raid_disks; i++) {
+ for (d = a->info.devs ; d ; d = d->next)
+ if (d->disk.raid_disk == i)
+ break;
+ dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
+ if (d && (d->state_fd >= 0))
+ continue;
+
+ /* OK, this device needs recovery. Find a spare */
+ again:
+ for ( ; dl ; dl = dl->next) {
+ unsigned long long esize;
+ unsigned long long pos;
+ struct mdinfo *d2;
+ int is_global = 0;
+ int is_dedicated = 0;
+ be16 state;
+
+ if (dl->pdnum < 0)
+ continue;
+ state = ddf->phys->entries[dl->pdnum].state;
+ if (be16_and(state,
+ cpu_to_be16(DDF_Failed|DDF_Missing)) ||
+ !be16_and(state,
+ cpu_to_be16(DDF_Online)))
+ continue;
+
+ /* If in this array, skip */
+ for (d2 = a->info.devs ; d2 ; d2 = d2->next)
+ if (d2->state_fd >= 0 &&
+ d2->disk.major == dl->major &&
+ d2->disk.minor == dl->minor) {
+ dprintf("%x:%x (%08x) already in array\n",
+ dl->major, dl->minor,
+ be32_to_cpu(dl->disk.refnum));
+ break;
+ }
+ if (d2)
+ continue;
+ if (be16_and(ddf->phys->entries[dl->pdnum].type,
+ cpu_to_be16(DDF_Spare))) {
+ /* Check spare assign record */
+ if (dl->spare) {
+ if (dl->spare->type & DDF_spare_dedicated) {
+ /* check spare_ents for guid */
+ unsigned int j;
+ for (j = 0 ;
+ j < be16_to_cpu
+ (dl->spare
+ ->populated);
+ j++) {
+ if (memcmp(dl->spare->spare_ents[j].guid,
+ ddf->virt->entries[a->info.container_member].guid,
+ DDF_GUID_LEN) == 0)
+ is_dedicated = 1;
+ }
+ } else
+ is_global = 1;
+ }
+ } else if (be16_and(ddf->phys->entries[dl->pdnum].type,
+ cpu_to_be16(DDF_Global_Spare))) {
+ is_global = 1;
+ } else if (!be16_and(ddf->phys
+ ->entries[dl->pdnum].state,
+ cpu_to_be16(DDF_Failed))) {
+ /* we can possibly use some of this */
+ is_global = 1;
+ }
+ if ( ! (is_dedicated ||
+ (is_global && global_ok))) {
+ dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
+ is_dedicated, is_global);
+ continue;
+ }
+
+ /* We are allowed to use this device - is there space?
+ * We need a->info.component_size sectors */
+ esize = a->info.component_size;
+ pos = find_space(ddf, dl, INVALID_SECTORS, &esize);
+
+ if (esize < a->info.component_size) {
+ dprintf("%x:%x has no room: %llu %llu\n",
+ dl->major, dl->minor,
+ esize, a->info.component_size);
+ /* No room */
+ continue;
+ }
+
+ /* Cool, we have a device with some space at pos */
+ di = xcalloc(1, sizeof(*di));
+ di->disk.number = i;
+ di->disk.raid_disk = i;
+ di->disk.major = dl->major;
+ di->disk.minor = dl->minor;
+ di->disk.state = 0;
+ di->recovery_start = 0;
+ di->data_offset = pos;
+ di->component_size = a->info.component_size;
+ di->next = rv;
+ rv = di;
+ dprintf("%x:%x (%08x) to be %d at %llu\n",
+ dl->major, dl->minor,
+ be32_to_cpu(dl->disk.refnum), i, pos);
+
+ break;
+ }
+ if (!dl && ! global_ok) {
+ /* not enough dedicated spares, try global */
+ global_ok = 1;
+ dl = ddf->dlist;
+ goto again;
+ }
+ }
+
+ if (!rv)
+ /* No spares found */
+ return rv;
+ /* Now 'rv' has a list of devices to return.
+ * Create a metadata_update record to update the
+ * phys_refnum and lba_offset values
+ */
+ vc = find_vdcr(ddf, a->info.container_member, rv->disk.raid_disk,
+ &n_bvd, &vcl);
+ if (vc == NULL)
+ return NULL;
+
+ mu = xmalloc(sizeof(*mu));
+ if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
+ free(mu);
+ mu = NULL;
+ }
+
+ mu->len = ddf->conf_rec_len * 512 * vcl->conf.sec_elmnt_count;
+ mu->buf = xmalloc(mu->len);
+ mu->space = NULL;
+ mu->space_list = NULL;
+ mu->next = *updates;
+ memcpy(mu->buf, &vcl->conf, ddf->conf_rec_len * 512);
+ for (j = 1; j < vcl->conf.sec_elmnt_count; j++)
+ memcpy(mu->buf + j * ddf->conf_rec_len * 512,
+ vcl->other_bvds[j-1], ddf->conf_rec_len * 512);
+
+ vc = (struct vd_config*)mu->buf;
+ for (di = rv ; di ; di = di->next) {
+ unsigned int i_sec, i_prim;
+ i_sec = di->disk.raid_disk
+ / be16_to_cpu(vcl->conf.prim_elmnt_count);
+ i_prim = di->disk.raid_disk
+ % be16_to_cpu(vcl->conf.prim_elmnt_count);
+ vc = (struct vd_config *)(mu->buf
+ + i_sec * ddf->conf_rec_len * 512);
+ for (dl = ddf->dlist; dl; dl = dl->next)
+ if (dl->major == di->disk.major
+ && dl->minor == di->disk.minor)
+ break;
+ if (!dl || dl->pdnum < 0) {
+ pr_err("%s: BUG: can't find disk %d (%d/%d)\n",
+ __func__, di->disk.raid_disk,
+ di->disk.major, di->disk.minor);
+ return NULL;
+ }
+ vc->phys_refnum[i_prim] = ddf->phys->entries[dl->pdnum].refnum;
+ LBA_OFFSET(ddf, vc)[i_prim] = cpu_to_be64(di->data_offset);
+ dprintf("BVD %u gets %u: %08x at %llu\n", i_sec, i_prim,
+ be32_to_cpu(vc->phys_refnum[i_prim]),
+ be64_to_cpu(LBA_OFFSET(ddf, vc)[i_prim]));
+ }
+ *updates = mu;
+ return rv;
+}
+#endif /* MDASSEMBLE */
+
+static int ddf_level_to_layout(int level)
+{
+ switch(level) {
+ case 0:
+ case 1:
+ return 0;
+ case 5:
+ return ALGORITHM_LEFT_SYMMETRIC;
+ case 6:
+ return ALGORITHM_ROTATING_N_CONTINUE;
+ case 10:
+ return 0x102;
+ default:
+ return UnSet;
+ }
+}
+
+static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
+{
+ if (level && *level == UnSet)
+ *level = LEVEL_CONTAINER;
+
+ if (level && layout && *layout == UnSet)
+ *layout = ddf_level_to_layout(*level);
+}
+
+struct superswitch super_ddf = {
+#ifndef MDASSEMBLE
+ .examine_super = examine_super_ddf,
+ .brief_examine_super = brief_examine_super_ddf,
+ .brief_examine_subarrays = brief_examine_subarrays_ddf,
+ .export_examine_super = export_examine_super_ddf,
+ .detail_super = detail_super_ddf,
+ .brief_detail_super = brief_detail_super_ddf,
+ .validate_geometry = validate_geometry_ddf,
+ .write_init_super = write_init_super_ddf,
+ .add_to_super = add_to_super_ddf,
+ .remove_from_super = remove_from_super_ddf,
+ .load_container = load_container_ddf,
+ .copy_metadata = copy_metadata_ddf,
+ .kill_subarray = kill_subarray_ddf,
+#endif
+ .match_home = match_home_ddf,
+ .uuid_from_super= uuid_from_super_ddf,
+ .getinfo_super = getinfo_super_ddf,
+ .update_super = update_super_ddf,
+
+ .avail_size = avail_size_ddf,
+
+ .compare_super = compare_super_ddf,
+
+ .load_super = load_super_ddf,
+ .init_super = init_super_ddf,
+ .store_super = store_super_ddf,
+ .free_super = free_super_ddf,
+ .match_metadata_desc = match_metadata_desc_ddf,
+ .container_content = container_content_ddf,
+ .default_geometry = default_geometry_ddf,
+
+ .external = 1,
+
+#ifndef MDASSEMBLE
+/* for mdmon */
+ .open_new = ddf_open_new,
+ .set_array_state= ddf_set_array_state,
+ .set_disk = ddf_set_disk,
+ .sync_metadata = ddf_sync_metadata,
+ .process_update = ddf_process_update,
+ .prepare_update = ddf_prepare_update,
+ .activate_spare = ddf_activate_spare,
+#endif
+ .name = "ddf",
+};
diff --git a/super-gpt.c b/super-gpt.c
new file mode 100644
index 00000000..6a2f749e
--- /dev/null
+++ b/super-gpt.c
@@ -0,0 +1,217 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2010 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neil@brown.name>
+ *
+ */
+
+/*
+ * 'gpt' is a pseudo metadata type for devices which have a
+ * GPT partition table.
+ *
+ * Obviously arrays cannot be created or assembled for this type.
+ * It is used to allow a new bare device to have an partition table
+ * added so the member partitions can then be included in other
+ * arrays as relevant.
+ *
+ * The meaning operations are:
+ * examine_super, but not brief_examine_super or export_examine
+ * load_super
+ * store_super
+ */
+
+#include "mdadm.h"
+#include "part.h"
+
+static void free_gpt(struct supertype *st)
+{
+ free(st->sb);
+ st->sb = NULL;
+}
+
+#ifndef MDASSEMBLE
+static void examine_gpt(struct supertype *st, char *homehost)
+{
+ struct GPT *gpt = st->sb + 512;
+ struct GPT_part_entry *gpe = st->sb + 1024;
+ unsigned int i;
+
+ printf(" GPT Magic : %llx\n", (unsigned long long)__le64_to_cpu(gpt->magic));
+ printf(" GPT Revision : %ld\n", (long)__le32_to_cpu(gpt->revision));
+ for (i = 0; i < __le32_to_cpu(gpt->part_cnt); i++) {
+ printf(" Partition[%02d] : %12llu sectors at %12llu\n",
+ i,
+ (unsigned long long)__le64_to_cpu(gpe[i].starting_lba),
+ (unsigned long long)__le64_to_cpu(gpe[i].ending_lba)-
+ (unsigned long long)__le64_to_cpu(gpe[i].starting_lba)
+ +1
+ );
+ }
+}
+#endif /* MDASSEMBLE */
+
+static int load_gpt(struct supertype *st, int fd, char *devname)
+{
+ struct MBR *super;
+ struct GPT *gpt_head;
+ int to_read;
+
+ free_gpt(st);
+
+ if (posix_memalign((void**)&super, 4096, 32*512) != 0) {
+ pr_err("%s could not allocate superblock\n",
+ __func__);
+ return 1;
+ }
+
+ lseek(fd, 0, 0);
+ if (read(fd, super, sizeof(*super)) != sizeof(*super)) {
+ no_read:
+ if (devname)
+ pr_err("Cannot read partition table on %s\n",
+ devname);
+ free(super);
+ return 1;
+ }
+
+ if (super->magic != MBR_SIGNATURE_MAGIC ||
+ super->parts[0].part_type != MBR_GPT_PARTITION_TYPE) {
+ not_found:
+ if (devname)
+ pr_err("No partition table found on %s\n",
+ devname);
+ free(super);
+ return 1;
+ }
+ /* Seem to have GPT, load the header */
+ gpt_head = (struct GPT*)(super+1);
+ if (read(fd, gpt_head, sizeof(*gpt_head)) != sizeof(*gpt_head))
+ goto no_read;
+ if (gpt_head->magic != GPT_SIGNATURE_MAGIC)
+ goto not_found;
+ if (__le32_to_cpu(gpt_head->part_cnt) >= 128)
+ goto not_found;
+
+ to_read = __le32_to_cpu(gpt_head->part_cnt) * sizeof(struct GPT_part_entry);
+ to_read = ((to_read+511)/512) * 512;
+ if (read(fd, gpt_head+1, to_read) != to_read)
+ goto no_read;
+
+ st->sb = super;
+
+ if (st->ss == NULL) {
+ st->ss = &gpt;
+ st->minor_version = 0;
+ st->max_devs = 1;
+ st->info = NULL;
+ }
+ return 0;
+}
+
+static int store_gpt(struct supertype *st, int fd)
+{
+ /* FIXME should I save the boot loader */
+ /* need to write two copies! */
+ /* FIXME allow for blocks != 512 bytes
+ *etc
+ */
+ struct MBR *super = st->sb;
+ struct GPT *gpt;
+ int to_write;
+
+ gpt = (struct GPT*)(super+1);
+
+ to_write = __le32_to_cpu(gpt->part_cnt) * sizeof(struct GPT_part_entry);
+ to_write = ((to_write+511)/512) * 512;
+
+ lseek(fd, 0, 0);
+ if (write(fd, st->sb, to_write) != to_write)
+ return 4;
+
+ fsync(fd);
+ ioctl(fd, BLKRRPART, 0);
+ return 0;
+}
+
+static void getinfo_gpt(struct supertype *st, struct mdinfo *info, char *map)
+{
+ struct GPT *gpt = st->sb + 512;
+ struct GPT_part_entry *gpe = st->sb + 1024;
+ unsigned int i;
+
+ memset(&info->array, 0, sizeof(info->array));
+ memset(&info->disk, 0, sizeof(info->disk));
+ strcpy(info->text_version, "gpt");
+ strcpy(info->name, "gpt");
+ info->component_size = 0;
+
+ for (i = 0; i < __le32_to_cpu(gpt->part_cnt); i++) {
+ unsigned long long last =
+ (unsigned long long)__le64_to_cpu(gpe[i].ending_lba);
+ if (last > info->component_size)
+ info->component_size = last;
+ }
+}
+
+static struct supertype *match_metadata_desc(char *arg)
+{
+ struct supertype *st = xmalloc(sizeof(*st));
+
+ if (!st)
+ return st;
+ if (strcmp(arg, "gpt") != 0) {
+ free(st);
+ return NULL;
+ }
+
+ st->ss = &gpt;
+ st->info = NULL;
+ st->minor_version = 0;
+ st->max_devs = 1;
+ st->sb = NULL;
+ return st;
+}
+
+#ifndef MDASSEMBLE
+static int validate_geometry(struct supertype *st, int level,
+ int layout, int raiddisks,
+ int *chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *subdev, unsigned long long *freesize,
+ int verbose)
+{
+ pr_err("gpt metadata cannot be used this way\n");
+ return 0;
+}
+#endif
+
+struct superswitch gpt = {
+#ifndef MDASSEMBLE
+ .examine_super = examine_gpt,
+ .validate_geometry = validate_geometry,
+#endif
+ .match_metadata_desc = match_metadata_desc,
+ .load_super = load_gpt,
+ .store_super = store_gpt,
+ .getinfo_super = getinfo_gpt,
+ .free_super = free_gpt,
+ .name = "gpt",
+};
diff --git a/super-intel.c b/super-intel.c
new file mode 100644
index 00000000..e28ac7d3
--- /dev/null
+++ b/super-intel.c
@@ -0,0 +1,10626 @@
+/*
+ * mdadm - Intel(R) Matrix Storage Manager Support
+ *
+ * Copyright (C) 2002-2008 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define HAVE_STDINT_H 1
+#include "mdadm.h"
+#include "mdmon.h"
+#include "sha1.h"
+#include "platform-intel.h"
+#include <values.h>
+#include <scsi/sg.h>
+#include <ctype.h>
+#include <dirent.h>
+
+/* MPB == Metadata Parameter Block */
+#define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
+#define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
+#define MPB_VERSION_RAID0 "1.0.00"
+#define MPB_VERSION_RAID1 "1.1.00"
+#define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
+#define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
+#define MPB_VERSION_RAID5 "1.2.02"
+#define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
+#define MPB_VERSION_CNG "1.2.06"
+#define MPB_VERSION_ATTRIBS "1.3.00"
+#define MAX_SIGNATURE_LENGTH 32
+#define MAX_RAID_SERIAL_LEN 16
+
+/* supports RAID0 */
+#define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
+/* supports RAID1 */
+#define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
+/* supports RAID10 */
+#define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
+/* supports RAID1E */
+#define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
+/* supports RAID5 */
+#define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
+/* supports RAID CNG */
+#define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
+/* supports expanded stripe sizes of 256K, 512K and 1MB */
+#define MPB_ATTRIB_EXP_STRIPE_SIZE __cpu_to_le32(0x00000040)
+
+/* The OROM Support RST Caching of Volumes */
+#define MPB_ATTRIB_NVM __cpu_to_le32(0x02000000)
+/* The OROM supports creating disks greater than 2TB */
+#define MPB_ATTRIB_2TB_DISK __cpu_to_le32(0x04000000)
+/* The OROM supports Bad Block Management */
+#define MPB_ATTRIB_BBM __cpu_to_le32(0x08000000)
+
+/* THe OROM Supports NVM Caching of Volumes */
+#define MPB_ATTRIB_NEVER_USE2 __cpu_to_le32(0x10000000)
+/* The OROM supports creating volumes greater than 2TB */
+#define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
+/* originally for PMP, now it's wasted b/c. Never use this bit! */
+#define MPB_ATTRIB_NEVER_USE __cpu_to_le32(0x40000000)
+/* Verify MPB contents against checksum after reading MPB */
+#define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
+
+/* Define all supported attributes that have to be accepted by mdadm
+ */
+#define MPB_ATTRIB_SUPPORTED (MPB_ATTRIB_CHECKSUM_VERIFY | \
+ MPB_ATTRIB_2TB | \
+ MPB_ATTRIB_2TB_DISK | \
+ MPB_ATTRIB_RAID0 | \
+ MPB_ATTRIB_RAID1 | \
+ MPB_ATTRIB_RAID10 | \
+ MPB_ATTRIB_RAID5 | \
+ MPB_ATTRIB_EXP_STRIPE_SIZE)
+
+/* Define attributes that are unused but not harmful */
+#define MPB_ATTRIB_IGNORED (MPB_ATTRIB_NEVER_USE)
+
+#define MPB_SECTOR_CNT 2210
+#define IMSM_RESERVED_SECTORS 4096
+#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056
+#define SECT_PER_MB_SHIFT 11
+
+/* Disk configuration info. */
+#define IMSM_MAX_DEVICES 255
+struct imsm_disk {
+ __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
+ __u32 total_blocks_lo; /* 0xE8 - 0xEB total blocks lo */
+ __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
+#define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
+#define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
+#define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
+ __u32 status; /* 0xF0 - 0xF3 */
+ __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
+ __u32 total_blocks_hi; /* 0xF4 - 0xF5 total blocks hi */
+#define IMSM_DISK_FILLERS 3
+ __u32 filler[IMSM_DISK_FILLERS]; /* 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion */
+};
+
+/* map selector for map managment
+ */
+#define MAP_0 0
+#define MAP_1 1
+#define MAP_X -1
+
+/* RAID map configuration infos. */
+struct imsm_map {
+ __u32 pba_of_lba0_lo; /* start address of partition */
+ __u32 blocks_per_member_lo;/* blocks per member */
+ __u32 num_data_stripes_lo; /* number of data stripes */
+ __u16 blocks_per_strip;
+ __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
+#define IMSM_T_STATE_NORMAL 0
+#define IMSM_T_STATE_UNINITIALIZED 1
+#define IMSM_T_STATE_DEGRADED 2
+#define IMSM_T_STATE_FAILED 3
+ __u8 raid_level;
+#define IMSM_T_RAID0 0
+#define IMSM_T_RAID1 1
+#define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
+ __u8 num_members; /* number of member disks */
+ __u8 num_domains; /* number of parity domains */
+ __u8 failed_disk_num; /* valid only when state is degraded */
+ __u8 ddf;
+ __u32 pba_of_lba0_hi;
+ __u32 blocks_per_member_hi;
+ __u32 num_data_stripes_hi;
+ __u32 filler[4]; /* expansion area */
+#define IMSM_ORD_REBUILD (1 << 24)
+ __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
+ * top byte contains some flags
+ */
+} __attribute__ ((packed));
+
+struct imsm_vol {
+ __u32 curr_migr_unit;
+ __u32 checkpoint_id; /* id to access curr_migr_unit */
+ __u8 migr_state; /* Normal or Migrating */
+#define MIGR_INIT 0
+#define MIGR_REBUILD 1
+#define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
+#define MIGR_GEN_MIGR 3
+#define MIGR_STATE_CHANGE 4
+#define MIGR_REPAIR 5
+ __u8 migr_type; /* Initializing, Rebuilding, ... */
+ __u8 dirty;
+ __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
+ __u16 verify_errors; /* number of mismatches */
+ __u16 bad_blocks; /* number of bad blocks during verify */
+ __u32 filler[4];
+ struct imsm_map map[1];
+ /* here comes another one if migr_state */
+} __attribute__ ((packed));
+
+struct imsm_dev {
+ __u8 volume[MAX_RAID_SERIAL_LEN];
+ __u32 size_low;
+ __u32 size_high;
+#define DEV_BOOTABLE __cpu_to_le32(0x01)
+#define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
+#define DEV_READ_COALESCING __cpu_to_le32(0x04)
+#define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
+#define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
+#define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
+#define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
+#define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
+#define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
+#define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
+#define DEV_CLONE_N_GO __cpu_to_le32(0x400)
+#define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
+#define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
+ __u32 status; /* Persistent RaidDev status */
+ __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
+ __u8 migr_priority;
+ __u8 num_sub_vols;
+ __u8 tid;
+ __u8 cng_master_disk;
+ __u16 cache_policy;
+ __u8 cng_state;
+ __u8 cng_sub_state;
+#define IMSM_DEV_FILLERS 10
+ __u32 filler[IMSM_DEV_FILLERS];
+ struct imsm_vol vol;
+} __attribute__ ((packed));
+
+struct imsm_super {
+ __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
+ __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
+ __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
+ __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
+ __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
+ __u32 error_log_size; /* 0x30 - 0x33 in bytes */
+ __u32 attributes; /* 0x34 - 0x37 */
+ __u8 num_disks; /* 0x38 Number of configured disks */
+ __u8 num_raid_devs; /* 0x39 Number of configured volumes */
+ __u8 error_log_pos; /* 0x3A */
+ __u8 fill[1]; /* 0x3B */
+ __u32 cache_size; /* 0x3c - 0x40 in mb */
+ __u32 orig_family_num; /* 0x40 - 0x43 original family num */
+ __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
+ __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
+#define IMSM_FILLERS 35
+ __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
+ struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
+ /* here comes imsm_dev[num_raid_devs] */
+ /* here comes BBM logs */
+} __attribute__ ((packed));
+
+#define BBM_LOG_MAX_ENTRIES 254
+
+struct bbm_log_entry {
+ __u64 defective_block_start;
+#define UNREADABLE 0xFFFFFFFF
+ __u32 spare_block_offset;
+ __u16 remapped_marked_count;
+ __u16 disk_ordinal;
+} __attribute__ ((__packed__));
+
+struct bbm_log {
+ __u32 signature; /* 0xABADB10C */
+ __u32 entry_count;
+ __u32 reserved_spare_block_count; /* 0 */
+ __u32 reserved; /* 0xFFFF */
+ __u64 first_spare_lba;
+ struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
+} __attribute__ ((__packed__));
+
+#ifndef MDASSEMBLE
+static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
+#endif
+
+#define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
+
+#define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
+
+#define MIGR_REC_BUF_SIZE 512 /* size of migr_record i/o buffer */
+#define MIGR_REC_POSITION 512 /* migr_record position offset on disk,
+ * MIGR_REC_BUF_SIZE <= MIGR_REC_POSITION
+ */
+
+#define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
+ * be recovered using srcMap */
+#define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
+ * already been migrated and must
+ * be recovered from checkpoint area */
+struct migr_record {
+ __u32 rec_status; /* Status used to determine how to restart
+ * migration in case it aborts
+ * in some fashion */
+ __u32 curr_migr_unit; /* 0..numMigrUnits-1 */
+ __u32 family_num; /* Family number of MPB
+ * containing the RaidDev
+ * that is migrating */
+ __u32 ascending_migr; /* True if migrating in increasing
+ * order of lbas */
+ __u32 blocks_per_unit; /* Num disk blocks per unit of operation */
+ __u32 dest_depth_per_unit; /* Num member blocks each destMap
+ * member disk
+ * advances per unit-of-operation */
+ __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */
+ __u32 dest_1st_member_lba; /* First member lba on first
+ * stripe of destination */
+ __u32 num_migr_units; /* Total num migration units-of-op */
+ __u32 post_migr_vol_cap; /* Size of volume after
+ * migration completes */
+ __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
+ __u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the
+ * migration ckpt record was read from
+ * (for recovered migrations) */
+} __attribute__ ((__packed__));
+
+struct md_list {
+ /* usage marker:
+ * 1: load metadata
+ * 2: metadata does not match
+ * 4: already checked
+ */
+ int used;
+ char *devname;
+ int found;
+ int container;
+ dev_t st_rdev;
+ struct md_list *next;
+};
+
+#define pr_vrb(fmt, arg...) (void) (verbose && pr_err(fmt, ##arg))
+
+static __u8 migr_type(struct imsm_dev *dev)
+{
+ if (dev->vol.migr_type == MIGR_VERIFY &&
+ dev->status & DEV_VERIFY_AND_FIX)
+ return MIGR_REPAIR;
+ else
+ return dev->vol.migr_type;
+}
+
+static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
+{
+ /* for compatibility with older oroms convert MIGR_REPAIR, into
+ * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
+ */
+ if (migr_type == MIGR_REPAIR) {
+ dev->vol.migr_type = MIGR_VERIFY;
+ dev->status |= DEV_VERIFY_AND_FIX;
+ } else {
+ dev->vol.migr_type = migr_type;
+ dev->status &= ~DEV_VERIFY_AND_FIX;
+ }
+}
+
+static unsigned int sector_count(__u32 bytes)
+{
+ return ROUND_UP(bytes, 512) / 512;
+}
+
+static unsigned int mpb_sectors(struct imsm_super *mpb)
+{
+ return sector_count(__le32_to_cpu(mpb->mpb_size));
+}
+
+struct intel_dev {
+ struct imsm_dev *dev;
+ struct intel_dev *next;
+ unsigned index;
+};
+
+struct intel_hba {
+ enum sys_dev_type type;
+ char *path;
+ char *pci_id;
+ struct intel_hba *next;
+};
+
+enum action {
+ DISK_REMOVE = 1,
+ DISK_ADD
+};
+/* internal representation of IMSM metadata */
+struct intel_super {
+ union {
+ void *buf; /* O_DIRECT buffer for reading/writing metadata */
+ struct imsm_super *anchor; /* immovable parameters */
+ };
+ union {
+ void *migr_rec_buf; /* buffer for I/O operations */
+ struct migr_record *migr_rec; /* migration record */
+ };
+ int clean_migration_record_by_mdmon; /* when reshape is switched to next
+ array, it indicates that mdmon is allowed to clean migration
+ record */
+ size_t len; /* size of the 'buf' allocation */
+ void *next_buf; /* for realloc'ing buf from the manager */
+ size_t next_len;
+ int updates_pending; /* count of pending updates for mdmon */
+ int current_vol; /* index of raid device undergoing creation */
+ unsigned long long create_offset; /* common start for 'current_vol' */
+ __u32 random; /* random data for seeding new family numbers */
+ struct intel_dev *devlist;
+ struct dl {
+ struct dl *next;
+ int index;
+ __u8 serial[MAX_RAID_SERIAL_LEN];
+ int major, minor;
+ char *devname;
+ struct imsm_disk disk;
+ int fd;
+ int extent_cnt;
+ struct extent *e; /* for determining freespace @ create */
+ int raiddisk; /* slot to fill in autolayout */
+ enum action action;
+ } *disks, *current_disk;
+ struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
+ active */
+ struct dl *missing; /* disks removed while we weren't looking */
+ struct bbm_log *bbm_log;
+ struct intel_hba *hba; /* device path of the raid controller for this metadata */
+ const struct imsm_orom *orom; /* platform firmware support */
+ struct intel_super *next; /* (temp) list for disambiguating family_num */
+};
+
+struct intel_disk {
+ struct imsm_disk disk;
+ #define IMSM_UNKNOWN_OWNER (-1)
+ int owner;
+ struct intel_disk *next;
+};
+
+struct extent {
+ unsigned long long start, size;
+};
+
+/* definitions of reshape process types */
+enum imsm_reshape_type {
+ CH_TAKEOVER,
+ CH_MIGRATION,
+ CH_ARRAY_SIZE,
+};
+
+/* definition of messages passed to imsm_process_update */
+enum imsm_update_type {
+ update_activate_spare,
+ update_create_array,
+ update_kill_array,
+ update_rename_array,
+ update_add_remove_disk,
+ update_reshape_container_disks,
+ update_reshape_migration,
+ update_takeover,
+ update_general_migration_checkpoint,
+ update_size_change,
+};
+
+struct imsm_update_activate_spare {
+ enum imsm_update_type type;
+ struct dl *dl;
+ int slot;
+ int array;
+ struct imsm_update_activate_spare *next;
+};
+
+struct geo_params {
+ char devnm[32];
+ char *dev_name;
+ unsigned long long size;
+ int level;
+ int layout;
+ int chunksize;
+ int raid_disks;
+};
+
+enum takeover_direction {
+ R10_TO_R0,
+ R0_TO_R10
+};
+struct imsm_update_takeover {
+ enum imsm_update_type type;
+ int subarray;
+ enum takeover_direction direction;
+};
+
+struct imsm_update_reshape {
+ enum imsm_update_type type;
+ int old_raid_disks;
+ int new_raid_disks;
+
+ int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
+};
+
+struct imsm_update_reshape_migration {
+ enum imsm_update_type type;
+ int old_raid_disks;
+ int new_raid_disks;
+ /* fields for array migration changes
+ */
+ int subdev;
+ int new_level;
+ int new_layout;
+ int new_chunksize;
+
+ int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
+};
+
+struct imsm_update_size_change {
+ enum imsm_update_type type;
+ int subdev;
+ long long new_size;
+};
+
+struct imsm_update_general_migration_checkpoint {
+ enum imsm_update_type type;
+ __u32 curr_migr_unit;
+};
+
+struct disk_info {
+ __u8 serial[MAX_RAID_SERIAL_LEN];
+};
+
+struct imsm_update_create_array {
+ enum imsm_update_type type;
+ int dev_idx;
+ struct imsm_dev dev;
+};
+
+struct imsm_update_kill_array {
+ enum imsm_update_type type;
+ int dev_idx;
+};
+
+struct imsm_update_rename_array {
+ enum imsm_update_type type;
+ __u8 name[MAX_RAID_SERIAL_LEN];
+ int dev_idx;
+};
+
+struct imsm_update_add_remove_disk {
+ enum imsm_update_type type;
+};
+
+static const char *_sys_dev_type[] = {
+ [SYS_DEV_UNKNOWN] = "Unknown",
+ [SYS_DEV_SAS] = "SAS",
+ [SYS_DEV_SATA] = "SATA"
+};
+
+const char *get_sys_dev_type(enum sys_dev_type type)
+{
+ if (type >= SYS_DEV_MAX)
+ type = SYS_DEV_UNKNOWN;
+
+ return _sys_dev_type[type];
+}
+
+static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
+{
+ struct intel_hba *result = xmalloc(sizeof(*result));
+
+ result->type = device->type;
+ result->path = xstrdup(device->path);
+ result->next = NULL;
+ if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
+ result->pci_id++;
+
+ return result;
+}
+
+static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
+{
+ struct intel_hba *result=NULL;
+ for (result = hba; result; result = result->next) {
+ if (result->type == device->type && strcmp(result->path, device->path) == 0)
+ break;
+ }
+ return result;
+}
+
+static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
+{
+ struct intel_hba *hba;
+
+ /* check if disk attached to Intel HBA */
+ hba = find_intel_hba(super->hba, device);
+ if (hba != NULL)
+ return 1;
+ /* Check if HBA is already attached to super */
+ if (super->hba == NULL) {
+ super->hba = alloc_intel_hba(device);
+ return 1;
+ } else
+ /* IMSM metadata disallows to attach disks to multiple
+ * controllers.
+ */
+ return 2;
+}
+
+static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
+{
+ struct sys_dev *list, *elem;
+ char *disk_path;
+
+ if ((list = find_intel_devices()) == NULL)
+ return 0;
+
+ if (fd < 0)
+ disk_path = (char *) devname;
+ else
+ disk_path = diskfd_to_devpath(fd);
+
+ if (!disk_path)
+ return 0;
+
+ for (elem = list; elem; elem = elem->next)
+ if (path_attached_to_hba(disk_path, elem->path))
+ return elem;
+
+ if (disk_path != devname)
+ free(disk_path);
+
+ return NULL;
+}
+
+static int find_intel_hba_capability(int fd, struct intel_super *super,
+ char *devname);
+
+static struct supertype *match_metadata_desc_imsm(char *arg)
+{
+ struct supertype *st;
+
+ if (strcmp(arg, "imsm") != 0 &&
+ strcmp(arg, "default") != 0
+ )
+ return NULL;
+
+ st = xcalloc(1, sizeof(*st));
+ st->ss = &super_imsm;
+ st->max_devs = IMSM_MAX_DEVICES;
+ st->minor_version = 0;
+ st->sb = NULL;
+ return st;
+}
+
+#ifndef MDASSEMBLE
+static __u8 *get_imsm_version(struct imsm_super *mpb)
+{
+ return &mpb->sig[MPB_SIG_LEN];
+}
+#endif
+
+/* retrieve a disk directly from the anchor when the anchor is known to be
+ * up-to-date, currently only at load time
+ */
+static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
+{
+ if (index >= mpb->num_disks)
+ return NULL;
+ return &mpb->disk[index];
+}
+
+/* retrieve the disk description based on a index of the disk
+ * in the sub-array
+ */
+static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
+{
+ struct dl *d;
+
+ for (d = super->disks; d; d = d->next)
+ if (d->index == index)
+ return d;
+
+ return NULL;
+}
+/* retrieve a disk from the parsed metadata */
+static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
+{
+ struct dl *dl;
+
+ dl = get_imsm_dl_disk(super, index);
+ if (dl)
+ return &dl->disk;
+
+ return NULL;
+}
+
+/* generate a checksum directly from the anchor when the anchor is known to be
+ * up-to-date, currently only at load or write_super after coalescing
+ */
+static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
+{
+ __u32 end = mpb->mpb_size / sizeof(end);
+ __u32 *p = (__u32 *) mpb;
+ __u32 sum = 0;
+
+ while (end--) {
+ sum += __le32_to_cpu(*p);
+ p++;
+ }
+
+ return sum - __le32_to_cpu(mpb->check_sum);
+}
+
+static size_t sizeof_imsm_map(struct imsm_map *map)
+{
+ return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
+}
+
+struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
+{
+ /* A device can have 2 maps if it is in the middle of a migration.
+ * If second_map is:
+ * MAP_0 - we return the first map
+ * MAP_1 - we return the second map if it exists, else NULL
+ * MAP_X - we return the second map if it exists, else the first
+ */
+ struct imsm_map *map = &dev->vol.map[0];
+ struct imsm_map *map2 = NULL;
+
+ if (dev->vol.migr_state)
+ map2 = (void *)map + sizeof_imsm_map(map);
+
+ switch (second_map) {
+ case MAP_0:
+ break;
+ case MAP_1:
+ map = map2;
+ break;
+ case MAP_X:
+ if (map2)
+ map = map2;
+ break;
+ default:
+ map = NULL;
+ }
+ return map;
+
+}
+
+/* return the size of the device.
+ * migr_state increases the returned size if map[0] were to be duplicated
+ */
+static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
+{
+ size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
+ sizeof_imsm_map(get_imsm_map(dev, MAP_0));
+
+ /* migrating means an additional map */
+ if (dev->vol.migr_state)
+ size += sizeof_imsm_map(get_imsm_map(dev, MAP_1));
+ else if (migr_state)
+ size += sizeof_imsm_map(get_imsm_map(dev, MAP_0));
+
+ return size;
+}
+
+#ifndef MDASSEMBLE
+/* retrieve disk serial number list from a metadata update */
+static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
+{
+ void *u = update;
+ struct disk_info *inf;
+
+ inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
+ sizeof_imsm_dev(&update->dev, 0);
+
+ return inf;
+}
+#endif
+
+static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
+{
+ int offset;
+ int i;
+ void *_mpb = mpb;
+
+ if (index >= mpb->num_raid_devs)
+ return NULL;
+
+ /* devices start after all disks */
+ offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
+
+ for (i = 0; i <= index; i++)
+ if (i == index)
+ return _mpb + offset;
+ else
+ offset += sizeof_imsm_dev(_mpb + offset, 0);
+
+ return NULL;
+}
+
+static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
+{
+ struct intel_dev *dv;
+
+ if (index >= super->anchor->num_raid_devs)
+ return NULL;
+ for (dv = super->devlist; dv; dv = dv->next)
+ if (dv->index == index)
+ return dv->dev;
+ return NULL;
+}
+
+/*
+ * for second_map:
+ * == MAP_0 get first map
+ * == MAP_1 get second map
+ * == MAP_X than get map according to the current migr_state
+ */
+static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
+ int slot,
+ int second_map)
+{
+ struct imsm_map *map;
+
+ map = get_imsm_map(dev, second_map);
+
+ /* top byte identifies disk under rebuild */
+ return __le32_to_cpu(map->disk_ord_tbl[slot]);
+}
+
+#define ord_to_idx(ord) (((ord) << 8) >> 8)
+static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
+{
+ __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
+
+ return ord_to_idx(ord);
+}
+
+static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
+{
+ map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
+}
+
+static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
+{
+ int slot;
+ __u32 ord;
+
+ for (slot = 0; slot < map->num_members; slot++) {
+ ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
+ if (ord_to_idx(ord) == idx)
+ return slot;
+ }
+
+ return -1;
+}
+
+static int get_imsm_raid_level(struct imsm_map *map)
+{
+ if (map->raid_level == 1) {
+ if (map->num_members == 2)
+ return 1;
+ else
+ return 10;
+ }
+
+ return map->raid_level;
+}
+
+static int cmp_extent(const void *av, const void *bv)
+{
+ const struct extent *a = av;
+ const struct extent *b = bv;
+ if (a->start < b->start)
+ return -1;
+ if (a->start > b->start)
+ return 1;
+ return 0;
+}
+
+static int count_memberships(struct dl *dl, struct intel_super *super)
+{
+ int memberships = 0;
+ int i;
+
+ for (i = 0; i < super->anchor->num_raid_devs; i++) {
+ struct imsm_dev *dev = get_imsm_dev(super, i);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+
+ if (get_imsm_disk_slot(map, dl->index) >= 0)
+ memberships++;
+ }
+
+ return memberships;
+}
+
+static __u32 imsm_min_reserved_sectors(struct intel_super *super);
+
+static int split_ull(unsigned long long n, __u32 *lo, __u32 *hi)
+{
+ if (lo == 0 || hi == 0)
+ return 1;
+ *lo = __le32_to_cpu((unsigned)n);
+ *hi = __le32_to_cpu((unsigned)(n >> 32));
+ return 0;
+}
+
+static unsigned long long join_u32(__u32 lo, __u32 hi)
+{
+ return (unsigned long long)__le32_to_cpu(lo) |
+ (((unsigned long long)__le32_to_cpu(hi)) << 32);
+}
+
+static unsigned long long total_blocks(struct imsm_disk *disk)
+{
+ if (disk == NULL)
+ return 0;
+ return join_u32(disk->total_blocks_lo, disk->total_blocks_hi);
+}
+
+static unsigned long long pba_of_lba0(struct imsm_map *map)
+{
+ if (map == NULL)
+ return 0;
+ return join_u32(map->pba_of_lba0_lo, map->pba_of_lba0_hi);
+}
+
+static unsigned long long blocks_per_member(struct imsm_map *map)
+{
+ if (map == NULL)
+ return 0;
+ return join_u32(map->blocks_per_member_lo, map->blocks_per_member_hi);
+}
+
+#ifndef MDASSEMBLE
+static unsigned long long num_data_stripes(struct imsm_map *map)
+{
+ if (map == NULL)
+ return 0;
+ return join_u32(map->num_data_stripes_lo, map->num_data_stripes_hi);
+}
+
+static void set_total_blocks(struct imsm_disk *disk, unsigned long long n)
+{
+ split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi);
+}
+#endif
+
+static void set_pba_of_lba0(struct imsm_map *map, unsigned long long n)
+{
+ split_ull(n, &map->pba_of_lba0_lo, &map->pba_of_lba0_hi);
+}
+
+static void set_blocks_per_member(struct imsm_map *map, unsigned long long n)
+{
+ split_ull(n, &map->blocks_per_member_lo, &map->blocks_per_member_hi);
+}
+
+static void set_num_data_stripes(struct imsm_map *map, unsigned long long n)
+{
+ split_ull(n, &map->num_data_stripes_lo, &map->num_data_stripes_hi);
+}
+
+static struct extent *get_extents(struct intel_super *super, struct dl *dl)
+{
+ /* find a list of used extents on the given physical device */
+ struct extent *rv, *e;
+ int i;
+ int memberships = count_memberships(dl, super);
+ __u32 reservation;
+
+ /* trim the reserved area for spares, so they can join any array
+ * regardless of whether the OROM has assigned sectors from the
+ * IMSM_RESERVED_SECTORS region
+ */
+ if (dl->index == -1)
+ reservation = imsm_min_reserved_sectors(super);
+ else
+ reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
+
+ rv = xcalloc(sizeof(struct extent), (memberships + 1));
+ e = rv;
+
+ for (i = 0; i < super->anchor->num_raid_devs; i++) {
+ struct imsm_dev *dev = get_imsm_dev(super, i);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+
+ if (get_imsm_disk_slot(map, dl->index) >= 0) {
+ e->start = pba_of_lba0(map);
+ e->size = blocks_per_member(map);
+ e++;
+ }
+ }
+ qsort(rv, memberships, sizeof(*rv), cmp_extent);
+
+ /* determine the start of the metadata
+ * when no raid devices are defined use the default
+ * ...otherwise allow the metadata to truncate the value
+ * as is the case with older versions of imsm
+ */
+ if (memberships) {
+ struct extent *last = &rv[memberships - 1];
+ unsigned long long remainder;
+
+ remainder = total_blocks(&dl->disk) - (last->start + last->size);
+ /* round down to 1k block to satisfy precision of the kernel
+ * 'size' interface
+ */
+ remainder &= ~1UL;
+ /* make sure remainder is still sane */
+ if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
+ remainder = ROUND_UP(super->len, 512) >> 9;
+ if (reservation > remainder)
+ reservation = remainder;
+ }
+ e->start = total_blocks(&dl->disk) - reservation;
+ e->size = 0;
+ return rv;
+}
+
+/* try to determine how much space is reserved for metadata from
+ * the last get_extents() entry, otherwise fallback to the
+ * default
+ */
+static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
+{
+ struct extent *e;
+ int i;
+ __u32 rv;
+
+ /* for spares just return a minimal reservation which will grow
+ * once the spare is picked up by an array
+ */
+ if (dl->index == -1)
+ return MPB_SECTOR_CNT;
+
+ e = get_extents(super, dl);
+ if (!e)
+ return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
+
+ /* scroll to last entry */
+ for (i = 0; e[i].size; i++)
+ continue;
+
+ rv = total_blocks(&dl->disk) - e[i].start;
+
+ free(e);
+
+ return rv;
+}
+
+static int is_spare(struct imsm_disk *disk)
+{
+ return (disk->status & SPARE_DISK) == SPARE_DISK;
+}
+
+static int is_configured(struct imsm_disk *disk)
+{
+ return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
+}
+
+static int is_failed(struct imsm_disk *disk)
+{
+ return (disk->status & FAILED_DISK) == FAILED_DISK;
+}
+
+/* try to determine how much space is reserved for metadata from
+ * the last get_extents() entry on the smallest active disk,
+ * otherwise fallback to the default
+ */
+static __u32 imsm_min_reserved_sectors(struct intel_super *super)
+{
+ struct extent *e;
+ int i;
+ unsigned long long min_active;
+ __u32 remainder;
+ __u32 rv = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
+ struct dl *dl, *dl_min = NULL;
+
+ if (!super)
+ return rv;
+
+ min_active = 0;
+ for (dl = super->disks; dl; dl = dl->next) {
+ if (dl->index < 0)
+ continue;
+ unsigned long long blocks = total_blocks(&dl->disk);
+ if (blocks < min_active || min_active == 0) {
+ dl_min = dl;
+ min_active = blocks;
+ }
+ }
+ if (!dl_min)
+ return rv;
+
+ /* find last lba used by subarrays on the smallest active disk */
+ e = get_extents(super, dl_min);
+ if (!e)
+ return rv;
+ for (i = 0; e[i].size; i++)
+ continue;
+
+ remainder = min_active - e[i].start;
+ free(e);
+
+ /* to give priority to recovery we should not require full
+ IMSM_RESERVED_SECTORS from the spare */
+ rv = MPB_SECTOR_CNT + NUM_BLOCKS_DIRTY_STRIPE_REGION;
+
+ /* if real reservation is smaller use that value */
+ return (remainder < rv) ? remainder : rv;
+}
+
+/* Return minimum size of a spare that can be used in this array*/
+static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
+{
+ struct intel_super *super = st->sb;
+ struct dl *dl;
+ struct extent *e;
+ int i;
+ unsigned long long rv = 0;
+
+ if (!super)
+ return rv;
+ /* find first active disk in array */
+ dl = super->disks;
+ while (dl && (is_failed(&dl->disk) || dl->index == -1))
+ dl = dl->next;
+ if (!dl)
+ return rv;
+ /* find last lba used by subarrays */
+ e = get_extents(super, dl);
+ if (!e)
+ return rv;
+ for (i = 0; e[i].size; i++)
+ continue;
+ if (i > 0)
+ rv = e[i-1].start + e[i-1].size;
+ free(e);
+
+ /* add the amount of space needed for metadata */
+ rv = rv + imsm_min_reserved_sectors(super);
+
+ return rv * 512;
+}
+
+static int is_gen_migration(struct imsm_dev *dev);
+
+#ifndef MDASSEMBLE
+static __u64 blocks_per_migr_unit(struct intel_super *super,
+ struct imsm_dev *dev);
+
+static void print_imsm_dev(struct intel_super *super,
+ struct imsm_dev *dev,
+ char *uuid,
+ int disk_idx)
+{
+ __u64 sz;
+ int slot, i;
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
+ __u32 ord;
+
+ printf("\n");
+ printf("[%.16s]:\n", dev->volume);
+ printf(" UUID : %s\n", uuid);
+ printf(" RAID Level : %d", get_imsm_raid_level(map));
+ if (map2)
+ printf(" <-- %d", get_imsm_raid_level(map2));
+ printf("\n");
+ printf(" Members : %d", map->num_members);
+ if (map2)
+ printf(" <-- %d", map2->num_members);
+ printf("\n");
+ printf(" Slots : [");
+ for (i = 0; i < map->num_members; i++) {
+ ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
+ printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
+ }
+ printf("]");
+ if (map2) {
+ printf(" <-- [");
+ for (i = 0; i < map2->num_members; i++) {
+ ord = get_imsm_ord_tbl_ent(dev, i, MAP_1);
+ printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
+ }
+ printf("]");
+ }
+ printf("\n");
+ printf(" Failed disk : ");
+ if (map->failed_disk_num == 0xff)
+ printf("none");
+ else
+ printf("%i", map->failed_disk_num);
+ printf("\n");
+ slot = get_imsm_disk_slot(map, disk_idx);
+ if (slot >= 0) {
+ ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
+ printf(" This Slot : %d%s\n", slot,
+ ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
+ } else
+ printf(" This Slot : ?\n");
+ sz = __le32_to_cpu(dev->size_high);
+ sz <<= 32;
+ sz += __le32_to_cpu(dev->size_low);
+ printf(" Array Size : %llu%s\n", (unsigned long long)sz,
+ human_size(sz * 512));
+ sz = blocks_per_member(map);
+ printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
+ human_size(sz * 512));
+ printf(" Sector Offset : %llu\n",
+ pba_of_lba0(map));
+ printf(" Num Stripes : %llu\n",
+ num_data_stripes(map));
+ printf(" Chunk Size : %u KiB",
+ __le16_to_cpu(map->blocks_per_strip) / 2);
+ if (map2)
+ printf(" <-- %u KiB",
+ __le16_to_cpu(map2->blocks_per_strip) / 2);
+ printf("\n");
+ printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
+ printf(" Migrate State : ");
+ if (dev->vol.migr_state) {
+ if (migr_type(dev) == MIGR_INIT)
+ printf("initialize\n");
+ else if (migr_type(dev) == MIGR_REBUILD)
+ printf("rebuild\n");
+ else if (migr_type(dev) == MIGR_VERIFY)
+ printf("check\n");
+ else if (migr_type(dev) == MIGR_GEN_MIGR)
+ printf("general migration\n");
+ else if (migr_type(dev) == MIGR_STATE_CHANGE)
+ printf("state change\n");
+ else if (migr_type(dev) == MIGR_REPAIR)
+ printf("repair\n");
+ else
+ printf("<unknown:%d>\n", migr_type(dev));
+ } else
+ printf("idle\n");
+ printf(" Map State : %s", map_state_str[map->map_state]);
+ if (dev->vol.migr_state) {
+ struct imsm_map *map = get_imsm_map(dev, MAP_1);
+
+ printf(" <-- %s", map_state_str[map->map_state]);
+ printf("\n Checkpoint : %u ",
+ __le32_to_cpu(dev->vol.curr_migr_unit));
+ if ((is_gen_migration(dev)) && ((slot > 1) || (slot < 0)))
+ printf("(N/A)");
+ else
+ printf("(%llu)", (unsigned long long)
+ blocks_per_migr_unit(super, dev));
+ }
+ printf("\n");
+ printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
+}
+
+static void print_imsm_disk(struct imsm_disk *disk, int index, __u32 reserved)
+{
+ char str[MAX_RAID_SERIAL_LEN + 1];
+ __u64 sz;
+
+ if (index < -1 || !disk)
+ return;
+
+ printf("\n");
+ snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
+ if (index >= 0)
+ printf(" Disk%02d Serial : %s\n", index, str);
+ else
+ printf(" Disk Serial : %s\n", str);
+ printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
+ is_configured(disk) ? " active" : "",
+ is_failed(disk) ? " failed" : "");
+ printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
+ sz = total_blocks(disk) - reserved;
+ printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
+ human_size(sz * 512));
+}
+
+void examine_migr_rec_imsm(struct intel_super *super)
+{
+ struct migr_record *migr_rec = super->migr_rec;
+ struct imsm_super *mpb = super->anchor;
+ int i;
+
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ struct imsm_dev *dev = __get_imsm_dev(mpb, i);
+ struct imsm_map *map;
+ int slot = -1;
+
+ if (is_gen_migration(dev) == 0)
+ continue;
+
+ printf("\nMigration Record Information:");
+
+ /* first map under migration */
+ map = get_imsm_map(dev, MAP_0);
+ if (map)
+ slot = get_imsm_disk_slot(map, super->disks->index);
+ if ((map == NULL) || (slot > 1) || (slot < 0)) {
+ printf(" Empty\n ");
+ printf("Examine one of first two disks in array\n");
+ break;
+ }
+ printf("\n Status : ");
+ if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
+ printf("Normal\n");
+ else
+ printf("Contains Data\n");
+ printf(" Current Unit : %u\n",
+ __le32_to_cpu(migr_rec->curr_migr_unit));
+ printf(" Family : %u\n",
+ __le32_to_cpu(migr_rec->family_num));
+ printf(" Ascending : %u\n",
+ __le32_to_cpu(migr_rec->ascending_migr));
+ printf(" Blocks Per Unit : %u\n",
+ __le32_to_cpu(migr_rec->blocks_per_unit));
+ printf(" Dest. Depth Per Unit : %u\n",
+ __le32_to_cpu(migr_rec->dest_depth_per_unit));
+ printf(" Checkpoint Area pba : %u\n",
+ __le32_to_cpu(migr_rec->ckpt_area_pba));
+ printf(" First member lba : %u\n",
+ __le32_to_cpu(migr_rec->dest_1st_member_lba));
+ printf(" Total Number of Units : %u\n",
+ __le32_to_cpu(migr_rec->num_migr_units));
+ printf(" Size of volume : %u\n",
+ __le32_to_cpu(migr_rec->post_migr_vol_cap));
+ printf(" Expansion space for LBA64 : %u\n",
+ __le32_to_cpu(migr_rec->post_migr_vol_cap_hi));
+ printf(" Record was read from : %u\n",
+ __le32_to_cpu(migr_rec->ckpt_read_disk_num));
+
+ break;
+ }
+}
+#endif /* MDASSEMBLE */
+/*******************************************************************************
+ * function: imsm_check_attributes
+ * Description: Function checks if features represented by attributes flags
+ * are supported by mdadm.
+ * Parameters:
+ * attributes - Attributes read from metadata
+ * Returns:
+ * 0 - passed attributes contains unsupported features flags
+ * 1 - all features are supported
+ ******************************************************************************/
+static int imsm_check_attributes(__u32 attributes)
+{
+ int ret_val = 1;
+ __u32 not_supported = MPB_ATTRIB_SUPPORTED^0xffffffff;
+
+ not_supported &= ~MPB_ATTRIB_IGNORED;
+
+ not_supported &= attributes;
+ if (not_supported) {
+ pr_err("(IMSM): Unsupported attributes : %x\n",
+ (unsigned)__le32_to_cpu(not_supported));
+ if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
+ dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY \n");
+ not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
+ }
+ if (not_supported & MPB_ATTRIB_2TB) {
+ dprintf("\t\tMPB_ATTRIB_2TB\n");
+ not_supported ^= MPB_ATTRIB_2TB;
+ }
+ if (not_supported & MPB_ATTRIB_RAID0) {
+ dprintf("\t\tMPB_ATTRIB_RAID0\n");
+ not_supported ^= MPB_ATTRIB_RAID0;
+ }
+ if (not_supported & MPB_ATTRIB_RAID1) {
+ dprintf("\t\tMPB_ATTRIB_RAID1\n");
+ not_supported ^= MPB_ATTRIB_RAID1;
+ }
+ if (not_supported & MPB_ATTRIB_RAID10) {
+ dprintf("\t\tMPB_ATTRIB_RAID10\n");
+ not_supported ^= MPB_ATTRIB_RAID10;
+ }
+ if (not_supported & MPB_ATTRIB_RAID1E) {
+ dprintf("\t\tMPB_ATTRIB_RAID1E\n");
+ not_supported ^= MPB_ATTRIB_RAID1E;
+ }
+ if (not_supported & MPB_ATTRIB_RAID5) {
+ dprintf("\t\tMPB_ATTRIB_RAID5\n");
+ not_supported ^= MPB_ATTRIB_RAID5;
+ }
+ if (not_supported & MPB_ATTRIB_RAIDCNG) {
+ dprintf("\t\tMPB_ATTRIB_RAIDCNG\n");
+ not_supported ^= MPB_ATTRIB_RAIDCNG;
+ }
+ if (not_supported & MPB_ATTRIB_BBM) {
+ dprintf("\t\tMPB_ATTRIB_BBM\n");
+ not_supported ^= MPB_ATTRIB_BBM;
+ }
+ if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
+ dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY (== MPB_ATTRIB_LEGACY)\n");
+ not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
+ }
+ if (not_supported & MPB_ATTRIB_EXP_STRIPE_SIZE) {
+ dprintf("\t\tMPB_ATTRIB_EXP_STRIP_SIZE\n");
+ not_supported ^= MPB_ATTRIB_EXP_STRIPE_SIZE;
+ }
+ if (not_supported & MPB_ATTRIB_2TB_DISK) {
+ dprintf("\t\tMPB_ATTRIB_2TB_DISK\n");
+ not_supported ^= MPB_ATTRIB_2TB_DISK;
+ }
+ if (not_supported & MPB_ATTRIB_NEVER_USE2) {
+ dprintf("\t\tMPB_ATTRIB_NEVER_USE2\n");
+ not_supported ^= MPB_ATTRIB_NEVER_USE2;
+ }
+ if (not_supported & MPB_ATTRIB_NEVER_USE) {
+ dprintf("\t\tMPB_ATTRIB_NEVER_USE\n");
+ not_supported ^= MPB_ATTRIB_NEVER_USE;
+ }
+
+ if (not_supported)
+ dprintf(Name "(IMSM): Unknown attributes : %x\n", not_supported);
+
+ ret_val = 0;
+ }
+
+ return ret_val;
+}
+
+#ifndef MDASSEMBLE
+static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
+
+static void examine_super_imsm(struct supertype *st, char *homehost)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ char str[MAX_SIGNATURE_LENGTH];
+ int i;
+ struct mdinfo info;
+ char nbuf[64];
+ __u32 sum;
+ __u32 reserved = imsm_reserved_sectors(super, super->disks);
+ struct dl *dl;
+
+ snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
+ printf(" Magic : %s\n", str);
+ snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
+ printf(" Version : %s\n", get_imsm_version(mpb));
+ printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
+ printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
+ printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
+ printf(" Attributes : ");
+ if (imsm_check_attributes(mpb->attributes))
+ printf("All supported\n");
+ else
+ printf("not supported\n");
+ getinfo_super_imsm(st, &info, NULL);
+ fname_from_uuid(st, &info, nbuf, ':');
+ printf(" UUID : %s\n", nbuf + 5);
+ sum = __le32_to_cpu(mpb->check_sum);
+ printf(" Checksum : %08x %s\n", sum,
+ __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
+ printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
+ printf(" Disks : %d\n", mpb->num_disks);
+ printf(" RAID Devices : %d\n", mpb->num_raid_devs);
+ print_imsm_disk(__get_imsm_disk(mpb, super->disks->index), super->disks->index, reserved);
+ if (super->bbm_log) {
+ struct bbm_log *log = super->bbm_log;
+
+ printf("\n");
+ printf("Bad Block Management Log:\n");
+ printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
+ printf(" Signature : %x\n", __le32_to_cpu(log->signature));
+ printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
+ printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
+ printf(" First Spare : %llx\n",
+ (unsigned long long) __le64_to_cpu(log->first_spare_lba));
+ }
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ struct mdinfo info;
+ struct imsm_dev *dev = __get_imsm_dev(mpb, i);
+
+ super->current_vol = i;
+ getinfo_super_imsm(st, &info, NULL);
+ fname_from_uuid(st, &info, nbuf, ':');
+ print_imsm_dev(super, dev, nbuf + 5, super->disks->index);
+ }
+ for (i = 0; i < mpb->num_disks; i++) {
+ if (i == super->disks->index)
+ continue;
+ print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved);
+ }
+
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl->index == -1)
+ print_imsm_disk(&dl->disk, -1, reserved);
+
+ examine_migr_rec_imsm(super);
+}
+
+static void brief_examine_super_imsm(struct supertype *st, int verbose)
+{
+ /* We just write a generic IMSM ARRAY entry */
+ struct mdinfo info;
+ char nbuf[64];
+ struct intel_super *super = st->sb;
+
+ if (!super->anchor->num_raid_devs) {
+ printf("ARRAY metadata=imsm\n");
+ return;
+ }
+
+ getinfo_super_imsm(st, &info, NULL);
+ fname_from_uuid(st, &info, nbuf, ':');
+ printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
+}
+
+static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
+{
+ /* We just write a generic IMSM ARRAY entry */
+ struct mdinfo info;
+ char nbuf[64];
+ char nbuf1[64];
+ struct intel_super *super = st->sb;
+ int i;
+
+ if (!super->anchor->num_raid_devs)
+ return;
+
+ getinfo_super_imsm(st, &info, NULL);
+ fname_from_uuid(st, &info, nbuf, ':');
+ for (i = 0; i < super->anchor->num_raid_devs; i++) {
+ struct imsm_dev *dev = get_imsm_dev(super, i);
+
+ super->current_vol = i;
+ getinfo_super_imsm(st, &info, NULL);
+ fname_from_uuid(st, &info, nbuf1, ':');
+ printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
+ dev->volume, nbuf + 5, i, nbuf1 + 5);
+ }
+}
+
+static void export_examine_super_imsm(struct supertype *st)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ struct mdinfo info;
+ char nbuf[64];
+
+ getinfo_super_imsm(st, &info, NULL);
+ fname_from_uuid(st, &info, nbuf, ':');
+ printf("MD_METADATA=imsm\n");
+ printf("MD_LEVEL=container\n");
+ printf("MD_UUID=%s\n", nbuf+5);
+ printf("MD_DEVICES=%u\n", mpb->num_disks);
+}
+
+static int copy_metadata_imsm(struct supertype *st, int from, int to)
+{
+ /* The second last 512byte sector of the device contains
+ * the "struct imsm_super" metadata.
+ * This contains mpb_size which is the size in bytes of the
+ * extended metadata. This is located immediately before
+ * the imsm_super.
+ * We want to read all that, plus the last sector which
+ * may contain a migration record, and write it all
+ * to the target.
+ */
+ void *buf;
+ unsigned long long dsize, offset;
+ int sectors;
+ struct imsm_super *sb;
+ int written = 0;
+
+ if (posix_memalign(&buf, 4096, 4096) != 0)
+ return 1;
+
+ if (!get_dev_size(from, NULL, &dsize))
+ goto err;
+
+ if (lseek64(from, dsize-1024, 0) < 0)
+ goto err;
+ if (read(from, buf, 512) != 512)
+ goto err;
+ sb = buf;
+ if (strncmp((char*)sb->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0)
+ goto err;
+
+ sectors = mpb_sectors(sb) + 2;
+ offset = dsize - sectors * 512;
+ if (lseek64(from, offset, 0) < 0 ||
+ lseek64(to, offset, 0) < 0)
+ goto err;
+ while (written < sectors * 512) {
+ int n = sectors*512 - written;
+ if (n > 4096)
+ n = 4096;
+ if (read(from, buf, n) != n)
+ goto err;
+ if (write(to, buf, n) != n)
+ goto err;
+ written += n;
+ }
+ free(buf);
+ return 0;
+err:
+ free(buf);
+ return 1;
+}
+
+static void detail_super_imsm(struct supertype *st, char *homehost)
+{
+ struct mdinfo info;
+ char nbuf[64];
+
+ getinfo_super_imsm(st, &info, NULL);
+ fname_from_uuid(st, &info, nbuf, ':');
+ printf("\n UUID : %s\n", nbuf + 5);
+}
+
+static void brief_detail_super_imsm(struct supertype *st)
+{
+ struct mdinfo info;
+ char nbuf[64];
+ getinfo_super_imsm(st, &info, NULL);
+ fname_from_uuid(st, &info, nbuf, ':');
+ printf(" UUID=%s", nbuf + 5);
+}
+
+static int imsm_read_serial(int fd, char *devname, __u8 *serial);
+static void fd2devname(int fd, char *name);
+
+static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
+{
+ /* dump an unsorted list of devices attached to AHCI Intel storage
+ * controller, as well as non-connected ports
+ */
+ int hba_len = strlen(hba_path) + 1;
+ struct dirent *ent;
+ DIR *dir;
+ char *path = NULL;
+ int err = 0;
+ unsigned long port_mask = (1 << port_count) - 1;
+
+ if (port_count > (int)sizeof(port_mask) * 8) {
+ if (verbose > 0)
+ pr_err("port_count %d out of range\n", port_count);
+ return 2;
+ }
+
+ /* scroll through /sys/dev/block looking for devices attached to
+ * this hba
+ */
+ dir = opendir("/sys/dev/block");
+ for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
+ int fd;
+ char model[64];
+ char vendor[64];
+ char buf[1024];
+ int major, minor;
+ char *device;
+ char *c;
+ int port;
+ int type;
+
+ if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
+ continue;
+ path = devt_to_devpath(makedev(major, minor));
+ if (!path)
+ continue;
+ if (!path_attached_to_hba(path, hba_path)) {
+ free(path);
+ path = NULL;
+ continue;
+ }
+
+ /* retrieve the scsi device type */
+ if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
+ if (verbose > 0)
+ pr_err("failed to allocate 'device'\n");
+ err = 2;
+ break;
+ }
+ sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
+ if (load_sys(device, buf) != 0) {
+ if (verbose > 0)
+ pr_err("failed to read device type for %s\n",
+ path);
+ err = 2;
+ free(device);
+ break;
+ }
+ type = strtoul(buf, NULL, 10);
+
+ /* if it's not a disk print the vendor and model */
+ if (!(type == 0 || type == 7 || type == 14)) {
+ vendor[0] = '\0';
+ model[0] = '\0';
+ sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
+ if (load_sys(device, buf) == 0) {
+ strncpy(vendor, buf, sizeof(vendor));
+ vendor[sizeof(vendor) - 1] = '\0';
+ c = (char *) &vendor[sizeof(vendor) - 1];
+ while (isspace(*c) || *c == '\0')
+ *c-- = '\0';
+
+ }
+ sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
+ if (load_sys(device, buf) == 0) {
+ strncpy(model, buf, sizeof(model));
+ model[sizeof(model) - 1] = '\0';
+ c = (char *) &model[sizeof(model) - 1];
+ while (isspace(*c) || *c == '\0')
+ *c-- = '\0';
+ }
+
+ if (vendor[0] && model[0])
+ sprintf(buf, "%.64s %.64s", vendor, model);
+ else
+ switch (type) { /* numbers from hald/linux/device.c */
+ case 1: sprintf(buf, "tape"); break;
+ case 2: sprintf(buf, "printer"); break;
+ case 3: sprintf(buf, "processor"); break;
+ case 4:
+ case 5: sprintf(buf, "cdrom"); break;
+ case 6: sprintf(buf, "scanner"); break;
+ case 8: sprintf(buf, "media_changer"); break;
+ case 9: sprintf(buf, "comm"); break;
+ case 12: sprintf(buf, "raid"); break;
+ default: sprintf(buf, "unknown");
+ }
+ } else
+ buf[0] = '\0';
+ free(device);
+
+ /* chop device path to 'host%d' and calculate the port number */
+ c = strchr(&path[hba_len], '/');
+ if (!c) {
+ if (verbose > 0)
+ pr_err("%s - invalid path name\n", path + hba_len);
+ err = 2;
+ break;
+ }
+ *c = '\0';
+ if (sscanf(&path[hba_len], "host%d", &port) == 1)
+ port -= host_base;
+ else {
+ if (verbose > 0) {
+ *c = '/'; /* repair the full string */
+ pr_err("failed to determine port number for %s\n",
+ path);
+ }
+ err = 2;
+ break;
+ }
+
+ /* mark this port as used */
+ port_mask &= ~(1 << port);
+
+ /* print out the device information */
+ if (buf[0]) {
+ printf(" Port%d : - non-disk device (%s) -\n", port, buf);
+ continue;
+ }
+
+ fd = dev_open(ent->d_name, O_RDONLY);
+ if (fd < 0)
+ printf(" Port%d : - disk info unavailable -\n", port);
+ else {
+ fd2devname(fd, buf);
+ printf(" Port%d : %s", port, buf);
+ if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
+ printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf);
+ else
+ printf(" ()\n");
+ close(fd);
+ }
+ free(path);
+ path = NULL;
+ }
+ if (path)
+ free(path);
+ if (dir)
+ closedir(dir);
+ if (err == 0) {
+ int i;
+
+ for (i = 0; i < port_count; i++)
+ if (port_mask & (1 << i))
+ printf(" Port%d : - no device attached -\n", i);
+ }
+
+ return err;
+}
+
+static void print_found_intel_controllers(struct sys_dev *elem)
+{
+ for (; elem; elem = elem->next) {
+ pr_err("found Intel(R) ");
+ if (elem->type == SYS_DEV_SATA)
+ fprintf(stderr, "SATA ");
+ else if (elem->type == SYS_DEV_SAS)
+ fprintf(stderr, "SAS ");
+ fprintf(stderr, "RAID controller");
+ if (elem->pci_id)
+ fprintf(stderr, " at %s", elem->pci_id);
+ fprintf(stderr, ".\n");
+ }
+ fflush(stderr);
+}
+
+static int ahci_get_port_count(const char *hba_path, int *port_count)
+{
+ struct dirent *ent;
+ DIR *dir;
+ int host_base = -1;
+
+ *port_count = 0;
+ if ((dir = opendir(hba_path)) == NULL)
+ return -1;
+
+ for (ent = readdir(dir); ent; ent = readdir(dir)) {
+ int host;
+
+ if (sscanf(ent->d_name, "host%d", &host) != 1)
+ continue;
+ if (*port_count == 0)
+ host_base = host;
+ else if (host < host_base)
+ host_base = host;
+
+ if (host + 1 > *port_count + host_base)
+ *port_count = host + 1 - host_base;
+ }
+ closedir(dir);
+ return host_base;
+}
+
+static void print_imsm_capability(const struct imsm_orom *orom)
+{
+ printf(" Platform : Intel(R) Matrix Storage Manager\n");
+ printf(" Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
+ orom->hotfix_ver, orom->build);
+ printf(" RAID Levels :%s%s%s%s%s\n",
+ imsm_orom_has_raid0(orom) ? " raid0" : "",
+ imsm_orom_has_raid1(orom) ? " raid1" : "",
+ imsm_orom_has_raid1e(orom) ? " raid1e" : "",
+ imsm_orom_has_raid10(orom) ? " raid10" : "",
+ imsm_orom_has_raid5(orom) ? " raid5" : "");
+ printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
+ imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
+ imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
+ imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
+ imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
+ imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
+ imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
+ imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
+ imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
+ imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
+ imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
+ imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
+ imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
+ imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
+ imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
+ imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
+ printf(" 2TB volumes :%s supported\n",
+ (orom->attr & IMSM_OROM_ATTR_2TB)?"":" not");
+ printf(" 2TB disks :%s supported\n",
+ (orom->attr & IMSM_OROM_ATTR_2TB_DISK)?"":" not");
+ printf(" Max Disks : %d\n", orom->tds);
+ printf(" Max Volumes : %d per array, %d per controller\n",
+ orom->vpa, orom->vphba);
+ return;
+}
+
+static void print_imsm_capability_export(const struct imsm_orom *orom)
+{
+ printf("MD_FIRMWARE_TYPE=imsm\n");
+ printf("IMSM_VERSION=%d.%d.%d.%d\n",orom->major_ver, orom->minor_ver,
+ orom->hotfix_ver, orom->build);
+ printf("IMSM_SUPPORTED_RAID_LEVELS=%s%s%s%s%s\n",
+ imsm_orom_has_raid0(orom) ? "raid0 " : "",
+ imsm_orom_has_raid1(orom) ? "raid1 " : "",
+ imsm_orom_has_raid1e(orom) ? "raid1e " : "",
+ imsm_orom_has_raid5(orom) ? "raid10 " : "",
+ imsm_orom_has_raid10(orom) ? "raid5 " : "");
+ printf("IMSM_SUPPORTED_CHUNK_SIZES=%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ imsm_orom_has_chunk(orom, 2) ? "2k " : "",
+ imsm_orom_has_chunk(orom, 4) ? "4k " : "",
+ imsm_orom_has_chunk(orom, 8) ? "8k " : "",
+ imsm_orom_has_chunk(orom, 16) ? "16k " : "",
+ imsm_orom_has_chunk(orom, 32) ? "32k " : "",
+ imsm_orom_has_chunk(orom, 64) ? "64k " : "",
+ imsm_orom_has_chunk(orom, 128) ? "128k " : "",
+ imsm_orom_has_chunk(orom, 256) ? "256k " : "",
+ imsm_orom_has_chunk(orom, 512) ? "512k " : "",
+ imsm_orom_has_chunk(orom, 1024*1) ? "1M " : "",
+ imsm_orom_has_chunk(orom, 1024*2) ? "2M " : "",
+ imsm_orom_has_chunk(orom, 1024*4) ? "4M " : "",
+ imsm_orom_has_chunk(orom, 1024*8) ? "8M " : "",
+ imsm_orom_has_chunk(orom, 1024*16) ? "16M " : "",
+ imsm_orom_has_chunk(orom, 1024*32) ? "32M " : "",
+ imsm_orom_has_chunk(orom, 1024*64) ? "64M " : "");
+ printf("IMSM_2TB_VOLUMES=%s\n",(orom->attr & IMSM_OROM_ATTR_2TB) ? "yes" : "no");
+ printf("IMSM_2TB_DISKS=%s\n",(orom->attr & IMSM_OROM_ATTR_2TB_DISK) ? "yes" : "no");
+ printf("IMSM_MAX_DISKS=%d\n",orom->tds);
+ printf("IMSM_MAX_VOLUMES_PER_ARRAY=%d\n",orom->vpa);
+ printf("IMSM_MAX_VOLUMES_PER_CONTROLLER=%d\n",orom->vphba);
+}
+
+static int detail_platform_imsm(int verbose, int enumerate_only, char *controller_path)
+{
+ /* There are two components to imsm platform support, the ahci SATA
+ * controller and the option-rom. To find the SATA controller we
+ * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
+ * controller with the Intel vendor id is present. This approach
+ * allows mdadm to leverage the kernel's ahci detection logic, with the
+ * caveat that if ahci.ko is not loaded mdadm will not be able to
+ * detect platform raid capabilities. The option-rom resides in a
+ * platform "Adapter ROM". We scan for its signature to retrieve the
+ * platform capabilities. If raid support is disabled in the BIOS the
+ * option-rom capability structure will not be available.
+ */
+ const struct imsm_orom *orom;
+ struct sys_dev *list, *hba;
+ int host_base = 0;
+ int port_count = 0;
+ int result=1;
+
+ if (enumerate_only) {
+ if (check_env("IMSM_NO_PLATFORM"))
+ return 0;
+ list = find_intel_devices();
+ if (!list)
+ return 2;
+ for (hba = list; hba; hba = hba->next) {
+ orom = find_imsm_capability(hba->type);
+ if (!orom) {
+ result = 2;
+ break;
+ }
+ else
+ result = 0;
+ }
+ return result;
+ }
+
+ list = find_intel_devices();
+ if (!list) {
+ if (verbose > 0)
+ pr_err("no active Intel(R) RAID "
+ "controller found.\n");
+ return 2;
+ } else if (verbose > 0)
+ print_found_intel_controllers(list);
+
+ for (hba = list; hba; hba = hba->next) {
+ if (controller_path && (compare_paths(hba->path,controller_path) != 0))
+ continue;
+ orom = find_imsm_capability(hba->type);
+ if (!orom)
+ pr_err("imsm capabilities not found for controller: %s (type %s)\n",
+ hba->path, get_sys_dev_type(hba->type));
+ else {
+ result = 0;
+ print_imsm_capability(orom);
+ printf(" I/O Controller : %s (%s)\n",
+ hba->path, get_sys_dev_type(hba->type));
+ if (hba->type == SYS_DEV_SATA) {
+ host_base = ahci_get_port_count(hba->path, &port_count);
+ if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
+ if (verbose > 0)
+ pr_err("failed to enumerate "
+ "ports on SATA controller at %s.\n", hba->pci_id);
+ result |= 2;
+ }
+ }
+ }
+ }
+
+ if (controller_path && result == 1)
+ pr_err("no active Intel(R) RAID "
+ "controller found under %s\n",controller_path);
+
+ return result;
+}
+
+static int export_detail_platform_imsm(int verbose, char *controller_path)
+{
+ const struct imsm_orom *orom;
+ struct sys_dev *list, *hba;
+ int result=1;
+
+ list = find_intel_devices();
+ if (!list) {
+ if (verbose > 0)
+ pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_INTEL_DEVICES\n");
+ result = 2;
+ return result;
+ }
+
+ for (hba = list; hba; hba = hba->next) {
+ if (controller_path && (compare_paths(hba->path,controller_path) != 0))
+ continue;
+ orom = find_imsm_capability(hba->type);
+ if (!orom) {
+ if (verbose > 0)
+ pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_IMSM_CAPABLE_DEVICE_UNDER_%s\n",hba->path);
+ }
+ else {
+ print_imsm_capability_export(orom);
+ result = 0;
+ }
+ }
+
+ return result;
+}
+
+#endif
+
+static int match_home_imsm(struct supertype *st, char *homehost)
+{
+ /* the imsm metadata format does not specify any host
+ * identification information. We return -1 since we can never
+ * confirm nor deny whether a given array is "meant" for this
+ * host. We rely on compare_super and the 'family_num' fields to
+ * exclude member disks that do not belong, and we rely on
+ * mdadm.conf to specify the arrays that should be assembled.
+ * Auto-assembly may still pick up "foreign" arrays.
+ */
+
+ return -1;
+}
+
+static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
+{
+ /* The uuid returned here is used for:
+ * uuid to put into bitmap file (Create, Grow)
+ * uuid for backup header when saving critical section (Grow)
+ * comparing uuids when re-adding a device into an array
+ * In these cases the uuid required is that of the data-array,
+ * not the device-set.
+ * uuid to recognise same set when adding a missing device back
+ * to an array. This is a uuid for the device-set.
+ *
+ * For each of these we can make do with a truncated
+ * or hashed uuid rather than the original, as long as
+ * everyone agrees.
+ * In each case the uuid required is that of the data-array,
+ * not the device-set.
+ */
+ /* imsm does not track uuid's so we synthesis one using sha1 on
+ * - The signature (Which is constant for all imsm array, but no matter)
+ * - the orig_family_num of the container
+ * - the index number of the volume
+ * - the 'serial' number of the volume.
+ * Hopefully these are all constant.
+ */
+ struct intel_super *super = st->sb;
+
+ char buf[20];
+ struct sha1_ctx ctx;
+ struct imsm_dev *dev = NULL;
+ __u32 family_num;
+
+ /* some mdadm versions failed to set ->orig_family_num, in which
+ * case fall back to ->family_num. orig_family_num will be
+ * fixed up with the first metadata update.
+ */
+ family_num = super->anchor->orig_family_num;
+ if (family_num == 0)
+ family_num = super->anchor->family_num;
+ sha1_init_ctx(&ctx);
+ sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
+ sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
+ if (super->current_vol >= 0)
+ dev = get_imsm_dev(super, super->current_vol);
+ if (dev) {
+ __u32 vol = super->current_vol;
+ sha1_process_bytes(&vol, sizeof(vol), &ctx);
+ sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
+ }
+ sha1_finish_ctx(&ctx, buf);
+ memcpy(uuid, buf, 4*4);
+}
+
+#if 0
+static void
+get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
+{
+ __u8 *v = get_imsm_version(mpb);
+ __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
+ char major[] = { 0, 0, 0 };
+ char minor[] = { 0 ,0, 0 };
+ char patch[] = { 0, 0, 0 };
+ char *ver_parse[] = { major, minor, patch };
+ int i, j;
+
+ i = j = 0;
+ while (*v != '\0' && v < end) {
+ if (*v != '.' && j < 2)
+ ver_parse[i][j++] = *v;
+ else {
+ i++;
+ j = 0;
+ }
+ v++;
+ }
+
+ *m = strtol(minor, NULL, 0);
+ *p = strtol(patch, NULL, 0);
+}
+#endif
+
+static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
+{
+ /* migr_strip_size when repairing or initializing parity */
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch (get_imsm_raid_level(map)) {
+ case 5:
+ case 10:
+ return chunk;
+ default:
+ return 128*1024 >> 9;
+ }
+}
+
+static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
+{
+ /* migr_strip_size when rebuilding a degraded disk, no idea why
+ * this is different than migr_strip_size_resync(), but it's good
+ * to be compatible
+ */
+ struct imsm_map *map = get_imsm_map(dev, MAP_1);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch (get_imsm_raid_level(map)) {
+ case 1:
+ case 10:
+ if (map->num_members % map->num_domains == 0)
+ return 128*1024 >> 9;
+ else
+ return chunk;
+ case 5:
+ return max((__u32) 64*1024 >> 9, chunk);
+ default:
+ return 128*1024 >> 9;
+ }
+}
+
+static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
+{
+ struct imsm_map *lo = get_imsm_map(dev, MAP_0);
+ struct imsm_map *hi = get_imsm_map(dev, MAP_1);
+ __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
+ __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
+
+ return max((__u32) 1, hi_chunk / lo_chunk);
+}
+
+static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
+{
+ struct imsm_map *lo = get_imsm_map(dev, MAP_0);
+ int level = get_imsm_raid_level(lo);
+
+ if (level == 1 || level == 10) {
+ struct imsm_map *hi = get_imsm_map(dev, MAP_1);
+
+ return hi->num_domains;
+ } else
+ return num_stripes_per_unit_resync(dev);
+}
+
+static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
+{
+ /* named 'imsm_' because raid0, raid1 and raid10
+ * counter-intuitively have the same number of data disks
+ */
+ struct imsm_map *map = get_imsm_map(dev, second_map);
+
+ switch (get_imsm_raid_level(map)) {
+ case 0:
+ return map->num_members;
+ break;
+ case 1:
+ case 10:
+ return map->num_members/2;
+ case 5:
+ return map->num_members - 1;
+ default:
+ dprintf("%s: unsupported raid level\n", __func__);
+ return 0;
+ }
+}
+
+static __u32 parity_segment_depth(struct imsm_dev *dev)
+{
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch(get_imsm_raid_level(map)) {
+ case 1:
+ case 10:
+ return chunk * map->num_domains;
+ case 5:
+ return chunk * map->num_members;
+ default:
+ return chunk;
+ }
+}
+
+static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
+{
+ struct imsm_map *map = get_imsm_map(dev, MAP_1);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+ __u32 strip = block / chunk;
+
+ switch (get_imsm_raid_level(map)) {
+ case 1:
+ case 10: {
+ __u32 vol_strip = (strip * map->num_domains) + 1;
+ __u32 vol_stripe = vol_strip / map->num_members;
+
+ return vol_stripe * chunk + block % chunk;
+ } case 5: {
+ __u32 stripe = strip / (map->num_members - 1);
+
+ return stripe * chunk + block % chunk;
+ }
+ default:
+ return 0;
+ }
+}
+
+static __u64 blocks_per_migr_unit(struct intel_super *super,
+ struct imsm_dev *dev)
+{
+ /* calculate the conversion factor between per member 'blocks'
+ * (md/{resync,rebuild}_start) and imsm migration units, return
+ * 0 for the 'not migrating' and 'unsupported migration' cases
+ */
+ if (!dev->vol.migr_state)
+ return 0;
+
+ switch (migr_type(dev)) {
+ case MIGR_GEN_MIGR: {
+ struct migr_record *migr_rec = super->migr_rec;
+ return __le32_to_cpu(migr_rec->blocks_per_unit);
+ }
+ case MIGR_VERIFY:
+ case MIGR_REPAIR:
+ case MIGR_INIT: {
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ __u32 stripes_per_unit;
+ __u32 blocks_per_unit;
+ __u32 parity_depth;
+ __u32 migr_chunk;
+ __u32 block_map;
+ __u32 block_rel;
+ __u32 segment;
+ __u32 stripe;
+ __u8 disks;
+
+ /* yes, this is really the translation of migr_units to
+ * per-member blocks in the 'resync' case
+ */
+ stripes_per_unit = num_stripes_per_unit_resync(dev);
+ migr_chunk = migr_strip_blocks_resync(dev);
+ disks = imsm_num_data_members(dev, MAP_0);
+ blocks_per_unit = stripes_per_unit * migr_chunk * disks;
+ stripe = __le16_to_cpu(map->blocks_per_strip) * disks;
+ segment = blocks_per_unit / stripe;
+ block_rel = blocks_per_unit - segment * stripe;
+ parity_depth = parity_segment_depth(dev);
+ block_map = map_migr_block(dev, block_rel);
+ return block_map + parity_depth * segment;
+ }
+ case MIGR_REBUILD: {
+ __u32 stripes_per_unit;
+ __u32 migr_chunk;
+
+ stripes_per_unit = num_stripes_per_unit_rebuild(dev);
+ migr_chunk = migr_strip_blocks_rebuild(dev);
+ return migr_chunk * stripes_per_unit;
+ }
+ case MIGR_STATE_CHANGE:
+ default:
+ return 0;
+ }
+}
+
+static int imsm_level_to_layout(int level)
+{
+ switch (level) {
+ case 0:
+ case 1:
+ return 0;
+ case 5:
+ case 6:
+ return ALGORITHM_LEFT_ASYMMETRIC;
+ case 10:
+ return 0x102;
+ }
+ return UnSet;
+}
+
+/*******************************************************************************
+ * Function: read_imsm_migr_rec
+ * Description: Function reads imsm migration record from last sector of disk
+ * Parameters:
+ * fd : disk descriptor
+ * super : metadata info
+ * Returns:
+ * 0 : success,
+ * -1 : fail
+ ******************************************************************************/
+static int read_imsm_migr_rec(int fd, struct intel_super *super)
+{
+ int ret_val = -1;
+ unsigned long long dsize;
+
+ get_dev_size(fd, NULL, &dsize);
+ if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) {
+ pr_err("Cannot seek to anchor block: %s\n",
+ strerror(errno));
+ goto out;
+ }
+ if (read(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) !=
+ MIGR_REC_BUF_SIZE) {
+ pr_err("Cannot read migr record block: %s\n",
+ strerror(errno));
+ goto out;
+ }
+ ret_val = 0;
+
+out:
+ return ret_val;
+}
+
+static struct imsm_dev *imsm_get_device_during_migration(
+ struct intel_super *super)
+{
+
+ struct intel_dev *dv;
+
+ for (dv = super->devlist; dv; dv = dv->next) {
+ if (is_gen_migration(dv->dev))
+ return dv->dev;
+ }
+ return NULL;
+}
+
+/*******************************************************************************
+ * Function: load_imsm_migr_rec
+ * Description: Function reads imsm migration record (it is stored at the last
+ * sector of disk)
+ * Parameters:
+ * super : imsm internal array info
+ * info : general array info
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ * -2 : no migration in progress
+ ******************************************************************************/
+static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
+{
+ struct mdinfo *sd;
+ struct dl *dl = NULL;
+ char nm[30];
+ int retval = -1;
+ int fd = -1;
+ struct imsm_dev *dev;
+ struct imsm_map *map = NULL;
+ int slot = -1;
+
+ /* find map under migration */
+ dev = imsm_get_device_during_migration(super);
+ /* nothing to load,no migration in progress?
+ */
+ if (dev == NULL)
+ return -2;
+ map = get_imsm_map(dev, MAP_0);
+
+ if (info) {
+ for (sd = info->devs ; sd ; sd = sd->next) {
+ /* skip spare and failed disks
+ */
+ if (sd->disk.raid_disk < 0)
+ continue;
+ /* read only from one of the first two slots */
+ if (map)
+ slot = get_imsm_disk_slot(map,
+ sd->disk.raid_disk);
+ if ((map == NULL) || (slot > 1) || (slot < 0))
+ continue;
+
+ sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
+ fd = dev_open(nm, O_RDONLY);
+ if (fd >= 0)
+ break;
+ }
+ }
+ if (fd < 0) {
+ for (dl = super->disks; dl; dl = dl->next) {
+ /* skip spare and failed disks
+ */
+ if (dl->index < 0)
+ continue;
+ /* read only from one of the first two slots */
+ if (map)
+ slot = get_imsm_disk_slot(map, dl->index);
+ if ((map == NULL) || (slot > 1) || (slot < 0))
+ continue;
+ sprintf(nm, "%d:%d", dl->major, dl->minor);
+ fd = dev_open(nm, O_RDONLY);
+ if (fd >= 0)
+ break;
+ }
+ }
+ if (fd < 0)
+ goto out;
+ retval = read_imsm_migr_rec(fd, super);
+
+out:
+ if (fd >= 0)
+ close(fd);
+ return retval;
+}
+
+#ifndef MDASSEMBLE
+/*******************************************************************************
+ * function: imsm_create_metadata_checkpoint_update
+ * Description: It creates update for checkpoint change.
+ * Parameters:
+ * super : imsm internal array info
+ * u : pointer to prepared update
+ * Returns:
+ * Uptate length.
+ * If length is equal to 0, input pointer u contains no update
+ ******************************************************************************/
+static int imsm_create_metadata_checkpoint_update(
+ struct intel_super *super,
+ struct imsm_update_general_migration_checkpoint **u)
+{
+
+ int update_memory_size = 0;
+
+ dprintf("imsm_create_metadata_checkpoint_update(enter)\n");
+
+ if (u == NULL)
+ return 0;
+ *u = NULL;
+
+ /* size of all update data without anchor */
+ update_memory_size =
+ sizeof(struct imsm_update_general_migration_checkpoint);
+
+ *u = xcalloc(1, update_memory_size);
+ if (*u == NULL) {
+ dprintf("error: cannot get memory for "
+ "imsm_create_metadata_checkpoint_update update\n");
+ return 0;
+ }
+ (*u)->type = update_general_migration_checkpoint;
+ (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit);
+ dprintf("imsm_create_metadata_checkpoint_update: prepared for %u\n",
+ (*u)->curr_migr_unit);
+
+ return update_memory_size;
+}
+
+static void imsm_update_metadata_locally(struct supertype *st,
+ void *buf, int len);
+
+/*******************************************************************************
+ * Function: write_imsm_migr_rec
+ * Description: Function writes imsm migration record
+ * (at the last sector of disk)
+ * Parameters:
+ * super : imsm internal array info
+ * Returns:
+ * 0 : success
+ * -1 : if fail
+ ******************************************************************************/
+static int write_imsm_migr_rec(struct supertype *st)
+{
+ struct intel_super *super = st->sb;
+ unsigned long long dsize;
+ char nm[30];
+ int fd = -1;
+ int retval = -1;
+ struct dl *sd;
+ int len;
+ struct imsm_update_general_migration_checkpoint *u;
+ struct imsm_dev *dev;
+ struct imsm_map *map = NULL;
+
+ /* find map under migration */
+ dev = imsm_get_device_during_migration(super);
+ /* if no migration, write buffer anyway to clear migr_record
+ * on disk based on first available device
+ */
+ if (dev == NULL)
+ dev = get_imsm_dev(super, super->current_vol < 0 ? 0 :
+ super->current_vol);
+
+ map = get_imsm_map(dev, MAP_0);
+
+ for (sd = super->disks ; sd ; sd = sd->next) {
+ int slot = -1;
+
+ /* skip failed and spare devices */
+ if (sd->index < 0)
+ continue;
+ /* write to 2 first slots only */
+ if (map)
+ slot = get_imsm_disk_slot(map, sd->index);
+ if ((map == NULL) || (slot > 1) || (slot < 0))
+ continue;
+
+ sprintf(nm, "%d:%d", sd->major, sd->minor);
+ fd = dev_open(nm, O_RDWR);
+ if (fd < 0)
+ continue;
+ get_dev_size(fd, NULL, &dsize);
+ if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) {
+ pr_err("Cannot seek to anchor block: %s\n",
+ strerror(errno));
+ goto out;
+ }
+ if (write(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) !=
+ MIGR_REC_BUF_SIZE) {
+ pr_err("Cannot write migr record block: %s\n",
+ strerror(errno));
+ goto out;
+ }
+ close(fd);
+ fd = -1;
+ }
+ /* update checkpoint information in metadata */
+ len = imsm_create_metadata_checkpoint_update(super, &u);
+
+ if (len <= 0) {
+ dprintf("imsm: Cannot prepare update\n");
+ goto out;
+ }
+ /* update metadata locally */
+ imsm_update_metadata_locally(st, u, len);
+ /* and possibly remotely */
+ if (st->update_tail) {
+ append_metadata_update(st, u, len);
+ /* during reshape we do all work inside metadata handler
+ * manage_reshape(), so metadata update has to be triggered
+ * insida it
+ */
+ flush_metadata_updates(st);
+ st->update_tail = &st->updates;
+ } else
+ free(u);
+
+ retval = 0;
+ out:
+ if (fd >= 0)
+ close(fd);
+ return retval;
+}
+#endif /* MDASSEMBLE */
+
+/* spare/missing disks activations are not allowe when
+ * array/container performs reshape operation, because
+ * all arrays in container works on the same disks set
+ */
+int imsm_reshape_blocks_arrays_changes(struct intel_super *super)
+{
+ int rv = 0;
+ struct intel_dev *i_dev;
+ struct imsm_dev *dev;
+
+ /* check whole container
+ */
+ for (i_dev = super->devlist; i_dev; i_dev = i_dev->next) {
+ dev = i_dev->dev;
+ if (is_gen_migration(dev)) {
+ /* No repair during any migration in container
+ */
+ rv = 1;
+ break;
+ }
+ }
+ return rv;
+}
+static unsigned long long imsm_component_size_aligment_check(int level,
+ int chunk_size,
+ unsigned long long component_size)
+{
+ unsigned int component_size_alligment;
+
+ /* check component size aligment
+ */
+ component_size_alligment = component_size % (chunk_size/512);
+
+ dprintf("imsm_component_size_aligment_check(Level: %i, "
+ "chunk_size = %i, component_size = %llu), "
+ "component_size_alligment = %u\n",
+ level, chunk_size, component_size,
+ component_size_alligment);
+
+ if (component_size_alligment && (level != 1) && (level != UnSet)) {
+ dprintf("imsm: reported component size alligned from %llu ",
+ component_size);
+ component_size -= component_size_alligment;
+ dprintf("to %llu (%i).\n",
+ component_size, component_size_alligment);
+ }
+
+ return component_size;
+}
+
+static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
+{
+ struct intel_super *super = st->sb;
+ struct migr_record *migr_rec = super->migr_rec;
+ struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *prev_map = get_imsm_map(dev, MAP_1);
+ struct imsm_map *map_to_analyse = map;
+ struct dl *dl;
+ int map_disks = info->array.raid_disks;
+
+ memset(info, 0, sizeof(*info));
+ if (prev_map)
+ map_to_analyse = prev_map;
+
+ dl = super->current_disk;
+
+ info->container_member = super->current_vol;
+ info->array.raid_disks = map->num_members;
+ info->array.level = get_imsm_raid_level(map_to_analyse);
+ info->array.layout = imsm_level_to_layout(info->array.level);
+ info->array.md_minor = -1;
+ info->array.ctime = 0;
+ info->array.utime = 0;
+ info->array.chunk_size =
+ __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
+ info->array.state = !dev->vol.dirty;
+ info->custom_array_size = __le32_to_cpu(dev->size_high);
+ info->custom_array_size <<= 32;
+ info->custom_array_size |= __le32_to_cpu(dev->size_low);
+ info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
+
+ if (is_gen_migration(dev)) {
+ info->reshape_active = 1;
+ info->new_level = get_imsm_raid_level(map);
+ info->new_layout = imsm_level_to_layout(info->new_level);
+ info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
+ info->delta_disks = map->num_members - prev_map->num_members;
+ if (info->delta_disks) {
+ /* this needs to be applied to every array
+ * in the container.
+ */
+ info->reshape_active = CONTAINER_RESHAPE;
+ }
+ /* We shape information that we give to md might have to be
+ * modify to cope with md's requirement for reshaping arrays.
+ * For example, when reshaping a RAID0, md requires it to be
+ * presented as a degraded RAID4.
+ * Also if a RAID0 is migrating to a RAID5 we need to specify
+ * the array as already being RAID5, but the 'before' layout
+ * is a RAID4-like layout.
+ */
+ switch (info->array.level) {
+ case 0:
+ switch(info->new_level) {
+ case 0:
+ /* conversion is happening as RAID4 */
+ info->array.level = 4;
+ info->array.raid_disks += 1;
+ break;
+ case 5:
+ /* conversion is happening as RAID5 */
+ info->array.level = 5;
+ info->array.layout = ALGORITHM_PARITY_N;
+ info->delta_disks -= 1;
+ break;
+ default:
+ /* FIXME error message */
+ info->array.level = UnSet;
+ break;
+ }
+ break;
+ }
+ } else {
+ info->new_level = UnSet;
+ info->new_layout = UnSet;
+ info->new_chunk = info->array.chunk_size;
+ info->delta_disks = 0;
+ }
+
+ if (dl) {
+ info->disk.major = dl->major;
+ info->disk.minor = dl->minor;
+ info->disk.number = dl->index;
+ info->disk.raid_disk = get_imsm_disk_slot(map_to_analyse,
+ dl->index);
+ }
+
+ info->data_offset = pba_of_lba0(map_to_analyse);
+ info->component_size = blocks_per_member(map_to_analyse);
+
+ info->component_size = imsm_component_size_aligment_check(
+ info->array.level,
+ info->array.chunk_size,
+ info->component_size);
+
+ memset(info->uuid, 0, sizeof(info->uuid));
+ info->recovery_start = MaxSector;
+
+ info->reshape_progress = 0;
+ info->resync_start = MaxSector;
+ if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
+ dev->vol.dirty) &&
+ imsm_reshape_blocks_arrays_changes(super) == 0) {
+ info->resync_start = 0;
+ }
+ if (dev->vol.migr_state) {
+ switch (migr_type(dev)) {
+ case MIGR_REPAIR:
+ case MIGR_INIT: {
+ __u64 blocks_per_unit = blocks_per_migr_unit(super,
+ dev);
+ __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
+
+ info->resync_start = blocks_per_unit * units;
+ break;
+ }
+ case MIGR_GEN_MIGR: {
+ __u64 blocks_per_unit = blocks_per_migr_unit(super,
+ dev);
+ __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit);
+ unsigned long long array_blocks;
+ int used_disks;
+
+ if (__le32_to_cpu(migr_rec->ascending_migr) &&
+ (units <
+ (__le32_to_cpu(migr_rec->num_migr_units)-1)) &&
+ (super->migr_rec->rec_status ==
+ __cpu_to_le32(UNIT_SRC_IN_CP_AREA)))
+ units++;
+
+ info->reshape_progress = blocks_per_unit * units;
+
+ dprintf("IMSM: General Migration checkpoint : %llu "
+ "(%llu) -> read reshape progress : %llu\n",
+ (unsigned long long)units,
+ (unsigned long long)blocks_per_unit,
+ info->reshape_progress);
+
+ used_disks = imsm_num_data_members(dev, MAP_1);
+ if (used_disks > 0) {
+ array_blocks = blocks_per_member(map) *
+ used_disks;
+ /* round array size down to closest MB
+ */
+ info->custom_array_size = (array_blocks
+ >> SECT_PER_MB_SHIFT)
+ << SECT_PER_MB_SHIFT;
+ }
+ }
+ case MIGR_VERIFY:
+ /* we could emulate the checkpointing of
+ * 'sync_action=check' migrations, but for now
+ * we just immediately complete them
+ */
+ case MIGR_REBUILD:
+ /* this is handled by container_content_imsm() */
+ case MIGR_STATE_CHANGE:
+ /* FIXME handle other migrations */
+ default:
+ /* we are not dirty, so... */
+ info->resync_start = MaxSector;
+ }
+ }
+
+ strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
+ info->name[MAX_RAID_SERIAL_LEN] = 0;
+
+ info->array.major_version = -1;
+ info->array.minor_version = -2;
+ sprintf(info->text_version, "/%s/%d", st->container_devnm, info->container_member);
+ info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
+ uuid_from_super_imsm(st, info->uuid);
+
+ if (dmap) {
+ int i, j;
+ for (i=0; i<map_disks; i++) {
+ dmap[i] = 0;
+ if (i < info->array.raid_disks) {
+ struct imsm_disk *dsk;
+ j = get_imsm_disk_idx(dev, i, MAP_X);
+ dsk = get_imsm_disk(super, j);
+ if (dsk && (dsk->status & CONFIGURED_DISK))
+ dmap[i] = 1;
+ }
+ }
+ }
+}
+
+static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
+ int failed, int look_in_map);
+
+static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
+ int look_in_map);
+
+#ifndef MDASSEMBLE
+static void manage_second_map(struct intel_super *super, struct imsm_dev *dev)
+{
+ if (is_gen_migration(dev)) {
+ int failed;
+ __u8 map_state;
+ struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
+
+ failed = imsm_count_failed(super, dev, MAP_1);
+ map_state = imsm_check_degraded(super, dev, failed, MAP_1);
+ if (map2->map_state != map_state) {
+ map2->map_state = map_state;
+ super->updates_pending++;
+ }
+ }
+}
+#endif
+
+static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
+{
+ struct dl *d;
+
+ for (d = super->missing; d; d = d->next)
+ if (d->index == index)
+ return &d->disk;
+ return NULL;
+}
+
+static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_disk *disk;
+ int map_disks = info->array.raid_disks;
+ int max_enough = -1;
+ int i;
+ struct imsm_super *mpb;
+
+ if (super->current_vol >= 0) {
+ getinfo_super_imsm_volume(st, info, map);
+ return;
+ }
+ memset(info, 0, sizeof(*info));
+
+ /* Set raid_disks to zero so that Assemble will always pull in valid
+ * spares
+ */
+ info->array.raid_disks = 0;
+ info->array.level = LEVEL_CONTAINER;
+ info->array.layout = 0;
+ info->array.md_minor = -1;
+ info->array.ctime = 0; /* N/A for imsm */
+ info->array.utime = 0;
+ info->array.chunk_size = 0;
+
+ info->disk.major = 0;
+ info->disk.minor = 0;
+ info->disk.raid_disk = -1;
+ info->reshape_active = 0;
+ info->array.major_version = -1;
+ info->array.minor_version = -2;
+ strcpy(info->text_version, "imsm");
+ info->safe_mode_delay = 0;
+ info->disk.number = -1;
+ info->disk.state = 0;
+ info->name[0] = 0;
+ info->recovery_start = MaxSector;
+ info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
+
+ /* do we have the all the insync disks that we expect? */
+ mpb = super->anchor;
+
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ struct imsm_dev *dev = get_imsm_dev(super, i);
+ int failed, enough, j, missing = 0;
+ struct imsm_map *map;
+ __u8 state;
+
+ failed = imsm_count_failed(super, dev, MAP_0);
+ state = imsm_check_degraded(super, dev, failed, MAP_0);
+ map = get_imsm_map(dev, MAP_0);
+
+ /* any newly missing disks?
+ * (catches single-degraded vs double-degraded)
+ */
+ for (j = 0; j < map->num_members; j++) {
+ __u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0);
+ __u32 idx = ord_to_idx(ord);
+
+ if (!(ord & IMSM_ORD_REBUILD) &&
+ get_imsm_missing(super, idx)) {
+ missing = 1;
+ break;
+ }
+ }
+
+ if (state == IMSM_T_STATE_FAILED)
+ enough = -1;
+ else if (state == IMSM_T_STATE_DEGRADED &&
+ (state != map->map_state || missing))
+ enough = 0;
+ else /* we're normal, or already degraded */
+ enough = 1;
+ if (is_gen_migration(dev) && missing) {
+ /* during general migration we need all disks
+ * that process is running on.
+ * No new missing disk is allowed.
+ */
+ max_enough = -1;
+ enough = -1;
+ /* no more checks necessary
+ */
+ break;
+ }
+ /* in the missing/failed disk case check to see
+ * if at least one array is runnable
+ */
+ max_enough = max(max_enough, enough);
+ }
+ dprintf("%s: enough: %d\n", __func__, max_enough);
+ info->container_enough = max_enough;
+
+ if (super->disks) {
+ __u32 reserved = imsm_reserved_sectors(super, super->disks);
+
+ disk = &super->disks->disk;
+ info->data_offset = total_blocks(&super->disks->disk) - reserved;
+ info->component_size = reserved;
+ info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
+ /* we don't change info->disk.raid_disk here because
+ * this state will be finalized in mdmon after we have
+ * found the 'most fresh' version of the metadata
+ */
+ info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
+ info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
+ }
+
+ /* only call uuid_from_super_imsm when this disk is part of a populated container,
+ * ->compare_super may have updated the 'num_raid_devs' field for spares
+ */
+ if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
+ uuid_from_super_imsm(st, info->uuid);
+ else
+ memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
+
+ /* I don't know how to compute 'map' on imsm, so use safe default */
+ if (map) {
+ int i;
+ for (i = 0; i < map_disks; i++)
+ map[i] = 1;
+ }
+
+}
+
+/* allocates memory and fills disk in mdinfo structure
+ * for each disk in array */
+struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
+{
+ struct mdinfo *mddev = NULL;
+ struct intel_super *super = st->sb;
+ struct imsm_disk *disk;
+ int count = 0;
+ struct dl *dl;
+ if (!super || !super->disks)
+ return NULL;
+ dl = super->disks;
+ mddev = xcalloc(1, sizeof(*mddev));
+ while (dl) {
+ struct mdinfo *tmp;
+ disk = &dl->disk;
+ tmp = xcalloc(1, sizeof(*tmp));
+ if (mddev->devs)
+ tmp->next = mddev->devs;
+ mddev->devs = tmp;
+ tmp->disk.number = count++;
+ tmp->disk.major = dl->major;
+ tmp->disk.minor = dl->minor;
+ tmp->disk.state = is_configured(disk) ?
+ (1 << MD_DISK_ACTIVE) : 0;
+ tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
+ tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
+ tmp->disk.raid_disk = -1;
+ dl = dl->next;
+ }
+ return mddev;
+}
+
+static int update_super_imsm(struct supertype *st, struct mdinfo *info,
+ char *update, char *devname, int verbose,
+ int uuid_set, char *homehost)
+{
+ /* For 'assemble' and 'force' we need to return non-zero if any
+ * change was made. For others, the return value is ignored.
+ * Update options are:
+ * force-one : This device looks a bit old but needs to be included,
+ * update age info appropriately.
+ * assemble: clear any 'faulty' flag to allow this device to
+ * be assembled.
+ * force-array: Array is degraded but being forced, mark it clean
+ * if that will be needed to assemble it.
+ *
+ * newdev: not used ????
+ * grow: Array has gained a new device - this is currently for
+ * linear only
+ * resync: mark as dirty so a resync will happen.
+ * name: update the name - preserving the homehost
+ * uuid: Change the uuid of the array to match watch is given
+ *
+ * Following are not relevant for this imsm:
+ * sparc2.2 : update from old dodgey metadata
+ * super-minor: change the preferred_minor number
+ * summaries: update redundant counters.
+ * homehost: update the recorded homehost
+ * _reshape_progress: record new reshape_progress position.
+ */
+ int rv = 1;
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb;
+
+ /* we can only update container info */
+ if (!super || super->current_vol >= 0 || !super->anchor)
+ return 1;
+
+ mpb = super->anchor;
+
+ if (strcmp(update, "uuid") == 0) {
+ /* We take this to mean that the family_num should be updated.
+ * However that is much smaller than the uuid so we cannot really
+ * allow an explicit uuid to be given. And it is hard to reliably
+ * know if one was.
+ * So if !uuid_set we know the current uuid is random and just used
+ * the first 'int' and copy it to the other 3 positions.
+ * Otherwise we require the 4 'int's to be the same as would be the
+ * case if we are using a random uuid. So an explicit uuid will be
+ * accepted as long as all for ints are the same... which shouldn't hurt
+ */
+ if (!uuid_set) {
+ info->uuid[1] = info->uuid[2] = info->uuid[3] = info->uuid[0];
+ rv = 0;
+ } else {
+ if (info->uuid[0] != info->uuid[1] ||
+ info->uuid[1] != info->uuid[2] ||
+ info->uuid[2] != info->uuid[3])
+ rv = -1;
+ else
+ rv = 0;
+ }
+ if (rv == 0)
+ mpb->orig_family_num = info->uuid[0];
+ } else if (strcmp(update, "assemble") == 0)
+ rv = 0;
+ else
+ rv = -1;
+
+ /* successful update? recompute checksum */
+ if (rv == 0)
+ mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
+
+ return rv;
+}
+
+static size_t disks_to_mpb_size(int disks)
+{
+ size_t size;
+
+ size = sizeof(struct imsm_super);
+ size += (disks - 1) * sizeof(struct imsm_disk);
+ size += 2 * sizeof(struct imsm_dev);
+ /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
+ size += (4 - 2) * sizeof(struct imsm_map);
+ /* 4 possible disk_ord_tbl's */
+ size += 4 * (disks - 1) * sizeof(__u32);
+
+ return size;
+}
+
+static __u64 avail_size_imsm(struct supertype *st, __u64 devsize,
+ unsigned long long data_offset)
+{
+ if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
+ return 0;
+
+ return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
+}
+
+static void free_devlist(struct intel_super *super)
+{
+ struct intel_dev *dv;
+
+ while (super->devlist) {
+ dv = super->devlist->next;
+ free(super->devlist->dev);
+ free(super->devlist);
+ super->devlist = dv;
+ }
+}
+
+static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
+{
+ memcpy(dest, src, sizeof_imsm_dev(src, 0));
+}
+
+static int compare_super_imsm(struct supertype *st, struct supertype *tst)
+{
+ /*
+ * return:
+ * 0 same, or first was empty, and second was copied
+ * 1 second had wrong number
+ * 2 wrong uuid
+ * 3 wrong other info
+ */
+ struct intel_super *first = st->sb;
+ struct intel_super *sec = tst->sb;
+
+ if (!first) {
+ st->sb = tst->sb;
+ tst->sb = NULL;
+ return 0;
+ }
+ /* in platform dependent environment test if the disks
+ * use the same Intel hba
+ * If not on Intel hba at all, allow anything.
+ */
+ if (!check_env("IMSM_NO_PLATFORM")) {
+ if (first->hba && sec->hba &&
+ strcmp(first->hba->path, sec->hba->path) != 0) {
+ fprintf(stderr,
+ "HBAs of devices does not match %s != %s\n",
+ first->hba ? first->hba->path : NULL,
+ sec->hba ? sec->hba->path : NULL);
+ return 3;
+ }
+ }
+
+ /* if an anchor does not have num_raid_devs set then it is a free
+ * floating spare
+ */
+ if (first->anchor->num_raid_devs > 0 &&
+ sec->anchor->num_raid_devs > 0) {
+ /* Determine if these disks might ever have been
+ * related. Further disambiguation can only take place
+ * in load_super_imsm_all
+ */
+ __u32 first_family = first->anchor->orig_family_num;
+ __u32 sec_family = sec->anchor->orig_family_num;
+
+ if (memcmp(first->anchor->sig, sec->anchor->sig,
+ MAX_SIGNATURE_LENGTH) != 0)
+ return 3;
+
+ if (first_family == 0)
+ first_family = first->anchor->family_num;
+ if (sec_family == 0)
+ sec_family = sec->anchor->family_num;
+
+ if (first_family != sec_family)
+ return 3;
+
+ }
+
+ /* if 'first' is a spare promote it to a populated mpb with sec's
+ * family number
+ */
+ if (first->anchor->num_raid_devs == 0 &&
+ sec->anchor->num_raid_devs > 0) {
+ int i;
+ struct intel_dev *dv;
+ struct imsm_dev *dev;
+
+ /* we need to copy raid device info from sec if an allocation
+ * fails here we don't associate the spare
+ */
+ for (i = 0; i < sec->anchor->num_raid_devs; i++) {
+ dv = xmalloc(sizeof(*dv));
+ dev = xmalloc(sizeof_imsm_dev(get_imsm_dev(sec, i), 1));
+ dv->dev = dev;
+ dv->index = i;
+ dv->next = first->devlist;
+ first->devlist = dv;
+ }
+ if (i < sec->anchor->num_raid_devs) {
+ /* allocation failure */
+ free_devlist(first);
+ pr_err("imsm: failed to associate spare\n");
+ return 3;
+ }
+ first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
+ first->anchor->orig_family_num = sec->anchor->orig_family_num;
+ first->anchor->family_num = sec->anchor->family_num;
+ memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
+ for (i = 0; i < sec->anchor->num_raid_devs; i++)
+ imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
+ }
+
+ return 0;
+}
+
+static void fd2devname(int fd, char *name)
+{
+ struct stat st;
+ char path[256];
+ char dname[PATH_MAX];
+ char *nm;
+ int rv;
+
+ name[0] = '\0';
+ if (fstat(fd, &st) != 0)
+ return;
+ sprintf(path, "/sys/dev/block/%d:%d",
+ major(st.st_rdev), minor(st.st_rdev));
+
+ rv = readlink(path, dname, sizeof(dname)-1);
+ if (rv <= 0)
+ return;
+
+ dname[rv] = '\0';
+ nm = strrchr(dname, '/');
+ if (nm) {
+ nm++;
+ snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
+ }
+}
+
+extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
+
+static int imsm_read_serial(int fd, char *devname,
+ __u8 serial[MAX_RAID_SERIAL_LEN])
+{
+ unsigned char scsi_serial[255];
+ int rv;
+ int rsp_len;
+ int len;
+ char *dest;
+ char *src;
+ char *rsp_buf;
+ int i;
+
+ memset(scsi_serial, 0, sizeof(scsi_serial));
+
+ rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
+
+ if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
+ memset(serial, 0, MAX_RAID_SERIAL_LEN);
+ fd2devname(fd, (char *) serial);
+ return 0;
+ }
+
+ if (rv != 0) {
+ if (devname)
+ pr_err("Failed to retrieve serial for %s\n",
+ devname);
+ return rv;
+ }
+
+ rsp_len = scsi_serial[3];
+ if (!rsp_len) {
+ if (devname)
+ pr_err("Failed to retrieve serial for %s\n",
+ devname);
+ return 2;
+ }
+ rsp_buf = (char *) &scsi_serial[4];
+
+ /* trim all whitespace and non-printable characters and convert
+ * ':' to ';'
+ */
+ for (i = 0, dest = rsp_buf; i < rsp_len; i++) {
+ src = &rsp_buf[i];
+ if (*src > 0x20) {
+ /* ':' is reserved for use in placeholder serial
+ * numbers for missing disks
+ */
+ if (*src == ':')
+ *dest++ = ';';
+ else
+ *dest++ = *src;
+ }
+ }
+ len = dest - rsp_buf;
+ dest = rsp_buf;
+
+ /* truncate leading characters */
+ if (len > MAX_RAID_SERIAL_LEN) {
+ dest += len - MAX_RAID_SERIAL_LEN;
+ len = MAX_RAID_SERIAL_LEN;
+ }
+
+ memset(serial, 0, MAX_RAID_SERIAL_LEN);
+ memcpy(serial, dest, len);
+
+ return 0;
+}
+
+static int serialcmp(__u8 *s1, __u8 *s2)
+{
+ return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
+}
+
+static void serialcpy(__u8 *dest, __u8 *src)
+{
+ strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
+}
+
+static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
+{
+ struct dl *dl;
+
+ for (dl = super->disks; dl; dl = dl->next)
+ if (serialcmp(dl->serial, serial) == 0)
+ break;
+
+ return dl;
+}
+
+static struct imsm_disk *
+__serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
+{
+ int i;
+
+ for (i = 0; i < mpb->num_disks; i++) {
+ struct imsm_disk *disk = __get_imsm_disk(mpb, i);
+
+ if (serialcmp(disk->serial, serial) == 0) {
+ if (idx)
+ *idx = i;
+ return disk;
+ }
+ }
+
+ return NULL;
+}
+
+static int
+load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
+{
+ struct imsm_disk *disk;
+ struct dl *dl;
+ struct stat stb;
+ int rv;
+ char name[40];
+ __u8 serial[MAX_RAID_SERIAL_LEN];
+
+ rv = imsm_read_serial(fd, devname, serial);
+
+ if (rv != 0)
+ return 2;
+
+ dl = xcalloc(1, sizeof(*dl));
+
+ fstat(fd, &stb);
+ dl->major = major(stb.st_rdev);
+ dl->minor = minor(stb.st_rdev);
+ dl->next = super->disks;
+ dl->fd = keep_fd ? fd : -1;
+ assert(super->disks == NULL);
+ super->disks = dl;
+ serialcpy(dl->serial, serial);
+ dl->index = -2;
+ dl->e = NULL;
+ fd2devname(fd, name);
+ if (devname)
+ dl->devname = xstrdup(devname);
+ else
+ dl->devname = xstrdup(name);
+
+ /* look up this disk's index in the current anchor */
+ disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
+ if (disk) {
+ dl->disk = *disk;
+ /* only set index on disks that are a member of a
+ * populated contianer, i.e. one with raid_devs
+ */
+ if (is_failed(&dl->disk))
+ dl->index = -2;
+ else if (is_spare(&dl->disk))
+ dl->index = -1;
+ }
+
+ return 0;
+}
+
+#ifndef MDASSEMBLE
+/* When migrating map0 contains the 'destination' state while map1
+ * contains the current state. When not migrating map0 contains the
+ * current state. This routine assumes that map[0].map_state is set to
+ * the current array state before being called.
+ *
+ * Migration is indicated by one of the following states
+ * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
+ * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
+ * map1state=unitialized)
+ * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
+ * map1state=normal)
+ * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
+ * map1state=degraded)
+ * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal
+ * map1state=normal)
+ */
+static void migrate(struct imsm_dev *dev, struct intel_super *super,
+ __u8 to_state, int migr_type)
+{
+ struct imsm_map *dest;
+ struct imsm_map *src = get_imsm_map(dev, MAP_0);
+
+ dev->vol.migr_state = 1;
+ set_migr_type(dev, migr_type);
+ dev->vol.curr_migr_unit = 0;
+ dest = get_imsm_map(dev, MAP_1);
+
+ /* duplicate and then set the target end state in map[0] */
+ memcpy(dest, src, sizeof_imsm_map(src));
+ if ((migr_type == MIGR_REBUILD) ||
+ (migr_type == MIGR_GEN_MIGR)) {
+ __u32 ord;
+ int i;
+
+ for (i = 0; i < src->num_members; i++) {
+ ord = __le32_to_cpu(src->disk_ord_tbl[i]);
+ set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord));
+ }
+ }
+
+ if (migr_type == MIGR_GEN_MIGR)
+ /* Clear migration record */
+ memset(super->migr_rec, 0, sizeof(struct migr_record));
+
+ src->map_state = to_state;
+}
+
+static void end_migration(struct imsm_dev *dev, struct intel_super *super,
+ __u8 map_state)
+{
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state == 0 ?
+ MAP_0 : MAP_1);
+ int i, j;
+
+ /* merge any IMSM_ORD_REBUILD bits that were not successfully
+ * completed in the last migration.
+ *
+ * FIXME add support for raid-level-migration
+ */
+ if ((map_state != map->map_state) && (is_gen_migration(dev) == 0) &&
+ (prev->map_state != IMSM_T_STATE_UNINITIALIZED)) {
+ /* when final map state is other than expected
+ * merge maps (not for migration)
+ */
+ int failed;
+
+ for (i = 0; i < prev->num_members; i++)
+ for (j = 0; j < map->num_members; j++)
+ /* during online capacity expansion
+ * disks position can be changed
+ * if takeover is used
+ */
+ if (ord_to_idx(map->disk_ord_tbl[j]) ==
+ ord_to_idx(prev->disk_ord_tbl[i])) {
+ map->disk_ord_tbl[j] |=
+ prev->disk_ord_tbl[i];
+ break;
+ }
+ failed = imsm_count_failed(super, dev, MAP_0);
+ map_state = imsm_check_degraded(super, dev, failed, MAP_0);
+ }
+
+ dev->vol.migr_state = 0;
+ set_migr_type(dev, 0);
+ dev->vol.curr_migr_unit = 0;
+ map->map_state = map_state;
+}
+#endif
+
+static int parse_raid_devices(struct intel_super *super)
+{
+ int i;
+ struct imsm_dev *dev_new;
+ size_t len, len_migr;
+ size_t max_len = 0;
+ size_t space_needed = 0;
+ struct imsm_super *mpb = super->anchor;
+
+ for (i = 0; i < super->anchor->num_raid_devs; i++) {
+ struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
+ struct intel_dev *dv;
+
+ len = sizeof_imsm_dev(dev_iter, 0);
+ len_migr = sizeof_imsm_dev(dev_iter, 1);
+ if (len_migr > len)
+ space_needed += len_migr - len;
+
+ dv = xmalloc(sizeof(*dv));
+ if (max_len < len_migr)
+ max_len = len_migr;
+ if (max_len > len_migr)
+ space_needed += max_len - len_migr;
+ dev_new = xmalloc(max_len);
+ imsm_copy_dev(dev_new, dev_iter);
+ dv->dev = dev_new;
+ dv->index = i;
+ dv->next = super->devlist;
+ super->devlist = dv;
+ }
+
+ /* ensure that super->buf is large enough when all raid devices
+ * are migrating
+ */
+ if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
+ void *buf;
+
+ len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
+ if (posix_memalign(&buf, 512, len) != 0)
+ return 1;
+
+ memcpy(buf, super->buf, super->len);
+ memset(buf + super->len, 0, len - super->len);
+ free(super->buf);
+ super->buf = buf;
+ super->len = len;
+ }
+
+ return 0;
+}
+
+/* retrieve a pointer to the bbm log which starts after all raid devices */
+struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
+{
+ void *ptr = NULL;
+
+ if (__le32_to_cpu(mpb->bbm_log_size)) {
+ ptr = mpb;
+ ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
+ }
+
+ return ptr;
+}
+
+/*******************************************************************************
+ * Function: check_mpb_migr_compatibility
+ * Description: Function checks for unsupported migration features:
+ * - migration optimization area (pba_of_lba0)
+ * - descending reshape (ascending_migr)
+ * Parameters:
+ * super : imsm metadata information
+ * Returns:
+ * 0 : migration is compatible
+ * -1 : migration is not compatible
+ ******************************************************************************/
+int check_mpb_migr_compatibility(struct intel_super *super)
+{
+ struct imsm_map *map0, *map1;
+ struct migr_record *migr_rec = super->migr_rec;
+ int i;
+
+ for (i = 0; i < super->anchor->num_raid_devs; i++) {
+ struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
+
+ if (dev_iter &&
+ dev_iter->vol.migr_state == 1 &&
+ dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
+ /* This device is migrating */
+ map0 = get_imsm_map(dev_iter, MAP_0);
+ map1 = get_imsm_map(dev_iter, MAP_1);
+ if (pba_of_lba0(map0) != pba_of_lba0(map1))
+ /* migration optimization area was used */
+ return -1;
+ if (migr_rec->ascending_migr == 0
+ && migr_rec->dest_depth_per_unit > 0)
+ /* descending reshape not supported yet */
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static void __free_imsm(struct intel_super *super, int free_disks);
+
+/* load_imsm_mpb - read matrix metadata
+ * allocates super->mpb to be freed by free_imsm
+ */
+static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
+{
+ unsigned long long dsize;
+ unsigned long long sectors;
+ struct stat;
+ struct imsm_super *anchor;
+ __u32 check_sum;
+
+ get_dev_size(fd, NULL, &dsize);
+ if (dsize < 1024) {
+ if (devname)
+ pr_err("%s: device to small for imsm\n",
+ devname);
+ return 1;
+ }
+
+ if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
+ if (devname)
+ pr_err("Cannot seek to anchor block on %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+
+ if (posix_memalign((void**)&anchor, 512, 512) != 0) {
+ if (devname)
+ pr_err("Failed to allocate imsm anchor buffer"
+ " on %s\n", devname);
+ return 1;
+ }
+ if (read(fd, anchor, 512) != 512) {
+ if (devname)
+ pr_err("Cannot read anchor block on %s: %s\n",
+ devname, strerror(errno));
+ free(anchor);
+ return 1;
+ }
+
+ if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
+ if (devname)
+ pr_err("no IMSM anchor on %s\n", devname);
+ free(anchor);
+ return 2;
+ }
+
+ __free_imsm(super, 0);
+ /* reload capability and hba */
+
+ /* capability and hba must be updated with new super allocation */
+ find_intel_hba_capability(fd, super, devname);
+ super->len = ROUND_UP(anchor->mpb_size, 512);
+ if (posix_memalign(&super->buf, 512, super->len) != 0) {
+ if (devname)
+ pr_err("unable to allocate %zu byte mpb buffer\n",
+ super->len);
+ free(anchor);
+ return 2;
+ }
+ memcpy(super->buf, anchor, 512);
+
+ sectors = mpb_sectors(anchor) - 1;
+ free(anchor);
+
+ if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) {
+ pr_err("%s could not allocate migr_rec buffer\n", __func__);
+ free(super->buf);
+ return 2;
+ }
+ super->clean_migration_record_by_mdmon = 0;
+
+ if (!sectors) {
+ check_sum = __gen_imsm_checksum(super->anchor);
+ if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
+ if (devname)
+ pr_err("IMSM checksum %x != %x on %s\n",
+ check_sum,
+ __le32_to_cpu(super->anchor->check_sum),
+ devname);
+ return 2;
+ }
+
+ return 0;
+ }
+
+ /* read the extended mpb */
+ if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
+ if (devname)
+ pr_err("Cannot seek to extended mpb on %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+
+ if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
+ if (devname)
+ pr_err("Cannot read extended mpb on %s: %s\n",
+ devname, strerror(errno));
+ return 2;
+ }
+
+ check_sum = __gen_imsm_checksum(super->anchor);
+ if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
+ if (devname)
+ pr_err("IMSM checksum %x != %x on %s\n",
+ check_sum, __le32_to_cpu(super->anchor->check_sum),
+ devname);
+ return 3;
+ }
+
+ /* FIXME the BBM log is disk specific so we cannot use this global
+ * buffer for all disks. Ok for now since we only look at the global
+ * bbm_log_size parameter to gate assembly
+ */
+ super->bbm_log = __get_imsm_bbm_log(super->anchor);
+
+ return 0;
+}
+
+static int read_imsm_migr_rec(int fd, struct intel_super *super);
+
+/* clears hi bits in metadata if MPB_ATTRIB_2TB_DISK not set */
+static void clear_hi(struct intel_super *super)
+{
+ struct imsm_super *mpb = super->anchor;
+ int i, n;
+ if (mpb->attributes & MPB_ATTRIB_2TB_DISK)
+ return;
+ for (i = 0; i < mpb->num_disks; ++i) {
+ struct imsm_disk *disk = &mpb->disk[i];
+ disk->total_blocks_hi = 0;
+ }
+ for (i = 0; i < mpb->num_raid_devs; ++i) {
+ struct imsm_dev *dev = get_imsm_dev(super, i);
+ if (!dev)
+ return;
+ for (n = 0; n < 2; ++n) {
+ struct imsm_map *map = get_imsm_map(dev, n);
+ if (!map)
+ continue;
+ map->pba_of_lba0_hi = 0;
+ map->blocks_per_member_hi = 0;
+ map->num_data_stripes_hi = 0;
+ }
+ }
+}
+
+static int
+load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
+{
+ int err;
+
+ err = load_imsm_mpb(fd, super, devname);
+ if (err)
+ return err;
+ err = load_imsm_disk(fd, super, devname, keep_fd);
+ if (err)
+ return err;
+ err = parse_raid_devices(super);
+ clear_hi(super);
+ return err;
+}
+
+static void __free_imsm_disk(struct dl *d)
+{
+ if (d->fd >= 0)
+ close(d->fd);
+ if (d->devname)
+ free(d->devname);
+ if (d->e)
+ free(d->e);
+ free(d);
+
+}
+
+static void free_imsm_disks(struct intel_super *super)
+{
+ struct dl *d;
+
+ while (super->disks) {
+ d = super->disks;
+ super->disks = d->next;
+ __free_imsm_disk(d);
+ }
+ while (super->disk_mgmt_list) {
+ d = super->disk_mgmt_list;
+ super->disk_mgmt_list = d->next;
+ __free_imsm_disk(d);
+ }
+ while (super->missing) {
+ d = super->missing;
+ super->missing = d->next;
+ __free_imsm_disk(d);
+ }
+
+}
+
+/* free all the pieces hanging off of a super pointer */
+static void __free_imsm(struct intel_super *super, int free_disks)
+{
+ struct intel_hba *elem, *next;
+
+ if (super->buf) {
+ free(super->buf);
+ super->buf = NULL;
+ }
+ /* unlink capability description */
+ super->orom = NULL;
+ if (super->migr_rec_buf) {
+ free(super->migr_rec_buf);
+ super->migr_rec_buf = NULL;
+ }
+ if (free_disks)
+ free_imsm_disks(super);
+ free_devlist(super);
+ elem = super->hba;
+ while (elem) {
+ if (elem->path)
+ free((void *)elem->path);
+ next = elem->next;
+ free(elem);
+ elem = next;
+ }
+ super->hba = NULL;
+}
+
+static void free_imsm(struct intel_super *super)
+{
+ __free_imsm(super, 1);
+ free(super);
+}
+
+static void free_super_imsm(struct supertype *st)
+{
+ struct intel_super *super = st->sb;
+
+ if (!super)
+ return;
+
+ free_imsm(super);
+ st->sb = NULL;
+}
+
+static struct intel_super *alloc_super(void)
+{
+ struct intel_super *super = xcalloc(1, sizeof(*super));
+
+ super->current_vol = -1;
+ super->create_offset = ~((unsigned long long) 0);
+ return super;
+}
+
+/*
+ * find and allocate hba and OROM/EFI based on valid fd of RAID component device
+ */
+static int find_intel_hba_capability(int fd, struct intel_super *super, char *devname)
+{
+ struct sys_dev *hba_name;
+ int rv = 0;
+
+ if ((fd < 0) || check_env("IMSM_NO_PLATFORM")) {
+ super->orom = NULL;
+ super->hba = NULL;
+ return 0;
+ }
+ hba_name = find_disk_attached_hba(fd, NULL);
+ if (!hba_name) {
+ if (devname)
+ pr_err("%s is not attached to Intel(R) RAID controller.\n",
+ devname);
+ return 1;
+ }
+ rv = attach_hba_to_super(super, hba_name);
+ if (rv == 2) {
+ if (devname) {
+ struct intel_hba *hba = super->hba;
+
+ pr_err("%s is attached to Intel(R) %s RAID "
+ "controller (%s),\n"
+ " but the container is assigned to Intel(R) "
+ "%s RAID controller (",
+ devname,
+ hba_name->path,
+ hba_name->pci_id ? : "Err!",
+ get_sys_dev_type(hba_name->type));
+
+ while (hba) {
+ fprintf(stderr, "%s", hba->pci_id ? : "Err!");
+ if (hba->next)
+ fprintf(stderr, ", ");
+ hba = hba->next;
+ }
+
+ fprintf(stderr, ").\n");
+ cont_err("Mixing devices attached to multiple controllers "
+ "is not allowed.\n");
+ }
+ return 2;
+ }
+ super->orom = find_imsm_capability(hba_name->type);
+ if (!super->orom)
+ return 3;
+ return 0;
+}
+
+/* find_missing - helper routine for load_super_imsm_all that identifies
+ * disks that have disappeared from the system. This routine relies on
+ * the mpb being uptodate, which it is at load time.
+ */
+static int find_missing(struct intel_super *super)
+{
+ int i;
+ struct imsm_super *mpb = super->anchor;
+ struct dl *dl;
+ struct imsm_disk *disk;
+
+ for (i = 0; i < mpb->num_disks; i++) {
+ disk = __get_imsm_disk(mpb, i);
+ dl = serial_to_dl(disk->serial, super);
+ if (dl)
+ continue;
+
+ dl = xmalloc(sizeof(*dl));
+ dl->major = 0;
+ dl->minor = 0;
+ dl->fd = -1;
+ dl->devname = xstrdup("missing");
+ dl->index = i;
+ serialcpy(dl->serial, disk->serial);
+ dl->disk = *disk;
+ dl->e = NULL;
+ dl->next = super->missing;
+ super->missing = dl;
+ }
+
+ return 0;
+}
+
+#ifndef MDASSEMBLE
+static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
+{
+ struct intel_disk *idisk = disk_list;
+
+ while (idisk) {
+ if (serialcmp(idisk->disk.serial, serial) == 0)
+ break;
+ idisk = idisk->next;
+ }
+
+ return idisk;
+}
+
+static int __prep_thunderdome(struct intel_super **table, int tbl_size,
+ struct intel_super *super,
+ struct intel_disk **disk_list)
+{
+ struct imsm_disk *d = &super->disks->disk;
+ struct imsm_super *mpb = super->anchor;
+ int i, j;
+
+ for (i = 0; i < tbl_size; i++) {
+ struct imsm_super *tbl_mpb = table[i]->anchor;
+ struct imsm_disk *tbl_d = &table[i]->disks->disk;
+
+ if (tbl_mpb->family_num == mpb->family_num) {
+ if (tbl_mpb->check_sum == mpb->check_sum) {
+ dprintf("%s: mpb from %d:%d matches %d:%d\n",
+ __func__, super->disks->major,
+ super->disks->minor,
+ table[i]->disks->major,
+ table[i]->disks->minor);
+ break;
+ }
+
+ if (((is_configured(d) && !is_configured(tbl_d)) ||
+ is_configured(d) == is_configured(tbl_d)) &&
+ tbl_mpb->generation_num < mpb->generation_num) {
+ /* current version of the mpb is a
+ * better candidate than the one in
+ * super_table, but copy over "cross
+ * generational" status
+ */
+ struct intel_disk *idisk;
+
+ dprintf("%s: mpb from %d:%d replaces %d:%d\n",
+ __func__, super->disks->major,
+ super->disks->minor,
+ table[i]->disks->major,
+ table[i]->disks->minor);
+
+ idisk = disk_list_get(tbl_d->serial, *disk_list);
+ if (idisk && is_failed(&idisk->disk))
+ tbl_d->status |= FAILED_DISK;
+ break;
+ } else {
+ struct intel_disk *idisk;
+ struct imsm_disk *disk;
+
+ /* tbl_mpb is more up to date, but copy
+ * over cross generational status before
+ * returning
+ */
+ disk = __serial_to_disk(d->serial, mpb, NULL);
+ if (disk && is_failed(disk))
+ d->status |= FAILED_DISK;
+
+ idisk = disk_list_get(d->serial, *disk_list);
+ if (idisk) {
+ idisk->owner = i;
+ if (disk && is_configured(disk))
+ idisk->disk.status |= CONFIGURED_DISK;
+ }
+
+ dprintf("%s: mpb from %d:%d prefer %d:%d\n",
+ __func__, super->disks->major,
+ super->disks->minor,
+ table[i]->disks->major,
+ table[i]->disks->minor);
+
+ return tbl_size;
+ }
+ }
+ }
+
+ if (i >= tbl_size)
+ table[tbl_size++] = super;
+ else
+ table[i] = super;
+
+ /* update/extend the merged list of imsm_disk records */
+ for (j = 0; j < mpb->num_disks; j++) {
+ struct imsm_disk *disk = __get_imsm_disk(mpb, j);
+ struct intel_disk *idisk;
+
+ idisk = disk_list_get(disk->serial, *disk_list);
+ if (idisk) {
+ idisk->disk.status |= disk->status;
+ if (is_configured(&idisk->disk) ||
+ is_failed(&idisk->disk))
+ idisk->disk.status &= ~(SPARE_DISK);
+ } else {
+ idisk = xcalloc(1, sizeof(*idisk));
+ idisk->owner = IMSM_UNKNOWN_OWNER;
+ idisk->disk = *disk;
+ idisk->next = *disk_list;
+ *disk_list = idisk;
+ }
+
+ if (serialcmp(idisk->disk.serial, d->serial) == 0)
+ idisk->owner = i;
+ }
+
+ return tbl_size;
+}
+
+static struct intel_super *
+validate_members(struct intel_super *super, struct intel_disk *disk_list,
+ const int owner)
+{
+ struct imsm_super *mpb = super->anchor;
+ int ok_count = 0;
+ int i;
+
+ for (i = 0; i < mpb->num_disks; i++) {
+ struct imsm_disk *disk = __get_imsm_disk(mpb, i);
+ struct intel_disk *idisk;
+
+ idisk = disk_list_get(disk->serial, disk_list);
+ if (idisk) {
+ if (idisk->owner == owner ||
+ idisk->owner == IMSM_UNKNOWN_OWNER)
+ ok_count++;
+ else
+ dprintf("%s: '%.16s' owner %d != %d\n",
+ __func__, disk->serial, idisk->owner,
+ owner);
+ } else {
+ dprintf("%s: unknown disk %x [%d]: %.16s\n",
+ __func__, __le32_to_cpu(mpb->family_num), i,
+ disk->serial);
+ break;
+ }
+ }
+
+ if (ok_count == mpb->num_disks)
+ return super;
+ return NULL;
+}
+
+static void show_conflicts(__u32 family_num, struct intel_super *super_list)
+{
+ struct intel_super *s;
+
+ for (s = super_list; s; s = s->next) {
+ if (family_num != s->anchor->family_num)
+ continue;
+ pr_err("Conflict, offlining family %#x on '%s'\n",
+ __le32_to_cpu(family_num), s->disks->devname);
+ }
+}
+
+static struct intel_super *
+imsm_thunderdome(struct intel_super **super_list, int len)
+{
+ struct intel_super *super_table[len];
+ struct intel_disk *disk_list = NULL;
+ struct intel_super *champion, *spare;
+ struct intel_super *s, **del;
+ int tbl_size = 0;
+ int conflict;
+ int i;
+
+ memset(super_table, 0, sizeof(super_table));
+ for (s = *super_list; s; s = s->next)
+ tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
+
+ for (i = 0; i < tbl_size; i++) {
+ struct imsm_disk *d;
+ struct intel_disk *idisk;
+ struct imsm_super *mpb = super_table[i]->anchor;
+
+ s = super_table[i];
+ d = &s->disks->disk;
+
+ /* 'd' must appear in merged disk list for its
+ * configuration to be valid
+ */
+ idisk = disk_list_get(d->serial, disk_list);
+ if (idisk && idisk->owner == i)
+ s = validate_members(s, disk_list, i);
+ else
+ s = NULL;
+
+ if (!s)
+ dprintf("%s: marking family: %#x from %d:%d offline\n",
+ __func__, mpb->family_num,
+ super_table[i]->disks->major,
+ super_table[i]->disks->minor);
+ super_table[i] = s;
+ }
+
+ /* This is where the mdadm implementation differs from the Windows
+ * driver which has no strict concept of a container. We can only
+ * assemble one family from a container, so when returning a prodigal
+ * array member to this system the code will not be able to disambiguate
+ * the container contents that should be assembled ("foreign" versus
+ * "local"). It requires user intervention to set the orig_family_num
+ * to a new value to establish a new container. The Windows driver in
+ * this situation fixes up the volume name in place and manages the
+ * foreign array as an independent entity.
+ */
+ s = NULL;
+ spare = NULL;
+ conflict = 0;
+ for (i = 0; i < tbl_size; i++) {
+ struct intel_super *tbl_ent = super_table[i];
+ int is_spare = 0;
+
+ if (!tbl_ent)
+ continue;
+
+ if (tbl_ent->anchor->num_raid_devs == 0) {
+ spare = tbl_ent;
+ is_spare = 1;
+ }
+
+ if (s && !is_spare) {
+ show_conflicts(tbl_ent->anchor->family_num, *super_list);
+ conflict++;
+ } else if (!s && !is_spare)
+ s = tbl_ent;
+ }
+
+ if (!s)
+ s = spare;
+ if (!s) {
+ champion = NULL;
+ goto out;
+ }
+ champion = s;
+
+ if (conflict)
+ pr_err("Chose family %#x on '%s', "
+ "assemble conflicts to new container with '--update=uuid'\n",
+ __le32_to_cpu(s->anchor->family_num), s->disks->devname);
+
+ /* collect all dl's onto 'champion', and update them to
+ * champion's version of the status
+ */
+ for (s = *super_list; s; s = s->next) {
+ struct imsm_super *mpb = champion->anchor;
+ struct dl *dl = s->disks;
+
+ if (s == champion)
+ continue;
+
+ mpb->attributes |= s->anchor->attributes & MPB_ATTRIB_2TB_DISK;
+
+ for (i = 0; i < mpb->num_disks; i++) {
+ struct imsm_disk *disk;
+
+ disk = __serial_to_disk(dl->serial, mpb, &dl->index);
+ if (disk) {
+ dl->disk = *disk;
+ /* only set index on disks that are a member of
+ * a populated contianer, i.e. one with
+ * raid_devs
+ */
+ if (is_failed(&dl->disk))
+ dl->index = -2;
+ else if (is_spare(&dl->disk))
+ dl->index = -1;
+ break;
+ }
+ }
+
+ if (i >= mpb->num_disks) {
+ struct intel_disk *idisk;
+
+ idisk = disk_list_get(dl->serial, disk_list);
+ if (idisk && is_spare(&idisk->disk) &&
+ !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
+ dl->index = -1;
+ else {
+ dl->index = -2;
+ continue;
+ }
+ }
+
+ dl->next = champion->disks;
+ champion->disks = dl;
+ s->disks = NULL;
+ }
+
+ /* delete 'champion' from super_list */
+ for (del = super_list; *del; ) {
+ if (*del == champion) {
+ *del = (*del)->next;
+ break;
+ } else
+ del = &(*del)->next;
+ }
+ champion->next = NULL;
+
+ out:
+ while (disk_list) {
+ struct intel_disk *idisk = disk_list;
+
+ disk_list = disk_list->next;
+ free(idisk);
+ }
+
+ return champion;
+}
+
+static int
+get_sra_super_block(int fd, struct intel_super **super_list, char *devname, int *max, int keep_fd);
+static int get_super_block(struct intel_super **super_list, char *devnm, char *devname,
+ int major, int minor, int keep_fd);
+static int
+get_devlist_super_block(struct md_list *devlist, struct intel_super **super_list,
+ int *max, int keep_fd);
+
+static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
+ char *devname, struct md_list *devlist,
+ int keep_fd)
+{
+ struct intel_super *super_list = NULL;
+ struct intel_super *super = NULL;
+ int err = 0;
+ int i = 0;
+
+ if (fd >= 0)
+ /* 'fd' is an opened container */
+ err = get_sra_super_block(fd, &super_list, devname, &i, keep_fd);
+ else
+ /* get super block from devlist devices */
+ err = get_devlist_super_block(devlist, &super_list, &i, keep_fd);
+ if (err)
+ goto error;
+ /* all mpbs enter, maybe one leaves */
+ super = imsm_thunderdome(&super_list, i);
+ if (!super) {
+ err = 1;
+ goto error;
+ }
+
+ if (find_missing(super) != 0) {
+ free_imsm(super);
+ err = 2;
+ goto error;
+ }
+
+ /* load migration record */
+ err = load_imsm_migr_rec(super, NULL);
+ if (err == -1) {
+ /* migration is in progress,
+ * but migr_rec cannot be loaded,
+ */
+ err = 4;
+ goto error;
+ }
+
+ /* Check migration compatibility */
+ if ((err == 0) && (check_mpb_migr_compatibility(super) != 0)) {
+ pr_err("Unsupported migration detected");
+ if (devname)
+ fprintf(stderr, " on %s\n", devname);
+ else
+ fprintf(stderr, " (IMSM).\n");
+
+ err = 5;
+ goto error;
+ }
+
+ err = 0;
+
+ error:
+ while (super_list) {
+ struct intel_super *s = super_list;
+
+ super_list = super_list->next;
+ free_imsm(s);
+ }
+
+ if (err)
+ return err;
+
+ *sbp = super;
+ if (fd >= 0)
+ strcpy(st->container_devnm, fd2devnm(fd));
+ else
+ st->container_devnm[0] = 0;
+ if (err == 0 && st->ss == NULL) {
+ st->ss = &super_imsm;
+ st->minor_version = 0;
+ st->max_devs = IMSM_MAX_DEVICES;
+ }
+ return 0;
+}
+
+static int
+get_devlist_super_block(struct md_list *devlist, struct intel_super **super_list,
+ int *max, int keep_fd)
+{
+ struct md_list *tmpdev;
+ int err = 0;
+ int i = 0;
+
+ for (i = 0, tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
+ if (tmpdev->used != 1)
+ continue;
+ if (tmpdev->container == 1) {
+ int lmax = 0;
+ int fd = dev_open(tmpdev->devname, O_RDONLY|O_EXCL);
+ if (fd < 0) {
+ pr_err("cannot open device %s: %s\n",
+ tmpdev->devname, strerror(errno));
+ err = 8;
+ goto error;
+ }
+ err = get_sra_super_block(fd, super_list,
+ tmpdev->devname, &lmax,
+ keep_fd);
+ i += lmax;
+ close(fd);
+ if (err) {
+ err = 7;
+ goto error;
+ }
+ } else {
+ int major = major(tmpdev->st_rdev);
+ int minor = minor(tmpdev->st_rdev);
+ err = get_super_block(super_list,
+ NULL,
+ tmpdev->devname,
+ major, minor,
+ keep_fd);
+ i++;
+ if (err) {
+ err = 6;
+ goto error;
+ }
+ }
+ }
+ error:
+ *max = i;
+ return err;
+}
+
+static int get_super_block(struct intel_super **super_list, char *devnm, char *devname,
+ int major, int minor, int keep_fd)
+{
+ struct intel_super*s = NULL;
+ char nm[32];
+ int dfd = -1;
+ int err = 0;
+ int retry;
+
+ s = alloc_super();
+ if (!s) {
+ err = 1;
+ goto error;
+ }
+
+ sprintf(nm, "%d:%d", major, minor);
+ dfd = dev_open(nm, O_RDWR);
+ if (dfd < 0) {
+ err = 2;
+ goto error;
+ }
+
+ find_intel_hba_capability(dfd, s, devname);
+ err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
+
+ /* retry the load if we might have raced against mdmon */
+ if (err == 3 && devnm && mdmon_running(devnm))
+ for (retry = 0; retry < 3; retry++) {
+ usleep(3000);
+ err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
+ if (err != 3)
+ break;
+ }
+ error:
+ if (!err) {
+ s->next = *super_list;
+ *super_list = s;
+ } else {
+ if (s)
+ free(s);
+ if (dfd >= 0)
+ close(dfd);
+ }
+ if ((dfd >= 0) && (!keep_fd))
+ close(dfd);
+ return err;
+
+}
+
+static int
+get_sra_super_block(int fd, struct intel_super **super_list, char *devname, int *max, int keep_fd)
+{
+ struct mdinfo *sra;
+ char *devnm;
+ struct mdinfo *sd;
+ int err = 0;
+ int i = 0;
+ sra = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
+ if (!sra)
+ return 1;
+
+ if (sra->array.major_version != -1 ||
+ sra->array.minor_version != -2 ||
+ strcmp(sra->text_version, "imsm") != 0) {
+ err = 1;
+ goto error;
+ }
+ /* load all mpbs */
+ devnm = fd2devnm(fd);
+ for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
+ if (get_super_block(super_list, devnm, devname,
+ sd->disk.major, sd->disk.minor, keep_fd) != 0) {
+ err = 7;
+ goto error;
+ }
+ }
+ error:
+ sysfs_free(sra);
+ *max = i;
+ return err;
+}
+
+static int load_container_imsm(struct supertype *st, int fd, char *devname)
+{
+ return load_super_imsm_all(st, fd, &st->sb, devname, NULL, 1);
+}
+#endif
+
+static int load_super_imsm(struct supertype *st, int fd, char *devname)
+{
+ struct intel_super *super;
+ int rv;
+ int retry;
+
+ if (test_partition(fd))
+ /* IMSM not allowed on partitions */
+ return 1;
+
+ free_super_imsm(st);
+
+ super = alloc_super();
+ /* Load hba and capabilities if they exist.
+ * But do not preclude loading metadata in case capabilities or hba are
+ * non-compliant and ignore_hw_compat is set.
+ */
+ rv = find_intel_hba_capability(fd, super, devname);
+ /* no orom/efi or non-intel hba of the disk */
+ if ((rv != 0) && (st->ignore_hw_compat == 0)) {
+ if (devname)
+ pr_err("No OROM/EFI properties for %s\n", devname);
+ free_imsm(super);
+ return 2;
+ }
+ rv = load_and_parse_mpb(fd, super, devname, 0);
+
+ /* retry the load if we might have raced against mdmon */
+ if (rv == 3) {
+ struct mdstat_ent *mdstat = mdstat_by_component(fd2devnm(fd));
+
+ if (mdstat && mdmon_running(mdstat->devnm) && getpid() != mdmon_pid(mdstat->devnm)) {
+ for (retry = 0; retry < 3; retry++) {
+ usleep(3000);
+ rv = load_and_parse_mpb(fd, super, devname, 0);
+ if (rv != 3)
+ break;
+ }
+ }
+
+ free_mdstat(mdstat);
+ }
+
+ if (rv) {
+ if (devname)
+ pr_err("Failed to load all information "
+ "sections on %s\n", devname);
+ free_imsm(super);
+ return rv;
+ }
+
+ st->sb = super;
+ if (st->ss == NULL) {
+ st->ss = &super_imsm;
+ st->minor_version = 0;
+ st->max_devs = IMSM_MAX_DEVICES;
+ }
+
+ /* load migration record */
+ if (load_imsm_migr_rec(super, NULL) == 0) {
+ /* Check for unsupported migration features */
+ if (check_mpb_migr_compatibility(super) != 0) {
+ pr_err("Unsupported migration detected");
+ if (devname)
+ fprintf(stderr, " on %s\n", devname);
+ else
+ fprintf(stderr, " (IMSM).\n");
+ return 3;
+ }
+ }
+
+ return 0;
+}
+
+static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
+{
+ if (info->level == 1)
+ return 128;
+ return info->chunk_size >> 9;
+}
+
+static unsigned long long info_to_blocks_per_member(mdu_array_info_t *info,
+ unsigned long long size)
+{
+ if (info->level == 1)
+ return size * 2;
+ else
+ return (size * 2) & ~(info_to_blocks_per_strip(info) - 1);
+}
+
+static void imsm_update_version_info(struct intel_super *super)
+{
+ /* update the version and attributes */
+ struct imsm_super *mpb = super->anchor;
+ char *version;
+ struct imsm_dev *dev;
+ struct imsm_map *map;
+ int i;
+
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ dev = get_imsm_dev(super, i);
+ map = get_imsm_map(dev, MAP_0);
+ if (__le32_to_cpu(dev->size_high) > 0)
+ mpb->attributes |= MPB_ATTRIB_2TB;
+
+ /* FIXME detect when an array spans a port multiplier */
+ #if 0
+ mpb->attributes |= MPB_ATTRIB_PM;
+ #endif
+
+ if (mpb->num_raid_devs > 1 ||
+ mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
+ version = MPB_VERSION_ATTRIBS;
+ switch (get_imsm_raid_level(map)) {
+ case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
+ case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
+ case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
+ case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
+ }
+ } else {
+ if (map->num_members >= 5)
+ version = MPB_VERSION_5OR6_DISK_ARRAY;
+ else if (dev->status == DEV_CLONE_N_GO)
+ version = MPB_VERSION_CNG;
+ else if (get_imsm_raid_level(map) == 5)
+ version = MPB_VERSION_RAID5;
+ else if (map->num_members >= 3)
+ version = MPB_VERSION_3OR4_DISK_ARRAY;
+ else if (get_imsm_raid_level(map) == 1)
+ version = MPB_VERSION_RAID1;
+ else
+ version = MPB_VERSION_RAID0;
+ }
+ strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
+ }
+}
+
+static int check_name(struct intel_super *super, char *name, int quiet)
+{
+ struct imsm_super *mpb = super->anchor;
+ char *reason = NULL;
+ int i;
+
+ if (strlen(name) > MAX_RAID_SERIAL_LEN)
+ reason = "must be 16 characters or less";
+
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ struct imsm_dev *dev = get_imsm_dev(super, i);
+
+ if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
+ reason = "already exists";
+ break;
+ }
+ }
+
+ if (reason && !quiet)
+ pr_err("imsm volume name %s\n", reason);
+
+ return !reason;
+}
+
+static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
+ unsigned long long size, char *name,
+ char *homehost, int *uuid,
+ long long data_offset)
+{
+ /* We are creating a volume inside a pre-existing container.
+ * so st->sb is already set.
+ */
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ struct intel_dev *dv;
+ struct imsm_dev *dev;
+ struct imsm_vol *vol;
+ struct imsm_map *map;
+ int idx = mpb->num_raid_devs;
+ int i;
+ unsigned long long array_blocks;
+ size_t size_old, size_new;
+ unsigned long long num_data_stripes;
+
+ if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
+ pr_err("This imsm-container already has the "
+ "maximum of %d volumes\n", super->orom->vpa);
+ return 0;
+ }
+
+ /* ensure the mpb is large enough for the new data */
+ size_old = __le32_to_cpu(mpb->mpb_size);
+ size_new = disks_to_mpb_size(info->nr_disks);
+ if (size_new > size_old) {
+ void *mpb_new;
+ size_t size_round = ROUND_UP(size_new, 512);
+
+ if (posix_memalign(&mpb_new, 512, size_round) != 0) {
+ pr_err("could not allocate new mpb\n");
+ return 0;
+ }
+ if (posix_memalign(&super->migr_rec_buf, 512,
+ MIGR_REC_BUF_SIZE) != 0) {
+ pr_err("%s could not allocate migr_rec buffer\n",
+ __func__);
+ free(super->buf);
+ free(super);
+ free(mpb_new);
+ return 0;
+ }
+ memcpy(mpb_new, mpb, size_old);
+ free(mpb);
+ mpb = mpb_new;
+ super->anchor = mpb_new;
+ mpb->mpb_size = __cpu_to_le32(size_new);
+ memset(mpb_new + size_old, 0, size_round - size_old);
+ }
+ super->current_vol = idx;
+
+ /* handle 'failed_disks' by either:
+ * a) create dummy disk entries in the table if this the first
+ * volume in the array. We add them here as this is the only
+ * opportunity to add them. add_to_super_imsm_volume()
+ * handles the non-failed disks and continues incrementing
+ * mpb->num_disks.
+ * b) validate that 'failed_disks' matches the current number
+ * of missing disks if the container is populated
+ */
+ if (super->current_vol == 0) {
+ mpb->num_disks = 0;
+ for (i = 0; i < info->failed_disks; i++) {
+ struct imsm_disk *disk;
+
+ mpb->num_disks++;
+ disk = __get_imsm_disk(mpb, i);
+ disk->status = CONFIGURED_DISK | FAILED_DISK;
+ disk->scsi_id = __cpu_to_le32(~(__u32)0);
+ snprintf((char *) disk->serial, MAX_RAID_SERIAL_LEN,
+ "missing:%d", i);
+ }
+ find_missing(super);
+ } else {
+ int missing = 0;
+ struct dl *d;
+
+ for (d = super->missing; d; d = d->next)
+ missing++;
+ if (info->failed_disks > missing) {
+ pr_err("unable to add 'missing' disk to container\n");
+ return 0;
+ }
+ }
+
+ if (!check_name(super, name, 0))
+ return 0;
+ dv = xmalloc(sizeof(*dv));
+ dev = xcalloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
+ strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
+ array_blocks = calc_array_size(info->level, info->raid_disks,
+ info->layout, info->chunk_size,
+ size * 2);
+ /* round array size down to closest MB */
+ array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
+
+ dev->size_low = __cpu_to_le32((__u32) array_blocks);
+ dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
+ dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING);
+ vol = &dev->vol;
+ vol->migr_state = 0;
+ set_migr_type(dev, MIGR_INIT);
+ vol->dirty = !info->state;
+ vol->curr_migr_unit = 0;
+ map = get_imsm_map(dev, MAP_0);
+ set_pba_of_lba0(map, super->create_offset);
+ set_blocks_per_member(map, info_to_blocks_per_member(info, size));
+ map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
+ map->failed_disk_num = ~0;
+ if (info->level > 0)
+ map->map_state = (info->state ? IMSM_T_STATE_NORMAL
+ : IMSM_T_STATE_UNINITIALIZED);
+ else
+ map->map_state = info->failed_disks ? IMSM_T_STATE_FAILED :
+ IMSM_T_STATE_NORMAL;
+ map->ddf = 1;
+
+ if (info->level == 1 && info->raid_disks > 2) {
+ free(dev);
+ free(dv);
+ pr_err("imsm does not support more than 2 disks"
+ "in a raid1 volume\n");
+ return 0;
+ }
+
+ map->raid_level = info->level;
+ if (info->level == 10) {
+ map->raid_level = 1;
+ map->num_domains = info->raid_disks / 2;
+ } else if (info->level == 1)
+ map->num_domains = info->raid_disks;
+ else
+ map->num_domains = 1;
+
+ /* info->size is only int so use the 'size' parameter instead */
+ num_data_stripes = (size * 2) / info_to_blocks_per_strip(info);
+ num_data_stripes /= map->num_domains;
+ set_num_data_stripes(map, num_data_stripes);
+
+ map->num_members = info->raid_disks;
+ for (i = 0; i < map->num_members; i++) {
+ /* initialized in add_to_super */
+ set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
+ }
+ mpb->num_raid_devs++;
+
+ dv->dev = dev;
+ dv->index = super->current_vol;
+ dv->next = super->devlist;
+ super->devlist = dv;
+
+ imsm_update_version_info(super);
+
+ return 1;
+}
+
+static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
+ unsigned long long size, char *name,
+ char *homehost, int *uuid,
+ unsigned long long data_offset)
+{
+ /* This is primarily called by Create when creating a new array.
+ * We will then get add_to_super called for each component, and then
+ * write_init_super called to write it out to each device.
+ * For IMSM, Create can create on fresh devices or on a pre-existing
+ * array.
+ * To create on a pre-existing array a different method will be called.
+ * This one is just for fresh drives.
+ */
+ struct intel_super *super;
+ struct imsm_super *mpb;
+ size_t mpb_size;
+ char *version;
+
+ if (data_offset != INVALID_SECTORS) {
+ pr_err("data-offset not supported by imsm\n");
+ return 0;
+ }
+
+ if (st->sb)
+ return init_super_imsm_volume(st, info, size, name, homehost, uuid,
+ data_offset);
+
+ if (info)
+ mpb_size = disks_to_mpb_size(info->nr_disks);
+ else
+ mpb_size = 512;
+
+ super = alloc_super();
+ if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
+ free(super);
+ super = NULL;
+ }
+ if (!super) {
+ pr_err("%s could not allocate superblock\n", __func__);
+ return 0;
+ }
+ if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) {
+ pr_err("%s could not allocate migr_rec buffer\n", __func__);
+ free(super->buf);
+ free(super);
+ return 0;
+ }
+ memset(super->buf, 0, mpb_size);
+ mpb = super->buf;
+ mpb->mpb_size = __cpu_to_le32(mpb_size);
+ st->sb = super;
+
+ if (info == NULL) {
+ /* zeroing superblock */
+ return 0;
+ }
+
+ mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
+
+ version = (char *) mpb->sig;
+ strcpy(version, MPB_SIGNATURE);
+ version += strlen(MPB_SIGNATURE);
+ strcpy(version, MPB_VERSION_RAID0);
+
+ return 1;
+}
+
+#ifndef MDASSEMBLE
+static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
+ int fd, char *devname)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ struct imsm_disk *_disk;
+ struct imsm_dev *dev;
+ struct imsm_map *map;
+ struct dl *dl, *df;
+ int slot;
+
+ dev = get_imsm_dev(super, super->current_vol);
+ map = get_imsm_map(dev, MAP_0);
+
+ if (! (dk->state & (1<<MD_DISK_SYNC))) {
+ pr_err("%s: Cannot add spare devices to IMSM volume\n",
+ devname);
+ return 1;
+ }
+
+ if (fd == -1) {
+ /* we're doing autolayout so grab the pre-marked (in
+ * validate_geometry) raid_disk
+ */
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl->raiddisk == dk->raid_disk)
+ break;
+ } else {
+ for (dl = super->disks; dl ; dl = dl->next)
+ if (dl->major == dk->major &&
+ dl->minor == dk->minor)
+ break;
+ }
+
+ if (!dl) {
+ pr_err("%s is not a member of the same container\n", devname);
+ return 1;
+ }
+
+ /* add a pristine spare to the metadata */
+ if (dl->index < 0) {
+ dl->index = super->anchor->num_disks;
+ super->anchor->num_disks++;
+ }
+ /* Check the device has not already been added */
+ slot = get_imsm_disk_slot(map, dl->index);
+ if (slot >= 0 &&
+ (get_imsm_ord_tbl_ent(dev, slot, MAP_X) & IMSM_ORD_REBUILD) == 0) {
+ pr_err("%s has been included in this array twice\n",
+ devname);
+ return 1;
+ }
+ set_imsm_ord_tbl_ent(map, dk->raid_disk, dl->index);
+ dl->disk.status = CONFIGURED_DISK;
+
+ /* update size of 'missing' disks to be at least as large as the
+ * largest acitve member (we only have dummy missing disks when
+ * creating the first volume)
+ */
+ if (super->current_vol == 0) {
+ for (df = super->missing; df; df = df->next) {
+ if (total_blocks(&dl->disk) > total_blocks(&df->disk))
+ set_total_blocks(&df->disk, total_blocks(&dl->disk));
+ _disk = __get_imsm_disk(mpb, df->index);
+ *_disk = df->disk;
+ }
+ }
+
+ /* refresh unset/failed slots to point to valid 'missing' entries */
+ for (df = super->missing; df; df = df->next)
+ for (slot = 0; slot < mpb->num_disks; slot++) {
+ __u32 ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
+
+ if ((ord & IMSM_ORD_REBUILD) == 0)
+ continue;
+ set_imsm_ord_tbl_ent(map, slot, df->index | IMSM_ORD_REBUILD);
+ if (is_gen_migration(dev)) {
+ struct imsm_map *map2 = get_imsm_map(dev,
+ MAP_1);
+ int slot2 = get_imsm_disk_slot(map2, df->index);
+ if ((slot2 < map2->num_members) &&
+ (slot2 >= 0)) {
+ __u32 ord2 = get_imsm_ord_tbl_ent(dev,
+ slot2,
+ MAP_1);
+ if ((unsigned)df->index ==
+ ord_to_idx(ord2))
+ set_imsm_ord_tbl_ent(map2,
+ slot2,
+ df->index |
+ IMSM_ORD_REBUILD);
+ }
+ }
+ dprintf("set slot:%d to missing disk:%d\n", slot, df->index);
+ break;
+ }
+
+ /* if we are creating the first raid device update the family number */
+ if (super->current_vol == 0) {
+ __u32 sum;
+ struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
+
+ _disk = __get_imsm_disk(mpb, dl->index);
+ if (!_dev || !_disk) {
+ pr_err("BUG mpb setup error\n");
+ return 1;
+ }
+ *_dev = *dev;
+ *_disk = dl->disk;
+ sum = random32();
+ sum += __gen_imsm_checksum(mpb);
+ mpb->family_num = __cpu_to_le32(sum);
+ mpb->orig_family_num = mpb->family_num;
+ }
+ super->current_disk = dl;
+ return 0;
+}
+
+/* mark_spare()
+ * Function marks disk as spare and restores disk serial
+ * in case it was previously marked as failed by takeover operation
+ * reruns:
+ * -1 : critical error
+ * 0 : disk is marked as spare but serial is not set
+ * 1 : success
+ */
+int mark_spare(struct dl *disk)
+{
+ __u8 serial[MAX_RAID_SERIAL_LEN];
+ int ret_val = -1;
+
+ if (!disk)
+ return ret_val;
+
+ ret_val = 0;
+ if (!imsm_read_serial(disk->fd, NULL, serial)) {
+ /* Restore disk serial number, because takeover marks disk
+ * as failed and adds to serial ':0' before it becomes
+ * a spare disk.
+ */
+ serialcpy(disk->serial, serial);
+ serialcpy(disk->disk.serial, serial);
+ ret_val = 1;
+ }
+ disk->disk.status = SPARE_DISK;
+ disk->index = -1;
+
+ return ret_val;
+}
+
+static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
+ int fd, char *devname,
+ unsigned long long data_offset)
+{
+ struct intel_super *super = st->sb;
+ struct dl *dd;
+ unsigned long long size;
+ __u32 id;
+ int rv;
+ struct stat stb;
+
+ /* If we are on an RAID enabled platform check that the disk is
+ * attached to the raid controller.
+ * We do not need to test disks attachment for container based additions,
+ * they shall be already tested when container was created/assembled.
+ */
+ rv = find_intel_hba_capability(fd, super, devname);
+ /* no orom/efi or non-intel hba of the disk */
+ if (rv != 0) {
+ dprintf("capability: %p fd: %d ret: %d\n",
+ super->orom, fd, rv);
+ return 1;
+ }
+
+ if (super->current_vol >= 0)
+ return add_to_super_imsm_volume(st, dk, fd, devname);
+
+ fstat(fd, &stb);
+ dd = xcalloc(sizeof(*dd), 1);
+ dd->major = major(stb.st_rdev);
+ dd->minor = minor(stb.st_rdev);
+ dd->devname = devname ? xstrdup(devname) : NULL;
+ dd->fd = fd;
+ dd->e = NULL;
+ dd->action = DISK_ADD;
+ rv = imsm_read_serial(fd, devname, dd->serial);
+ if (rv) {
+ pr_err("failed to retrieve scsi serial, aborting\n");
+ free(dd);
+ abort();
+ }
+
+ get_dev_size(fd, NULL, &size);
+ size /= 512;
+ serialcpy(dd->disk.serial, dd->serial);
+ set_total_blocks(&dd->disk, size);
+ if (__le32_to_cpu(dd->disk.total_blocks_hi) > 0) {
+ struct imsm_super *mpb = super->anchor;
+ mpb->attributes |= MPB_ATTRIB_2TB_DISK;
+ }
+ mark_spare(dd);
+ if (sysfs_disk_to_scsi_id(fd, &id) == 0)
+ dd->disk.scsi_id = __cpu_to_le32(id);
+ else
+ dd->disk.scsi_id = __cpu_to_le32(0);
+
+ if (st->update_tail) {
+ dd->next = super->disk_mgmt_list;
+ super->disk_mgmt_list = dd;
+ } else {
+ dd->next = super->disks;
+ super->disks = dd;
+ super->updates_pending++;
+ }
+
+ return 0;
+}
+
+static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
+{
+ struct intel_super *super = st->sb;
+ struct dl *dd;
+
+ /* remove from super works only in mdmon - for communication
+ * manager - monitor. Check if communication memory buffer
+ * is prepared.
+ */
+ if (!st->update_tail) {
+ pr_err("%s shall be used in mdmon context only"
+ "(line %d).\n", __func__, __LINE__);
+ return 1;
+ }
+ dd = xcalloc(1, sizeof(*dd));
+ dd->major = dk->major;
+ dd->minor = dk->minor;
+ dd->fd = -1;
+ mark_spare(dd);
+ dd->action = DISK_REMOVE;
+
+ dd->next = super->disk_mgmt_list;
+ super->disk_mgmt_list = dd;
+
+ return 0;
+}
+
+static int store_imsm_mpb(int fd, struct imsm_super *mpb);
+
+static union {
+ char buf[512];
+ struct imsm_super anchor;
+} spare_record __attribute__ ((aligned(512)));
+
+/* spare records have their own family number and do not have any defined raid
+ * devices
+ */
+static int write_super_imsm_spares(struct intel_super *super, int doclose)
+{
+ struct imsm_super *mpb = super->anchor;
+ struct imsm_super *spare = &spare_record.anchor;
+ __u32 sum;
+ struct dl *d;
+
+ spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)),
+ spare->generation_num = __cpu_to_le32(1UL),
+ spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
+ spare->num_disks = 1,
+ spare->num_raid_devs = 0,
+ spare->cache_size = mpb->cache_size,
+ spare->pwr_cycle_count = __cpu_to_le32(1),
+
+ snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
+ MPB_SIGNATURE MPB_VERSION_RAID0);
+
+ for (d = super->disks; d; d = d->next) {
+ if (d->index != -1)
+ continue;
+
+ spare->disk[0] = d->disk;
+ if (__le32_to_cpu(d->disk.total_blocks_hi) > 0)
+ spare->attributes |= MPB_ATTRIB_2TB_DISK;
+
+ sum = __gen_imsm_checksum(spare);
+ spare->family_num = __cpu_to_le32(sum);
+ spare->orig_family_num = 0;
+ sum = __gen_imsm_checksum(spare);
+ spare->check_sum = __cpu_to_le32(sum);
+
+ if (store_imsm_mpb(d->fd, spare)) {
+ pr_err("%s: failed for device %d:%d %s\n",
+ __func__, d->major, d->minor, strerror(errno));
+ return 1;
+ }
+ if (doclose) {
+ close(d->fd);
+ d->fd = -1;
+ }
+ }
+
+ return 0;
+}
+
+static int write_super_imsm(struct supertype *st, int doclose)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ struct dl *d;
+ __u32 generation;
+ __u32 sum;
+ int spares = 0;
+ int i;
+ __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
+ int num_disks = 0;
+ int clear_migration_record = 1;
+
+ /* 'generation' is incremented everytime the metadata is written */
+ generation = __le32_to_cpu(mpb->generation_num);
+ generation++;
+ mpb->generation_num = __cpu_to_le32(generation);
+
+ /* fix up cases where previous mdadm releases failed to set
+ * orig_family_num
+ */
+ if (mpb->orig_family_num == 0)
+ mpb->orig_family_num = mpb->family_num;
+
+ for (d = super->disks; d; d = d->next) {
+ if (d->index == -1)
+ spares++;
+ else {
+ mpb->disk[d->index] = d->disk;
+ num_disks++;
+ }
+ }
+ for (d = super->missing; d; d = d->next) {
+ mpb->disk[d->index] = d->disk;
+ num_disks++;
+ }
+ mpb->num_disks = num_disks;
+ mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
+
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ struct imsm_dev *dev = __get_imsm_dev(mpb, i);
+ struct imsm_dev *dev2 = get_imsm_dev(super, i);
+ if (dev && dev2) {
+ imsm_copy_dev(dev, dev2);
+ mpb_size += sizeof_imsm_dev(dev, 0);
+ }
+ if (is_gen_migration(dev2))
+ clear_migration_record = 0;
+ }
+ mpb_size += __le32_to_cpu(mpb->bbm_log_size);
+ mpb->mpb_size = __cpu_to_le32(mpb_size);
+
+ /* recalculate checksum */
+ sum = __gen_imsm_checksum(mpb);
+ mpb->check_sum = __cpu_to_le32(sum);
+
+ if (super->clean_migration_record_by_mdmon) {
+ clear_migration_record = 1;
+ super->clean_migration_record_by_mdmon = 0;
+ }
+ if (clear_migration_record)
+ memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SIZE);
+
+ /* write the mpb for disks that compose raid devices */
+ for (d = super->disks; d ; d = d->next) {
+ if (d->index < 0 || is_failed(&d->disk))
+ continue;
+
+ if (clear_migration_record) {
+ unsigned long long dsize;
+
+ get_dev_size(d->fd, NULL, &dsize);
+ if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) {
+ if (write(d->fd, super->migr_rec_buf,
+ MIGR_REC_BUF_SIZE) != MIGR_REC_BUF_SIZE)
+ perror("Write migr_rec failed");
+ }
+ }
+
+ if (store_imsm_mpb(d->fd, mpb))
+ fprintf(stderr,
+ "%s: failed for device %d:%d (fd: %d)%s\n",
+ __func__, d->major, d->minor,
+ d->fd, strerror(errno));
+
+ if (doclose) {
+ close(d->fd);
+ d->fd = -1;
+ }
+ }
+
+ if (spares)
+ return write_super_imsm_spares(super, doclose);
+
+ return 0;
+}
+
+static int create_array(struct supertype *st, int dev_idx)
+{
+ size_t len;
+ struct imsm_update_create_array *u;
+ struct intel_super *super = st->sb;
+ struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct disk_info *inf;
+ struct imsm_disk *disk;
+ int i;
+
+ len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
+ sizeof(*inf) * map->num_members;
+ u = xmalloc(len);
+ u->type = update_create_array;
+ u->dev_idx = dev_idx;
+ imsm_copy_dev(&u->dev, dev);
+ inf = get_disk_info(u);
+ for (i = 0; i < map->num_members; i++) {
+ int idx = get_imsm_disk_idx(dev, i, MAP_X);
+
+ disk = get_imsm_disk(super, idx);
+ if (!disk)
+ disk = get_imsm_missing(super, idx);
+ serialcpy(inf[i].serial, disk->serial);
+ }
+ append_metadata_update(st, u, len);
+
+ return 0;
+}
+
+static int mgmt_disk(struct supertype *st)
+{
+ struct intel_super *super = st->sb;
+ size_t len;
+ struct imsm_update_add_remove_disk *u;
+
+ if (!super->disk_mgmt_list)
+ return 0;
+
+ len = sizeof(*u);
+ u = xmalloc(len);
+ u->type = update_add_remove_disk;
+ append_metadata_update(st, u, len);
+
+ return 0;
+}
+
+static int write_init_super_imsm(struct supertype *st)
+{
+ struct intel_super *super = st->sb;
+ int current_vol = super->current_vol;
+
+ /* we are done with current_vol reset it to point st at the container */
+ super->current_vol = -1;
+
+ if (st->update_tail) {
+ /* queue the recently created array / added disk
+ * as a metadata update */
+ int rv;
+
+ /* determine if we are creating a volume or adding a disk */
+ if (current_vol < 0) {
+ /* in the mgmt (add/remove) disk case we are running
+ * in mdmon context, so don't close fd's
+ */
+ return mgmt_disk(st);
+ } else
+ rv = create_array(st, current_vol);
+
+ return rv;
+ } else {
+ struct dl *d;
+ for (d = super->disks; d; d = d->next)
+ Kill(d->devname, NULL, 0, -1, 1);
+ return write_super_imsm(st, 1);
+ }
+}
+#endif
+
+static int store_super_imsm(struct supertype *st, int fd)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super ? super->anchor : NULL;
+
+ if (!mpb)
+ return 1;
+
+#ifndef MDASSEMBLE
+ return store_imsm_mpb(fd, mpb);
+#else
+ return 1;
+#endif
+}
+
+static int imsm_bbm_log_size(struct imsm_super *mpb)
+{
+ return __le32_to_cpu(mpb->bbm_log_size);
+}
+
+#ifndef MDASSEMBLE
+static int validate_geometry_imsm_container(struct supertype *st, int level,
+ int layout, int raiddisks, int chunk,
+ unsigned long long size,
+ unsigned long long data_offset,
+ char *dev,
+ unsigned long long *freesize,
+ int verbose)
+{
+ int fd;
+ unsigned long long ldsize;
+ struct intel_super *super=NULL;
+ int rv = 0;
+
+ if (level != LEVEL_CONTAINER)
+ return 0;
+ if (!dev)
+ return 1;
+
+ fd = open(dev, O_RDONLY|O_EXCL, 0);
+ if (fd < 0) {
+ if (verbose > 0)
+ pr_err("imsm: Cannot open %s: %s\n",
+ dev, strerror(errno));
+ return 0;
+ }
+ if (!get_dev_size(fd, dev, &ldsize)) {
+ close(fd);
+ return 0;
+ }
+
+ /* capabilities retrieve could be possible
+ * note that there is no fd for the disks in array.
+ */
+ super = alloc_super();
+ rv = find_intel_hba_capability(fd, super, verbose > 0 ? dev : NULL);
+ if (rv != 0) {
+#if DEBUG
+ char str[256];
+ fd2devname(fd, str);
+ dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
+ fd, str, super->orom, rv, raiddisks);
+#endif
+ /* no orom/efi or non-intel hba of the disk */
+ close(fd);
+ free_imsm(super);
+ return 0;
+ }
+ close(fd);
+ if (super->orom) {
+ if (raiddisks > super->orom->tds) {
+ if (verbose)
+ pr_err("%d exceeds maximum number of"
+ " platform supported disks: %d\n",
+ raiddisks, super->orom->tds);
+ free_imsm(super);
+ return 0;
+ }
+ if ((super->orom->attr & IMSM_OROM_ATTR_2TB_DISK) == 0 &&
+ (ldsize >> 9) >> 32 > 0) {
+ if (verbose)
+ pr_err("%s exceeds maximum platform supported size\n", dev);
+ free_imsm(super);
+ return 0;
+ }
+ }
+
+ *freesize = avail_size_imsm(st, ldsize >> 9, data_offset);
+ free_imsm(super);
+
+ return 1;
+}
+
+static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
+{
+ const unsigned long long base_start = e[*idx].start;
+ unsigned long long end = base_start + e[*idx].size;
+ int i;
+
+ if (base_start == end)
+ return 0;
+
+ *idx = *idx + 1;
+ for (i = *idx; i < num_extents; i++) {
+ /* extend overlapping extents */
+ if (e[i].start >= base_start &&
+ e[i].start <= end) {
+ if (e[i].size == 0)
+ return 0;
+ if (e[i].start + e[i].size > end)
+ end = e[i].start + e[i].size;
+ } else if (e[i].start > end) {
+ *idx = i;
+ break;
+ }
+ }
+
+ return end - base_start;
+}
+
+static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
+{
+ /* build a composite disk with all known extents and generate a new
+ * 'maxsize' given the "all disks in an array must share a common start
+ * offset" constraint
+ */
+ struct extent *e = xcalloc(sum_extents, sizeof(*e));
+ struct dl *dl;
+ int i, j;
+ int start_extent;
+ unsigned long long pos;
+ unsigned long long start = 0;
+ unsigned long long maxsize;
+ unsigned long reserve;
+
+ /* coalesce and sort all extents. also, check to see if we need to
+ * reserve space between member arrays
+ */
+ j = 0;
+ for (dl = super->disks; dl; dl = dl->next) {
+ if (!dl->e)
+ continue;
+ for (i = 0; i < dl->extent_cnt; i++)
+ e[j++] = dl->e[i];
+ }
+ qsort(e, sum_extents, sizeof(*e), cmp_extent);
+
+ /* merge extents */
+ i = 0;
+ j = 0;
+ while (i < sum_extents) {
+ e[j].start = e[i].start;
+ e[j].size = find_size(e, &i, sum_extents);
+ j++;
+ if (e[j-1].size == 0)
+ break;
+ }
+
+ pos = 0;
+ maxsize = 0;
+ start_extent = 0;
+ i = 0;
+ do {
+ unsigned long long esize;
+
+ esize = e[i].start - pos;
+ if (esize >= maxsize) {
+ maxsize = esize;
+ start = pos;
+ start_extent = i;
+ }
+ pos = e[i].start + e[i].size;
+ i++;
+ } while (e[i-1].size);
+ free(e);
+
+ if (maxsize == 0)
+ return 0;
+
+ /* FIXME assumes volume at offset 0 is the first volume in a
+ * container
+ */
+ if (start_extent > 0)
+ reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
+ else
+ reserve = 0;
+
+ if (maxsize < reserve)
+ return 0;
+
+ super->create_offset = ~((unsigned long long) 0);
+ if (start + reserve > super->create_offset)
+ return 0; /* start overflows create_offset */
+ super->create_offset = start + reserve;
+
+ return maxsize - reserve;
+}
+
+static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
+{
+ if (level < 0 || level == 6 || level == 4)
+ return 0;
+
+ /* if we have an orom prevent invalid raid levels */
+ if (orom)
+ switch (level) {
+ case 0: return imsm_orom_has_raid0(orom);
+ case 1:
+ if (raiddisks > 2)
+ return imsm_orom_has_raid1e(orom);
+ return imsm_orom_has_raid1(orom) && raiddisks == 2;
+ case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
+ case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
+ }
+ else
+ return 1; /* not on an Intel RAID platform so anything goes */
+
+ return 0;
+}
+
+static int
+active_arrays_by_format(char *name, char* hba, struct md_list **devlist,
+ int dpa, int verbose)
+{
+ struct mdstat_ent *mdstat = mdstat_read(0, 0);
+ struct mdstat_ent *memb = NULL;
+ int count = 0;
+ int num = 0;
+ struct md_list *dv = NULL;
+ int found;
+
+ for (memb = mdstat ; memb ; memb = memb->next) {
+ if (memb->metadata_version &&
+ (strncmp(memb->metadata_version, "external:", 9) == 0) &&
+ (strcmp(&memb->metadata_version[9], name) == 0) &&
+ !is_subarray(memb->metadata_version+9) &&
+ memb->members) {
+ struct dev_member *dev = memb->members;
+ int fd = -1;
+ while(dev && (fd < 0)) {
+ char *path = xmalloc(strlen(dev->name) + strlen("/dev/") + 1);
+ num = sprintf(path, "%s%s", "/dev/", dev->name);
+ if (num > 0)
+ fd = open(path, O_RDONLY, 0);
+ if ((num <= 0) || (fd < 0)) {
+ pr_vrb(": Cannot open %s: %s\n",
+ dev->name, strerror(errno));
+ }
+ free(path);
+ dev = dev->next;
+ }
+ found = 0;
+ if ((fd >= 0) && disk_attached_to_hba(fd, hba)) {
+ struct mdstat_ent *vol;
+ for (vol = mdstat ; vol ; vol = vol->next) {
+ if ((vol->active > 0) &&
+ vol->metadata_version &&
+ is_container_member(vol, memb->dev)) {
+ found++;
+ count++;
+ }
+ }
+ if (*devlist && (found < dpa)) {
+ dv = xcalloc(1, sizeof(*dv));
+ dv->devname = xmalloc(strlen(memb->dev) + strlen("/dev/") + 1);
+ sprintf(dv->devname, "%s%s", "/dev/", memb->dev);
+ dv->found = found;
+ dv->used = 0;
+ dv->next = *devlist;
+ *devlist = dv;
+ }
+ }
+ if (fd >= 0)
+ close(fd);
+ }
+ }
+ free_mdstat(mdstat);
+ return count;
+}
+
+#ifdef DEBUG_LOOP
+static struct md_list*
+get_loop_devices(void)
+{
+ int i;
+ struct md_list *devlist = NULL;
+ struct md_list *dv = NULL;
+
+ for(i = 0; i < 12; i++) {
+ dv = xcalloc(1, sizeof(*dv));
+ dv->devname = xmalloc(40);
+ sprintf(dv->devname, "/dev/loop%d", i);
+ dv->next = devlist;
+ devlist = dv;
+ }
+ return devlist;
+}
+#endif
+
+static struct md_list*
+get_devices(const char *hba_path)
+{
+ struct md_list *devlist = NULL;
+ struct md_list *dv = NULL;
+ struct dirent *ent;
+ DIR *dir;
+ int err = 0;
+
+#if DEBUG_LOOP
+ devlist = get_loop_devices();
+ return devlist;
+#endif
+ /* scroll through /sys/dev/block looking for devices attached to
+ * this hba
+ */
+ dir = opendir("/sys/dev/block");
+ for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
+ int fd;
+ char buf[1024];
+ int major, minor;
+ char *path = NULL;
+ if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
+ continue;
+ path = devt_to_devpath(makedev(major, minor));
+ if (!path)
+ continue;
+ if (!path_attached_to_hba(path, hba_path)) {
+ free(path);
+ path = NULL;
+ continue;
+ }
+ free(path);
+ path = NULL;
+ fd = dev_open(ent->d_name, O_RDONLY);
+ if (fd >= 0) {
+ fd2devname(fd, buf);
+ close(fd);
+ } else {
+ pr_err("cannot open device: %s\n",
+ ent->d_name);
+ continue;
+ }
+
+ dv = xcalloc(1, sizeof(*dv));
+ dv->devname = xstrdup(buf);
+ dv->next = devlist;
+ devlist = dv;
+ }
+ if (err) {
+ while(devlist) {
+ dv = devlist;
+ devlist = devlist->next;
+ free(dv->devname);
+ free(dv);
+ }
+ }
+ closedir(dir);
+ return devlist;
+}
+
+static int
+count_volumes_list(struct md_list *devlist, char *homehost,
+ int verbose, int *found)
+{
+ struct md_list *tmpdev;
+ int count = 0;
+ struct supertype *st = NULL;
+
+ /* first walk the list of devices to find a consistent set
+ * that match the criterea, if that is possible.
+ * We flag the ones we like with 'used'.
+ */
+ *found = 0;
+ st = match_metadata_desc_imsm("imsm");
+ if (st == NULL) {
+ pr_vrb(": cannot allocate memory for imsm supertype\n");
+ return 0;
+ }
+
+ for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
+ char *devname = tmpdev->devname;
+ struct stat stb;
+ struct supertype *tst;
+ int dfd;
+ if (tmpdev->used > 1)
+ continue;
+ tst = dup_super(st);
+ if (tst == NULL) {
+ pr_vrb(": cannot allocate memory for imsm supertype\n");
+ goto err_1;
+ }
+ tmpdev->container = 0;
+ dfd = dev_open(devname, O_RDONLY|O_EXCL);
+ if (dfd < 0) {
+ dprintf(": cannot open device %s: %s\n",
+ devname, strerror(errno));
+ tmpdev->used = 2;
+ } else if (fstat(dfd, &stb)< 0) {
+ /* Impossible! */
+ dprintf(": fstat failed for %s: %s\n",
+ devname, strerror(errno));
+ tmpdev->used = 2;
+ } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+ dprintf(": %s is not a block device.\n",
+ devname);
+ tmpdev->used = 2;
+ } else if (must_be_container(dfd)) {
+ struct supertype *cst;
+ cst = super_by_fd(dfd, NULL);
+ if (cst == NULL) {
+ dprintf(": cannot recognize container type %s\n",
+ devname);
+ tmpdev->used = 2;
+ } else if (tst->ss != st->ss) {
+ dprintf(": non-imsm container - ignore it: %s\n",
+ devname);
+ tmpdev->used = 2;
+ } else if (!tst->ss->load_container ||
+ tst->ss->load_container(tst, dfd, NULL))
+ tmpdev->used = 2;
+ else {
+ tmpdev->container = 1;
+ }
+ if (cst)
+ cst->ss->free_super(cst);
+ } else {
+ tmpdev->st_rdev = stb.st_rdev;
+ if (tst->ss->load_super(tst,dfd, NULL)) {
+ dprintf(": no RAID superblock on %s\n",
+ devname);
+ tmpdev->used = 2;
+ } else if (tst->ss->compare_super == NULL) {
+ dprintf(": Cannot assemble %s metadata on %s\n",
+ tst->ss->name, devname);
+ tmpdev->used = 2;
+ }
+ }
+ if (dfd >= 0)
+ close(dfd);
+ if (tmpdev->used == 2 || tmpdev->used == 4) {
+ /* Ignore unrecognised devices during auto-assembly */
+ goto loop;
+ }
+ else {
+ struct mdinfo info;
+ tst->ss->getinfo_super(tst, &info, NULL);
+
+ if (st->minor_version == -1)
+ st->minor_version = tst->minor_version;
+
+ if (memcmp(info.uuid, uuid_zero,
+ sizeof(int[4])) == 0) {
+ /* this is a floating spare. It cannot define
+ * an array unless there are no more arrays of
+ * this type to be found. It can be included
+ * in an array of this type though.
+ */
+ tmpdev->used = 3;
+ goto loop;
+ }
+
+ if (st->ss != tst->ss ||
+ st->minor_version != tst->minor_version ||
+ st->ss->compare_super(st, tst) != 0) {
+ /* Some mismatch. If exactly one array matches this host,
+ * we can resolve on that one.
+ * Or, if we are auto assembling, we just ignore the second
+ * for now.
+ */
+ dprintf(": superblock on %s doesn't match others - assembly aborted\n",
+ devname);
+ goto loop;
+ }
+ tmpdev->used = 1;
+ *found = 1;
+ dprintf("found: devname: %s\n", devname);
+ }
+ loop:
+ if (tst)
+ tst->ss->free_super(tst);
+ }
+ if (*found != 0) {
+ int err;
+ if ((err = load_super_imsm_all(st, -1, &st->sb, NULL, devlist, 0)) == 0) {
+ struct mdinfo *iter, *head = st->ss->container_content(st, NULL);
+ for (iter = head; iter; iter = iter->next) {
+ dprintf("content->text_version: %s vol\n",
+ iter->text_version);
+ if (iter->array.state & (1<<MD_SB_BLOCK_VOLUME)) {
+ /* do not assemble arrays with unsupported
+ configurations */
+ dprintf(": Cannot activate member %s.\n",
+ iter->text_version);
+ } else
+ count++;
+ }
+ sysfs_free(head);
+
+ } else {
+ dprintf(" no valid super block on device list: err: %d %p\n",
+ err, st->sb);
+ }
+ } else {
+ dprintf(" no more devices to examin\n");
+ }
+
+ for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
+ if ((tmpdev->used == 1) && (tmpdev->found)) {
+ if (count) {
+ if (count < tmpdev->found)
+ count = 0;
+ else
+ count -= tmpdev->found;
+ }
+ }
+ if (tmpdev->used == 1)
+ tmpdev->used = 4;
+ }
+ err_1:
+ if (st)
+ st->ss->free_super(st);
+ return count;
+}
+
+static int
+count_volumes(char *hba, int dpa, int verbose)
+{
+ struct md_list *devlist = NULL;
+ int count = 0;
+ int found = 0;;
+
+ devlist = get_devices(hba);
+ /* if no intel devices return zero volumes */
+ if (devlist == NULL)
+ return 0;
+
+ count = active_arrays_by_format("imsm", hba, &devlist, dpa, verbose);
+ dprintf(" path: %s active arrays: %d\n", hba, count);
+ if (devlist == NULL)
+ return 0;
+ do {
+ found = 0;
+ count += count_volumes_list(devlist,
+ NULL,
+ verbose,
+ &found);
+ dprintf("found %d count: %d\n", found, count);
+ } while (found);
+
+ dprintf("path: %s total number of volumes: %d\n", hba, count);
+
+ while(devlist) {
+ struct md_list *dv = devlist;
+ devlist = devlist->next;
+ free(dv->devname);
+ free(dv);
+ }
+ return count;
+}
+
+static int imsm_default_chunk(const struct imsm_orom *orom)
+{
+ /* up to 512 if the plaform supports it, otherwise the platform max.
+ * 128 if no platform detected
+ */
+ int fs = max(7, orom ? fls(orom->sss) : 0);
+
+ return min(512, (1 << fs));
+}
+
+static int
+validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
+ int raiddisks, int *chunk, unsigned long long size, int verbose)
+{
+ /* check/set platform and metadata limits/defaults */
+ if (super->orom && raiddisks > super->orom->dpa) {
+ pr_vrb(": platform supports a maximum of %d disks per array\n",
+ super->orom->dpa);
+ return 0;
+ }
+
+ /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
+ if (!is_raid_level_supported(super->orom, level, raiddisks)) {
+ pr_vrb(": platform does not support raid%d with %d disk%s\n",
+ level, raiddisks, raiddisks > 1 ? "s" : "");
+ return 0;
+ }
+
+ if (*chunk == 0 || *chunk == UnSet)
+ *chunk = imsm_default_chunk(super->orom);
+
+ if (super->orom && !imsm_orom_has_chunk(super->orom, *chunk)) {
+ pr_vrb(": platform does not support a chunk size of: "
+ "%d\n", *chunk);
+ return 0;
+ }
+
+ if (layout != imsm_level_to_layout(level)) {
+ if (level == 5)
+ pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
+ else if (level == 10)
+ pr_vrb(": imsm raid 10 only supports the n2 layout\n");
+ else
+ pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
+ layout, level);
+ return 0;
+ }
+
+ if (super->orom && (super->orom->attr & IMSM_OROM_ATTR_2TB) == 0 &&
+ (calc_array_size(level, raiddisks, layout, *chunk, size) >> 32) > 0) {
+ pr_vrb(": platform does not support a volume size over 2TB\n");
+ return 0;
+ }
+ return 1;
+}
+
+/* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
+ * FIX ME add ahci details
+ */
+static int validate_geometry_imsm_volume(struct supertype *st, int level,
+ int layout, int raiddisks, int *chunk,
+ unsigned long long size,
+ unsigned long long data_offset,
+ char *dev,
+ unsigned long long *freesize,
+ int verbose)
+{
+ struct stat stb;
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb;
+ struct dl *dl;
+ unsigned long long pos = 0;
+ unsigned long long maxsize;
+ struct extent *e;
+ int i;
+
+ /* We must have the container info already read in. */
+ if (!super)
+ return 0;
+
+ mpb = super->anchor;
+
+ if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, size, verbose)) {
+ pr_err("RAID gemetry validation failed. "
+ "Cannot proceed with the action(s).\n");
+ return 0;
+ }
+ if (!dev) {
+ /* General test: make sure there is space for
+ * 'raiddisks' device extents of size 'size' at a given
+ * offset
+ */
+ unsigned long long minsize = size;
+ unsigned long long start_offset = MaxSector;
+ int dcnt = 0;
+ if (minsize == 0)
+ minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
+ for (dl = super->disks; dl ; dl = dl->next) {
+ int found = 0;
+
+ pos = 0;
+ i = 0;
+ e = get_extents(super, dl);
+ if (!e) continue;
+ do {
+ unsigned long long esize;
+ esize = e[i].start - pos;
+ if (esize >= minsize)
+ found = 1;
+ if (found && start_offset == MaxSector) {
+ start_offset = pos;
+ break;
+ } else if (found && pos != start_offset) {
+ found = 0;
+ break;
+ }
+ pos = e[i].start + e[i].size;
+ i++;
+ } while (e[i-1].size);
+ if (found)
+ dcnt++;
+ free(e);
+ }
+ if (dcnt < raiddisks) {
+ if (verbose)
+ pr_err("imsm: Not enough "
+ "devices with space for this array "
+ "(%d < %d)\n",
+ dcnt, raiddisks);
+ return 0;
+ }
+ return 1;
+ }
+
+ /* This device must be a member of the set */
+ if (stat(dev, &stb) < 0)
+ return 0;
+ if ((S_IFMT & stb.st_mode) != S_IFBLK)
+ return 0;
+ for (dl = super->disks ; dl ; dl = dl->next) {
+ if (dl->major == (int)major(stb.st_rdev) &&
+ dl->minor == (int)minor(stb.st_rdev))
+ break;
+ }
+ if (!dl) {
+ if (verbose)
+ pr_err("%s is not in the "
+ "same imsm set\n", dev);
+ return 0;
+ } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) {
+ /* If a volume is present then the current creation attempt
+ * cannot incorporate new spares because the orom may not
+ * understand this configuration (all member disks must be
+ * members of each array in the container).
+ */
+ pr_err("%s is a spare and a volume"
+ " is already defined for this container\n", dev);
+ pr_err("The option-rom requires all member"
+ " disks to be a member of all volumes\n");
+ return 0;
+ } else if (super->orom && mpb->num_raid_devs > 0 &&
+ mpb->num_disks != raiddisks) {
+ pr_err("The option-rom requires all member"
+ " disks to be a member of all volumes\n");
+ return 0;
+ }
+
+ /* retrieve the largest free space block */
+ e = get_extents(super, dl);
+ maxsize = 0;
+ i = 0;
+ if (e) {
+ do {
+ unsigned long long esize;
+
+ esize = e[i].start - pos;
+ if (esize >= maxsize)
+ maxsize = esize;
+ pos = e[i].start + e[i].size;
+ i++;
+ } while (e[i-1].size);
+ dl->e = e;
+ dl->extent_cnt = i;
+ } else {
+ if (verbose)
+ pr_err("unable to determine free space for: %s\n",
+ dev);
+ return 0;
+ }
+ if (maxsize < size) {
+ if (verbose)
+ pr_err("%s not enough space (%llu < %llu)\n",
+ dev, maxsize, size);
+ return 0;
+ }
+
+ /* count total number of extents for merge */
+ i = 0;
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl->e)
+ i += dl->extent_cnt;
+
+ maxsize = merge_extents(super, i);
+
+ if (!check_env("IMSM_NO_PLATFORM") &&
+ mpb->num_raid_devs > 0 && size && size != maxsize) {
+ pr_err("attempting to create a second "
+ "volume with size less then remaining space. "
+ "Aborting...\n");
+ return 0;
+ }
+
+ if (maxsize < size || maxsize == 0) {
+ if (verbose) {
+ if (maxsize == 0)
+ pr_err("no free space"
+ " left on device. Aborting...\n");
+ else
+ pr_err("not enough space"
+ " to create volume of given size"
+ " (%llu < %llu). Aborting...\n",
+ maxsize, size);
+ }
+ return 0;
+ }
+
+ *freesize = maxsize;
+
+ if (super->orom) {
+ int count = count_volumes(super->hba->path,
+ super->orom->dpa, verbose);
+ if (super->orom->vphba <= count) {
+ pr_vrb(": platform does not support more than %d raid volumes.\n",
+ super->orom->vphba);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int imsm_get_free_size(struct supertype *st, int raiddisks,
+ unsigned long long size, int chunk,
+ unsigned long long *freesize)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ struct dl *dl;
+ int i;
+ int extent_cnt;
+ struct extent *e;
+ unsigned long long maxsize;
+ unsigned long long minsize;
+ int cnt;
+ int used;
+
+ /* find the largest common start free region of the possible disks */
+ used = 0;
+ extent_cnt = 0;
+ cnt = 0;
+ for (dl = super->disks; dl; dl = dl->next) {
+ dl->raiddisk = -1;
+
+ if (dl->index >= 0)
+ used++;
+
+ /* don't activate new spares if we are orom constrained
+ * and there is already a volume active in the container
+ */
+ if (super->orom && dl->index < 0 && mpb->num_raid_devs)
+ continue;
+
+ e = get_extents(super, dl);
+ if (!e)
+ continue;
+ for (i = 1; e[i-1].size; i++)
+ ;
+ dl->e = e;
+ dl->extent_cnt = i;
+ extent_cnt += i;
+ cnt++;
+ }
+
+ maxsize = merge_extents(super, extent_cnt);
+ minsize = size;
+ if (size == 0)
+ /* chunk is in K */
+ minsize = chunk * 2;
+
+ if (cnt < raiddisks ||
+ (super->orom && used && used != raiddisks) ||
+ maxsize < minsize ||
+ maxsize == 0) {
+ pr_err("not enough devices with space to create array.\n");
+ return 0; /* No enough free spaces large enough */
+ }
+
+ if (size == 0) {
+ size = maxsize;
+ if (chunk) {
+ size /= 2 * chunk;
+ size *= 2 * chunk;
+ }
+ maxsize = size;
+ }
+ if (!check_env("IMSM_NO_PLATFORM") &&
+ mpb->num_raid_devs > 0 && size && size != maxsize) {
+ pr_err("attempting to create a second "
+ "volume with size less then remaining space. "
+ "Aborting...\n");
+ return 0;
+ }
+ cnt = 0;
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl->e)
+ dl->raiddisk = cnt++;
+
+ *freesize = size;
+
+ dprintf("imsm: imsm_get_free_size() returns : %llu\n", size);
+
+ return 1;
+}
+
+static int reserve_space(struct supertype *st, int raiddisks,
+ unsigned long long size, int chunk,
+ unsigned long long *freesize)
+{
+ struct intel_super *super = st->sb;
+ struct dl *dl;
+ int cnt;
+ int rv = 0;
+
+ rv = imsm_get_free_size(st, raiddisks, size, chunk, freesize);
+ if (rv) {
+ cnt = 0;
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl->e)
+ dl->raiddisk = cnt++;
+ rv = 1;
+ }
+
+ return rv;
+}
+
+static int validate_geometry_imsm(struct supertype *st, int level, int layout,
+ int raiddisks, int *chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *dev, unsigned long long *freesize,
+ int verbose)
+{
+ int fd, cfd;
+ struct mdinfo *sra;
+ int is_member = 0;
+
+ /* load capability
+ * if given unused devices create a container
+ * if given given devices in a container create a member volume
+ */
+ if (level == LEVEL_CONTAINER) {
+ /* Must be a fresh device to add to a container */
+ return validate_geometry_imsm_container(st, level, layout,
+ raiddisks,
+ *chunk,
+ size, data_offset,
+ dev, freesize,
+ verbose);
+ }
+
+ if (!dev) {
+ if (st->sb) {
+ struct intel_super *super = st->sb;
+ if (!validate_geometry_imsm_orom(st->sb, level, layout,
+ raiddisks, chunk, size,
+ verbose))
+ return 0;
+ /* we are being asked to automatically layout a
+ * new volume based on the current contents of
+ * the container. If the the parameters can be
+ * satisfied reserve_space will record the disks,
+ * start offset, and size of the volume to be
+ * created. add_to_super and getinfo_super
+ * detect when autolayout is in progress.
+ */
+ /* assuming that freesize is always given when array is
+ created */
+ if (super->orom && freesize) {
+ int count;
+ count = count_volumes(super->hba->path,
+ super->orom->dpa, verbose);
+ if (super->orom->vphba <= count) {
+ pr_vrb(": platform does not support more"
+ " than %d raid volumes.\n",
+ super->orom->vphba);
+ return 0;
+ }
+ }
+ if (freesize)
+ return reserve_space(st, raiddisks, size,
+ *chunk, freesize);
+ }
+ return 1;
+ }
+ if (st->sb) {
+ /* creating in a given container */
+ return validate_geometry_imsm_volume(st, level, layout,
+ raiddisks, chunk, size,
+ data_offset,
+ dev, freesize, verbose);
+ }
+
+ /* This device needs to be a device in an 'imsm' container */
+ fd = open(dev, O_RDONLY|O_EXCL, 0);
+ if (fd >= 0) {
+ if (verbose)
+ pr_err("Cannot create this array on device %s\n",
+ dev);
+ close(fd);
+ return 0;
+ }
+ if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
+ if (verbose)
+ pr_err("Cannot open %s: %s\n",
+ dev, strerror(errno));
+ return 0;
+ }
+ /* Well, it is in use by someone, maybe an 'imsm' container. */
+ cfd = open_container(fd);
+ close(fd);
+ if (cfd < 0) {
+ if (verbose)
+ pr_err("Cannot use %s: It is busy\n",
+ dev);
+ return 0;
+ }
+ sra = sysfs_read(cfd, NULL, GET_VERSION);
+ if (sra && sra->array.major_version == -1 &&
+ strcmp(sra->text_version, "imsm") == 0)
+ is_member = 1;
+ sysfs_free(sra);
+ if (is_member) {
+ /* This is a member of a imsm container. Load the container
+ * and try to create a volume
+ */
+ struct intel_super *super;
+
+ if (load_super_imsm_all(st, cfd, (void **) &super, NULL, NULL, 1) == 0) {
+ st->sb = super;
+ strcpy(st->container_devnm, fd2devnm(cfd));
+ close(cfd);
+ return validate_geometry_imsm_volume(st, level, layout,
+ raiddisks, chunk,
+ size, data_offset, dev,
+ freesize, 1)
+ ? 1 : -1;
+ }
+ }
+
+ if (verbose)
+ pr_err("failed container membership check\n");
+
+ close(cfd);
+ return 0;
+}
+
+static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
+{
+ struct intel_super *super = st->sb;
+
+ if (level && *level == UnSet)
+ *level = LEVEL_CONTAINER;
+
+ if (level && layout && *layout == UnSet)
+ *layout = imsm_level_to_layout(*level);
+
+ if (chunk && (*chunk == UnSet || *chunk == 0))
+ *chunk = imsm_default_chunk(super->orom);
+}
+
+static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
+
+static int kill_subarray_imsm(struct supertype *st)
+{
+ /* remove the subarray currently referenced by ->current_vol */
+ __u8 i;
+ struct intel_dev **dp;
+ struct intel_super *super = st->sb;
+ __u8 current_vol = super->current_vol;
+ struct imsm_super *mpb = super->anchor;
+
+ if (super->current_vol < 0)
+ return 2;
+ super->current_vol = -1; /* invalidate subarray cursor */
+
+ /* block deletions that would change the uuid of active subarrays
+ *
+ * FIXME when immutable ids are available, but note that we'll
+ * also need to fixup the invalidated/active subarray indexes in
+ * mdstat
+ */
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ char subarray[4];
+
+ if (i < current_vol)
+ continue;
+ sprintf(subarray, "%u", i);
+ if (is_subarray_active(subarray, st->devnm)) {
+ pr_err("deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
+ current_vol, i);
+
+ return 2;
+ }
+ }
+
+ if (st->update_tail) {
+ struct imsm_update_kill_array *u = xmalloc(sizeof(*u));
+
+ u->type = update_kill_array;
+ u->dev_idx = current_vol;
+ append_metadata_update(st, u, sizeof(*u));
+
+ return 0;
+ }
+
+ for (dp = &super->devlist; *dp;)
+ if ((*dp)->index == current_vol) {
+ *dp = (*dp)->next;
+ } else {
+ handle_missing(super, (*dp)->dev);
+ if ((*dp)->index > current_vol)
+ (*dp)->index--;
+ dp = &(*dp)->next;
+ }
+
+ /* no more raid devices, all active components are now spares,
+ * but of course failed are still failed
+ */
+ if (--mpb->num_raid_devs == 0) {
+ struct dl *d;
+
+ for (d = super->disks; d; d = d->next)
+ if (d->index > -2)
+ mark_spare(d);
+ }
+
+ super->updates_pending++;
+
+ return 0;
+}
+
+static int update_subarray_imsm(struct supertype *st, char *subarray,
+ char *update, struct mddev_ident *ident)
+{
+ /* update the subarray currently referenced by ->current_vol */
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+
+ if (strcmp(update, "name") == 0) {
+ char *name = ident->name;
+ char *ep;
+ int vol;
+
+ if (is_subarray_active(subarray, st->devnm)) {
+ pr_err("Unable to update name of active subarray\n");
+ return 2;
+ }
+
+ if (!check_name(super, name, 0))
+ return 2;
+
+ vol = strtoul(subarray, &ep, 10);
+ if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
+ return 2;
+
+ if (st->update_tail) {
+ struct imsm_update_rename_array *u = xmalloc(sizeof(*u));
+
+ u->type = update_rename_array;
+ u->dev_idx = vol;
+ snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
+ append_metadata_update(st, u, sizeof(*u));
+ } else {
+ struct imsm_dev *dev;
+ int i;
+
+ dev = get_imsm_dev(super, vol);
+ snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ dev = get_imsm_dev(super, i);
+ handle_missing(super, dev);
+ }
+ super->updates_pending++;
+ }
+ } else
+ return 2;
+
+ return 0;
+}
+#endif /* MDASSEMBLE */
+
+static int is_gen_migration(struct imsm_dev *dev)
+{
+ if (dev == NULL)
+ return 0;
+
+ if (!dev->vol.migr_state)
+ return 0;
+
+ if (migr_type(dev) == MIGR_GEN_MIGR)
+ return 1;
+
+ return 0;
+}
+
+static int is_rebuilding(struct imsm_dev *dev)
+{
+ struct imsm_map *migr_map;
+
+ if (!dev->vol.migr_state)
+ return 0;
+
+ if (migr_type(dev) != MIGR_REBUILD)
+ return 0;
+
+ migr_map = get_imsm_map(dev, MAP_1);
+
+ if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
+ return 1;
+ else
+ return 0;
+}
+
+#ifndef MDASSEMBLE
+static int is_initializing(struct imsm_dev *dev)
+{
+ struct imsm_map *migr_map;
+
+ if (!dev->vol.migr_state)
+ return 0;
+
+ if (migr_type(dev) != MIGR_INIT)
+ return 0;
+
+ migr_map = get_imsm_map(dev, MAP_1);
+
+ if (migr_map->map_state == IMSM_T_STATE_UNINITIALIZED)
+ return 1;
+
+ return 0;
+}
+#endif
+
+static void update_recovery_start(struct intel_super *super,
+ struct imsm_dev *dev,
+ struct mdinfo *array)
+{
+ struct mdinfo *rebuild = NULL;
+ struct mdinfo *d;
+ __u32 units;
+
+ if (!is_rebuilding(dev))
+ return;
+
+ /* Find the rebuild target, but punt on the dual rebuild case */
+ for (d = array->devs; d; d = d->next)
+ if (d->recovery_start == 0) {
+ if (rebuild)
+ return;
+ rebuild = d;
+ }
+
+ if (!rebuild) {
+ /* (?) none of the disks are marked with
+ * IMSM_ORD_REBUILD, so assume they are missing and the
+ * disk_ord_tbl was not correctly updated
+ */
+ dprintf("%s: failed to locate out-of-sync disk\n", __func__);
+ return;
+ }
+
+ units = __le32_to_cpu(dev->vol.curr_migr_unit);
+ rebuild->recovery_start = units * blocks_per_migr_unit(super, dev);
+}
+
+#ifndef MDASSEMBLE
+static int recover_backup_imsm(struct supertype *st, struct mdinfo *info);
+#endif
+
+static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
+{
+ /* Given a container loaded by load_super_imsm_all,
+ * extract information about all the arrays into
+ * an mdinfo tree.
+ * If 'subarray' is given, just extract info about that array.
+ *
+ * For each imsm_dev create an mdinfo, fill it in,
+ * then look for matching devices in super->disks
+ * and create appropriate device mdinfo.
+ */
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ struct mdinfo *rest = NULL;
+ unsigned int i;
+ int sb_errors = 0;
+ struct dl *d;
+ int spare_disks = 0;
+
+ /* do not assemble arrays when not all attributes are supported */
+ if (imsm_check_attributes(mpb->attributes) == 0) {
+ sb_errors = 1;
+ pr_err("Unsupported attributes in IMSM metadata."
+ "Arrays activation is blocked.\n");
+ }
+
+ /* check for bad blocks */
+ if (imsm_bbm_log_size(super->anchor)) {
+ pr_err("BBM log found in IMSM metadata."
+ "Arrays activation is blocked.\n");
+ sb_errors = 1;
+ }
+
+ /* count spare devices, not used in maps
+ */
+ for (d = super->disks; d; d = d->next)
+ if (d->index == -1)
+ spare_disks++;
+
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ struct imsm_dev *dev;
+ struct imsm_map *map;
+ struct imsm_map *map2;
+ struct mdinfo *this;
+ int slot;
+#ifndef MDASSEMBLE
+ int chunk;
+#endif
+ char *ep;
+
+ if (subarray &&
+ (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
+ continue;
+
+ dev = get_imsm_dev(super, i);
+ map = get_imsm_map(dev, MAP_0);
+ map2 = get_imsm_map(dev, MAP_1);
+
+ /* do not publish arrays that are in the middle of an
+ * unsupported migration
+ */
+ if (dev->vol.migr_state &&
+ (migr_type(dev) == MIGR_STATE_CHANGE)) {
+ pr_err("cannot assemble volume '%.16s':"
+ " unsupported migration in progress\n",
+ dev->volume);
+ continue;
+ }
+ /* do not publish arrays that are not support by controller's
+ * OROM/EFI
+ */
+
+ this = xmalloc(sizeof(*this));
+
+ super->current_vol = i;
+ getinfo_super_imsm_volume(st, this, NULL);
+ this->next = rest;
+#ifndef MDASSEMBLE
+ chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
+ /* mdadm does not support all metadata features- set the bit in all arrays state */
+ if (!validate_geometry_imsm_orom(super,
+ get_imsm_raid_level(map), /* RAID level */
+ imsm_level_to_layout(get_imsm_raid_level(map)),
+ map->num_members, /* raid disks */
+ &chunk, join_u32(dev->size_low, dev->size_high),
+ 1 /* verbose */)) {
+ pr_err("IMSM RAID geometry validation"
+ " failed. Array %s activation is blocked.\n",
+ dev->volume);
+ this->array.state |=
+ (1<<MD_SB_BLOCK_CONTAINER_RESHAPE) |
+ (1<<MD_SB_BLOCK_VOLUME);
+ }
+#endif
+
+ /* if array has bad blocks, set suitable bit in all arrays state */
+ if (sb_errors)
+ this->array.state |=
+ (1<<MD_SB_BLOCK_CONTAINER_RESHAPE) |
+ (1<<MD_SB_BLOCK_VOLUME);
+
+ for (slot = 0 ; slot < map->num_members; slot++) {
+ unsigned long long recovery_start;
+ struct mdinfo *info_d;
+ struct dl *d;
+ int idx;
+ int skip;
+ __u32 ord;
+
+ skip = 0;
+ idx = get_imsm_disk_idx(dev, slot, MAP_0);
+ ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
+ for (d = super->disks; d ; d = d->next)
+ if (d->index == idx)
+ break;
+
+ recovery_start = MaxSector;
+ if (d == NULL)
+ skip = 1;
+ if (d && is_failed(&d->disk))
+ skip = 1;
+ if (ord & IMSM_ORD_REBUILD)
+ recovery_start = 0;
+
+ /*
+ * if we skip some disks the array will be assmebled degraded;
+ * reset resync start to avoid a dirty-degraded
+ * situation when performing the intial sync
+ *
+ * FIXME handle dirty degraded
+ */
+ if ((skip || recovery_start == 0) && !dev->vol.dirty)
+ this->resync_start = MaxSector;
+ if (skip)
+ continue;
+
+ info_d = xcalloc(1, sizeof(*info_d));
+ info_d->next = this->devs;
+ this->devs = info_d;
+
+ info_d->disk.number = d->index;
+ info_d->disk.major = d->major;
+ info_d->disk.minor = d->minor;
+ info_d->disk.raid_disk = slot;
+ info_d->recovery_start = recovery_start;
+ if (map2) {
+ if (slot < map2->num_members)
+ info_d->disk.state = (1 << MD_DISK_ACTIVE);
+ else
+ this->array.spare_disks++;
+ } else {
+ if (slot < map->num_members)
+ info_d->disk.state = (1 << MD_DISK_ACTIVE);
+ else
+ this->array.spare_disks++;
+ }
+ if (info_d->recovery_start == MaxSector)
+ this->array.working_disks++;
+
+ info_d->events = __le32_to_cpu(mpb->generation_num);
+ info_d->data_offset = pba_of_lba0(map);
+ info_d->component_size = blocks_per_member(map);
+ }
+ /* now that the disk list is up-to-date fixup recovery_start */
+ update_recovery_start(super, dev, this);
+ this->array.spare_disks += spare_disks;
+
+#ifndef MDASSEMBLE
+ /* check for reshape */
+ if (this->reshape_active == 1)
+ recover_backup_imsm(st, this);
+#endif
+ rest = this;
+ }
+
+ return rest;
+}
+
+static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
+ int failed, int look_in_map)
+{
+ struct imsm_map *map;
+
+ map = get_imsm_map(dev, look_in_map);
+
+ if (!failed)
+ return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
+ IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
+
+ switch (get_imsm_raid_level(map)) {
+ case 0:
+ return IMSM_T_STATE_FAILED;
+ break;
+ case 1:
+ if (failed < map->num_members)
+ return IMSM_T_STATE_DEGRADED;
+ else
+ return IMSM_T_STATE_FAILED;
+ break;
+ case 10:
+ {
+ /**
+ * check to see if any mirrors have failed, otherwise we
+ * are degraded. Even numbered slots are mirrored on
+ * slot+1
+ */
+ int i;
+ /* gcc -Os complains that this is unused */
+ int insync = insync;
+
+ for (i = 0; i < map->num_members; i++) {
+ __u32 ord = get_imsm_ord_tbl_ent(dev, i, MAP_X);
+ int idx = ord_to_idx(ord);
+ struct imsm_disk *disk;
+
+ /* reset the potential in-sync count on even-numbered
+ * slots. num_copies is always 2 for imsm raid10
+ */
+ if ((i & 1) == 0)
+ insync = 2;
+
+ disk = get_imsm_disk(super, idx);
+ if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
+ insync--;
+
+ /* no in-sync disks left in this mirror the
+ * array has failed
+ */
+ if (insync == 0)
+ return IMSM_T_STATE_FAILED;
+ }
+
+ return IMSM_T_STATE_DEGRADED;
+ }
+ case 5:
+ if (failed < 2)
+ return IMSM_T_STATE_DEGRADED;
+ else
+ return IMSM_T_STATE_FAILED;
+ break;
+ default:
+ break;
+ }
+
+ return map->map_state;
+}
+
+static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
+ int look_in_map)
+{
+ int i;
+ int failed = 0;
+ struct imsm_disk *disk;
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *prev = get_imsm_map(dev, MAP_1);
+ struct imsm_map *map_for_loop;
+ __u32 ord;
+ int idx;
+ int idx_1;
+
+ /* at the beginning of migration we set IMSM_ORD_REBUILD on
+ * disks that are being rebuilt. New failures are recorded to
+ * map[0]. So we look through all the disks we started with and
+ * see if any failures are still present, or if any new ones
+ * have arrived
+ */
+ map_for_loop = map;
+ if (prev && (map->num_members < prev->num_members))
+ map_for_loop = prev;
+
+ for (i = 0; i < map_for_loop->num_members; i++) {
+ idx_1 = -255;
+ /* when MAP_X is passed both maps failures are counted
+ */
+ if (prev &&
+ ((look_in_map == MAP_1) || (look_in_map == MAP_X)) &&
+ (i < prev->num_members)) {
+ ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
+ idx_1 = ord_to_idx(ord);
+
+ disk = get_imsm_disk(super, idx_1);
+ if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
+ failed++;
+ }
+ if (((look_in_map == MAP_0) || (look_in_map == MAP_X)) &&
+ (i < map->num_members)) {
+ ord = __le32_to_cpu(map->disk_ord_tbl[i]);
+ idx = ord_to_idx(ord);
+
+ if (idx != idx_1) {
+ disk = get_imsm_disk(super, idx);
+ if (!disk || is_failed(disk) ||
+ ord & IMSM_ORD_REBUILD)
+ failed++;
+ }
+ }
+ }
+
+ return failed;
+}
+
+#ifndef MDASSEMBLE
+static int imsm_open_new(struct supertype *c, struct active_array *a,
+ char *inst)
+{
+ struct intel_super *super = c->sb;
+ struct imsm_super *mpb = super->anchor;
+
+ if (atoi(inst) >= mpb->num_raid_devs) {
+ pr_err("%s: subarry index %d, out of range\n",
+ __func__, atoi(inst));
+ return -ENODEV;
+ }
+
+ dprintf("imsm: open_new %s\n", inst);
+ a->info.container_member = atoi(inst);
+ return 0;
+}
+
+static int is_resyncing(struct imsm_dev *dev)
+{
+ struct imsm_map *migr_map;
+
+ if (!dev->vol.migr_state)
+ return 0;
+
+ if (migr_type(dev) == MIGR_INIT ||
+ migr_type(dev) == MIGR_REPAIR)
+ return 1;
+
+ if (migr_type(dev) == MIGR_GEN_MIGR)
+ return 0;
+
+ migr_map = get_imsm_map(dev, MAP_1);
+
+ if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
+ (dev->vol.migr_type != MIGR_GEN_MIGR))
+ return 1;
+ else
+ return 0;
+}
+
+/* return true if we recorded new information */
+static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
+{
+ __u32 ord;
+ int slot;
+ struct imsm_map *map;
+ char buf[MAX_RAID_SERIAL_LEN+3];
+ unsigned int len, shift = 0;
+
+ /* new failures are always set in map[0] */
+ map = get_imsm_map(dev, MAP_0);
+
+ slot = get_imsm_disk_slot(map, idx);
+ if (slot < 0)
+ return 0;
+
+ ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
+ if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
+ return 0;
+
+ memcpy(buf, disk->serial, MAX_RAID_SERIAL_LEN);
+ buf[MAX_RAID_SERIAL_LEN] = '\000';
+ strcat(buf, ":0");
+ if ((len = strlen(buf)) >= MAX_RAID_SERIAL_LEN)
+ shift = len - MAX_RAID_SERIAL_LEN + 1;
+ strncpy((char *)disk->serial, &buf[shift], MAX_RAID_SERIAL_LEN);
+
+ disk->status |= FAILED_DISK;
+ set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
+ /* mark failures in second map if second map exists and this disk
+ * in this slot.
+ * This is valid for migration, initialization and rebuild
+ */
+ if (dev->vol.migr_state) {
+ struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
+ int slot2 = get_imsm_disk_slot(map2, idx);
+
+ if ((slot2 < map2->num_members) &&
+ (slot2 >= 0))
+ set_imsm_ord_tbl_ent(map2, slot2,
+ idx | IMSM_ORD_REBUILD);
+ }
+ if (map->failed_disk_num == 0xff)
+ map->failed_disk_num = slot;
+ return 1;
+}
+
+static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
+{
+ mark_failure(dev, disk, idx);
+
+ if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
+ return;
+
+ disk->scsi_id = __cpu_to_le32(~(__u32)0);
+ memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
+}
+
+static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
+{
+ struct dl *dl;
+
+ if (!super->missing)
+ return;
+
+ /* When orom adds replacement for missing disk it does
+ * not remove entry of missing disk, but just updates map with
+ * new added disk. So it is not enough just to test if there is
+ * any missing disk, we have to look if there are any failed disks
+ * in map to stop migration */
+
+ dprintf("imsm: mark missing\n");
+ /* end process for initialization and rebuild only
+ */
+ if (is_gen_migration(dev) == 0) {
+ __u8 map_state;
+ int failed;
+
+ failed = imsm_count_failed(super, dev, MAP_0);
+ map_state = imsm_check_degraded(super, dev, failed, MAP_0);
+
+ if (failed)
+ end_migration(dev, super, map_state);
+ }
+ for (dl = super->missing; dl; dl = dl->next)
+ mark_missing(dev, &dl->disk, dl->index);
+ super->updates_pending++;
+}
+
+static unsigned long long imsm_set_array_size(struct imsm_dev *dev,
+ long long new_size)
+{
+ int used_disks = imsm_num_data_members(dev, MAP_0);
+ unsigned long long array_blocks;
+ struct imsm_map *map;
+
+ if (used_disks == 0) {
+ /* when problems occures
+ * return current array_blocks value
+ */
+ array_blocks = __le32_to_cpu(dev->size_high);
+ array_blocks = array_blocks << 32;
+ array_blocks += __le32_to_cpu(dev->size_low);
+
+ return array_blocks;
+ }
+
+ /* set array size in metadata
+ */
+ if (new_size <= 0) {
+ /* OLCE size change is caused by added disks
+ */
+ map = get_imsm_map(dev, MAP_0);
+ array_blocks = blocks_per_member(map) * used_disks;
+ } else {
+ /* Online Volume Size Change
+ * Using available free space
+ */
+ array_blocks = new_size;
+ }
+
+ /* round array size down to closest MB
+ */
+ array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
+ dev->size_low = __cpu_to_le32((__u32)array_blocks);
+ dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
+
+ return array_blocks;
+}
+
+static void imsm_set_disk(struct active_array *a, int n, int state);
+
+static void imsm_progress_container_reshape(struct intel_super *super)
+{
+ /* if no device has a migr_state, but some device has a
+ * different number of members than the previous device, start
+ * changing the number of devices in this device to match
+ * previous.
+ */
+ struct imsm_super *mpb = super->anchor;
+ int prev_disks = -1;
+ int i;
+ int copy_map_size;
+
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ struct imsm_dev *dev = get_imsm_dev(super, i);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *map2;
+ int prev_num_members;
+
+ if (dev->vol.migr_state)
+ return;
+
+ if (prev_disks == -1)
+ prev_disks = map->num_members;
+ if (prev_disks == map->num_members)
+ continue;
+
+ /* OK, this array needs to enter reshape mode.
+ * i.e it needs a migr_state
+ */
+
+ copy_map_size = sizeof_imsm_map(map);
+ prev_num_members = map->num_members;
+ map->num_members = prev_disks;
+ dev->vol.migr_state = 1;
+ dev->vol.curr_migr_unit = 0;
+ set_migr_type(dev, MIGR_GEN_MIGR);
+ for (i = prev_num_members;
+ i < map->num_members; i++)
+ set_imsm_ord_tbl_ent(map, i, i);
+ map2 = get_imsm_map(dev, MAP_1);
+ /* Copy the current map */
+ memcpy(map2, map, copy_map_size);
+ map2->num_members = prev_num_members;
+
+ imsm_set_array_size(dev, -1);
+ super->clean_migration_record_by_mdmon = 1;
+ super->updates_pending++;
+ }
+}
+
+/* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
+ * states are handled in imsm_set_disk() with one exception, when a
+ * resync is stopped due to a new failure this routine will set the
+ * 'degraded' state for the array.
+ */
+static int imsm_set_array_state(struct active_array *a, int consistent)
+{
+ int inst = a->info.container_member;
+ struct intel_super *super = a->container->sb;
+ struct imsm_dev *dev = get_imsm_dev(super, inst);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ int failed = imsm_count_failed(super, dev, MAP_0);
+ __u8 map_state = imsm_check_degraded(super, dev, failed, MAP_0);
+ __u32 blocks_per_unit;
+
+ if (dev->vol.migr_state &&
+ dev->vol.migr_type == MIGR_GEN_MIGR) {
+ /* array state change is blocked due to reshape action
+ * We might need to
+ * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
+ * - finish the reshape (if last_checkpoint is big and action != reshape)
+ * - update curr_migr_unit
+ */
+ if (a->curr_action == reshape) {
+ /* still reshaping, maybe update curr_migr_unit */
+ goto mark_checkpoint;
+ } else {
+ if (a->last_checkpoint == 0 && a->prev_action == reshape) {
+ /* for some reason we aborted the reshape.
+ *
+ * disable automatic metadata rollback
+ * user action is required to recover process
+ */
+ if (0) {
+ struct imsm_map *map2 =
+ get_imsm_map(dev, MAP_1);
+ dev->vol.migr_state = 0;
+ set_migr_type(dev, 0);
+ dev->vol.curr_migr_unit = 0;
+ memcpy(map, map2,
+ sizeof_imsm_map(map2));
+ super->updates_pending++;
+ }
+ }
+ if (a->last_checkpoint >= a->info.component_size) {
+ unsigned long long array_blocks;
+ int used_disks;
+ struct mdinfo *mdi;
+
+ used_disks = imsm_num_data_members(dev, MAP_0);
+ if (used_disks > 0) {
+ array_blocks =
+ blocks_per_member(map) *
+ used_disks;
+ /* round array size down to closest MB
+ */
+ array_blocks = (array_blocks
+ >> SECT_PER_MB_SHIFT)
+ << SECT_PER_MB_SHIFT;
+ a->info.custom_array_size = array_blocks;
+ /* encourage manager to update array
+ * size
+ */
+
+ a->check_reshape = 1;
+ }
+ /* finalize online capacity expansion/reshape */
+ for (mdi = a->info.devs; mdi; mdi = mdi->next)
+ imsm_set_disk(a,
+ mdi->disk.raid_disk,
+ mdi->curr_state);
+
+ imsm_progress_container_reshape(super);
+ }
+ }
+ }
+
+ /* before we activate this array handle any missing disks */
+ if (consistent == 2)
+ handle_missing(super, dev);
+
+ if (consistent == 2 &&
+ (!is_resync_complete(&a->info) ||
+ map_state != IMSM_T_STATE_NORMAL ||
+ dev->vol.migr_state))
+ consistent = 0;
+
+ if (is_resync_complete(&a->info)) {
+ /* complete intialization / resync,
+ * recovery and interrupted recovery is completed in
+ * ->set_disk
+ */
+ if (is_resyncing(dev)) {
+ dprintf("imsm: mark resync done\n");
+ end_migration(dev, super, map_state);
+ super->updates_pending++;
+ a->last_checkpoint = 0;
+ }
+ } else if ((!is_resyncing(dev) && !failed) &&
+ (imsm_reshape_blocks_arrays_changes(super) == 0)) {
+ /* mark the start of the init process if nothing is failed */
+ dprintf("imsm: mark resync start\n");
+ if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
+ migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_INIT);
+ else
+ migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_REPAIR);
+ super->updates_pending++;
+ }
+
+mark_checkpoint:
+ /* skip checkpointing for general migration,
+ * it is controlled in mdadm
+ */
+ if (is_gen_migration(dev))
+ goto skip_mark_checkpoint;
+
+ /* check if we can update curr_migr_unit from resync_start, recovery_start */
+ blocks_per_unit = blocks_per_migr_unit(super, dev);
+ if (blocks_per_unit) {
+ __u32 units32;
+ __u64 units;
+
+ units = a->last_checkpoint / blocks_per_unit;
+ units32 = units;
+
+ /* check that we did not overflow 32-bits, and that
+ * curr_migr_unit needs updating
+ */
+ if (units32 == units &&
+ units32 != 0 &&
+ __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
+ dprintf("imsm: mark checkpoint (%u)\n", units32);
+ dev->vol.curr_migr_unit = __cpu_to_le32(units32);
+ super->updates_pending++;
+ }
+ }
+
+skip_mark_checkpoint:
+ /* mark dirty / clean */
+ if (dev->vol.dirty != !consistent) {
+ dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
+ if (consistent)
+ dev->vol.dirty = 0;
+ else
+ dev->vol.dirty = 1;
+ super->updates_pending++;
+ }
+
+ return consistent;
+}
+
+static void imsm_set_disk(struct active_array *a, int n, int state)
+{
+ int inst = a->info.container_member;
+ struct intel_super *super = a->container->sb;
+ struct imsm_dev *dev = get_imsm_dev(super, inst);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_disk *disk;
+ struct mdinfo *mdi;
+ int recovery_not_finished = 0;
+ int failed;
+ __u32 ord;
+ __u8 map_state;
+
+ if (n > map->num_members)
+ pr_err("imsm: set_disk %d out of range 0..%d\n",
+ n, map->num_members - 1);
+
+ if (n < 0)
+ return;
+
+ dprintf("imsm: set_disk %d:%x\n", n, state);
+
+ ord = get_imsm_ord_tbl_ent(dev, n, MAP_0);
+ disk = get_imsm_disk(super, ord_to_idx(ord));
+
+ /* check for new failures */
+ if (state & DS_FAULTY) {
+ if (mark_failure(dev, disk, ord_to_idx(ord)))
+ super->updates_pending++;
+ }
+
+ /* check if in_sync */
+ if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
+ struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
+
+ set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
+ super->updates_pending++;
+ }
+
+ failed = imsm_count_failed(super, dev, MAP_0);
+ map_state = imsm_check_degraded(super, dev, failed, MAP_0);
+
+ /* check if recovery complete, newly degraded, or failed */
+ dprintf("imsm: Detected transition to state ");
+ switch (map_state) {
+ case IMSM_T_STATE_NORMAL: /* transition to normal state */
+ dprintf("normal: ");
+ if (is_rebuilding(dev)) {
+ dprintf("while rebuilding");
+ /* check if recovery is really finished */
+ for (mdi = a->info.devs; mdi ; mdi = mdi->next)
+ if (mdi->recovery_start != MaxSector) {
+ recovery_not_finished = 1;
+ break;
+ }
+ if (recovery_not_finished) {
+ dprintf("\nimsm: Rebuild has not finished yet, "
+ "state not changed");
+ if (a->last_checkpoint < mdi->recovery_start) {
+ a->last_checkpoint = mdi->recovery_start;
+ super->updates_pending++;
+ }
+ break;
+ }
+ end_migration(dev, super, map_state);
+ map = get_imsm_map(dev, MAP_0);
+ map->failed_disk_num = ~0;
+ super->updates_pending++;
+ a->last_checkpoint = 0;
+ break;
+ }
+ if (is_gen_migration(dev)) {
+ dprintf("while general migration");
+ if (a->last_checkpoint >= a->info.component_size)
+ end_migration(dev, super, map_state);
+ else
+ map->map_state = map_state;
+ map = get_imsm_map(dev, MAP_0);
+ map->failed_disk_num = ~0;
+ super->updates_pending++;
+ break;
+ }
+ break;
+ case IMSM_T_STATE_DEGRADED: /* transition to degraded state */
+ dprintf("degraded: ");
+ if ((map->map_state != map_state) &&
+ !dev->vol.migr_state) {
+ dprintf("mark degraded");
+ map->map_state = map_state;
+ super->updates_pending++;
+ a->last_checkpoint = 0;
+ break;
+ }
+ if (is_rebuilding(dev)) {
+ dprintf("while rebuilding.");
+ if (map->map_state != map_state) {
+ dprintf(" Map state change");
+ end_migration(dev, super, map_state);
+ super->updates_pending++;
+ }
+ break;
+ }
+ if (is_gen_migration(dev)) {
+ dprintf("while general migration");
+ if (a->last_checkpoint >= a->info.component_size)
+ end_migration(dev, super, map_state);
+ else {
+ map->map_state = map_state;
+ manage_second_map(super, dev);
+ }
+ super->updates_pending++;
+ break;
+ }
+ if (is_initializing(dev)) {
+ dprintf("while initialization.");
+ map->map_state = map_state;
+ super->updates_pending++;
+ break;
+ }
+ break;
+ case IMSM_T_STATE_FAILED: /* transition to failed state */
+ dprintf("failed: ");
+ if (is_gen_migration(dev)) {
+ dprintf("while general migration");
+ map->map_state = map_state;
+ super->updates_pending++;
+ break;
+ }
+ if (map->map_state != map_state) {
+ dprintf("mark failed");
+ end_migration(dev, super, map_state);
+ super->updates_pending++;
+ a->last_checkpoint = 0;
+ break;
+ }
+ break;
+ default:
+ dprintf("state %i\n", map_state);
+ }
+ dprintf("\n");
+
+}
+
+static int store_imsm_mpb(int fd, struct imsm_super *mpb)
+{
+ void *buf = mpb;
+ __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
+ unsigned long long dsize;
+ unsigned long long sectors;
+
+ get_dev_size(fd, NULL, &dsize);
+
+ if (mpb_size > 512) {
+ /* -1 to account for anchor */
+ sectors = mpb_sectors(mpb) - 1;
+
+ /* write the extended mpb to the sectors preceeding the anchor */
+ if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
+ return 1;
+
+ if ((unsigned long long)write(fd, buf + 512, 512 * sectors)
+ != 512 * sectors)
+ return 1;
+ }
+
+ /* first block is stored on second to last sector of the disk */
+ if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
+ return 1;
+
+ if (write(fd, buf, 512) != 512)
+ return 1;
+
+ return 0;
+}
+
+static void imsm_sync_metadata(struct supertype *container)
+{
+ struct intel_super *super = container->sb;
+
+ dprintf("sync metadata: %d\n", super->updates_pending);
+ if (!super->updates_pending)
+ return;
+
+ write_super_imsm(container, 0);
+
+ super->updates_pending = 0;
+}
+
+static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
+{
+ struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
+ int i = get_imsm_disk_idx(dev, idx, MAP_X);
+ struct dl *dl;
+
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl->index == i)
+ break;
+
+ if (dl && is_failed(&dl->disk))
+ dl = NULL;
+
+ if (dl)
+ dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
+
+ return dl;
+}
+
+static struct dl *imsm_add_spare(struct intel_super *super, int slot,
+ struct active_array *a, int activate_new,
+ struct mdinfo *additional_test_list)
+{
+ struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
+ int idx = get_imsm_disk_idx(dev, slot, MAP_X);
+ struct imsm_super *mpb = super->anchor;
+ struct imsm_map *map;
+ unsigned long long pos;
+ struct mdinfo *d;
+ struct extent *ex;
+ int i, j;
+ int found;
+ __u32 array_start = 0;
+ __u32 array_end = 0;
+ struct dl *dl;
+ struct mdinfo *test_list;
+
+ for (dl = super->disks; dl; dl = dl->next) {
+ /* If in this array, skip */
+ for (d = a->info.devs ; d ; d = d->next)
+ if (d->state_fd >= 0 &&
+ d->disk.major == dl->major &&
+ d->disk.minor == dl->minor) {
+ dprintf("%x:%x already in array\n",
+ dl->major, dl->minor);
+ break;
+ }
+ if (d)
+ continue;
+ test_list = additional_test_list;
+ while (test_list) {
+ if (test_list->disk.major == dl->major &&
+ test_list->disk.minor == dl->minor) {
+ dprintf("%x:%x already in additional test list\n",
+ dl->major, dl->minor);
+ break;
+ }
+ test_list = test_list->next;
+ }
+ if (test_list)
+ continue;
+
+ /* skip in use or failed drives */
+ if (is_failed(&dl->disk) || idx == dl->index ||
+ dl->index == -2) {
+ dprintf("%x:%x status (failed: %d index: %d)\n",
+ dl->major, dl->minor, is_failed(&dl->disk), idx);
+ continue;
+ }
+
+ /* skip pure spares when we are looking for partially
+ * assimilated drives
+ */
+ if (dl->index == -1 && !activate_new)
+ continue;
+
+ /* Does this unused device have the requisite free space?
+ * It needs to be able to cover all member volumes
+ */
+ ex = get_extents(super, dl);
+ if (!ex) {
+ dprintf("cannot get extents\n");
+ continue;
+ }
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ dev = get_imsm_dev(super, i);
+ map = get_imsm_map(dev, MAP_0);
+
+ /* check if this disk is already a member of
+ * this array
+ */
+ if (get_imsm_disk_slot(map, dl->index) >= 0)
+ continue;
+
+ found = 0;
+ j = 0;
+ pos = 0;
+ array_start = pba_of_lba0(map);
+ array_end = array_start +
+ blocks_per_member(map) - 1;
+
+ do {
+ /* check that we can start at pba_of_lba0 with
+ * blocks_per_member of space
+ */
+ if (array_start >= pos && array_end < ex[j].start) {
+ found = 1;
+ break;
+ }
+ pos = ex[j].start + ex[j].size;
+ j++;
+ } while (ex[j-1].size);
+
+ if (!found)
+ break;
+ }
+
+ free(ex);
+ if (i < mpb->num_raid_devs) {
+ dprintf("%x:%x does not have %u to %u available\n",
+ dl->major, dl->minor, array_start, array_end);
+ /* No room */
+ continue;
+ }
+ return dl;
+ }
+
+ return dl;
+}
+
+static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
+{
+ struct imsm_dev *dev2;
+ struct imsm_map *map;
+ struct dl *idisk;
+ int slot;
+ int idx;
+ __u8 state;
+
+ dev2 = get_imsm_dev(cont->sb, dev_idx);
+ if (dev2) {
+ state = imsm_check_degraded(cont->sb, dev2, failed, MAP_0);
+ if (state == IMSM_T_STATE_FAILED) {
+ map = get_imsm_map(dev2, MAP_0);
+ if (!map)
+ return 1;
+ for (slot = 0; slot < map->num_members; slot++) {
+ /*
+ * Check if failed disks are deleted from intel
+ * disk list or are marked to be deleted
+ */
+ idx = get_imsm_disk_idx(dev2, slot, MAP_X);
+ idisk = get_imsm_dl_disk(cont->sb, idx);
+ /*
+ * Do not rebuild the array if failed disks
+ * from failed sub-array are not removed from
+ * container.
+ */
+ if (idisk &&
+ is_failed(&idisk->disk) &&
+ (idisk->action != DISK_REMOVE))
+ return 0;
+ }
+ }
+ }
+ return 1;
+}
+
+static struct mdinfo *imsm_activate_spare(struct active_array *a,
+ struct metadata_update **updates)
+{
+ /**
+ * Find a device with unused free space and use it to replace a
+ * failed/vacant region in an array. We replace failed regions one a
+ * array at a time. The result is that a new spare disk will be added
+ * to the first failed array and after the monitor has finished
+ * propagating failures the remainder will be consumed.
+ *
+ * FIXME add a capability for mdmon to request spares from another
+ * container.
+ */
+
+ struct intel_super *super = a->container->sb;
+ int inst = a->info.container_member;
+ struct imsm_dev *dev = get_imsm_dev(super, inst);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ int failed = a->info.array.raid_disks;
+ struct mdinfo *rv = NULL;
+ struct mdinfo *d;
+ struct mdinfo *di;
+ struct metadata_update *mu;
+ struct dl *dl;
+ struct imsm_update_activate_spare *u;
+ int num_spares = 0;
+ int i;
+ int allowed;
+
+ for (d = a->info.devs ; d ; d = d->next) {
+ if ((d->curr_state & DS_FAULTY) &&
+ d->state_fd >= 0)
+ /* wait for Removal to happen */
+ return NULL;
+ if (d->state_fd >= 0)
+ failed--;
+ }
+
+ dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
+ inst, failed, a->info.array.raid_disks, a->info.array.level);
+
+ if (imsm_reshape_blocks_arrays_changes(super))
+ return NULL;
+
+ /* Cannot activate another spare if rebuild is in progress already
+ */
+ if (is_rebuilding(dev)) {
+ dprintf("imsm: No spare activation allowed. "
+ "Rebuild in progress already.\n");
+ return NULL;
+ }
+
+ if (a->info.array.level == 4)
+ /* No repair for takeovered array
+ * imsm doesn't support raid4
+ */
+ return NULL;
+
+ if (imsm_check_degraded(super, dev, failed, MAP_0) !=
+ IMSM_T_STATE_DEGRADED)
+ return NULL;
+
+ /*
+ * If there are any failed disks check state of the other volume.
+ * Block rebuild if the another one is failed until failed disks
+ * are removed from container.
+ */
+ if (failed) {
+ dprintf("found failed disks in %.*s, check if there another"
+ "failed sub-array.\n",
+ MAX_RAID_SERIAL_LEN, dev->volume);
+ /* check if states of the other volumes allow for rebuild */
+ for (i = 0; i < super->anchor->num_raid_devs; i++) {
+ if (i != inst) {
+ allowed = imsm_rebuild_allowed(a->container,
+ i, failed);
+ if (!allowed)
+ return NULL;
+ }
+ }
+ }
+
+ /* For each slot, if it is not working, find a spare */
+ for (i = 0; i < a->info.array.raid_disks; i++) {
+ for (d = a->info.devs ; d ; d = d->next)
+ if (d->disk.raid_disk == i)
+ break;
+ dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
+ if (d && (d->state_fd >= 0))
+ continue;
+
+ /*
+ * OK, this device needs recovery. Try to re-add the
+ * previous occupant of this slot, if this fails see if
+ * we can continue the assimilation of a spare that was
+ * partially assimilated, finally try to activate a new
+ * spare.
+ */
+ dl = imsm_readd(super, i, a);
+ if (!dl)
+ dl = imsm_add_spare(super, i, a, 0, rv);
+ if (!dl)
+ dl = imsm_add_spare(super, i, a, 1, rv);
+ if (!dl)
+ continue;
+
+ /* found a usable disk with enough space */
+ di = xcalloc(1, sizeof(*di));
+
+ /* dl->index will be -1 in the case we are activating a
+ * pristine spare. imsm_process_update() will create a
+ * new index in this case. Once a disk is found to be
+ * failed in all member arrays it is kicked from the
+ * metadata
+ */
+ di->disk.number = dl->index;
+
+ /* (ab)use di->devs to store a pointer to the device
+ * we chose
+ */
+ di->devs = (struct mdinfo *) dl;
+
+ di->disk.raid_disk = i;
+ di->disk.major = dl->major;
+ di->disk.minor = dl->minor;
+ di->disk.state = 0;
+ di->recovery_start = 0;
+ di->data_offset = pba_of_lba0(map);
+ di->component_size = a->info.component_size;
+ di->container_member = inst;
+ super->random = random32();
+ di->next = rv;
+ rv = di;
+ num_spares++;
+ dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
+ i, di->data_offset);
+ }
+
+ if (!rv)
+ /* No spares found */
+ return rv;
+ /* Now 'rv' has a list of devices to return.
+ * Create a metadata_update record to update the
+ * disk_ord_tbl for the array
+ */
+ mu = xmalloc(sizeof(*mu));
+ mu->buf = xcalloc(num_spares,
+ sizeof(struct imsm_update_activate_spare));
+ mu->space = NULL;
+ mu->space_list = NULL;
+ mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
+ mu->next = *updates;
+ u = (struct imsm_update_activate_spare *) mu->buf;
+
+ for (di = rv ; di ; di = di->next) {
+ u->type = update_activate_spare;
+ u->dl = (struct dl *) di->devs;
+ di->devs = NULL;
+ u->slot = di->disk.raid_disk;
+ u->array = inst;
+ u->next = u + 1;
+ u++;
+ }
+ (u-1)->next = NULL;
+ *updates = mu;
+
+ return rv;
+}
+
+static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
+{
+ struct imsm_dev *dev = get_imsm_dev(super, idx);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *new_map = get_imsm_map(&u->dev, MAP_0);
+ struct disk_info *inf = get_disk_info(u);
+ struct imsm_disk *disk;
+ int i;
+ int j;
+
+ for (i = 0; i < map->num_members; i++) {
+ disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, MAP_X));
+ for (j = 0; j < new_map->num_members; j++)
+ if (serialcmp(disk->serial, inf[j].serial) == 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
+{
+ struct dl *dl = NULL;
+ for (dl = super->disks; dl; dl = dl->next)
+ if ((dl->major == major) && (dl->minor == minor))
+ return dl;
+ return NULL;
+}
+
+static int remove_disk_super(struct intel_super *super, int major, int minor)
+{
+ struct dl *prev = NULL;
+ struct dl *dl;
+
+ prev = NULL;
+ for (dl = super->disks; dl; dl = dl->next) {
+ if ((dl->major == major) && (dl->minor == minor)) {
+ /* remove */
+ if (prev)
+ prev->next = dl->next;
+ else
+ super->disks = dl->next;
+ dl->next = NULL;
+ __free_imsm_disk(dl);
+ dprintf("%s: removed %x:%x\n",
+ __func__, major, minor);
+ break;
+ }
+ prev = dl;
+ }
+ return 0;
+}
+
+static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);
+
+static int add_remove_disk_update(struct intel_super *super)
+{
+ int check_degraded = 0;
+ struct dl *disk = NULL;
+ /* add/remove some spares to/from the metadata/contrainer */
+ while (super->disk_mgmt_list) {
+ struct dl *disk_cfg;
+
+ disk_cfg = super->disk_mgmt_list;
+ super->disk_mgmt_list = disk_cfg->next;
+ disk_cfg->next = NULL;
+
+ if (disk_cfg->action == DISK_ADD) {
+ disk_cfg->next = super->disks;
+ super->disks = disk_cfg;
+ check_degraded = 1;
+ dprintf("%s: added %x:%x\n",
+ __func__, disk_cfg->major,
+ disk_cfg->minor);
+ } else if (disk_cfg->action == DISK_REMOVE) {
+ dprintf("Disk remove action processed: %x.%x\n",
+ disk_cfg->major, disk_cfg->minor);
+ disk = get_disk_super(super,
+ disk_cfg->major,
+ disk_cfg->minor);
+ if (disk) {
+ /* store action status */
+ disk->action = DISK_REMOVE;
+ /* remove spare disks only */
+ if (disk->index == -1) {
+ remove_disk_super(super,
+ disk_cfg->major,
+ disk_cfg->minor);
+ }
+ }
+ /* release allocate disk structure */
+ __free_imsm_disk(disk_cfg);
+ }
+ }
+ return check_degraded;
+}
+
+static int apply_reshape_migration_update(struct imsm_update_reshape_migration *u,
+ struct intel_super *super,
+ void ***space_list)
+{
+ struct intel_dev *id;
+ void **tofree = NULL;
+ int ret_val = 0;
+
+ dprintf("apply_reshape_migration_update()\n");
+ if ((u->subdev < 0) ||
+ (u->subdev > 1)) {
+ dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
+ return ret_val;
+ }
+ if ((space_list == NULL) || (*space_list == NULL)) {
+ dprintf("imsm: Error: Memory is not allocated\n");
+ return ret_val;
+ }
+
+ for (id = super->devlist ; id; id = id->next) {
+ if (id->index == (unsigned)u->subdev) {
+ struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
+ struct imsm_map *map;
+ struct imsm_dev *new_dev =
+ (struct imsm_dev *)*space_list;
+ struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
+ int to_state;
+ struct dl *new_disk;
+
+ if (new_dev == NULL)
+ return ret_val;
+ *space_list = **space_list;
+ memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0));
+ map = get_imsm_map(new_dev, MAP_0);
+ if (migr_map) {
+ dprintf("imsm: Error: migration in progress");
+ return ret_val;
+ }
+
+ to_state = map->map_state;
+ if ((u->new_level == 5) && (map->raid_level == 0)) {
+ map->num_members++;
+ /* this should not happen */
+ if (u->new_disks[0] < 0) {
+ map->failed_disk_num =
+ map->num_members - 1;
+ to_state = IMSM_T_STATE_DEGRADED;
+ } else
+ to_state = IMSM_T_STATE_NORMAL;
+ }
+ migrate(new_dev, super, to_state, MIGR_GEN_MIGR);
+ if (u->new_level > -1)
+ map->raid_level = u->new_level;
+ migr_map = get_imsm_map(new_dev, MAP_1);
+ if ((u->new_level == 5) &&
+ (migr_map->raid_level == 0)) {
+ int ord = map->num_members - 1;
+ migr_map->num_members--;
+ if (u->new_disks[0] < 0)
+ ord |= IMSM_ORD_REBUILD;
+ set_imsm_ord_tbl_ent(map,
+ map->num_members - 1,
+ ord);
+ }
+ id->dev = new_dev;
+ tofree = (void **)dev;
+
+ /* update chunk size
+ */
+ if (u->new_chunksize > 0)
+ map->blocks_per_strip =
+ __cpu_to_le16(u->new_chunksize * 2);
+
+ /* add disk
+ */
+ if ((u->new_level != 5) ||
+ (migr_map->raid_level != 0) ||
+ (migr_map->raid_level == map->raid_level))
+ goto skip_disk_add;
+
+ if (u->new_disks[0] >= 0) {
+ /* use passes spare
+ */
+ new_disk = get_disk_super(super,
+ major(u->new_disks[0]),
+ minor(u->new_disks[0]));
+ dprintf("imsm: new disk for reshape is: %i:%i "
+ "(%p, index = %i)\n",
+ major(u->new_disks[0]),
+ minor(u->new_disks[0]),
+ new_disk, new_disk->index);
+ if (new_disk == NULL)
+ goto error_disk_add;
+
+ new_disk->index = map->num_members - 1;
+ /* slot to fill in autolayout
+ */
+ new_disk->raiddisk = new_disk->index;
+ new_disk->disk.status |= CONFIGURED_DISK;
+ new_disk->disk.status &= ~SPARE_DISK;
+ } else
+ goto error_disk_add;
+
+skip_disk_add:
+ *tofree = *space_list;
+ /* calculate new size
+ */
+ imsm_set_array_size(new_dev, -1);
+
+ ret_val = 1;
+ }
+ }
+
+ if (tofree)
+ *space_list = tofree;
+ return ret_val;
+
+error_disk_add:
+ dprintf("Error: imsm: Cannot find disk.\n");
+ return ret_val;
+}
+
+static int apply_size_change_update(struct imsm_update_size_change *u,
+ struct intel_super *super)
+{
+ struct intel_dev *id;
+ int ret_val = 0;
+
+ dprintf("apply_size_change_update()\n");
+ if ((u->subdev < 0) ||
+ (u->subdev > 1)) {
+ dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
+ return ret_val;
+ }
+
+ for (id = super->devlist ; id; id = id->next) {
+ if (id->index == (unsigned)u->subdev) {
+ struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ int used_disks = imsm_num_data_members(dev, MAP_0);
+ unsigned long long blocks_per_member;
+
+ /* calculate new size
+ */
+ blocks_per_member = u->new_size / used_disks;
+ dprintf("imsm: apply_size_change_update(size: %llu, "
+ "blocks per member: %llu)\n",
+ u->new_size, blocks_per_member);
+ set_blocks_per_member(map, blocks_per_member);
+ imsm_set_array_size(dev, u->new_size);
+
+ ret_val = 1;
+ break;
+ }
+ }
+
+ return ret_val;
+}
+
+static int apply_update_activate_spare(struct imsm_update_activate_spare *u,
+ struct intel_super *super,
+ struct active_array *active_array)
+{
+ struct imsm_super *mpb = super->anchor;
+ struct imsm_dev *dev = get_imsm_dev(super, u->array);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *migr_map;
+ struct active_array *a;
+ struct imsm_disk *disk;
+ __u8 to_state;
+ struct dl *dl;
+ unsigned int found;
+ int failed;
+ int victim;
+ int i;
+ int second_map_created = 0;
+
+ for (; u; u = u->next) {
+ victim = get_imsm_disk_idx(dev, u->slot, MAP_X);
+
+ if (victim < 0)
+ return 0;
+
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl == u->dl)
+ break;
+
+ if (!dl) {
+ pr_err("error: imsm_activate_spare passed "
+ "an unknown disk (index: %d)\n",
+ u->dl->index);
+ return 0;
+ }
+
+ /* count failures (excluding rebuilds and the victim)
+ * to determine map[0] state
+ */
+ failed = 0;
+ for (i = 0; i < map->num_members; i++) {
+ if (i == u->slot)
+ continue;
+ disk = get_imsm_disk(super,
+ get_imsm_disk_idx(dev, i, MAP_X));
+ if (!disk || is_failed(disk))
+ failed++;
+ }
+
+ /* adding a pristine spare, assign a new index */
+ if (dl->index < 0) {
+ dl->index = super->anchor->num_disks;
+ super->anchor->num_disks++;
+ }
+ disk = &dl->disk;
+ disk->status |= CONFIGURED_DISK;
+ disk->status &= ~SPARE_DISK;
+
+ /* mark rebuild */
+ to_state = imsm_check_degraded(super, dev, failed, MAP_0);
+ if (!second_map_created) {
+ second_map_created = 1;
+ map->map_state = IMSM_T_STATE_DEGRADED;
+ migrate(dev, super, to_state, MIGR_REBUILD);
+ } else
+ map->map_state = to_state;
+ migr_map = get_imsm_map(dev, MAP_1);
+ set_imsm_ord_tbl_ent(map, u->slot, dl->index);
+ set_imsm_ord_tbl_ent(migr_map, u->slot,
+ dl->index | IMSM_ORD_REBUILD);
+
+ /* update the family_num to mark a new container
+ * generation, being careful to record the existing
+ * family_num in orig_family_num to clean up after
+ * earlier mdadm versions that neglected to set it.
+ */
+ if (mpb->orig_family_num == 0)
+ mpb->orig_family_num = mpb->family_num;
+ mpb->family_num += super->random;
+
+ /* count arrays using the victim in the metadata */
+ found = 0;
+ for (a = active_array; a ; a = a->next) {
+ dev = get_imsm_dev(super, a->info.container_member);
+ map = get_imsm_map(dev, MAP_0);
+
+ if (get_imsm_disk_slot(map, victim) >= 0)
+ found++;
+ }
+
+ /* delete the victim if it is no longer being
+ * utilized anywhere
+ */
+ if (!found) {
+ struct dl **dlp;
+
+ /* We know that 'manager' isn't touching anything,
+ * so it is safe to delete
+ */
+ for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
+ if ((*dlp)->index == victim)
+ break;
+
+ /* victim may be on the missing list */
+ if (!*dlp)
+ for (dlp = &super->missing; *dlp;
+ dlp = &(*dlp)->next)
+ if ((*dlp)->index == victim)
+ break;
+ imsm_delete(super, dlp, victim);
+ }
+ }
+
+ return 1;
+}
+
+static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
+ struct intel_super *super,
+ void ***space_list)
+{
+ struct dl *new_disk;
+ struct intel_dev *id;
+ int i;
+ int delta_disks = u->new_raid_disks - u->old_raid_disks;
+ int disk_count = u->old_raid_disks;
+ void **tofree = NULL;
+ int devices_to_reshape = 1;
+ struct imsm_super *mpb = super->anchor;
+ int ret_val = 0;
+ unsigned int dev_id;
+
+ dprintf("imsm: apply_reshape_container_disks_update()\n");
+
+ /* enable spares to use in array */
+ for (i = 0; i < delta_disks; i++) {
+ new_disk = get_disk_super(super,
+ major(u->new_disks[i]),
+ minor(u->new_disks[i]));
+ dprintf("imsm: new disk for reshape is: %i:%i "
+ "(%p, index = %i)\n",
+ major(u->new_disks[i]), minor(u->new_disks[i]),
+ new_disk, new_disk->index);
+ if ((new_disk == NULL) ||
+ ((new_disk->index >= 0) &&
+ (new_disk->index < u->old_raid_disks)))
+ goto update_reshape_exit;
+ new_disk->index = disk_count++;
+ /* slot to fill in autolayout
+ */
+ new_disk->raiddisk = new_disk->index;
+ new_disk->disk.status |=
+ CONFIGURED_DISK;
+ new_disk->disk.status &= ~SPARE_DISK;
+ }
+
+ dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
+ mpb->num_raid_devs);
+ /* manage changes in volume
+ */
+ for (dev_id = 0; dev_id < mpb->num_raid_devs; dev_id++) {
+ void **sp = *space_list;
+ struct imsm_dev *newdev;
+ struct imsm_map *newmap, *oldmap;
+
+ for (id = super->devlist ; id; id = id->next) {
+ if (id->index == dev_id)
+ break;
+ }
+ if (id == NULL)
+ break;
+ if (!sp)
+ continue;
+ *space_list = *sp;
+ newdev = (void*)sp;
+ /* Copy the dev, but not (all of) the map */
+ memcpy(newdev, id->dev, sizeof(*newdev));
+ oldmap = get_imsm_map(id->dev, MAP_0);
+ newmap = get_imsm_map(newdev, MAP_0);
+ /* Copy the current map */
+ memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
+ /* update one device only
+ */
+ if (devices_to_reshape) {
+ dprintf("imsm: modifying subdev: %i\n",
+ id->index);
+ devices_to_reshape--;
+ newdev->vol.migr_state = 1;
+ newdev->vol.curr_migr_unit = 0;
+ set_migr_type(newdev, MIGR_GEN_MIGR);
+ newmap->num_members = u->new_raid_disks;
+ for (i = 0; i < delta_disks; i++) {
+ set_imsm_ord_tbl_ent(newmap,
+ u->old_raid_disks + i,
+ u->old_raid_disks + i);
+ }
+ /* New map is correct, now need to save old map
+ */
+ newmap = get_imsm_map(newdev, MAP_1);
+ memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
+
+ imsm_set_array_size(newdev, -1);
+ }
+
+ sp = (void **)id->dev;
+ id->dev = newdev;
+ *sp = tofree;
+ tofree = sp;
+
+ /* Clear migration record */
+ memset(super->migr_rec, 0, sizeof(struct migr_record));
+ }
+ if (tofree)
+ *space_list = tofree;
+ ret_val = 1;
+
+update_reshape_exit:
+
+ return ret_val;
+}
+
+static int apply_takeover_update(struct imsm_update_takeover *u,
+ struct intel_super *super,
+ void ***space_list)
+{
+ struct imsm_dev *dev = NULL;
+ struct intel_dev *dv;
+ struct imsm_dev *dev_new;
+ struct imsm_map *map;
+ struct dl *dm, *du;
+ int i;
+
+ for (dv = super->devlist; dv; dv = dv->next)
+ if (dv->index == (unsigned int)u->subarray) {
+ dev = dv->dev;
+ break;
+ }
+
+ if (dev == NULL)
+ return 0;
+
+ map = get_imsm_map(dev, MAP_0);
+
+ if (u->direction == R10_TO_R0) {
+ /* Number of failed disks must be half of initial disk number */
+ if (imsm_count_failed(super, dev, MAP_0) !=
+ (map->num_members / 2))
+ return 0;
+
+ /* iterate through devices to mark removed disks as spare */
+ for (dm = super->disks; dm; dm = dm->next) {
+ if (dm->disk.status & FAILED_DISK) {
+ int idx = dm->index;
+ /* update indexes on the disk list */
+/* FIXME this loop-with-the-loop looks wrong, I'm not convinced
+ the index values will end up being correct.... NB */
+ for (du = super->disks; du; du = du->next)
+ if (du->index > idx)
+ du->index--;
+ /* mark as spare disk */
+ mark_spare(dm);
+ }
+ }
+ /* update map */
+ map->num_members = map->num_members / 2;
+ map->map_state = IMSM_T_STATE_NORMAL;
+ map->num_domains = 1;
+ map->raid_level = 0;
+ map->failed_disk_num = -1;
+ }
+
+ if (u->direction == R0_TO_R10) {
+ void **space;
+ /* update slots in current disk list */
+ for (dm = super->disks; dm; dm = dm->next) {
+ if (dm->index >= 0)
+ dm->index *= 2;
+ }
+ /* create new *missing* disks */
+ for (i = 0; i < map->num_members; i++) {
+ space = *space_list;
+ if (!space)
+ continue;
+ *space_list = *space;
+ du = (void *)space;
+ memcpy(du, super->disks, sizeof(*du));
+ du->fd = -1;
+ du->minor = 0;
+ du->major = 0;
+ du->index = (i * 2) + 1;
+ sprintf((char *)du->disk.serial,
+ " MISSING_%d", du->index);
+ sprintf((char *)du->serial,
+ "MISSING_%d", du->index);
+ du->next = super->missing;
+ super->missing = du;
+ }
+ /* create new dev and map */
+ space = *space_list;
+ if (!space)
+ return 0;
+ *space_list = *space;
+ dev_new = (void *)space;
+ memcpy(dev_new, dev, sizeof(*dev));
+ /* update new map */
+ map = get_imsm_map(dev_new, MAP_0);
+ map->num_members = map->num_members * 2;
+ map->map_state = IMSM_T_STATE_DEGRADED;
+ map->num_domains = 2;
+ map->raid_level = 1;
+ /* replace dev<->dev_new */
+ dv->dev = dev_new;
+ }
+ /* update disk order table */
+ for (du = super->disks; du; du = du->next)
+ if (du->index >= 0)
+ set_imsm_ord_tbl_ent(map, du->index, du->index);
+ for (du = super->missing; du; du = du->next)
+ if (du->index >= 0) {
+ set_imsm_ord_tbl_ent(map, du->index, du->index);
+ mark_missing(dv->dev, &du->disk, du->index);
+ }
+
+ return 1;
+}
+
+static void imsm_process_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ /**
+ * crack open the metadata_update envelope to find the update record
+ * update can be one of:
+ * update_reshape_container_disks - all the arrays in the container
+ * are being reshaped to have more devices. We need to mark
+ * the arrays for general migration and convert selected spares
+ * into active devices.
+ * update_activate_spare - a spare device has replaced a failed
+ * device in an array, update the disk_ord_tbl. If this disk is
+ * present in all member arrays then also clear the SPARE_DISK
+ * flag
+ * update_create_array
+ * update_kill_array
+ * update_rename_array
+ * update_add_remove_disk
+ */
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb;
+ enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
+
+ /* update requires a larger buf but the allocation failed */
+ if (super->next_len && !super->next_buf) {
+ super->next_len = 0;
+ return;
+ }
+
+ if (super->next_buf) {
+ memcpy(super->next_buf, super->buf, super->len);
+ free(super->buf);
+ super->len = super->next_len;
+ super->buf = super->next_buf;
+
+ super->next_len = 0;
+ super->next_buf = NULL;
+ }
+
+ mpb = super->anchor;
+
+ switch (type) {
+ case update_general_migration_checkpoint: {
+ struct intel_dev *id;
+ struct imsm_update_general_migration_checkpoint *u =
+ (void *)update->buf;
+
+ dprintf("imsm: process_update() "
+ "for update_general_migration_checkpoint called\n");
+
+ /* find device under general migration */
+ for (id = super->devlist ; id; id = id->next) {
+ if (is_gen_migration(id->dev)) {
+ id->dev->vol.curr_migr_unit =
+ __cpu_to_le32(u->curr_migr_unit);
+ super->updates_pending++;
+ }
+ }
+ break;
+ }
+ case update_takeover: {
+ struct imsm_update_takeover *u = (void *)update->buf;
+ if (apply_takeover_update(u, super, &update->space_list)) {
+ imsm_update_version_info(super);
+ super->updates_pending++;
+ }
+ break;
+ }
+
+ case update_reshape_container_disks: {
+ struct imsm_update_reshape *u = (void *)update->buf;
+ if (apply_reshape_container_disks_update(
+ u, super, &update->space_list))
+ super->updates_pending++;
+ break;
+ }
+ case update_reshape_migration: {
+ struct imsm_update_reshape_migration *u = (void *)update->buf;
+ if (apply_reshape_migration_update(
+ u, super, &update->space_list))
+ super->updates_pending++;
+ break;
+ }
+ case update_size_change: {
+ struct imsm_update_size_change *u = (void *)update->buf;
+ if (apply_size_change_update(u, super))
+ super->updates_pending++;
+ break;
+ }
+ case update_activate_spare: {
+ struct imsm_update_activate_spare *u = (void *) update->buf;
+ if (apply_update_activate_spare(u, super, st->arrays))
+ super->updates_pending++;
+ break;
+ }
+ case update_create_array: {
+ /* someone wants to create a new array, we need to be aware of
+ * a few races/collisions:
+ * 1/ 'Create' called by two separate instances of mdadm
+ * 2/ 'Create' versus 'activate_spare': mdadm has chosen
+ * devices that have since been assimilated via
+ * activate_spare.
+ * In the event this update can not be carried out mdadm will
+ * (FIX ME) notice that its update did not take hold.
+ */
+ struct imsm_update_create_array *u = (void *) update->buf;
+ struct intel_dev *dv;
+ struct imsm_dev *dev;
+ struct imsm_map *map, *new_map;
+ unsigned long long start, end;
+ unsigned long long new_start, new_end;
+ int i;
+ struct disk_info *inf;
+ struct dl *dl;
+
+ /* handle racing creates: first come first serve */
+ if (u->dev_idx < mpb->num_raid_devs) {
+ dprintf("%s: subarray %d already defined\n",
+ __func__, u->dev_idx);
+ goto create_error;
+ }
+
+ /* check update is next in sequence */
+ if (u->dev_idx != mpb->num_raid_devs) {
+ dprintf("%s: can not create array %d expected index %d\n",
+ __func__, u->dev_idx, mpb->num_raid_devs);
+ goto create_error;
+ }
+
+ new_map = get_imsm_map(&u->dev, MAP_0);
+ new_start = pba_of_lba0(new_map);
+ new_end = new_start + blocks_per_member(new_map);
+ inf = get_disk_info(u);
+
+ /* handle activate_spare versus create race:
+ * check to make sure that overlapping arrays do not include
+ * overalpping disks
+ */
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ dev = get_imsm_dev(super, i);
+ map = get_imsm_map(dev, MAP_0);
+ start = pba_of_lba0(map);
+ end = start + blocks_per_member(map);
+ if ((new_start >= start && new_start <= end) ||
+ (start >= new_start && start <= new_end))
+ /* overlap */;
+ else
+ continue;
+
+ if (disks_overlap(super, i, u)) {
+ dprintf("%s: arrays overlap\n", __func__);
+ goto create_error;
+ }
+ }
+
+ /* check that prepare update was successful */
+ if (!update->space) {
+ dprintf("%s: prepare update failed\n", __func__);
+ goto create_error;
+ }
+
+ /* check that all disks are still active before committing
+ * changes. FIXME: could we instead handle this by creating a
+ * degraded array? That's probably not what the user expects,
+ * so better to drop this update on the floor.
+ */
+ for (i = 0; i < new_map->num_members; i++) {
+ dl = serial_to_dl(inf[i].serial, super);
+ if (!dl) {
+ dprintf("%s: disk disappeared\n", __func__);
+ goto create_error;
+ }
+ }
+
+ super->updates_pending++;
+
+ /* convert spares to members and fixup ord_tbl */
+ for (i = 0; i < new_map->num_members; i++) {
+ dl = serial_to_dl(inf[i].serial, super);
+ if (dl->index == -1) {
+ dl->index = mpb->num_disks;
+ mpb->num_disks++;
+ dl->disk.status |= CONFIGURED_DISK;
+ dl->disk.status &= ~SPARE_DISK;
+ }
+ set_imsm_ord_tbl_ent(new_map, i, dl->index);
+ }
+
+ dv = update->space;
+ dev = dv->dev;
+ update->space = NULL;
+ imsm_copy_dev(dev, &u->dev);
+ dv->index = u->dev_idx;
+ dv->next = super->devlist;
+ super->devlist = dv;
+ mpb->num_raid_devs++;
+
+ imsm_update_version_info(super);
+ break;
+ create_error:
+ /* mdmon knows how to release update->space, but not
+ * ((struct intel_dev *) update->space)->dev
+ */
+ if (update->space) {
+ dv = update->space;
+ free(dv->dev);
+ }
+ break;
+ }
+ case update_kill_array: {
+ struct imsm_update_kill_array *u = (void *) update->buf;
+ int victim = u->dev_idx;
+ struct active_array *a;
+ struct intel_dev **dp;
+ struct imsm_dev *dev;
+
+ /* sanity check that we are not affecting the uuid of
+ * active arrays, or deleting an active array
+ *
+ * FIXME when immutable ids are available, but note that
+ * we'll also need to fixup the invalidated/active
+ * subarray indexes in mdstat
+ */
+ for (a = st->arrays; a; a = a->next)
+ if (a->info.container_member >= victim)
+ break;
+ /* by definition if mdmon is running at least one array
+ * is active in the container, so checking
+ * mpb->num_raid_devs is just extra paranoia
+ */
+ dev = get_imsm_dev(super, victim);
+ if (a || !dev || mpb->num_raid_devs == 1) {
+ dprintf("failed to delete subarray-%d\n", victim);
+ break;
+ }
+
+ for (dp = &super->devlist; *dp;)
+ if ((*dp)->index == (unsigned)super->current_vol) {
+ *dp = (*dp)->next;
+ } else {
+ if ((*dp)->index > (unsigned)victim)
+ (*dp)->index--;
+ dp = &(*dp)->next;
+ }
+ mpb->num_raid_devs--;
+ super->updates_pending++;
+ break;
+ }
+ case update_rename_array: {
+ struct imsm_update_rename_array *u = (void *) update->buf;
+ char name[MAX_RAID_SERIAL_LEN+1];
+ int target = u->dev_idx;
+ struct active_array *a;
+ struct imsm_dev *dev;
+
+ /* sanity check that we are not affecting the uuid of
+ * an active array
+ */
+ snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
+ name[MAX_RAID_SERIAL_LEN] = '\0';
+ for (a = st->arrays; a; a = a->next)
+ if (a->info.container_member == target)
+ break;
+ dev = get_imsm_dev(super, u->dev_idx);
+ if (a || !dev || !check_name(super, name, 1)) {
+ dprintf("failed to rename subarray-%d\n", target);
+ break;
+ }
+
+ snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
+ super->updates_pending++;
+ break;
+ }
+ case update_add_remove_disk: {
+ /* we may be able to repair some arrays if disks are
+ * being added, check the status of add_remove_disk
+ * if discs has been added.
+ */
+ if (add_remove_disk_update(super)) {
+ struct active_array *a;
+
+ super->updates_pending++;
+ for (a = st->arrays; a; a = a->next)
+ a->check_degraded = 1;
+ }
+ break;
+ }
+ default:
+ pr_err("error: unsuported process update type:"
+ "(type: %d)\n", type);
+ }
+}
+
+static struct mdinfo *get_spares_for_grow(struct supertype *st);
+
+static int imsm_prepare_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ /**
+ * Allocate space to hold new disk entries, raid-device entries or a new
+ * mpb if necessary. The manager synchronously waits for updates to
+ * complete in the monitor, so new mpb buffers allocated here can be
+ * integrated by the monitor thread without worrying about live pointers
+ * in the manager thread.
+ */
+ enum imsm_update_type type;
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ size_t buf_len;
+ size_t len = 0;
+
+ if (update->len < (int)sizeof(type))
+ return 0;
+
+ type = *(enum imsm_update_type *) update->buf;
+
+ switch (type) {
+ case update_general_migration_checkpoint:
+ if (update->len < (int)sizeof(struct imsm_update_general_migration_checkpoint))
+ return 0;
+ dprintf("imsm: prepare_update() "
+ "for update_general_migration_checkpoint called\n");
+ break;
+ case update_takeover: {
+ struct imsm_update_takeover *u = (void *)update->buf;
+ if (update->len < (int)sizeof(*u))
+ return 0;
+ if (u->direction == R0_TO_R10) {
+ void **tail = (void **)&update->space_list;
+ struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ int num_members = map->num_members;
+ void *space;
+ int size, i;
+ /* allocate memory for added disks */
+ for (i = 0; i < num_members; i++) {
+ size = sizeof(struct dl);
+ space = xmalloc(size);
+ *tail = space;
+ tail = space;
+ *tail = NULL;
+ }
+ /* allocate memory for new device */
+ size = sizeof_imsm_dev(super->devlist->dev, 0) +
+ (num_members * sizeof(__u32));
+ space = xmalloc(size);
+ *tail = space;
+ tail = space;
+ *tail = NULL;
+ len = disks_to_mpb_size(num_members * 2);
+ }
+
+ break;
+ }
+ case update_reshape_container_disks: {
+ /* Every raid device in the container is about to
+ * gain some more devices, and we will enter a
+ * reconfiguration.
+ * So each 'imsm_map' will be bigger, and the imsm_vol
+ * will now hold 2 of them.
+ * Thus we need new 'struct imsm_dev' allocations sized
+ * as sizeof_imsm_dev but with more devices in both maps.
+ */
+ struct imsm_update_reshape *u = (void *)update->buf;
+ struct intel_dev *dl;
+ void **space_tail = (void**)&update->space_list;
+
+ if (update->len < (int)sizeof(*u))
+ return 0;
+
+ dprintf("imsm: imsm_prepare_update() for update_reshape\n");
+
+ for (dl = super->devlist; dl; dl = dl->next) {
+ int size = sizeof_imsm_dev(dl->dev, 1);
+ void *s;
+ if (u->new_raid_disks > u->old_raid_disks)
+ size += sizeof(__u32)*2*
+ (u->new_raid_disks - u->old_raid_disks);
+ s = xmalloc(size);
+ *space_tail = s;
+ space_tail = s;
+ *space_tail = NULL;
+ }
+
+ len = disks_to_mpb_size(u->new_raid_disks);
+ dprintf("New anchor length is %llu\n", (unsigned long long)len);
+ break;
+ }
+ case update_reshape_migration: {
+ /* for migration level 0->5 we need to add disks
+ * so the same as for container operation we will copy
+ * device to the bigger location.
+ * in memory prepared device and new disk area are prepared
+ * for usage in process update
+ */
+ struct imsm_update_reshape_migration *u = (void *)update->buf;
+ struct intel_dev *id;
+ void **space_tail = (void **)&update->space_list;
+ int size;
+ void *s;
+ int current_level = -1;
+
+ if (update->len < (int)sizeof(*u))
+ return 0;
+
+ dprintf("imsm: imsm_prepare_update() for update_reshape\n");
+
+ /* add space for bigger array in update
+ */
+ for (id = super->devlist; id; id = id->next) {
+ if (id->index == (unsigned)u->subdev) {
+ size = sizeof_imsm_dev(id->dev, 1);
+ if (u->new_raid_disks > u->old_raid_disks)
+ size += sizeof(__u32)*2*
+ (u->new_raid_disks - u->old_raid_disks);
+ s = xmalloc(size);
+ *space_tail = s;
+ space_tail = s;
+ *space_tail = NULL;
+ break;
+ }
+ }
+ if (update->space_list == NULL)
+ break;
+
+ /* add space for disk in update
+ */
+ size = sizeof(struct dl);
+ s = xmalloc(size);
+ *space_tail = s;
+ space_tail = s;
+ *space_tail = NULL;
+
+ /* add spare device to update
+ */
+ for (id = super->devlist ; id; id = id->next)
+ if (id->index == (unsigned)u->subdev) {
+ struct imsm_dev *dev;
+ struct imsm_map *map;
+
+ dev = get_imsm_dev(super, u->subdev);
+ map = get_imsm_map(dev, MAP_0);
+ current_level = map->raid_level;
+ break;
+ }
+ if ((u->new_level == 5) && (u->new_level != current_level)) {
+ struct mdinfo *spares;
+
+ spares = get_spares_for_grow(st);
+ if (spares) {
+ struct dl *dl;
+ struct mdinfo *dev;
+
+ dev = spares->devs;
+ if (dev) {
+ u->new_disks[0] =
+ makedev(dev->disk.major,
+ dev->disk.minor);
+ dl = get_disk_super(super,
+ dev->disk.major,
+ dev->disk.minor);
+ dl->index = u->old_raid_disks;
+ dev = dev->next;
+ }
+ sysfs_free(spares);
+ }
+ }
+ len = disks_to_mpb_size(u->new_raid_disks);
+ dprintf("New anchor length is %llu\n", (unsigned long long)len);
+ break;
+ }
+ case update_size_change: {
+ if (update->len < (int)sizeof(struct imsm_update_size_change))
+ return 0;
+ break;
+ }
+ case update_activate_spare: {
+ if (update->len < (int)sizeof(struct imsm_update_activate_spare))
+ return 0;
+ break;
+ }
+ case update_create_array: {
+ struct imsm_update_create_array *u = (void *) update->buf;
+ struct intel_dev *dv;
+ struct imsm_dev *dev = &u->dev;
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct dl *dl;
+ struct disk_info *inf;
+ int i;
+ int activate = 0;
+
+ if (update->len < (int)sizeof(*u))
+ return 0;
+
+ inf = get_disk_info(u);
+ len = sizeof_imsm_dev(dev, 1);
+ /* allocate a new super->devlist entry */
+ dv = xmalloc(sizeof(*dv));
+ dv->dev = xmalloc(len);
+ update->space = dv;
+
+ /* count how many spares will be converted to members */
+ for (i = 0; i < map->num_members; i++) {
+ dl = serial_to_dl(inf[i].serial, super);
+ if (!dl) {
+ /* hmm maybe it failed?, nothing we can do about
+ * it here
+ */
+ continue;
+ }
+ if (count_memberships(dl, super) == 0)
+ activate++;
+ }
+ len += activate * sizeof(struct imsm_disk);
+ break;
+ }
+ case update_kill_array: {
+ if (update->len < (int)sizeof(struct imsm_update_kill_array))
+ return 0;
+ break;
+ }
+ case update_rename_array: {
+ if (update->len < (int)sizeof(struct imsm_update_rename_array))
+ return 0;
+ break;
+ }
+ case update_add_remove_disk:
+ /* no update->len needed */
+ break;
+ default:
+ return 0;
+ }
+
+ /* check if we need a larger metadata buffer */
+ if (super->next_buf)
+ buf_len = super->next_len;
+ else
+ buf_len = super->len;
+
+ if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
+ /* ok we need a larger buf than what is currently allocated
+ * if this allocation fails process_update will notice that
+ * ->next_len is set and ->next_buf is NULL
+ */
+ buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
+ if (super->next_buf)
+ free(super->next_buf);
+
+ super->next_len = buf_len;
+ if (posix_memalign(&super->next_buf, 512, buf_len) == 0)
+ memset(super->next_buf, 0, buf_len);
+ else
+ super->next_buf = NULL;
+ }
+ return 1;
+}
+
+/* must be called while manager is quiesced */
+static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index)
+{
+ struct imsm_super *mpb = super->anchor;
+ struct dl *iter;
+ struct imsm_dev *dev;
+ struct imsm_map *map;
+ int i, j, num_members;
+ __u32 ord;
+
+ dprintf("%s: deleting device[%d] from imsm_super\n",
+ __func__, index);
+
+ /* shift all indexes down one */
+ for (iter = super->disks; iter; iter = iter->next)
+ if (iter->index > (int)index)
+ iter->index--;
+ for (iter = super->missing; iter; iter = iter->next)
+ if (iter->index > (int)index)
+ iter->index--;
+
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ dev = get_imsm_dev(super, i);
+ map = get_imsm_map(dev, MAP_0);
+ num_members = map->num_members;
+ for (j = 0; j < num_members; j++) {
+ /* update ord entries being careful not to propagate
+ * ord-flags to the first map
+ */
+ ord = get_imsm_ord_tbl_ent(dev, j, MAP_X);
+
+ if (ord_to_idx(ord) <= index)
+ continue;
+
+ map = get_imsm_map(dev, MAP_0);
+ set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
+ map = get_imsm_map(dev, MAP_1);
+ if (map)
+ set_imsm_ord_tbl_ent(map, j, ord - 1);
+ }
+ }
+
+ mpb->num_disks--;
+ super->updates_pending++;
+ if (*dlp) {
+ struct dl *dl = *dlp;
+
+ *dlp = (*dlp)->next;
+ __free_imsm_disk(dl);
+ }
+}
+#endif /* MDASSEMBLE */
+
+static void close_targets(int *targets, int new_disks)
+{
+ int i;
+
+ if (!targets)
+ return;
+
+ for (i = 0; i < new_disks; i++) {
+ if (targets[i] >= 0) {
+ close(targets[i]);
+ targets[i] = -1;
+ }
+ }
+}
+
+static int imsm_get_allowed_degradation(int level, int raid_disks,
+ struct intel_super *super,
+ struct imsm_dev *dev)
+{
+ switch (level) {
+ case 1:
+ case 10:{
+ int ret_val = 0;
+ struct imsm_map *map;
+ int i;
+
+ ret_val = raid_disks/2;
+ /* check map if all disks pairs not failed
+ * in both maps
+ */
+ map = get_imsm_map(dev, MAP_0);
+ for (i = 0; i < ret_val; i++) {
+ int degradation = 0;
+ if (get_imsm_disk(super, i) == NULL)
+ degradation++;
+ if (get_imsm_disk(super, i + 1) == NULL)
+ degradation++;
+ if (degradation == 2)
+ return 0;
+ }
+ map = get_imsm_map(dev, MAP_1);
+ /* if there is no second map
+ * result can be returned
+ */
+ if (map == NULL)
+ return ret_val;
+ /* check degradation in second map
+ */
+ for (i = 0; i < ret_val; i++) {
+ int degradation = 0;
+ if (get_imsm_disk(super, i) == NULL)
+ degradation++;
+ if (get_imsm_disk(super, i + 1) == NULL)
+ degradation++;
+ if (degradation == 2)
+ return 0;
+ }
+ return ret_val;
+ }
+ case 5:
+ return 1;
+ case 6:
+ return 2;
+ default:
+ return 0;
+ }
+}
+
+/*******************************************************************************
+ * Function: open_backup_targets
+ * Description: Function opens file descriptors for all devices given in
+ * info->devs
+ * Parameters:
+ * info : general array info
+ * raid_disks : number of disks
+ * raid_fds : table of device's file descriptors
+ * super : intel super for raid10 degradation check
+ * dev : intel device for raid10 degradation check
+ * Returns:
+ * 0 : success
+ * -1 : fail
+ ******************************************************************************/
+int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds,
+ struct intel_super *super, struct imsm_dev *dev)
+{
+ struct mdinfo *sd;
+ int i;
+ int opened = 0;
+
+ for (i = 0; i < raid_disks; i++)
+ raid_fds[i] = -1;
+
+ for (sd = info->devs ; sd ; sd = sd->next) {
+ char *dn;
+
+ if (sd->disk.state & (1<<MD_DISK_FAULTY)) {
+ dprintf("disk is faulty!!\n");
+ continue;
+ }
+
+ if ((sd->disk.raid_disk >= raid_disks) ||
+ (sd->disk.raid_disk < 0))
+ continue;
+
+ dn = map_dev(sd->disk.major,
+ sd->disk.minor, 1);
+ raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR);
+ if (raid_fds[sd->disk.raid_disk] < 0) {
+ pr_err("cannot open component\n");
+ continue;
+ }
+ opened++;
+ }
+ /* check if maximum array degradation level is not exceeded
+ */
+ if ((raid_disks - opened) >
+ imsm_get_allowed_degradation(info->new_level,
+ raid_disks,
+ super, dev)) {
+ pr_err("Not enough disks can be opened.\n");
+ close_targets(raid_fds, raid_disks);
+ return -2;
+ }
+ return 0;
+}
+
+/*******************************************************************************
+ * Function: validate_container_imsm
+ * Description: This routine validates container after assemble,
+ * eg. if devices in container are under the same controller.
+ *
+ * Parameters:
+ * info : linked list with info about devices used in array
+ * Returns:
+ * 1 : HBA mismatch
+ * 0 : Success
+ ******************************************************************************/
+int validate_container_imsm(struct mdinfo *info)
+{
+ if (!check_env("IMSM_NO_PLATFORM")) {
+ struct sys_dev *idev;
+ struct mdinfo *dev;
+ char *hba_path = NULL;
+ char *dev_path = devt_to_devpath(makedev(info->disk.major,
+ info->disk.minor));
+
+ for (idev = find_intel_devices(); idev; idev = idev->next) {
+ if (strstr(dev_path, idev->path)) {
+ hba_path = idev->path;
+ break;
+ }
+ }
+ free(dev_path);
+
+ if (hba_path) {
+ for (dev = info->next; dev; dev = dev->next) {
+ if (!devt_attached_to_hba(makedev(dev->disk.major,
+ dev->disk.minor), hba_path)) {
+ pr_err("WARNING - IMSM container assembled with disks under different HBAs!\n"
+ " This operation is not supported and can lead to data loss.\n");
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+#ifndef MDASSEMBLE
+/*******************************************************************************
+ * Function: init_migr_record_imsm
+ * Description: Function inits imsm migration record
+ * Parameters:
+ * super : imsm internal array info
+ * dev : device under migration
+ * info : general array info to find the smallest device
+ * Returns:
+ * none
+ ******************************************************************************/
+void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
+ struct mdinfo *info)
+{
+ struct intel_super *super = st->sb;
+ struct migr_record *migr_rec = super->migr_rec;
+ int new_data_disks;
+ unsigned long long dsize, dev_sectors;
+ long long unsigned min_dev_sectors = -1LLU;
+ struct mdinfo *sd;
+ char nm[30];
+ int fd;
+ struct imsm_map *map_dest = get_imsm_map(dev, MAP_0);
+ struct imsm_map *map_src = get_imsm_map(dev, MAP_1);
+ unsigned long long num_migr_units;
+ unsigned long long array_blocks;
+
+ memset(migr_rec, 0, sizeof(struct migr_record));
+ migr_rec->family_num = __cpu_to_le32(super->anchor->family_num);
+
+ /* only ascending reshape supported now */
+ migr_rec->ascending_migr = __cpu_to_le32(1);
+
+ migr_rec->dest_depth_per_unit = GEN_MIGR_AREA_SIZE /
+ max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
+ migr_rec->dest_depth_per_unit *=
+ max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
+ new_data_disks = imsm_num_data_members(dev, MAP_0);
+ migr_rec->blocks_per_unit =
+ __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks);
+ migr_rec->dest_depth_per_unit =
+ __cpu_to_le32(migr_rec->dest_depth_per_unit);
+ array_blocks = info->component_size * new_data_disks;
+ num_migr_units =
+ array_blocks / __le32_to_cpu(migr_rec->blocks_per_unit);
+
+ if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit))
+ num_migr_units++;
+ migr_rec->num_migr_units = __cpu_to_le32(num_migr_units);
+
+ migr_rec->post_migr_vol_cap = dev->size_low;
+ migr_rec->post_migr_vol_cap_hi = dev->size_high;
+
+ /* Find the smallest dev */
+ for (sd = info->devs ; sd ; sd = sd->next) {
+ sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
+ fd = dev_open(nm, O_RDONLY);
+ if (fd < 0)
+ continue;
+ get_dev_size(fd, NULL, &dsize);
+ dev_sectors = dsize / 512;
+ if (dev_sectors < min_dev_sectors)
+ min_dev_sectors = dev_sectors;
+ close(fd);
+ }
+ migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors -
+ RAID_DISK_RESERVED_BLOCKS_IMSM_HI);
+
+ write_imsm_migr_rec(st);
+
+ return;
+}
+
+/*******************************************************************************
+ * Function: save_backup_imsm
+ * Description: Function saves critical data stripes to Migration Copy Area
+ * and updates the current migration unit status.
+ * Use restore_stripes() to form a destination stripe,
+ * and to write it to the Copy Area.
+ * Parameters:
+ * st : supertype information
+ * dev : imsm device that backup is saved for
+ * info : general array info
+ * buf : input buffer
+ * length : length of data to backup (blocks_per_unit)
+ * Returns:
+ * 0 : success
+ *, -1 : fail
+ ******************************************************************************/
+int save_backup_imsm(struct supertype *st,
+ struct imsm_dev *dev,
+ struct mdinfo *info,
+ void *buf,
+ int length)
+{
+ int rv = -1;
+ struct intel_super *super = st->sb;
+ unsigned long long *target_offsets = NULL;
+ int *targets = NULL;
+ int i;
+ struct imsm_map *map_dest = get_imsm_map(dev, MAP_0);
+ int new_disks = map_dest->num_members;
+ int dest_layout = 0;
+ int dest_chunk;
+ unsigned long long start;
+ int data_disks = imsm_num_data_members(dev, MAP_0);
+
+ targets = xmalloc(new_disks * sizeof(int));
+
+ for (i = 0; i < new_disks; i++)
+ targets[i] = -1;
+
+ target_offsets = xcalloc(new_disks, sizeof(unsigned long long));
+
+ start = info->reshape_progress * 512;
+ for (i = 0; i < new_disks; i++) {
+ target_offsets[i] = (unsigned long long)
+ __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512;
+ /* move back copy area adderss, it will be moved forward
+ * in restore_stripes() using start input variable
+ */
+ target_offsets[i] -= start/data_disks;
+ }
+
+ if (open_backup_targets(info, new_disks, targets,
+ super, dev))
+ goto abort;
+
+ dest_layout = imsm_level_to_layout(map_dest->raid_level);
+ dest_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512;
+
+ if (restore_stripes(targets, /* list of dest devices */
+ target_offsets, /* migration record offsets */
+ new_disks,
+ dest_chunk,
+ map_dest->raid_level,
+ dest_layout,
+ -1, /* source backup file descriptor */
+ 0, /* input buf offset
+ * always 0 buf is already offseted */
+ start,
+ length,
+ buf) != 0) {
+ pr_err("Error restoring stripes\n");
+ goto abort;
+ }
+
+ rv = 0;
+
+abort:
+ if (targets) {
+ close_targets(targets, new_disks);
+ free(targets);
+ }
+ free(target_offsets);
+
+ return rv;
+}
+
+/*******************************************************************************
+ * Function: save_checkpoint_imsm
+ * Description: Function called for current unit status update
+ * in the migration record. It writes it to disk.
+ * Parameters:
+ * super : imsm internal array info
+ * info : general array info
+ * Returns:
+ * 0: success
+ * 1: failure
+ * 2: failure, means no valid migration record
+ * / no general migration in progress /
+ ******************************************************************************/
+int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
+{
+ struct intel_super *super = st->sb;
+ unsigned long long blocks_per_unit;
+ unsigned long long curr_migr_unit;
+
+ if (load_imsm_migr_rec(super, info) != 0) {
+ dprintf("imsm: ERROR: Cannot read migration record "
+ "for checkpoint save.\n");
+ return 1;
+ }
+
+ blocks_per_unit = __le32_to_cpu(super->migr_rec->blocks_per_unit);
+ if (blocks_per_unit == 0) {
+ dprintf("imsm: no migration in progress.\n");
+ return 2;
+ }
+ curr_migr_unit = info->reshape_progress / blocks_per_unit;
+ /* check if array is alligned to copy area
+ * if it is not alligned, add one to current migration unit value
+ * this can happend on array reshape finish only
+ */
+ if (info->reshape_progress % blocks_per_unit)
+ curr_migr_unit++;
+
+ super->migr_rec->curr_migr_unit =
+ __cpu_to_le32(curr_migr_unit);
+ super->migr_rec->rec_status = __cpu_to_le32(state);
+ super->migr_rec->dest_1st_member_lba =
+ __cpu_to_le32(curr_migr_unit *
+ __le32_to_cpu(super->migr_rec->dest_depth_per_unit));
+ if (write_imsm_migr_rec(st) < 0) {
+ dprintf("imsm: Cannot write migration record "
+ "outside backup area\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/*******************************************************************************
+ * Function: recover_backup_imsm
+ * Description: Function recovers critical data from the Migration Copy Area
+ * while assembling an array.
+ * Parameters:
+ * super : imsm internal array info
+ * info : general array info
+ * Returns:
+ * 0 : success (or there is no data to recover)
+ * 1 : fail
+ ******************************************************************************/
+int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
+{
+ struct intel_super *super = st->sb;
+ struct migr_record *migr_rec = super->migr_rec;
+ struct imsm_map *map_dest = NULL;
+ struct intel_dev *id = NULL;
+ unsigned long long read_offset;
+ unsigned long long write_offset;
+ unsigned unit_len;
+ int *targets = NULL;
+ int new_disks, i, err;
+ char *buf = NULL;
+ int retval = 1;
+ unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit);
+ unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
+ char buffer[20];
+ int skipped_disks = 0;
+
+ err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20);
+ if (err < 1)
+ return 1;
+
+ /* recover data only during assemblation */
+ if (strncmp(buffer, "inactive", 8) != 0)
+ return 0;
+ /* no data to recover */
+ if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
+ return 0;
+ if (curr_migr_unit >= num_migr_units)
+ return 1;
+
+ /* find device during reshape */
+ for (id = super->devlist; id; id = id->next)
+ if (is_gen_migration(id->dev))
+ break;
+ if (id == NULL)
+ return 1;
+
+ map_dest = get_imsm_map(id->dev, MAP_0);
+ new_disks = map_dest->num_members;
+
+ read_offset = (unsigned long long)
+ __le32_to_cpu(migr_rec->ckpt_area_pba) * 512;
+
+ write_offset = ((unsigned long long)
+ __le32_to_cpu(migr_rec->dest_1st_member_lba) +
+ pba_of_lba0(map_dest)) * 512;
+
+ unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
+ if (posix_memalign((void **)&buf, 512, unit_len) != 0)
+ goto abort;
+ targets = xcalloc(new_disks, sizeof(int));
+
+ if (open_backup_targets(info, new_disks, targets, super, id->dev)) {
+ pr_err("Cannot open some devices belonging to array.\n");
+ goto abort;
+ }
+
+ for (i = 0; i < new_disks; i++) {
+ if (targets[i] < 0) {
+ skipped_disks++;
+ continue;
+ }
+ if (lseek64(targets[i], read_offset, SEEK_SET) < 0) {
+ pr_err("Cannot seek to block: %s\n",
+ strerror(errno));
+ skipped_disks++;
+ continue;
+ }
+ if ((unsigned)read(targets[i], buf, unit_len) != unit_len) {
+ pr_err("Cannot read copy area block: %s\n",
+ strerror(errno));
+ skipped_disks++;
+ continue;
+ }
+ if (lseek64(targets[i], write_offset, SEEK_SET) < 0) {
+ pr_err("Cannot seek to block: %s\n",
+ strerror(errno));
+ skipped_disks++;
+ continue;
+ }
+ if ((unsigned)write(targets[i], buf, unit_len) != unit_len) {
+ pr_err("Cannot restore block: %s\n",
+ strerror(errno));
+ skipped_disks++;
+ continue;
+ }
+ }
+
+ if (skipped_disks > imsm_get_allowed_degradation(info->new_level,
+ new_disks,
+ super,
+ id->dev)) {
+ pr_err("Cannot restore data from backup."
+ " Too many failed disks\n");
+ goto abort;
+ }
+
+ if (save_checkpoint_imsm(st, info, UNIT_SRC_NORMAL)) {
+ /* ignore error == 2, this can mean end of reshape here
+ */
+ dprintf("imsm: Cannot write checkpoint to "
+ "migration record (UNIT_SRC_NORMAL) during restart\n");
+ } else
+ retval = 0;
+
+abort:
+ if (targets) {
+ for (i = 0; i < new_disks; i++)
+ if (targets[i])
+ close(targets[i]);
+ free(targets);
+ }
+ free(buf);
+ return retval;
+}
+
+static char disk_by_path[] = "/dev/disk/by-path/";
+
+static const char *imsm_get_disk_controller_domain(const char *path)
+{
+ char disk_path[PATH_MAX];
+ char *drv=NULL;
+ struct stat st;
+
+ strcpy(disk_path, disk_by_path);
+ strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
+ if (stat(disk_path, &st) == 0) {
+ struct sys_dev* hba;
+ char *path=NULL;
+
+ path = devt_to_devpath(st.st_rdev);
+ if (path == NULL)
+ return "unknown";
+ hba = find_disk_attached_hba(-1, path);
+ if (hba && hba->type == SYS_DEV_SAS)
+ drv = "isci";
+ else if (hba && hba->type == SYS_DEV_SATA)
+ drv = "ahci";
+ else
+ drv = "unknown";
+ dprintf("path: %s hba: %s attached: %s\n",
+ path, (hba) ? hba->path : "NULL", drv);
+ free(path);
+ }
+ return drv;
+}
+
+static char *imsm_find_array_devnm_by_subdev(int subdev, char *container)
+{
+ static char devnm[32];
+ char subdev_name[20];
+ struct mdstat_ent *mdstat;
+
+ sprintf(subdev_name, "%d", subdev);
+ mdstat = mdstat_by_subdev(subdev_name, container);
+ if (!mdstat)
+ return NULL;
+
+ strcpy(devnm, mdstat->devnm);
+ free_mdstat(mdstat);
+ return devnm;
+}
+
+static int imsm_reshape_is_allowed_on_container(struct supertype *st,
+ struct geo_params *geo,
+ int *old_raid_disks,
+ int direction)
+{
+ /* currently we only support increasing the number of devices
+ * for a container. This increases the number of device for each
+ * member array. They must all be RAID0 or RAID5.
+ */
+ int ret_val = 0;
+ struct mdinfo *info, *member;
+ int devices_that_can_grow = 0;
+
+ dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
+ "st->devnm = (%s)\n", st->devnm);
+
+ if (geo->size > 0 ||
+ geo->level != UnSet ||
+ geo->layout != UnSet ||
+ geo->chunksize != 0 ||
+ geo->raid_disks == UnSet) {
+ dprintf("imsm: Container operation is allowed for "
+ "raid disks number change only.\n");
+ return ret_val;
+ }
+
+ if (direction == ROLLBACK_METADATA_CHANGES) {
+ dprintf("imsm: Metadata changes rollback is not supported for "
+ "container operation.\n");
+ return ret_val;
+ }
+
+ info = container_content_imsm(st, NULL);
+ for (member = info; member; member = member->next) {
+ char *result;
+
+ dprintf("imsm: checking device_num: %i\n",
+ member->container_member);
+
+ if (geo->raid_disks <= member->array.raid_disks) {
+ /* we work on container for Online Capacity Expansion
+ * only so raid_disks has to grow
+ */
+ dprintf("imsm: for container operation raid disks "
+ "increase is required\n");
+ break;
+ }
+
+ if ((info->array.level != 0) &&
+ (info->array.level != 5)) {
+ /* we cannot use this container with other raid level
+ */
+ dprintf("imsm: for container operation wrong"
+ " raid level (%i) detected\n",
+ info->array.level);
+ break;
+ } else {
+ /* check for platform support
+ * for this raid level configuration
+ */
+ struct intel_super *super = st->sb;
+ if (!is_raid_level_supported(super->orom,
+ member->array.level,
+ geo->raid_disks)) {
+ dprintf("platform does not support raid%d with"
+ " %d disk%s\n",
+ info->array.level,
+ geo->raid_disks,
+ geo->raid_disks > 1 ? "s" : "");
+ break;
+ }
+ /* check if component size is aligned to chunk size
+ */
+ if (info->component_size %
+ (info->array.chunk_size/512)) {
+ dprintf("Component size is not aligned to "
+ "chunk size\n");
+ break;
+ }
+ }
+
+ if (*old_raid_disks &&
+ info->array.raid_disks != *old_raid_disks)
+ break;
+ *old_raid_disks = info->array.raid_disks;
+
+ /* All raid5 and raid0 volumes in container
+ * have to be ready for Online Capacity Expansion
+ * so they need to be assembled. We have already
+ * checked that no recovery etc is happening.
+ */
+ result = imsm_find_array_devnm_by_subdev(member->container_member,
+ st->container_devnm);
+ if (result == NULL) {
+ dprintf("imsm: cannot find array\n");
+ break;
+ }
+ devices_that_can_grow++;
+ }
+ sysfs_free(info);
+ if (!member && devices_that_can_grow)
+ ret_val = 1;
+
+ if (ret_val)
+ dprintf("\tContainer operation allowed\n");
+ else
+ dprintf("\tError: %i\n", ret_val);
+
+ return ret_val;
+}
+
+/* Function: get_spares_for_grow
+ * Description: Allocates memory and creates list of spare devices
+ * avaliable in container. Checks if spare drive size is acceptable.
+ * Parameters: Pointer to the supertype structure
+ * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
+ * NULL if fail
+ */
+static struct mdinfo *get_spares_for_grow(struct supertype *st)
+{
+ unsigned long long min_size = min_acceptable_spare_size_imsm(st);
+ return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
+}
+
+/******************************************************************************
+ * function: imsm_create_metadata_update_for_reshape
+ * Function creates update for whole IMSM container.
+ *
+ ******************************************************************************/
+static int imsm_create_metadata_update_for_reshape(
+ struct supertype *st,
+ struct geo_params *geo,
+ int old_raid_disks,
+ struct imsm_update_reshape **updatep)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ int update_memory_size = 0;
+ struct imsm_update_reshape *u = NULL;
+ struct mdinfo *spares = NULL;
+ int i;
+ int delta_disks = 0;
+ struct mdinfo *dev;
+
+ dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
+ geo->raid_disks);
+
+ delta_disks = geo->raid_disks - old_raid_disks;
+
+ /* size of all update data without anchor */
+ update_memory_size = sizeof(struct imsm_update_reshape);
+
+ /* now add space for spare disks that we need to add. */
+ update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1);
+
+ u = xcalloc(1, update_memory_size);
+ u->type = update_reshape_container_disks;
+ u->old_raid_disks = old_raid_disks;
+ u->new_raid_disks = geo->raid_disks;
+
+ /* now get spare disks list
+ */
+ spares = get_spares_for_grow(st);
+
+ if (spares == NULL
+ || delta_disks > spares->array.spare_disks) {
+ pr_err("imsm: ERROR: Cannot get spare devices "
+ "for %s.\n", geo->dev_name);
+ i = -1;
+ goto abort;
+ }
+
+ /* we have got spares
+ * update disk list in imsm_disk list table in anchor
+ */
+ dprintf("imsm: %i spares are available.\n\n",
+ spares->array.spare_disks);
+
+ dev = spares->devs;
+ for (i = 0; i < delta_disks; i++) {
+ struct dl *dl;
+
+ if (dev == NULL)
+ break;
+ u->new_disks[i] = makedev(dev->disk.major,
+ dev->disk.minor);
+ dl = get_disk_super(super, dev->disk.major, dev->disk.minor);
+ dl->index = mpb->num_disks;
+ mpb->num_disks++;
+ dev = dev->next;
+ }
+
+abort:
+ /* free spares
+ */
+ sysfs_free(spares);
+
+ dprintf("imsm: reshape update preparation :");
+ if (i == delta_disks) {
+ dprintf(" OK\n");
+ *updatep = u;
+ return update_memory_size;
+ }
+ free(u);
+ dprintf(" Error\n");
+
+ return 0;
+}
+
+/******************************************************************************
+ * function: imsm_create_metadata_update_for_size_change()
+ * Creates update for IMSM array for array size change.
+ *
+ ******************************************************************************/
+static int imsm_create_metadata_update_for_size_change(
+ struct supertype *st,
+ struct geo_params *geo,
+ struct imsm_update_size_change **updatep)
+{
+ struct intel_super *super = st->sb;
+ int update_memory_size = 0;
+ struct imsm_update_size_change *u = NULL;
+
+ dprintf("imsm_create_metadata_update_for_size_change(enter)"
+ " New size = %llu\n", geo->size);
+
+ /* size of all update data without anchor */
+ update_memory_size = sizeof(struct imsm_update_size_change);
+
+ u = xcalloc(1, update_memory_size);
+ u->type = update_size_change;
+ u->subdev = super->current_vol;
+ u->new_size = geo->size;
+
+ dprintf("imsm: reshape update preparation : OK\n");
+ *updatep = u;
+
+ return update_memory_size;
+}
+
+/******************************************************************************
+ * function: imsm_create_metadata_update_for_migration()
+ * Creates update for IMSM array.
+ *
+ ******************************************************************************/
+static int imsm_create_metadata_update_for_migration(
+ struct supertype *st,
+ struct geo_params *geo,
+ struct imsm_update_reshape_migration **updatep)
+{
+ struct intel_super *super = st->sb;
+ int update_memory_size = 0;
+ struct imsm_update_reshape_migration *u = NULL;
+ struct imsm_dev *dev;
+ int previous_level = -1;
+
+ dprintf("imsm_create_metadata_update_for_migration(enter)"
+ " New Level = %i\n", geo->level);
+
+ /* size of all update data without anchor */
+ update_memory_size = sizeof(struct imsm_update_reshape_migration);
+
+ u = xcalloc(1, update_memory_size);
+ u->type = update_reshape_migration;
+ u->subdev = super->current_vol;
+ u->new_level = geo->level;
+ u->new_layout = geo->layout;
+ u->new_raid_disks = u->old_raid_disks = geo->raid_disks;
+ u->new_disks[0] = -1;
+ u->new_chunksize = -1;
+
+ dev = get_imsm_dev(super, u->subdev);
+ if (dev) {
+ struct imsm_map *map;
+
+ map = get_imsm_map(dev, MAP_0);
+ if (map) {
+ int current_chunk_size =
+ __le16_to_cpu(map->blocks_per_strip) / 2;
+
+ if (geo->chunksize != current_chunk_size) {
+ u->new_chunksize = geo->chunksize / 1024;
+ dprintf("imsm: "
+ "chunk size change from %i to %i\n",
+ current_chunk_size, u->new_chunksize);
+ }
+ previous_level = map->raid_level;
+ }
+ }
+ if ((geo->level == 5) && (previous_level == 0)) {
+ struct mdinfo *spares = NULL;
+
+ u->new_raid_disks++;
+ spares = get_spares_for_grow(st);
+ if ((spares == NULL) || (spares->array.spare_disks < 1)) {
+ free(u);
+ sysfs_free(spares);
+ update_memory_size = 0;
+ dprintf("error: cannot get spare device "
+ "for requested migration");
+ return 0;
+ }
+ sysfs_free(spares);
+ }
+ dprintf("imsm: reshape update preparation : OK\n");
+ *updatep = u;
+
+ return update_memory_size;
+}
+
+static void imsm_update_metadata_locally(struct supertype *st,
+ void *buf, int len)
+{
+ struct metadata_update mu;
+
+ mu.buf = buf;
+ mu.len = len;
+ mu.space = NULL;
+ mu.space_list = NULL;
+ mu.next = NULL;
+ if (imsm_prepare_update(st, &mu))
+ imsm_process_update(st, &mu);
+
+ while (mu.space_list) {
+ void **space = mu.space_list;
+ mu.space_list = *space;
+ free(space);
+ }
+}
+
+/***************************************************************************
+* Function: imsm_analyze_change
+* Description: Function analyze change for single volume
+* and validate if transition is supported
+* Parameters: Geometry parameters, supertype structure,
+* metadata change direction (apply/rollback)
+* Returns: Operation type code on success, -1 if fail
+****************************************************************************/
+enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
+ struct geo_params *geo,
+ int direction)
+{
+ struct mdinfo info;
+ int change = -1;
+ int check_devs = 0;
+ int chunk;
+ /* number of added/removed disks in operation result */
+ int devNumChange = 0;
+ /* imsm compatible layout value for array geometry verification */
+ int imsm_layout = -1;
+ int data_disks;
+ struct imsm_dev *dev;
+ struct intel_super *super;
+ unsigned long long current_size;
+ unsigned long long free_size;
+ unsigned long long max_size;
+ int rv;
+
+ getinfo_super_imsm_volume(st, &info, NULL);
+ if ((geo->level != info.array.level) &&
+ (geo->level >= 0) &&
+ (geo->level != UnSet)) {
+ switch (info.array.level) {
+ case 0:
+ if (geo->level == 5) {
+ change = CH_MIGRATION;
+ if (geo->layout != ALGORITHM_LEFT_ASYMMETRIC) {
+ pr_err("Error. Requested Layout "
+ "not supported (left-asymmetric layout "
+ "is supported only)!\n");
+ change = -1;
+ goto analyse_change_exit;
+ }
+ imsm_layout = geo->layout;
+ check_devs = 1;
+ devNumChange = 1; /* parity disk added */
+ } else if (geo->level == 10) {
+ change = CH_TAKEOVER;
+ check_devs = 1;
+ devNumChange = 2; /* two mirrors added */
+ imsm_layout = 0x102; /* imsm supported layout */
+ }
+ break;
+ case 1:
+ case 10:
+ if (geo->level == 0) {
+ change = CH_TAKEOVER;
+ check_devs = 1;
+ devNumChange = -(geo->raid_disks/2);
+ imsm_layout = 0; /* imsm raid0 layout */
+ }
+ break;
+ }
+ if (change == -1) {
+ pr_err("Error. Level Migration from %d to %d "
+ "not supported!\n",
+ info.array.level, geo->level);
+ goto analyse_change_exit;
+ }
+ } else
+ geo->level = info.array.level;
+
+ if ((geo->layout != info.array.layout)
+ && ((geo->layout != UnSet) && (geo->layout != -1))) {
+ change = CH_MIGRATION;
+ if ((info.array.layout == 0)
+ && (info.array.level == 5)
+ && (geo->layout == 5)) {
+ /* reshape 5 -> 4 */
+ } else if ((info.array.layout == 5)
+ && (info.array.level == 5)
+ && (geo->layout == 0)) {
+ /* reshape 4 -> 5 */
+ geo->layout = 0;
+ geo->level = 5;
+ } else {
+ pr_err("Error. Layout Migration from %d to %d "
+ "not supported!\n",
+ info.array.layout, geo->layout);
+ change = -1;
+ goto analyse_change_exit;
+ }
+ } else {
+ geo->layout = info.array.layout;
+ if (imsm_layout == -1)
+ imsm_layout = info.array.layout;
+ }
+
+ if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
+ && (geo->chunksize != info.array.chunk_size))
+ change = CH_MIGRATION;
+ else
+ geo->chunksize = info.array.chunk_size;
+
+ chunk = geo->chunksize / 1024;
+
+ super = st->sb;
+ dev = get_imsm_dev(super, super->current_vol);
+ data_disks = imsm_num_data_members(dev , MAP_0);
+ /* compute current size per disk member
+ */
+ current_size = info.custom_array_size / data_disks;
+
+ if ((geo->size > 0) && (geo->size != MAX_SIZE)) {
+ /* align component size
+ */
+ geo->size = imsm_component_size_aligment_check(
+ get_imsm_raid_level(dev->vol.map),
+ chunk * 1024,
+ geo->size * 2);
+ if (geo->size == 0) {
+ pr_err("Error. Size expansion is " \
+ "supported only (current size is %llu, " \
+ "requested size /rounded/ is 0).\n",
+ current_size);
+ goto analyse_change_exit;
+ }
+ }
+
+ if ((current_size != geo->size) && (geo->size > 0)) {
+ if (change != -1) {
+ pr_err("Error. Size change should be the only "
+ "one at a time.\n");
+ change = -1;
+ goto analyse_change_exit;
+ }
+ if ((super->current_vol + 1) != super->anchor->num_raid_devs) {
+ pr_err("Error. The last volume in container "
+ "can be expanded only (%i/%s).\n",
+ super->current_vol, st->devnm);
+ goto analyse_change_exit;
+ }
+ /* check the maximum available size
+ */
+ rv = imsm_get_free_size(st, dev->vol.map->num_members,
+ 0, chunk, &free_size);
+ if (rv == 0)
+ /* Cannot find maximum available space
+ */
+ max_size = 0;
+ else {
+ max_size = free_size + current_size;
+ /* align component size
+ */
+ max_size = imsm_component_size_aligment_check(
+ get_imsm_raid_level(dev->vol.map),
+ chunk * 1024,
+ max_size);
+ }
+ if (geo->size == MAX_SIZE) {
+ /* requested size change to the maximum available size
+ */
+ if (max_size == 0) {
+ pr_err("Error. Cannot find "
+ "maximum available space.\n");
+ change = -1;
+ goto analyse_change_exit;
+ } else
+ geo->size = max_size;
+ }
+
+ if ((direction == ROLLBACK_METADATA_CHANGES)) {
+ /* accept size for rollback only
+ */
+ } else {
+ /* round size due to metadata compatibility
+ */
+ geo->size = (geo->size >> SECT_PER_MB_SHIFT)
+ << SECT_PER_MB_SHIFT;
+ dprintf("Prepare update for size change to %llu\n",
+ geo->size );
+ if (current_size >= geo->size) {
+ pr_err("Error. Size expansion is "
+ "supported only (current size is %llu, "
+ "requested size /rounded/ is %llu).\n",
+ current_size, geo->size);
+ goto analyse_change_exit;
+ }
+ if (max_size && geo->size > max_size) {
+ pr_err("Error. Requested size is larger "
+ "than maximum available size (maximum "
+ "available size is %llu, "
+ "requested size /rounded/ is %llu).\n",
+ max_size, geo->size);
+ goto analyse_change_exit;
+ }
+ }
+ geo->size *= data_disks;
+ geo->raid_disks = dev->vol.map->num_members;
+ change = CH_ARRAY_SIZE;
+ }
+ if (!validate_geometry_imsm(st,
+ geo->level,
+ imsm_layout,
+ geo->raid_disks + devNumChange,
+ &chunk,
+ geo->size, INVALID_SECTORS,
+ 0, 0, 1))
+ change = -1;
+
+ if (check_devs) {
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+
+ if (mpb->num_raid_devs > 1) {
+ pr_err("Error. Cannot perform operation on %s"
+ "- for this operation it MUST be single "
+ "array in container\n",
+ geo->dev_name);
+ change = -1;
+ }
+ }
+
+analyse_change_exit:
+ if ((direction == ROLLBACK_METADATA_CHANGES) &&
+ ((change == CH_MIGRATION) || (change == CH_TAKEOVER))) {
+ dprintf("imsm: Metadata changes rollback is not supported for "
+ "migration and takeover operations.\n");
+ change = -1;
+ }
+ return change;
+}
+
+int imsm_takeover(struct supertype *st, struct geo_params *geo)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_update_takeover *u;
+
+ u = xmalloc(sizeof(struct imsm_update_takeover));
+
+ u->type = update_takeover;
+ u->subarray = super->current_vol;
+
+ /* 10->0 transition */
+ if (geo->level == 0)
+ u->direction = R10_TO_R0;
+
+ /* 0->10 transition */
+ if (geo->level == 10)
+ u->direction = R0_TO_R10;
+
+ /* update metadata locally */
+ imsm_update_metadata_locally(st, u,
+ sizeof(struct imsm_update_takeover));
+ /* and possibly remotely */
+ if (st->update_tail)
+ append_metadata_update(st, u,
+ sizeof(struct imsm_update_takeover));
+ else
+ free(u);
+
+ return 0;
+}
+
+static int imsm_reshape_super(struct supertype *st, unsigned long long size,
+ int level,
+ int layout, int chunksize, int raid_disks,
+ int delta_disks, char *backup, char *dev,
+ int direction, int verbose)
+{
+ int ret_val = 1;
+ struct geo_params geo;
+
+ dprintf("imsm: reshape_super called.\n");
+
+ memset(&geo, 0, sizeof(struct geo_params));
+
+ geo.dev_name = dev;
+ strcpy(geo.devnm, st->devnm);
+ geo.size = size;
+ geo.level = level;
+ geo.layout = layout;
+ geo.chunksize = chunksize;
+ geo.raid_disks = raid_disks;
+ if (delta_disks != UnSet)
+ geo.raid_disks += delta_disks;
+
+ dprintf("\tfor level : %i\n", geo.level);
+ dprintf("\tfor raid_disks : %i\n", geo.raid_disks);
+
+ if (experimental() == 0)
+ return ret_val;
+
+ if (strcmp(st->container_devnm, st->devnm) == 0) {
+ /* On container level we can only increase number of devices. */
+ dprintf("imsm: info: Container operation\n");
+ int old_raid_disks = 0;
+
+ if (imsm_reshape_is_allowed_on_container(
+ st, &geo, &old_raid_disks, direction)) {
+ struct imsm_update_reshape *u = NULL;
+ int len;
+
+ len = imsm_create_metadata_update_for_reshape(
+ st, &geo, old_raid_disks, &u);
+
+ if (len <= 0) {
+ dprintf("imsm: Cannot prepare update\n");
+ goto exit_imsm_reshape_super;
+ }
+
+ ret_val = 0;
+ /* update metadata locally */
+ imsm_update_metadata_locally(st, u, len);
+ /* and possibly remotely */
+ if (st->update_tail)
+ append_metadata_update(st, u, len);
+ else
+ free(u);
+
+ } else {
+ pr_err("(imsm) Operation "
+ "is not allowed on this container\n");
+ }
+ } else {
+ /* On volume level we support following operations
+ * - takeover: raid10 -> raid0; raid0 -> raid10
+ * - chunk size migration
+ * - migration: raid5 -> raid0; raid0 -> raid5
+ */
+ struct intel_super *super = st->sb;
+ struct intel_dev *dev = super->devlist;
+ int change;
+ dprintf("imsm: info: Volume operation\n");
+ /* find requested device */
+ while (dev) {
+ char *devnm =
+ imsm_find_array_devnm_by_subdev(
+ dev->index, st->container_devnm);
+ if (devnm && strcmp(devnm, geo.devnm) == 0)
+ break;
+ dev = dev->next;
+ }
+ if (dev == NULL) {
+ pr_err("Cannot find %s (%s) subarray\n",
+ geo.dev_name, geo.devnm);
+ goto exit_imsm_reshape_super;
+ }
+ super->current_vol = dev->index;
+ change = imsm_analyze_change(st, &geo, direction);
+ switch (change) {
+ case CH_TAKEOVER:
+ ret_val = imsm_takeover(st, &geo);
+ break;
+ case CH_MIGRATION: {
+ struct imsm_update_reshape_migration *u = NULL;
+ int len =
+ imsm_create_metadata_update_for_migration(
+ st, &geo, &u);
+ if (len < 1) {
+ dprintf("imsm: "
+ "Cannot prepare update\n");
+ break;
+ }
+ ret_val = 0;
+ /* update metadata locally */
+ imsm_update_metadata_locally(st, u, len);
+ /* and possibly remotely */
+ if (st->update_tail)
+ append_metadata_update(st, u, len);
+ else
+ free(u);
+ }
+ break;
+ case CH_ARRAY_SIZE: {
+ struct imsm_update_size_change *u = NULL;
+ int len =
+ imsm_create_metadata_update_for_size_change(
+ st, &geo, &u);
+ if (len < 1) {
+ dprintf("imsm: "
+ "Cannot prepare update\n");
+ break;
+ }
+ ret_val = 0;
+ /* update metadata locally */
+ imsm_update_metadata_locally(st, u, len);
+ /* and possibly remotely */
+ if (st->update_tail)
+ append_metadata_update(st, u, len);
+ else
+ free(u);
+ }
+ break;
+ default:
+ ret_val = 1;
+ }
+ }
+
+exit_imsm_reshape_super:
+ dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
+ return ret_val;
+}
+
+/*******************************************************************************
+ * Function: wait_for_reshape_imsm
+ * Description: Function writes new sync_max value and waits until
+ * reshape process reach new position
+ * Parameters:
+ * sra : general array info
+ * ndata : number of disks in new array's layout
+ * Returns:
+ * 0 : success,
+ * 1 : there is no reshape in progress,
+ * -1 : fail
+ ******************************************************************************/
+int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
+{
+ int fd = sysfs_get_fd(sra, NULL, "sync_completed");
+ unsigned long long completed;
+ /* to_complete : new sync_max position */
+ unsigned long long to_complete = sra->reshape_progress;
+ unsigned long long position_to_set = to_complete / ndata;
+
+ if (fd < 0) {
+ dprintf("imsm: wait_for_reshape_imsm() "
+ "cannot open reshape_position\n");
+ return 1;
+ }
+
+ if (sysfs_fd_get_ll(fd, &completed) < 0) {
+ dprintf("imsm: wait_for_reshape_imsm() "
+ "cannot read reshape_position (no reshape in progres)\n");
+ close(fd);
+ return 0;
+ }
+
+ if (completed > position_to_set) {
+ dprintf("imsm: wait_for_reshape_imsm() "
+ "wrong next position to set %llu (%llu)\n",
+ to_complete, position_to_set);
+ close(fd);
+ return -1;
+ }
+ dprintf("Position set: %llu\n", position_to_set);
+ if (sysfs_set_num(sra, NULL, "sync_max",
+ position_to_set) != 0) {
+ dprintf("imsm: wait_for_reshape_imsm() "
+ "cannot set reshape position to %llu\n",
+ position_to_set);
+ close(fd);
+ return -1;
+ }
+
+ do {
+ char action[20];
+ sysfs_wait(fd, NULL);
+ if (sysfs_get_str(sra, NULL, "sync_action",
+ action, 20) > 0 &&
+ strncmp(action, "reshape", 7) != 0)
+ break;
+ if (sysfs_fd_get_ll(fd, &completed) < 0) {
+ dprintf("imsm: wait_for_reshape_imsm() "
+ "cannot read reshape_position (in loop)\n");
+ close(fd);
+ return 1;
+ }
+ } while (completed < position_to_set);
+ close(fd);
+ return 0;
+
+}
+
+/*******************************************************************************
+ * Function: check_degradation_change
+ * Description: Check that array hasn't become failed.
+ * Parameters:
+ * info : for sysfs access
+ * sources : source disks descriptors
+ * degraded: previous degradation level
+ * Returns:
+ * degradation level
+ ******************************************************************************/
+int check_degradation_change(struct mdinfo *info,
+ int *sources,
+ int degraded)
+{
+ unsigned long long new_degraded;
+ int rv;
+
+ rv = sysfs_get_ll(info, NULL, "degraded", &new_degraded);
+ if ((rv == -1) || (new_degraded != (unsigned long long)degraded)) {
+ /* check each device to ensure it is still working */
+ struct mdinfo *sd;
+ new_degraded = 0;
+ for (sd = info->devs ; sd ; sd = sd->next) {
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ if (sd->disk.state & (1<<MD_DISK_SYNC)) {
+ char sbuf[20];
+ if (sysfs_get_str(info,
+ sd, "state", sbuf, 20) < 0 ||
+ strstr(sbuf, "faulty") ||
+ strstr(sbuf, "in_sync") == NULL) {
+ /* this device is dead */
+ sd->disk.state = (1<<MD_DISK_FAULTY);
+ if (sd->disk.raid_disk >= 0 &&
+ sources[sd->disk.raid_disk] >= 0) {
+ close(sources[
+ sd->disk.raid_disk]);
+ sources[sd->disk.raid_disk] =
+ -1;
+ }
+ new_degraded++;
+ }
+ }
+ }
+ }
+
+ return new_degraded;
+}
+
+/*******************************************************************************
+ * Function: imsm_manage_reshape
+ * Description: Function finds array under reshape and it manages reshape
+ * process. It creates stripes backups (if required) and sets
+ * checheckpoits.
+ * Parameters:
+ * afd : Backup handle (nattive) - not used
+ * sra : general array info
+ * reshape : reshape parameters - not used
+ * st : supertype structure
+ * blocks : size of critical section [blocks]
+ * fds : table of source device descriptor
+ * offsets : start of array (offest per devices)
+ * dests : not used
+ * destfd : table of destination device descriptor
+ * destoffsets : table of destination offsets (per device)
+ * Returns:
+ * 1 : success, reshape is done
+ * 0 : fail
+ ******************************************************************************/
+static int imsm_manage_reshape(
+ int afd, struct mdinfo *sra, struct reshape *reshape,
+ struct supertype *st, unsigned long backup_blocks,
+ int *fds, unsigned long long *offsets,
+ int dests, int *destfd, unsigned long long *destoffsets)
+{
+ int ret_val = 0;
+ struct intel_super *super = st->sb;
+ struct intel_dev *dv = NULL;
+ struct imsm_dev *dev = NULL;
+ struct imsm_map *map_src;
+ int migr_vol_qan = 0;
+ int ndata, odata; /* [bytes] */
+ int chunk; /* [bytes] */
+ struct migr_record *migr_rec;
+ char *buf = NULL;
+ unsigned int buf_size; /* [bytes] */
+ unsigned long long max_position; /* array size [bytes] */
+ unsigned long long next_step; /* [blocks]/[bytes] */
+ unsigned long long old_data_stripe_length;
+ unsigned long long start_src; /* [bytes] */
+ unsigned long long start; /* [bytes] */
+ unsigned long long start_buf_shift; /* [bytes] */
+ int degraded = 0;
+ int source_layout = 0;
+
+ if (!fds || !offsets || !sra)
+ goto abort;
+
+ /* Find volume during the reshape */
+ for (dv = super->devlist; dv; dv = dv->next) {
+ if (dv->dev->vol.migr_type == MIGR_GEN_MIGR
+ && dv->dev->vol.migr_state == 1) {
+ dev = dv->dev;
+ migr_vol_qan++;
+ }
+ }
+ /* Only one volume can migrate at the same time */
+ if (migr_vol_qan != 1) {
+ pr_err(": %s", migr_vol_qan ?
+ "Number of migrating volumes greater than 1\n" :
+ "There is no volume during migrationg\n");
+ goto abort;
+ }
+
+ map_src = get_imsm_map(dev, MAP_1);
+ if (map_src == NULL)
+ goto abort;
+
+ ndata = imsm_num_data_members(dev, MAP_0);
+ odata = imsm_num_data_members(dev, MAP_1);
+
+ chunk = __le16_to_cpu(map_src->blocks_per_strip) * 512;
+ old_data_stripe_length = odata * chunk;
+
+ migr_rec = super->migr_rec;
+
+ /* initialize migration record for start condition */
+ if (sra->reshape_progress == 0)
+ init_migr_record_imsm(st, dev, sra);
+ else {
+ if (__le32_to_cpu(migr_rec->rec_status) != UNIT_SRC_NORMAL) {
+ dprintf("imsm: cannot restart migration when data "
+ "are present in copy area.\n");
+ goto abort;
+ }
+ /* Save checkpoint to update migration record for current
+ * reshape position (in md). It can be farther than current
+ * reshape position in metadata.
+ */
+ if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) {
+ /* ignore error == 2, this can mean end of reshape here
+ */
+ dprintf("imsm: Cannot write checkpoint to "
+ "migration record (UNIT_SRC_NORMAL, "
+ "initial save)\n");
+ goto abort;
+ }
+ }
+
+ /* size for data */
+ buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512;
+ /* extend buffer size for parity disk */
+ buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
+ /* add space for stripe aligment */
+ buf_size += old_data_stripe_length;
+ if (posix_memalign((void **)&buf, 4096, buf_size)) {
+ dprintf("imsm: Cannot allocate checpoint buffer\n");
+ goto abort;
+ }
+
+ max_position = sra->component_size * ndata;
+ source_layout = imsm_level_to_layout(map_src->raid_level);
+
+ while (__le32_to_cpu(migr_rec->curr_migr_unit) <
+ __le32_to_cpu(migr_rec->num_migr_units)) {
+ /* current reshape position [blocks] */
+ unsigned long long current_position =
+ __le32_to_cpu(migr_rec->blocks_per_unit)
+ * __le32_to_cpu(migr_rec->curr_migr_unit);
+ unsigned long long border;
+
+ /* Check that array hasn't become failed.
+ */
+ degraded = check_degradation_change(sra, fds, degraded);
+ if (degraded > 1) {
+ dprintf("imsm: Abort reshape due to degradation"
+ " level (%i)\n", degraded);
+ goto abort;
+ }
+
+ next_step = __le32_to_cpu(migr_rec->blocks_per_unit);
+
+ if ((current_position + next_step) > max_position)
+ next_step = max_position - current_position;
+
+ start = current_position * 512;
+
+ /* allign reading start to old geometry */
+ start_buf_shift = start % old_data_stripe_length;
+ start_src = start - start_buf_shift;
+
+ border = (start_src / odata) - (start / ndata);
+ border /= 512;
+ if (border <= __le32_to_cpu(migr_rec->dest_depth_per_unit)) {
+ /* save critical stripes to buf
+ * start - start address of current unit
+ * to backup [bytes]
+ * start_src - start address of current unit
+ * to backup alligned to source array
+ * [bytes]
+ */
+ unsigned long long next_step_filler = 0;
+ unsigned long long copy_length = next_step * 512;
+
+ /* allign copy area length to stripe in old geometry */
+ next_step_filler = ((copy_length + start_buf_shift)
+ % old_data_stripe_length);
+ if (next_step_filler)
+ next_step_filler = (old_data_stripe_length
+ - next_step_filler);
+ dprintf("save_stripes() parameters: start = %llu,"
+ "\tstart_src = %llu,\tnext_step*512 = %llu,"
+ "\tstart_in_buf_shift = %llu,"
+ "\tnext_step_filler = %llu\n",
+ start, start_src, copy_length,
+ start_buf_shift, next_step_filler);
+
+ if (save_stripes(fds, offsets, map_src->num_members,
+ chunk, map_src->raid_level,
+ source_layout, 0, NULL, start_src,
+ copy_length +
+ next_step_filler + start_buf_shift,
+ buf)) {
+ dprintf("imsm: Cannot save stripes"
+ " to buffer\n");
+ goto abort;
+ }
+ /* Convert data to destination format and store it
+ * in backup general migration area
+ */
+ if (save_backup_imsm(st, dev, sra,
+ buf + start_buf_shift, copy_length)) {
+ dprintf("imsm: Cannot save stripes to "
+ "target devices\n");
+ goto abort;
+ }
+ if (save_checkpoint_imsm(st, sra,
+ UNIT_SRC_IN_CP_AREA)) {
+ dprintf("imsm: Cannot write checkpoint to "
+ "migration record (UNIT_SRC_IN_CP_AREA)\n");
+ goto abort;
+ }
+ } else {
+ /* set next step to use whole border area */
+ border /= next_step;
+ if (border > 1)
+ next_step *= border;
+ }
+ /* When data backed up, checkpoint stored,
+ * kick the kernel to reshape unit of data
+ */
+ next_step = next_step + sra->reshape_progress;
+ /* limit next step to array max position */
+ if (next_step > max_position)
+ next_step = max_position;
+ sysfs_set_num(sra, NULL, "suspend_lo", sra->reshape_progress);
+ sysfs_set_num(sra, NULL, "suspend_hi", next_step);
+ sra->reshape_progress = next_step;
+
+ /* wait until reshape finish */
+ if (wait_for_reshape_imsm(sra, ndata) < 0) {
+ dprintf("wait_for_reshape_imsm returned error!\n");
+ goto abort;
+ }
+ if (sigterm)
+ goto abort;
+
+ if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) {
+ /* ignore error == 2, this can mean end of reshape here
+ */
+ dprintf("imsm: Cannot write checkpoint to "
+ "migration record (UNIT_SRC_NORMAL)\n");
+ goto abort;
+ }
+
+ }
+
+ /* return '1' if done */
+ ret_val = 1;
+abort:
+ free(buf);
+ abort_reshape(sra);
+
+ return ret_val;
+}
+
+#endif /* MDASSEMBLE */
+
+struct superswitch super_imsm = {
+#ifndef MDASSEMBLE
+ .examine_super = examine_super_imsm,
+ .brief_examine_super = brief_examine_super_imsm,
+ .brief_examine_subarrays = brief_examine_subarrays_imsm,
+ .export_examine_super = export_examine_super_imsm,
+ .detail_super = detail_super_imsm,
+ .brief_detail_super = brief_detail_super_imsm,
+ .write_init_super = write_init_super_imsm,
+ .validate_geometry = validate_geometry_imsm,
+ .add_to_super = add_to_super_imsm,
+ .remove_from_super = remove_from_super_imsm,
+ .detail_platform = detail_platform_imsm,
+ .export_detail_platform = export_detail_platform_imsm,
+ .kill_subarray = kill_subarray_imsm,
+ .update_subarray = update_subarray_imsm,
+ .load_container = load_container_imsm,
+ .default_geometry = default_geometry_imsm,
+ .get_disk_controller_domain = imsm_get_disk_controller_domain,
+ .reshape_super = imsm_reshape_super,
+ .manage_reshape = imsm_manage_reshape,
+ .recover_backup = recover_backup_imsm,
+ .copy_metadata = copy_metadata_imsm,
+#endif
+ .match_home = match_home_imsm,
+ .uuid_from_super= uuid_from_super_imsm,
+ .getinfo_super = getinfo_super_imsm,
+ .getinfo_super_disks = getinfo_super_disks_imsm,
+ .update_super = update_super_imsm,
+
+ .avail_size = avail_size_imsm,
+ .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
+
+ .compare_super = compare_super_imsm,
+
+ .load_super = load_super_imsm,
+ .init_super = init_super_imsm,
+ .store_super = store_super_imsm,
+ .free_super = free_super_imsm,
+ .match_metadata_desc = match_metadata_desc_imsm,
+ .container_content = container_content_imsm,
+ .validate_container = validate_container_imsm,
+
+ .external = 1,
+ .name = "imsm",
+
+#ifndef MDASSEMBLE
+/* for mdmon */
+ .open_new = imsm_open_new,
+ .set_array_state= imsm_set_array_state,
+ .set_disk = imsm_set_disk,
+ .sync_metadata = imsm_sync_metadata,
+ .activate_spare = imsm_activate_spare,
+ .process_update = imsm_process_update,
+ .prepare_update = imsm_prepare_update,
+#endif /* MDASSEMBLE */
+};
diff --git a/super-mbr.c b/super-mbr.c
new file mode 100644
index 00000000..0fcac7c3
--- /dev/null
+++ b/super-mbr.c
@@ -0,0 +1,206 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2010 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neil@brown.name>
+ *
+ */
+
+/*
+ * 'mbr' is a pseudo metadata type for devices which have a
+ * partition table in the Master Boot Record (mbr) also known
+ * as a dos partition table.
+ *
+ * Obviously arrays cannot be created or assembled for this type.
+ * It is used to allow a new bare device to have an partition table
+ * added so the member partitions can then be included in other
+ * arrays as relevant.
+ *
+ * The meaning operations are:
+ * examine_super, but not brief_examine_super or export_examine
+ * load_super
+ * store_super
+ */
+
+#include "mdadm.h"
+#include "part.h"
+
+static void free_mbr(struct supertype *st)
+{
+ free(st->sb);
+ st->sb = NULL;
+}
+
+#ifndef MDASSEMBLE
+
+static void examine_mbr(struct supertype *st, char *homehost)
+{
+ struct MBR *sb = st->sb;
+ int i;
+
+ printf(" MBR Magic : %04x\n", sb->magic);
+ for (i = 0; i < MBR_PARTITIONS; i++)
+ if (sb->parts[i].blocks_num)
+ printf("Partition[%d] : %12lu sectors at %12lu (type %02x)\n",
+ i,
+ (unsigned long)__le32_to_cpu(sb->parts[i].blocks_num),
+ (unsigned long)__le32_to_cpu(sb->parts[i].first_sect_lba),
+ sb->parts[i].part_type);
+
+}
+
+#endif /*MDASSEMBLE */
+
+static int load_super_mbr(struct supertype *st, int fd, char *devname)
+{
+ /* try to read an mbr
+ * Return
+ * 0 on success
+ * 1 cannot get record
+ * 2 record is meaningless
+ */
+ struct MBR *super;
+
+ free_mbr(st);
+
+ if (posix_memalign((void**)&super, 512, 512) != 0) {
+ pr_err("%s could not allocate superblock\n",
+ __func__);
+ return 1;
+ }
+
+ lseek(fd, 0, 0);
+ if (read(fd, super, sizeof(*super)) != sizeof(*super)) {
+ if (devname)
+ pr_err("Cannot read partition table on %s\n",
+ devname);
+ free(super);
+ return 1;
+ }
+
+ if (super->magic != MBR_SIGNATURE_MAGIC) {
+ if (devname)
+ pr_err("No partition table found on %s\n",
+ devname);
+ free(super);
+ return 1;
+ }
+
+ st->sb = super;
+
+ if (st->ss == NULL) {
+ st->ss = &mbr;
+ st->minor_version = 0;
+ st->max_devs = 1;
+ st->info = NULL;
+ }
+ return 0;
+}
+
+static int store_mbr(struct supertype *st, int fd)
+{
+ struct MBR *old, *super;
+
+ if (posix_memalign((void**)&old, 512, 512) != 0) {
+ pr_err("%s could not allocate superblock\n",
+ __func__);
+ return 1;
+ }
+
+ lseek(fd, 0, 0);
+ if (read(fd, old, sizeof(*old)) != sizeof(*old)) {
+ free(old);
+ return 1;
+ }
+
+ super = st->sb;
+ memcpy(super->pad, old->pad, sizeof(super->pad));
+ free(old);
+ lseek(fd, 0, 0);
+ if (write(fd, super, sizeof(*super)) != sizeof(*super))
+ return 4;
+ fsync(fd);
+ ioctl(fd, BLKRRPART, 0);
+ return 0;
+}
+
+static void getinfo_mbr(struct supertype *st, struct mdinfo *info, char *map)
+{
+ struct MBR *sb = st->sb;
+ int i;
+
+ memset(&info->array, 0, sizeof(info->array));
+ memset(&info->disk, 0, sizeof(info->disk));
+ strcpy(info->text_version, "mbr");
+ strcpy(info->name, "mbr");
+ info->component_size = 0;
+
+ for (i = 0; i < MBR_PARTITIONS ; i++)
+ if (sb->parts[i].blocks_num) {
+ unsigned long last =
+ (unsigned long)__le32_to_cpu(sb->parts[i].blocks_num)
+ + (unsigned long)__le32_to_cpu(sb->parts[i].first_sect_lba);
+ if (last > info->component_size)
+ info->component_size = last;
+ }
+
+}
+
+static struct supertype *match_metadata_desc(char *arg)
+{
+ struct supertype *st;
+
+ if (strcmp(arg, "mbr") != 0)
+ return NULL;
+
+ st = xmalloc(sizeof(*st));
+ st->ss = &mbr;
+ st->info = NULL;
+ st->minor_version = 0;
+ st->max_devs = 1;
+ st->sb = NULL;
+ return st;
+}
+
+#ifndef MDASSEMBLE
+static int validate_geometry(struct supertype *st, int level,
+ int layout, int raiddisks,
+ int *chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *subdev, unsigned long long *freesize,
+ int verbose)
+{
+ pr_err("mbr metadata cannot be used this way\n");
+ return 0;
+}
+#endif
+
+struct superswitch mbr = {
+#ifndef MDASSEMBLE
+ .examine_super = examine_mbr,
+ .validate_geometry = validate_geometry,
+#endif
+ .match_metadata_desc = match_metadata_desc,
+ .load_super = load_super_mbr,
+ .store_super = store_mbr,
+ .getinfo_super = getinfo_mbr,
+ .free_super = free_mbr,
+ .name = "mbr",
+};
diff --git a/super0.c b/super0.c
new file mode 100644
index 00000000..1c203927
--- /dev/null
+++ b/super0.c
@@ -0,0 +1,1331 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#define HAVE_STDINT_H 1
+#include "mdadm.h"
+#include "sha1.h"
+/*
+ * All handling for the 0.90.0 version superblock is in
+ * this file.
+ * This includes:
+ * - finding, loading, and writing the superblock.
+ * - initialising a new superblock
+ * - printing the superblock for --examine
+ * - printing part of the superblock for --detail
+ * .. other stuff
+ */
+
+static unsigned long calc_sb0_csum(mdp_super_t *super)
+{
+ unsigned long csum = super->sb_csum;
+ unsigned long newcsum;
+ super->sb_csum= 0 ;
+ newcsum = calc_csum(super, MD_SB_BYTES);
+ super->sb_csum = csum;
+ return newcsum;
+}
+
+static void super0_swap_endian(struct mdp_superblock_s *sb)
+{
+ /* as super0 superblocks are host-endian, it is sometimes
+ * useful to be able to swap the endianness
+ * as (almost) everything is u32's we byte-swap every 4byte
+ * number.
+ * We then also have to swap the events_hi and events_lo
+ */
+ char *sbc = (char *)sb;
+ __u32 t32;
+ int i;
+
+ for (i=0; i < MD_SB_BYTES ; i+=4) {
+ char t = sbc[i];
+ sbc[i] = sbc[i+3];
+ sbc[i+3] = t;
+ t=sbc[i+1];
+ sbc[i+1]=sbc[i+2];
+ sbc[i+2]=t;
+ }
+ t32 = sb->events_hi;
+ sb->events_hi = sb->events_lo;
+ sb->events_lo = t32;
+
+ t32 = sb->cp_events_hi;
+ sb->cp_events_hi = sb->cp_events_lo;
+ sb->cp_events_lo = t32;
+
+}
+
+#ifndef MDASSEMBLE
+
+static void examine_super0(struct supertype *st, char *homehost)
+{
+ mdp_super_t *sb = st->sb;
+ time_t atime;
+ int d;
+ int delta_extra = 0;
+ char *c;
+
+ printf(" Magic : %08x\n", sb->md_magic);
+ printf(" Version : %d.%02d.%02d\n", sb->major_version, sb->minor_version,
+ sb->patch_version);
+ if (sb->minor_version >= 90) {
+ printf(" UUID : %08x:%08x:%08x:%08x", sb->set_uuid0, sb->set_uuid1,
+ sb->set_uuid2, sb->set_uuid3);
+ if (homehost) {
+ char buf[20];
+ void *hash = sha1_buffer(homehost,
+ strlen(homehost),
+ buf);
+ if (memcmp(&sb->set_uuid2, hash, 8)==0)
+ printf(" (local to host %s)", homehost);
+ }
+ printf("\n");
+ } else
+ printf(" UUID : %08x\n", sb->set_uuid0);
+
+ if (sb->not_persistent)
+ printf(" Eedk : not persistent\n");
+
+ atime = sb->ctime;
+ printf(" Creation Time : %.24s\n", ctime(&atime));
+ c=map_num(pers, sb->level);
+ printf(" Raid Level : %s\n", c?c:"-unknown-");
+ if ((int)sb->level > 0) {
+ int ddsks = 0, ddsks_denom = 1;
+ printf(" Used Dev Size : %d%s\n", sb->size,
+ human_size((long long)sb->size<<10));
+ switch(sb->level) {
+ case 1: ddsks=1;break;
+ case 4:
+ case 5: ddsks = sb->raid_disks-1; break;
+ case 6: ddsks = sb->raid_disks-2; break;
+ case 10: ddsks = sb->raid_disks;
+ ddsks_denom = (sb->layout&255) * ((sb->layout>>8)&255);
+ }
+ if (ddsks) {
+ long long asize = sb->size;
+ asize = (asize << 10) * ddsks / ddsks_denom;
+ printf(" Array Size : %llu%s\n",
+ asize >> 10, human_size(asize));
+ }
+ }
+ printf(" Raid Devices : %d\n", sb->raid_disks);
+ printf(" Total Devices : %d\n", sb->nr_disks);
+ printf("Preferred Minor : %d\n", sb->md_minor);
+ printf("\n");
+ if (sb->minor_version > 90 && (sb->reshape_position+1) != 0) {
+ printf(" Reshape pos'n : %llu%s\n", (unsigned long long)sb->reshape_position/2, human_size((long long)sb->reshape_position<<9));
+ if (sb->delta_disks) {
+ printf(" Delta Devices : %d", sb->delta_disks);
+ printf(" (%d->%d)\n", sb->raid_disks-sb->delta_disks, sb->raid_disks);
+ if (((int)sb->delta_disks) < 0)
+ delta_extra = - sb->delta_disks;
+ }
+ if (sb->new_level != sb->level) {
+ c = map_num(pers, sb->new_level);
+ printf(" New Level : %s\n", c?c:"-unknown-");
+ }
+ if (sb->new_layout != sb->layout) {
+ if (sb->level == 5) {
+ c = map_num(r5layout, sb->new_layout);
+ printf(" New Layout : %s\n", c?c:"-unknown-");
+ }
+ if (sb->level == 6) {
+ c = map_num(r6layout, sb->new_layout);
+ printf(" New Layout : %s\n", c?c:"-unknown-");
+ }
+ if (sb->level == 10) {
+ printf(" New Layout : near=%d, %s=%d\n",
+ sb->new_layout&255,
+ (sb->new_layout&0x10000)?"offset":"far",
+ (sb->new_layout>>8)&255);
+ }
+ }
+ if (sb->new_chunk != sb->chunk_size)
+ printf(" New Chunksize : %d\n", sb->new_chunk);
+ printf("\n");
+ }
+ atime = sb->utime;
+ printf(" Update Time : %.24s\n", ctime(&atime));
+ printf(" State : %s\n",
+ (sb->state&(1<<MD_SB_CLEAN))?"clean":"active");
+ if (sb->state & (1<<MD_SB_BITMAP_PRESENT))
+ printf("Internal Bitmap : present\n");
+ printf(" Active Devices : %d\n", sb->active_disks);
+ printf("Working Devices : %d\n", sb->working_disks);
+ printf(" Failed Devices : %d\n", sb->failed_disks);
+ printf(" Spare Devices : %d\n", sb->spare_disks);
+ if (calc_sb0_csum(sb) == sb->sb_csum)
+ printf(" Checksum : %x - correct\n", sb->sb_csum);
+ else
+ printf(" Checksum : %x - expected %lx\n", sb->sb_csum, calc_sb0_csum(sb));
+ printf(" Events : %llu\n",
+ ((unsigned long long)sb->events_hi << 32)
+ + sb->events_lo);
+ printf("\n");
+ if (sb->level == 5) {
+ c = map_num(r5layout, sb->layout);
+ printf(" Layout : %s\n", c?c:"-unknown-");
+ }
+ if (sb->level == 6) {
+ c = map_num(r6layout, sb->layout);
+ printf(" Layout : %s\n", c?c:"-unknown-");
+ }
+ if (sb->level == 10) {
+ printf(" Layout :");
+ print_r10_layout(sb->layout);
+ printf("\n");
+ }
+ switch(sb->level) {
+ case 0:
+ case 4:
+ case 5:
+ case 6:
+ case 10:
+ printf(" Chunk Size : %dK\n", sb->chunk_size/1024);
+ break;
+ case -1:
+ printf(" Rounding : %dK\n", sb->chunk_size/1024);
+ break;
+ default: break;
+ }
+ printf("\n");
+ printf(" Number Major Minor RaidDevice State\n");
+ for (d= -1; d<(signed int)(sb->raid_disks+delta_extra + sb->spare_disks); d++) {
+ mdp_disk_t *dp;
+ char *dv;
+ char nb[5];
+ int wonly;
+ if (d>=0) dp = &sb->disks[d];
+ else dp = &sb->this_disk;
+ snprintf(nb, sizeof(nb), "%4d", d);
+ printf("%4s %5d %5d %5d %5d ", d < 0 ? "this" : nb,
+ dp->number, dp->major, dp->minor, dp->raid_disk);
+ wonly = dp->state & (1<<MD_DISK_WRITEMOSTLY);
+ dp->state &= ~(1<<MD_DISK_WRITEMOSTLY);
+ if (dp->state & (1<<MD_DISK_FAULTY)) printf(" faulty");
+ if (dp->state & (1<<MD_DISK_ACTIVE)) printf(" active");
+ if (dp->state & (1<<MD_DISK_SYNC)) printf(" sync");
+ if (dp->state & (1<<MD_DISK_REMOVED)) printf(" removed");
+ if (wonly) printf(" write-mostly");
+ if (dp->state == 0) printf(" spare");
+ if ((dv=map_dev(dp->major, dp->minor, 0)))
+ printf(" %s", dv);
+ printf("\n");
+ if (d == -1) printf("\n");
+ }
+}
+
+static void brief_examine_super0(struct supertype *st, int verbose)
+{
+ mdp_super_t *sb = st->sb;
+ char *c=map_num(pers, sb->level);
+ char devname[20];
+
+ sprintf(devname, "/dev/md%d", sb->md_minor);
+
+ if (verbose) {
+ printf("ARRAY %s level=%s num-devices=%d",
+ devname,
+ c?c:"-unknown-", sb->raid_disks);
+ } else
+ printf("ARRAY %s", devname);
+
+ if (sb->minor_version >= 90)
+ printf(" UUID=%08x:%08x:%08x:%08x", sb->set_uuid0, sb->set_uuid1,
+ sb->set_uuid2, sb->set_uuid3);
+ else
+ printf(" UUID=%08x", sb->set_uuid0);
+ printf("\n");
+}
+
+static void export_examine_super0(struct supertype *st)
+{
+ mdp_super_t *sb = st->sb;
+
+ printf("MD_LEVEL=%s\n", map_num(pers, sb->level));
+ printf("MD_DEVICES=%d\n", sb->raid_disks);
+ if (sb->minor_version >= 90)
+ printf("MD_UUID=%08x:%08x:%08x:%08x\n",
+ sb->set_uuid0, sb->set_uuid1,
+ sb->set_uuid2, sb->set_uuid3);
+ else
+ printf("MD_UUID=%08x\n", sb->set_uuid0);
+ printf("MD_UPDATE_TIME=%llu\n",
+ __le64_to_cpu(sb->ctime) & 0xFFFFFFFFFFULL);
+ printf("MD_EVENTS=%llu\n",
+ ((unsigned long long)sb->events_hi << 32)
+ + sb->events_lo);
+}
+
+static int copy_metadata0(struct supertype *st, int from, int to)
+{
+ /* Read 64K from the appropriate offset of 'from'
+ * and if it looks a little like a 0.90 superblock,
+ * write it to the same offset of 'to'
+ */
+ void *buf;
+ unsigned long long dsize, offset;
+ const int bufsize = 64*1024;
+ mdp_super_t *super;
+
+ if (posix_memalign(&buf, 4096, bufsize) != 0)
+ return 1;
+
+ if (!get_dev_size(from, NULL, &dsize))
+ goto err;
+
+ if (dsize < MD_RESERVED_SECTORS*512)
+ goto err;
+
+ offset = MD_NEW_SIZE_SECTORS(dsize>>9);
+
+ offset *= 512;
+
+ if (lseek64(from, offset, 0) < 0LL)
+ goto err;
+ if (read(from, buf, bufsize) != bufsize)
+ goto err;
+
+ if (lseek64(to, offset, 0) < 0LL)
+ goto err;
+ super = buf;
+ if (super->md_magic != MD_SB_MAGIC ||
+ super->major_version != 0 ||
+ calc_sb0_csum(super) != super->sb_csum)
+ goto err;
+ if (write(to, buf, bufsize) != bufsize)
+ goto err;
+ free(buf);
+ return 0;
+err:
+ free(buf);
+ return 1;
+}
+
+static void detail_super0(struct supertype *st, char *homehost)
+{
+ mdp_super_t *sb = st->sb;
+ printf(" UUID : ");
+ if (sb->minor_version >= 90)
+ printf("%08x:%08x:%08x:%08x", sb->set_uuid0, sb->set_uuid1,
+ sb->set_uuid2, sb->set_uuid3);
+ else
+ printf("%08x", sb->set_uuid0);
+ if (homehost) {
+ char buf[20];
+ void *hash = sha1_buffer(homehost,
+ strlen(homehost),
+ buf);
+ if (memcmp(&sb->set_uuid2, hash, 8)==0)
+ printf(" (local to host %s)", homehost);
+ }
+ printf("\n Events : %d.%d\n\n", sb->events_hi, sb->events_lo);
+}
+
+static void brief_detail_super0(struct supertype *st)
+{
+ mdp_super_t *sb = st->sb;
+ printf(" UUID=");
+ if (sb->minor_version >= 90)
+ printf("%08x:%08x:%08x:%08x", sb->set_uuid0, sb->set_uuid1,
+ sb->set_uuid2, sb->set_uuid3);
+ else
+ printf("%08x", sb->set_uuid0);
+}
+#endif
+
+static int match_home0(struct supertype *st, char *homehost)
+{
+ mdp_super_t *sb = st->sb;
+ char buf[20];
+ char *hash;
+
+ if (!homehost)
+ return 0;
+ hash = sha1_buffer(homehost,
+ strlen(homehost),
+ buf);
+
+ return (memcmp(&sb->set_uuid2, hash, 8)==0);
+}
+
+static void uuid_from_super0(struct supertype *st, int uuid[4])
+{
+ mdp_super_t *super = st->sb;
+ uuid[0] = super->set_uuid0;
+ if (super->minor_version >= 90) {
+ uuid[1] = super->set_uuid1;
+ uuid[2] = super->set_uuid2;
+ uuid[3] = super->set_uuid3;
+ } else {
+ uuid[1] = 0;
+ uuid[2] = 0;
+ uuid[3] = 0;
+ }
+}
+
+static void getinfo_super0(struct supertype *st, struct mdinfo *info, char *map)
+{
+ mdp_super_t *sb = st->sb;
+ int working = 0;
+ int i;
+ int map_disks = info->array.raid_disks;
+
+ memset(info, 0, sizeof(*info));
+ info->array.major_version = sb->major_version;
+ info->array.minor_version = sb->minor_version;
+ info->array.patch_version = sb->patch_version;
+ info->array.raid_disks = sb->raid_disks;
+ info->array.level = sb->level;
+ info->array.layout = sb->layout;
+ info->array.md_minor = sb->md_minor;
+ info->array.ctime = sb->ctime;
+ info->array.utime = sb->utime;
+ info->array.chunk_size = sb->chunk_size;
+ info->array.state = sb->state;
+ info->component_size = sb->size*2;
+
+ if (sb->state & (1<<MD_SB_BITMAP_PRESENT))
+ info->bitmap_offset = 8;
+
+ info->disk.state = sb->this_disk.state;
+ info->disk.major = sb->this_disk.major;
+ info->disk.minor = sb->this_disk.minor;
+ info->disk.raid_disk = sb->this_disk.raid_disk;
+ info->disk.number = sb->this_disk.number;
+
+ info->events = md_event(sb);
+ info->data_offset = 0;
+
+ sprintf(info->text_version, "0.%d", sb->minor_version);
+ info->safe_mode_delay = 200;
+
+ uuid_from_super0(st, info->uuid);
+
+ info->recovery_start = MaxSector;
+ if (sb->minor_version > 90 && (sb->reshape_position+1) != 0) {
+ info->reshape_active = 1;
+ info->reshape_progress = sb->reshape_position;
+ info->new_level = sb->new_level;
+ info->delta_disks = sb->delta_disks;
+ info->new_layout = sb->new_layout;
+ info->new_chunk = sb->new_chunk;
+ if (info->delta_disks < 0)
+ info->array.raid_disks -= info->delta_disks;
+ } else
+ info->reshape_active = 0;
+
+ info->recovery_blocked = info->reshape_active;
+
+ sprintf(info->name, "%d", sb->md_minor);
+ /* work_disks is calculated rather than read directly */
+ for (i=0; i < MD_SB_DISKS; i++)
+ if ((sb->disks[i].state & (1<<MD_DISK_SYNC)) &&
+ (sb->disks[i].raid_disk < (unsigned)info->array.raid_disks) &&
+ (sb->disks[i].state & (1<<MD_DISK_ACTIVE)) &&
+ !(sb->disks[i].state & (1<<MD_DISK_FAULTY))) {
+ working ++;
+ if (map && i < map_disks)
+ map[i] = 1;
+ } else if (map && i < map_disks)
+ map[i] = 0;
+ info->array.working_disks = working;
+}
+
+static struct mdinfo *container_content0(struct supertype *st, char *subarray)
+{
+ struct mdinfo *info;
+
+ if (subarray)
+ return NULL;
+
+ info = xmalloc(sizeof(*info));
+ getinfo_super0(st, info, NULL);
+ return info;
+}
+
+static int update_super0(struct supertype *st, struct mdinfo *info,
+ char *update,
+ char *devname, int verbose,
+ int uuid_set, char *homehost)
+{
+ /* NOTE: for 'assemble' and 'force' we need to return non-zero
+ * if any change was made. For others, the return value is
+ * ignored.
+ */
+ int rv = 0;
+ int uuid[4];
+ mdp_super_t *sb = st->sb;
+
+ if (strcmp(update, "homehost") == 0 &&
+ homehost) {
+ /* note that 'homehost' is special as it is really
+ * a "uuid" update.
+ */
+ uuid_set = 0;
+ update = "uuid";
+ info->uuid[0] = sb->set_uuid0;
+ info->uuid[1] = sb->set_uuid1;
+ }
+
+ if (strcmp(update, "sparc2.2")==0 ) {
+ /* 2.2 sparc put the events in the wrong place
+ * So we copy the tail of the superblock
+ * up 4 bytes before continuing
+ */
+ __u32 *sb32 = (__u32*)sb;
+ memcpy(sb32+MD_SB_GENERIC_CONSTANT_WORDS+7,
+ sb32+MD_SB_GENERIC_CONSTANT_WORDS+7+1,
+ (MD_SB_WORDS - (MD_SB_GENERIC_CONSTANT_WORDS+7+1))*4);
+ if (verbose >= 0)
+ pr_err("adjusting superblock of %s for 2.2/sparc compatibility.\n",
+ devname);
+ } else if (strcmp(update, "super-minor") ==0) {
+ sb->md_minor = info->array.md_minor;
+ if (verbose > 0)
+ pr_err("updating superblock of %s with minor number %d\n",
+ devname, info->array.md_minor);
+ } else if (strcmp(update, "summaries") == 0) {
+ unsigned int i;
+ /* set nr_disks, active_disks, working_disks,
+ * failed_disks, spare_disks based on disks[]
+ * array in superblock.
+ * Also make sure extra slots aren't 'failed'
+ */
+ sb->nr_disks = sb->active_disks =
+ sb->working_disks = sb->failed_disks =
+ sb->spare_disks = 0;
+ for (i=0; i < MD_SB_DISKS ; i++)
+ if (sb->disks[i].major ||
+ sb->disks[i].minor) {
+ int state = sb->disks[i].state;
+ if (state & (1<<MD_DISK_REMOVED))
+ continue;
+ sb->nr_disks++;
+ if (state & (1<<MD_DISK_ACTIVE))
+ sb->active_disks++;
+ if (state & (1<<MD_DISK_FAULTY))
+ sb->failed_disks++;
+ else
+ sb->working_disks++;
+ if (state == 0)
+ sb->spare_disks++;
+ } else if (i >= sb->raid_disks && sb->disks[i].number == 0)
+ sb->disks[i].state = 0;
+ } else if (strcmp(update, "force-one")==0) {
+ /* Not enough devices for a working array, so
+ * bring this one up-to-date.
+ */
+ __u32 ehi = sb->events_hi, elo = sb->events_lo;
+ sb->events_hi = (info->events>>32) & 0xFFFFFFFF;
+ sb->events_lo = (info->events) & 0xFFFFFFFF;
+ if (sb->events_hi != ehi ||
+ sb->events_lo != elo)
+ rv = 1;
+ } else if (strcmp(update, "force-array")==0) {
+ /* degraded array and 'force' requested, so
+ * maybe need to mark it 'clean'
+ */
+ if ((sb->level == 5 || sb->level == 4 || sb->level == 6) &&
+ (sb->state & (1 << MD_SB_CLEAN)) == 0) {
+ /* need to force clean */
+ sb->state |= (1 << MD_SB_CLEAN);
+ rv = 1;
+ }
+ } else if (strcmp(update, "assemble")==0) {
+ int d = info->disk.number;
+ int wonly = sb->disks[d].state & (1<<MD_DISK_WRITEMOSTLY);
+ int mask = (1<<MD_DISK_WRITEMOSTLY);
+ int add = 0;
+ if (sb->minor_version >= 91)
+ /* During reshape we don't insist on everything
+ * being marked 'sync'
+ */
+ add = (1<<MD_DISK_SYNC);
+ if (((sb->disks[d].state & ~mask) | add)
+ != (unsigned)info->disk.state) {
+ sb->disks[d].state = info->disk.state | wonly;
+ rv = 1;
+ }
+ if (info->reshape_active &&
+ sb->minor_version > 90 && (sb->reshape_position+1) != 0 &&
+ info->delta_disks >= 0 &&
+ info->reshape_progress < sb->reshape_position) {
+ sb->reshape_position = info->reshape_progress;
+ rv = 1;
+ }
+ if (info->reshape_active &&
+ sb->minor_version > 90 && (sb->reshape_position+1) != 0 &&
+ info->delta_disks < 0 &&
+ info->reshape_progress > sb->reshape_position) {
+ sb->reshape_position = info->reshape_progress;
+ rv = 1;
+ }
+ } else if (strcmp(update, "linear-grow-new") == 0) {
+ memset(&sb->disks[info->disk.number], 0, sizeof(sb->disks[0]));
+ sb->disks[info->disk.number].number = info->disk.number;
+ sb->disks[info->disk.number].major = info->disk.major;
+ sb->disks[info->disk.number].minor = info->disk.minor;
+ sb->disks[info->disk.number].raid_disk = info->disk.raid_disk;
+ sb->disks[info->disk.number].state = info->disk.state;
+ sb->this_disk = sb->disks[info->disk.number];
+ } else if (strcmp(update, "linear-grow-update") == 0) {
+ sb->raid_disks = info->array.raid_disks;
+ sb->nr_disks = info->array.nr_disks;
+ sb->active_disks = info->array.active_disks;
+ sb->working_disks = info->array.working_disks;
+ memset(&sb->disks[info->disk.number], 0, sizeof(sb->disks[0]));
+ sb->disks[info->disk.number].number = info->disk.number;
+ sb->disks[info->disk.number].major = info->disk.major;
+ sb->disks[info->disk.number].minor = info->disk.minor;
+ sb->disks[info->disk.number].raid_disk = info->disk.raid_disk;
+ sb->disks[info->disk.number].state = info->disk.state;
+ } else if (strcmp(update, "resync") == 0) {
+ /* make sure resync happens */
+ sb->state &= ~(1<<MD_SB_CLEAN);
+ sb->recovery_cp = 0;
+ } else if (strcmp(update, "uuid") == 0) {
+ if (!uuid_set && homehost) {
+ char buf[20];
+ char *hash = sha1_buffer(homehost,
+ strlen(homehost),
+ buf);
+ memcpy(info->uuid+2, hash, 8);
+ }
+ sb->set_uuid0 = info->uuid[0];
+ sb->set_uuid1 = info->uuid[1];
+ sb->set_uuid2 = info->uuid[2];
+ sb->set_uuid3 = info->uuid[3];
+ if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) {
+ struct bitmap_super_s *bm;
+ bm = (struct bitmap_super_s*)(sb+1);
+ uuid_from_super0(st, uuid);
+ memcpy(bm->uuid, uuid, 16);
+ }
+ } else if (strcmp(update, "metadata") == 0) {
+ /* Create some v1.0 metadata to match ours but make the
+ * ctime bigger. Also update info->array.*_version.
+ * We need to arrange that store_super writes out
+ * the v1.0 metadata.
+ * Not permitted for unclean array, or array with
+ * bitmap.
+ */
+ if (info->bitmap_offset) {
+ pr_err("Cannot update metadata when bitmap is present\n");
+ rv = -2;
+ } else if (info->array.state != 1) {
+ pr_err("Cannot update metadata on unclean array\n");
+ rv = -2;
+ } else {
+ info->array.major_version = 1;
+ info->array.minor_version = 0;
+ uuid_from_super0(st, info->uuid);
+ st->other = super1_make_v0(st, info, st->sb);
+ }
+ } else if (strcmp(update, "revert-reshape") == 0) {
+ rv = -2;
+ if (sb->minor_version <= 90)
+ pr_err("No active reshape to revert on %s\n",
+ devname);
+ else if (sb->delta_disks == 0)
+ pr_err("%s: Can only revert reshape which changes number of devices\n",
+ devname);
+ else {
+ int tmp;
+ int parity = sb->level == 6 ? 2 : 1;
+ rv = 0;
+
+ if (sb->level >= 4 && sb->level <= 6 &&
+ sb->reshape_position % (
+ sb->new_chunk/512 *
+ (sb->raid_disks - sb->delta_disks - parity))) {
+ pr_err("Reshape position is not suitably aligned.\n");
+ pr_err("Try normal assembly and stop again\n");
+ return -2;
+ }
+ sb->raid_disks -= sb->delta_disks;
+ sb->delta_disks = -sb->delta_disks;
+
+ tmp = sb->new_layout;
+ sb->new_layout = sb->layout;
+ sb->layout = tmp;
+
+ tmp = sb->new_chunk;
+ sb->new_chunk = sb->chunk_size;
+ sb->chunk_size = tmp;
+ }
+ } else if (strcmp(update, "no-bitmap") == 0) {
+ sb->state &= ~(1<<MD_SB_BITMAP_PRESENT);
+ } else if (strcmp(update, "_reshape_progress")==0)
+ sb->reshape_position = info->reshape_progress;
+ else if (strcmp(update, "writemostly")==0)
+ sb->state |= (1<<MD_DISK_WRITEMOSTLY);
+ else if (strcmp(update, "readwrite")==0)
+ sb->state &= ~(1<<MD_DISK_WRITEMOSTLY);
+ else
+ rv = -1;
+
+ sb->sb_csum = calc_sb0_csum(sb);
+ return rv;
+}
+
+/*
+ * For verion-0 superblock, the homehost is 'stored' in the
+ * uuid. 8 bytes for a hash of the host leaving 8 bytes
+ * of random material.
+ * We use the first 8 bytes (64bits) of the sha1 of the
+ * host name
+ */
+
+static int init_super0(struct supertype *st, mdu_array_info_t *info,
+ unsigned long long size, char *ignored_name, char *homehost,
+ int *uuid, unsigned long long data_offset)
+{
+ mdp_super_t *sb;
+ int spares;
+
+ if (data_offset != INVALID_SECTORS) {
+ pr_err("data-offset not support for 0.90\n");
+ return 0;
+ }
+
+ if (posix_memalign((void**)&sb, 4096,
+ MD_SB_BYTES + ROUND_UP(sizeof(bitmap_super_t), 4096)) != 0) {
+ pr_err("%s could not allocate superblock\n", __func__);
+ return 0;
+ }
+ memset(sb, 0, MD_SB_BYTES + sizeof(bitmap_super_t));
+
+ st->sb = sb;
+ if (info == NULL) {
+ /* zeroing the superblock */
+ return 0;
+ }
+
+ spares = info->working_disks - info->active_disks;
+ if (info->raid_disks + spares > MD_SB_DISKS) {
+ pr_err("too many devices requested: %d+%d > %d\n",
+ info->raid_disks , spares, MD_SB_DISKS);
+ return 0;
+ }
+
+ sb->md_magic = MD_SB_MAGIC;
+ sb->major_version = 0;
+ sb->minor_version = 90;
+ sb->patch_version = 0;
+ sb->gvalid_words = 0; /* ignored */
+ sb->ctime = time(0);
+ sb->level = info->level;
+ sb->size = size;
+ if (size != (unsigned long long)sb->size)
+ return 0;
+ sb->nr_disks = info->nr_disks;
+ sb->raid_disks = info->raid_disks;
+ sb->md_minor = info->md_minor;
+ sb->not_persistent = 0;
+ if (uuid) {
+ sb->set_uuid0 = uuid[0];
+ sb->set_uuid1 = uuid[1];
+ sb->set_uuid2 = uuid[2];
+ sb->set_uuid3 = uuid[3];
+ } else {
+ int rfd = open("/dev/urandom", O_RDONLY);
+ if (rfd < 0 || read(rfd, &sb->set_uuid0, 4) != 4)
+ sb->set_uuid0 = random();
+ if (rfd < 0 || read(rfd, &sb->set_uuid1, 12) != 12) {
+ sb->set_uuid1 = random();
+ sb->set_uuid2 = random();
+ sb->set_uuid3 = random();
+ }
+ if (rfd >= 0)
+ close(rfd);
+ }
+ if (homehost && !uuid) {
+ char buf[20];
+ char *hash = sha1_buffer(homehost,
+ strlen(homehost),
+ buf);
+ memcpy(&sb->set_uuid2, hash, 8);
+ }
+
+ sb->utime = sb->ctime;
+ sb->state = info->state;
+ sb->active_disks = info->active_disks;
+ sb->working_disks = info->working_disks;
+ sb->failed_disks = info->failed_disks;
+ sb->spare_disks = info->spare_disks;
+ sb->events_hi = 0;
+ sb->events_lo = 1;
+
+ sb->layout = info->layout;
+ sb->chunk_size = info->chunk_size;
+
+ return 1;
+}
+
+struct devinfo {
+ int fd;
+ char *devname;
+ mdu_disk_info_t disk;
+ struct devinfo *next;
+};
+
+#ifndef MDASSEMBLE
+/* Add a device to the superblock being created */
+static int add_to_super0(struct supertype *st, mdu_disk_info_t *dinfo,
+ int fd, char *devname, unsigned long long data_offset)
+{
+ mdp_super_t *sb = st->sb;
+ mdp_disk_t *dk = &sb->disks[dinfo->number];
+ struct devinfo *di, **dip;
+
+ dk->number = dinfo->number;
+ dk->major = dinfo->major;
+ dk->minor = dinfo->minor;
+ dk->raid_disk = dinfo->raid_disk;
+ dk->state = dinfo->state & ((1<<MD_DISK_ACTIVE) |
+ (1<<MD_DISK_SYNC));
+
+ sb->this_disk = sb->disks[dinfo->number];
+ sb->sb_csum = calc_sb0_csum(sb);
+
+ dip = (struct devinfo **)&st->info;
+ while (*dip)
+ dip = &(*dip)->next;
+ di = xmalloc(sizeof(struct devinfo));
+ di->fd = fd;
+ di->devname = devname;
+ di->disk = *dinfo;
+ di->next = NULL;
+ *dip = di;
+
+ return 0;
+}
+#endif
+
+static int store_super0(struct supertype *st, int fd)
+{
+ unsigned long long dsize;
+ unsigned long long offset;
+ mdp_super_t *super = st->sb;
+
+ if (!get_dev_size(fd, NULL, &dsize))
+ return 1;
+
+ if (dsize < MD_RESERVED_SECTORS*512)
+ return 2;
+
+ if (st->other) {
+ /* Writing out v1.0 metadata for --update=metadata */
+ int ret = 0;
+
+ offset = dsize/512 - 8*2;
+ offset &= ~(4*2-1);
+ offset *= 512;
+ if (lseek64(fd, offset, 0)< 0LL)
+ ret = 3;
+ else if (write(fd, st->other, 1024) != 1024)
+ ret = 4;
+ else
+ fsync(fd);
+ free(st->other);
+ st->other = NULL;
+ return ret;
+ }
+
+ offset = MD_NEW_SIZE_SECTORS(dsize>>9);
+
+ offset *= 512;
+
+ if (lseek64(fd, offset, 0)< 0LL)
+ return 3;
+
+ if (write(fd, super, sizeof(*super)) != sizeof(*super))
+ return 4;
+
+ if (super->state & (1<<MD_SB_BITMAP_PRESENT)) {
+ struct bitmap_super_s * bm = (struct bitmap_super_s*)(super+1);
+ if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC)
+ if (write(fd, bm, ROUND_UP(sizeof(*bm),4096)) !=
+ ROUND_UP(sizeof(*bm),4096))
+ return 5;
+ }
+
+ fsync(fd);
+ return 0;
+}
+
+#ifndef MDASSEMBLE
+static int write_init_super0(struct supertype *st)
+{
+ mdp_super_t *sb = st->sb;
+ int rv = 0;
+ struct devinfo *di;
+
+ for (di = st->info ; di && ! rv ; di = di->next) {
+
+ if (di->disk.state & (1 << MD_DISK_FAULTY))
+ continue;
+ if (di->fd == -1)
+ continue;
+ while (Kill(di->devname, NULL, 0, -1, 1) == 0)
+ ;
+
+ sb->disks[di->disk.number].state &= ~(1<<MD_DISK_FAULTY);
+
+ sb->this_disk = sb->disks[di->disk.number];
+ sb->sb_csum = calc_sb0_csum(sb);
+ rv = store_super0(st, di->fd);
+
+ if (rv == 0 && (sb->state & (1<<MD_SB_BITMAP_PRESENT)))
+ rv = st->ss->write_bitmap(st, di->fd);
+
+ if (rv)
+ pr_err("failed to write superblock to %s\n",
+ di->devname);
+ }
+ return rv;
+}
+#endif
+
+static int compare_super0(struct supertype *st, struct supertype *tst)
+{
+ /*
+ * return:
+ * 0 same, or first was empty, and second was copied
+ * 1 second had wrong number
+ * 2 wrong uuid
+ * 3 wrong other info
+ */
+ mdp_super_t *first = st->sb;
+ mdp_super_t *second = tst->sb;
+ int uuid1[4], uuid2[4];
+
+ if (second->md_magic != MD_SB_MAGIC)
+ return 1;
+ if (!first) {
+ if (posix_memalign((void**)&first, 4096,
+ MD_SB_BYTES +
+ ROUND_UP(sizeof(struct bitmap_super_s), 4096)) != 0) {
+ pr_err("%s could not allocate superblock\n", __func__);
+ return 1;
+ }
+ memcpy(first, second, MD_SB_BYTES + sizeof(struct bitmap_super_s));
+ st->sb = first;
+ return 0;
+ }
+
+ uuid_from_super0(st, uuid1);
+ uuid_from_super0(tst, uuid2);
+ if (!same_uuid(uuid1, uuid2, 0))
+ return 2;
+ if (first->major_version != second->major_version ||
+ first->minor_version != second->minor_version ||
+ first->patch_version != second->patch_version ||
+ first->gvalid_words != second->gvalid_words ||
+ first->ctime != second->ctime ||
+ first->level != second->level ||
+ first->size != second->size ||
+ first->raid_disks != second->raid_disks )
+ return 3;
+
+ return 0;
+}
+
+static void free_super0(struct supertype *st);
+
+static int load_super0(struct supertype *st, int fd, char *devname)
+{
+ /* try to read in the superblock
+ * Return:
+ * 0 on success
+ * 1 on cannot get superblock
+ * 2 on superblock meaningless
+ */
+ unsigned long long dsize;
+ unsigned long long offset;
+ mdp_super_t *super;
+ int uuid[4];
+ struct bitmap_super_s *bsb;
+
+ free_super0(st);
+
+ if (!get_dev_size(fd, devname, &dsize))
+ return 1;
+
+ if (dsize < MD_RESERVED_SECTORS*512) {
+ if (devname)
+ pr_err("%s is too small for md: size is %llu sectors.\n",
+ devname, dsize);
+ return 1;
+ }
+ st->devsize = dsize;
+
+ offset = MD_NEW_SIZE_SECTORS(dsize>>9);
+
+ offset *= 512;
+
+ if (lseek64(fd, offset, 0)< 0LL) {
+ if (devname)
+ pr_err("Cannot seek to superblock on %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+
+ if (posix_memalign((void**)&super, 4096,
+ MD_SB_BYTES +
+ ROUND_UP(sizeof(bitmap_super_t), 4096)) != 0) {
+ pr_err("%s could not allocate superblock\n", __func__);
+ return 1;
+ }
+
+ if (read(fd, super, sizeof(*super)) != MD_SB_BYTES) {
+ if (devname)
+ pr_err("Cannot read superblock on %s\n",
+ devname);
+ free(super);
+ return 1;
+ }
+
+ if (st->ss && st->minor_version == 9)
+ super0_swap_endian(super);
+
+ if (super->md_magic != MD_SB_MAGIC) {
+ if (devname)
+ pr_err("No super block found on %s (Expected magic %08x, got %08x)\n",
+ devname, MD_SB_MAGIC, super->md_magic);
+ free(super);
+ return 2;
+ }
+
+ if (super->major_version != 0) {
+ if (devname)
+ pr_err("Cannot interpret superblock on %s - version is %d\n",
+ devname, super->major_version);
+ free(super);
+ return 2;
+ }
+ st->sb = super;
+
+ if (st->ss == NULL) {
+ st->ss = &super0;
+ st->minor_version = super->minor_version;
+ st->max_devs = MD_SB_DISKS;
+ st->info = NULL;
+ }
+
+ /* Now check on the bitmap superblock */
+ if ((super->state & (1<<MD_SB_BITMAP_PRESENT)) == 0)
+ return 0;
+ /* Read the bitmap superblock and make sure it looks
+ * valid. If it doesn't clear the bit. An --assemble --force
+ * should get that written out.
+ */
+ if (read(fd, super+1, ROUND_UP(sizeof(struct bitmap_super_s),4096))
+ != ROUND_UP(sizeof(struct bitmap_super_s),4096))
+ goto no_bitmap;
+
+ uuid_from_super0(st, uuid);
+ bsb = (struct bitmap_super_s *)(super+1);
+ if (__le32_to_cpu(bsb->magic) != BITMAP_MAGIC ||
+ memcmp(bsb->uuid, uuid, 16) != 0)
+ goto no_bitmap;
+ return 0;
+
+ no_bitmap:
+ super->state &= ~(1<<MD_SB_BITMAP_PRESENT);
+
+ return 0;
+}
+
+static struct supertype *match_metadata_desc0(char *arg)
+{
+ struct supertype *st = xcalloc(1, sizeof(*st));
+
+ st->container_devnm[0] = 0;
+ st->ss = &super0;
+ st->info = NULL;
+ st->minor_version = 90;
+ st->max_devs = MD_SB_DISKS;
+ st->sb = NULL;
+ /* we sometimes get 00.90 */
+ while (arg[0] == '0' && arg[1] == '0')
+ arg++;
+ if (strcmp(arg, "0") == 0 ||
+#ifdef DEFAULT_OLD_METADATA /* ifndef in super1.c */
+ strcmp(arg, "default") == 0 ||
+#endif /* DEFAULT_OLD_METADATA */
+ strcmp(arg, "0.90") == 0 ||
+ strcmp(arg, "") == 0 /* no metadata - i.e. non_persistent */
+ )
+ return st;
+
+ st->minor_version = 91; /* reshape in progress */
+ if (strcmp(arg, "0.91") == 0) /* For dup_super support */
+ return st;
+
+ st->minor_version = 9; /* flag for 'byte-swapped' */
+ if (strcmp(arg, "0.swap")==0 ||
+ strcmp(arg, "0.9") == 0) /* For dup_super support */
+ return st;
+
+ free(st);
+ return NULL;
+}
+
+static __u64 avail_size0(struct supertype *st, __u64 devsize,
+ unsigned long long data_offset)
+{
+ if (data_offset != 0 && data_offset != INVALID_SECTORS)
+ return 0ULL;
+ if (devsize < MD_RESERVED_SECTORS)
+ return 0ULL;
+ return MD_NEW_SIZE_SECTORS(devsize);
+}
+
+static int add_internal_bitmap0(struct supertype *st, int *chunkp,
+ int delay, int write_behind,
+ unsigned long long size, int may_change,
+ int major)
+{
+ /*
+ * The bitmap comes immediately after the superblock and must be 60K in size
+ * at most. The default size is between 30K and 60K
+ *
+ * size is in sectors, chunk is in bytes !!!
+ */
+ unsigned long long bits;
+ unsigned long long max_bits = (60*1024 - sizeof(bitmap_super_t))*8;
+ unsigned long long min_chunk;
+ int chunk = *chunkp;
+ mdp_super_t *sb = st->sb;
+ bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MD_SB_BYTES);
+ int uuid[4];
+
+ min_chunk = 4096; /* sub-page chunks don't work yet.. */
+ bits = (size * 512) / min_chunk + 1;
+ while (bits > max_bits) {
+ min_chunk *= 2;
+ bits = (bits+1)/2;
+ }
+ if (chunk == UnSet) {
+ /* A chunk size less than a few Megabytes gives poor
+ * performance without increasing resync noticeably
+ */
+ chunk = min_chunk;
+ if (chunk < 64*1024*1024)
+ chunk = 64*1024*1024;
+ } else if ((unsigned long long)chunk < min_chunk)
+ return 0; /* chunk size too small */
+
+ sb->state |= (1<<MD_SB_BITMAP_PRESENT);
+
+ memset(bms, 0, sizeof(*bms));
+ bms->magic = __cpu_to_le32(BITMAP_MAGIC);
+ bms->version = __cpu_to_le32(major);
+ uuid_from_super0(st, uuid);
+ memcpy(bms->uuid, uuid, 16);
+ bms->chunksize = __cpu_to_le32(chunk);
+ bms->daemon_sleep = __cpu_to_le32(delay);
+ bms->sync_size = __cpu_to_le64(size);
+ bms->write_behind = __cpu_to_le32(write_behind);
+ *chunkp = chunk;
+ return 1;
+}
+
+static void locate_bitmap0(struct supertype *st, int fd)
+{
+ unsigned long long dsize;
+ unsigned long long offset;
+
+ if (!get_dev_size(fd, NULL, &dsize))
+ return;
+
+ if (dsize < MD_RESERVED_SECTORS*512)
+ return;
+
+ offset = MD_NEW_SIZE_SECTORS(dsize>>9);
+
+ offset *= 512;
+
+ offset += MD_SB_BYTES;
+
+ lseek64(fd, offset, 0);
+}
+
+static int write_bitmap0(struct supertype *st, int fd)
+{
+ unsigned long long dsize;
+ unsigned long long offset;
+ mdp_super_t *sb = st->sb;
+
+ int rv = 0;
+
+ int towrite, n;
+ void *buf;
+
+ if (!get_dev_size(fd, NULL, &dsize))
+ return 1;
+
+ if (dsize < MD_RESERVED_SECTORS*512)
+ return -1;
+
+ offset = MD_NEW_SIZE_SECTORS(dsize>>9);
+
+ offset *= 512;
+
+ if (lseek64(fd, offset + 4096, 0)< 0LL)
+ return 3;
+
+ if (posix_memalign(&buf, 4096, 4096))
+ return -ENOMEM;
+
+ memset(buf, 0xff, 4096);
+ memcpy(buf, ((char*)sb)+MD_SB_BYTES, sizeof(bitmap_super_t));
+ towrite = 60*1024;
+ while (towrite > 0) {
+ n = towrite;
+ if (n > 4096)
+ n = 4096;
+ n = write(fd, buf, n);
+ if (n > 0)
+ towrite -= n;
+ else
+ break;
+ memset(buf, 0xff, 4096);
+ }
+ fsync(fd);
+ if (towrite)
+ rv = -2;
+
+ free(buf);
+ return rv;
+}
+
+static void free_super0(struct supertype *st)
+{
+ if (st->sb)
+ free(st->sb);
+ while (st->info) {
+ struct devinfo *di = st->info;
+ st->info = di->next;
+ if (di->fd >= 0)
+ close(di->fd);
+ free(di);
+ }
+ st->sb = NULL;
+}
+
+#ifndef MDASSEMBLE
+static int validate_geometry0(struct supertype *st, int level,
+ int layout, int raiddisks,
+ int *chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *subdev, unsigned long long *freesize,
+ int verbose)
+{
+ unsigned long long ldsize;
+ int fd;
+ unsigned int tbmax = 4;
+
+ /* prior to linux 3.1, a but limits usable device size to 2TB.
+ * It was introduced in 2.6.29, but we won't worry about that detail
+ */
+ if (get_linux_version() < 3001000)
+ tbmax = 2;
+
+ if (level == LEVEL_CONTAINER) {
+ if (verbose)
+ pr_err("0.90 metadata does not support containers\n");
+ return 0;
+ }
+ if (raiddisks > MD_SB_DISKS) {
+ if (verbose)
+ pr_err("0.90 metadata supports at most %d devices per array\n",
+ MD_SB_DISKS);
+ return 0;
+ }
+ if (size >= tbmax * 2ULL*1024*1024*1024) {
+ if (verbose)
+ pr_err("0.90 metadata supports at most "
+ "%d terabytes per device\n", tbmax);
+ return 0;
+ }
+ if (*chunk == UnSet)
+ *chunk = DEFAULT_CHUNK;
+
+ if (!subdev)
+ return 1;
+
+ fd = open(subdev, O_RDONLY|O_EXCL, 0);
+ if (fd < 0) {
+ if (verbose)
+ pr_err("super0.90 cannot open %s: %s\n",
+ subdev, strerror(errno));
+ return 0;
+ }
+
+ if (!get_dev_size(fd, subdev, &ldsize)) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+
+ if (ldsize < MD_RESERVED_SECTORS * 512)
+ return 0;
+ *freesize = MD_NEW_SIZE_SECTORS(ldsize >> 9);
+ return 1;
+}
+#endif /* MDASSEMBLE */
+
+struct superswitch super0 = {
+#ifndef MDASSEMBLE
+ .examine_super = examine_super0,
+ .brief_examine_super = brief_examine_super0,
+ .export_examine_super = export_examine_super0,
+ .detail_super = detail_super0,
+ .brief_detail_super = brief_detail_super0,
+ .write_init_super = write_init_super0,
+ .validate_geometry = validate_geometry0,
+ .add_to_super = add_to_super0,
+ .copy_metadata = copy_metadata0,
+#endif
+ .match_home = match_home0,
+ .uuid_from_super = uuid_from_super0,
+ .getinfo_super = getinfo_super0,
+ .container_content = container_content0,
+ .update_super = update_super0,
+ .init_super = init_super0,
+ .store_super = store_super0,
+ .compare_super = compare_super0,
+ .load_super = load_super0,
+ .match_metadata_desc = match_metadata_desc0,
+ .avail_size = avail_size0,
+ .add_internal_bitmap = add_internal_bitmap0,
+ .locate_bitmap = locate_bitmap0,
+ .write_bitmap = write_bitmap0,
+ .free_super = free_super0,
+ .name = "0.90",
+};
diff --git a/super1.c b/super1.c
new file mode 100644
index 00000000..727a08a1
--- /dev/null
+++ b/super1.c
@@ -0,0 +1,2425 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include <stddef.h>
+#include "mdadm.h"
+/*
+ * The version-1 superblock :
+ * All numeric fields are little-endian.
+ *
+ * total size: 256 bytes plus 2 per device.
+ * 1K allows 384 devices.
+ */
+struct mdp_superblock_1 {
+ /* constant array information - 128 bytes */
+ __u32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */
+ __u32 major_version; /* 1 */
+ __u32 feature_map; /* 0 for now */
+ __u32 pad0; /* always set to 0 when writing */
+
+ __u8 set_uuid[16]; /* user-space generated. */
+ char set_name[32]; /* set and interpreted by user-space */
+
+ __u64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/
+ __u32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */
+ __u32 layout; /* only for raid5 currently */
+ __u64 size; /* used size of component devices, in 512byte sectors */
+
+ __u32 chunksize; /* in 512byte sectors */
+ __u32 raid_disks;
+ __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts
+ * NOTE: signed, so bitmap can be before superblock
+ * only meaningful of feature_map[0] is set.
+ */
+
+ /* These are only valid with feature bit '4' */
+ __u32 new_level; /* new level we are reshaping to */
+ __u64 reshape_position; /* next address in array-space for reshape */
+ __u32 delta_disks; /* change in number of raid_disks */
+ __u32 new_layout; /* new layout */
+ __u32 new_chunk; /* new chunk size (sectors) */
+ __u32 new_offset; /* signed number to add to data_offset in new
+ * layout. 0 == no-change. This can be
+ * different on each device in the array.
+ */
+
+ /* constant this-device information - 64 bytes */
+ __u64 data_offset; /* sector start of data, often 0 */
+ __u64 data_size; /* sectors in this device that can be used for data */
+ __u64 super_offset; /* sector start of this superblock */
+ __u64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
+ __u32 dev_number; /* permanent identifier of this device - not role in raid */
+ __u32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */
+ __u8 device_uuid[16]; /* user-space setable, ignored by kernel */
+ __u8 devflags; /* per-device flags. Only one defined...*/
+#define WriteMostly1 1 /* mask for writemostly flag in above */
+ /* bad block log. If there are any bad blocks the feature flag is set.
+ * if offset and size are non-zero, that space is reserved and available.
+ */
+ __u8 bblog_shift; /* shift from sectors to block size for badblocklist */
+ __u16 bblog_size; /* number of sectors reserved for badblocklist */
+ __u32 bblog_offset; /* sector offset from superblock to bblog, signed */
+
+ /* array state information - 64 bytes */
+ __u64 utime; /* 40 bits second, 24 btes microseconds */
+ __u64 events; /* incremented when superblock updated */
+ __u64 resync_offset; /* data before this offset (from data_offset) known to be in sync */
+ __u32 sb_csum; /* checksum upto dev_roles[max_dev] */
+ __u32 max_dev; /* size of dev_roles[] array to consider */
+ __u8 pad3[64-32]; /* set to 0 when writing */
+
+ /* device state information. Indexed by dev_number.
+ * 2 bytes per device
+ * Note there are no per-device state flags. State information is rolled
+ * into the 'roles' value. If a device is spare or faulty, then it doesn't
+ * have a meaningful role.
+ */
+ __u16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */
+};
+
+#define MAX_SB_SIZE 4096
+/* bitmap super size is 256, but we round up to a sector for alignment */
+#define BM_SUPER_SIZE 512
+#define MAX_DEVS ((int)(MAX_SB_SIZE - sizeof(struct mdp_superblock_1)) / 2)
+#define SUPER1_SIZE (MAX_SB_SIZE + BM_SUPER_SIZE \
+ + sizeof(struct misc_dev_info))
+
+struct misc_dev_info {
+ __u64 device_size;
+};
+
+/* feature_map bits */
+#define MD_FEATURE_BITMAP_OFFSET 1
+#define MD_FEATURE_RECOVERY_OFFSET 2 /* recovery_offset is present and
+ * must be honoured
+ */
+#define MD_FEATURE_RESHAPE_ACTIVE 4
+#define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */
+#define MD_FEATURE_REPLACEMENT 16 /* This device is replacing an
+ * active device with same 'role'.
+ * 'recovery_offset' is also set.
+ */
+#define MD_FEATURE_RESHAPE_BACKWARDS 32 /* Reshape doesn't change number
+ * of devices, but is going
+ * backwards anyway.
+ */
+#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */
+#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
+ |MD_FEATURE_RECOVERY_OFFSET \
+ |MD_FEATURE_RESHAPE_ACTIVE \
+ |MD_FEATURE_BAD_BLOCKS \
+ |MD_FEATURE_REPLACEMENT \
+ |MD_FEATURE_RESHAPE_BACKWARDS \
+ |MD_FEATURE_NEW_OFFSET \
+ )
+
+static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
+{
+ unsigned int disk_csum, csum;
+ unsigned long long newcsum;
+ int size = sizeof(*sb) + __le32_to_cpu(sb->max_dev)*2;
+ unsigned int *isuper = (unsigned int*)sb;
+
+/* make sure I can count... */
+ if (offsetof(struct mdp_superblock_1,data_offset) != 128 ||
+ offsetof(struct mdp_superblock_1, utime) != 192 ||
+ sizeof(struct mdp_superblock_1) != 256) {
+ fprintf(stderr, "WARNING - superblock isn't sized correctly\n");
+ }
+
+ disk_csum = sb->sb_csum;
+ sb->sb_csum = 0;
+ newcsum = 0;
+ for (; size>=4; size -= 4 ) {
+ newcsum += __le32_to_cpu(*isuper);
+ isuper++;
+ }
+
+ if (size == 2)
+ newcsum += __le16_to_cpu(*(unsigned short*) isuper);
+
+ csum = (newcsum & 0xffffffff) + (newcsum >> 32);
+ sb->sb_csum = disk_csum;
+ return __cpu_to_le32(csum);
+}
+
+/*
+ * Information related to file descriptor used for aligned reads/writes.
+ * Cache the block size.
+ */
+struct align_fd {
+ int fd;
+ int blk_sz;
+};
+
+static void init_afd(struct align_fd *afd, int fd)
+{
+ afd->fd = fd;
+
+ if (ioctl(afd->fd, BLKSSZGET, &afd->blk_sz) != 0)
+ afd->blk_sz = 512;
+}
+
+static char abuf[4096+4096];
+static int aread(struct align_fd *afd, void *buf, int len)
+{
+ /* aligned read.
+ * On devices with a 4K sector size, we need to read
+ * the full sector and copy relevant bits into
+ * the buffer
+ */
+ int bsize, iosize;
+ char *b;
+ int n;
+
+ bsize = afd->blk_sz;
+
+ if (!bsize || bsize > 4096 || len > 4096) {
+ if (!bsize)
+ fprintf(stderr, "WARNING - aread() called with "
+ "invalid block size\n");
+ return -1;
+ }
+ b = ROUND_UP_PTR((char *)abuf, 4096);
+
+ for (iosize = 0; iosize < len; iosize += bsize)
+ ;
+ n = read(afd->fd, b, iosize);
+ if (n <= 0)
+ return n;
+ lseek(afd->fd, len - n, 1);
+ if (n > len)
+ n = len;
+ memcpy(buf, b, n);
+ return n;
+}
+
+static int awrite(struct align_fd *afd, void *buf, int len)
+{
+ /* aligned write.
+ * On devices with a 4K sector size, we need to write
+ * the full sector. We pre-read if the sector is larger
+ * than the write.
+ * The address must be sector-aligned.
+ */
+ int bsize, iosize;
+ char *b;
+ int n;
+
+ bsize = afd->blk_sz;
+ if (!bsize || bsize > 4096 || len > 4096) {
+ if (!bsize)
+ fprintf(stderr, "WARNING - awrite() called with "
+ "invalid block size\n");
+ return -1;
+ }
+ b = ROUND_UP_PTR((char *)abuf, 4096);
+
+ for (iosize = 0; iosize < len ; iosize += bsize)
+ ;
+
+ if (len != iosize) {
+ n = read(afd->fd, b, iosize);
+ if (n <= 0)
+ return n;
+ lseek(afd->fd, -n, 1);
+ }
+
+ memcpy(b, buf, len);
+ n = write(afd->fd, b, iosize);
+ if (n <= 0)
+ return n;
+ lseek(afd->fd, len - n, 1);
+ return len;
+}
+
+#ifndef MDASSEMBLE
+static void examine_super1(struct supertype *st, char *homehost)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ time_t atime;
+ unsigned int d;
+ int role;
+ int delta_extra = 0;
+ int i;
+ char *c;
+ int l = homehost ? strlen(homehost) : 0;
+ int layout;
+ unsigned long long sb_offset;
+ struct mdinfo info;
+
+ printf(" Magic : %08x\n", __le32_to_cpu(sb->magic));
+ printf(" Version : 1");
+ sb_offset = __le64_to_cpu(sb->super_offset);
+ if (sb_offset <= 4)
+ printf(".1\n");
+ else if (sb_offset <= 8)
+ printf(".2\n");
+ else
+ printf(".0\n");
+ printf(" Feature Map : 0x%x\n", __le32_to_cpu(sb->feature_map));
+ printf(" Array UUID : ");
+ for (i=0; i<16; i++) {
+ if ((i&3)==0 && i != 0) printf(":");
+ printf("%02x", sb->set_uuid[i]);
+ }
+ printf("\n");
+ printf(" Name : %.32s", sb->set_name);
+ if (l > 0 && l < 32 &&
+ sb->set_name[l] == ':' &&
+ strncmp(sb->set_name, homehost, l) == 0)
+ printf(" (local to host %s)", homehost);
+ printf("\n");
+ atime = __le64_to_cpu(sb->ctime) & 0xFFFFFFFFFFULL;
+ printf(" Creation Time : %.24s\n", ctime(&atime));
+ c=map_num(pers, __le32_to_cpu(sb->level));
+ printf(" Raid Level : %s\n", c?c:"-unknown-");
+ printf(" Raid Devices : %d\n", __le32_to_cpu(sb->raid_disks));
+ printf("\n");
+ printf(" Avail Dev Size : %llu%s\n",
+ (unsigned long long)__le64_to_cpu(sb->data_size),
+ human_size(__le64_to_cpu(sb->data_size)<<9));
+ if (__le32_to_cpu(sb->level) > 0) {
+ int ddsks = 0, ddsks_denom = 1;
+ switch(__le32_to_cpu(sb->level)) {
+ case 1: ddsks=1;break;
+ case 4:
+ case 5: ddsks = __le32_to_cpu(sb->raid_disks)-1; break;
+ case 6: ddsks = __le32_to_cpu(sb->raid_disks)-2; break;
+ case 10:
+ layout = __le32_to_cpu(sb->layout);
+ ddsks = __le32_to_cpu(sb->raid_disks);
+ ddsks_denom = (layout&255) * ((layout>>8)&255);
+ }
+ if (ddsks) {
+ long long asize = __le64_to_cpu(sb->size);
+ asize = (asize << 9) * ddsks / ddsks_denom;
+ printf(" Array Size : %llu%s\n",
+ asize >> 10, human_size(asize));
+ }
+ if (sb->size != sb->data_size)
+ printf(" Used Dev Size : %llu%s\n",
+ (unsigned long long)__le64_to_cpu(sb->size),
+ human_size(__le64_to_cpu(sb->size)<<9));
+ }
+ if (sb->data_offset)
+ printf(" Data Offset : %llu sectors\n",
+ (unsigned long long)__le64_to_cpu(sb->data_offset));
+ if (sb->new_offset &&
+ (__le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET)) {
+ unsigned long long offset = __le64_to_cpu(sb->data_offset);
+ offset += (signed)(int32_t)__le32_to_cpu(sb->new_offset);
+ printf(" New Offset : %llu sectors\n", offset);
+ }
+ printf(" Super Offset : %llu sectors\n",
+ (unsigned long long)__le64_to_cpu(sb->super_offset));
+ if (__le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET)
+ printf("Recovery Offset : %llu sectors\n", (unsigned long long)__le64_to_cpu(sb->recovery_offset));
+
+ st->ss->getinfo_super(st, &info, NULL);
+ if (info.space_after != 1 &&
+ !(__le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
+ printf(" Unused Space : before=%llu sectors, after=%llu sectors\n",
+ info.space_before, info.space_after);
+
+ printf(" State : %s\n", (__le64_to_cpu(sb->resync_offset)+1)? "active":"clean");
+ printf(" Device UUID : ");
+ for (i=0; i<16; i++) {
+ if ((i&3)==0 && i != 0) printf(":");
+ printf("%02x", sb->device_uuid[i]);
+ }
+ printf("\n");
+ printf("\n");
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
+ printf("Internal Bitmap : %ld sectors from superblock\n",
+ (long)(int32_t)__le32_to_cpu(sb->bitmap_offset));
+ }
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)) {
+ printf(" Reshape pos'n : %llu%s\n", (unsigned long long)__le64_to_cpu(sb->reshape_position)/2,
+ human_size(__le64_to_cpu(sb->reshape_position)<<9));
+ if (__le32_to_cpu(sb->delta_disks)) {
+ printf(" Delta Devices : %d", __le32_to_cpu(sb->delta_disks));
+ printf(" (%d->%d)\n",
+ __le32_to_cpu(sb->raid_disks)-__le32_to_cpu(sb->delta_disks),
+ __le32_to_cpu(sb->raid_disks));
+ if ((int)__le32_to_cpu(sb->delta_disks) < 0)
+ delta_extra = -__le32_to_cpu(sb->delta_disks);
+ }
+ if (__le32_to_cpu(sb->new_level) != __le32_to_cpu(sb->level)) {
+ c = map_num(pers, __le32_to_cpu(sb->new_level));
+ printf(" New Level : %s\n", c?c:"-unknown-");
+ }
+ if (__le32_to_cpu(sb->new_layout) != __le32_to_cpu(sb->layout)) {
+ if (__le32_to_cpu(sb->level) == 5) {
+ c = map_num(r5layout, __le32_to_cpu(sb->new_layout));
+ printf(" New Layout : %s\n", c?c:"-unknown-");
+ }
+ if (__le32_to_cpu(sb->level) == 6) {
+ c = map_num(r6layout, __le32_to_cpu(sb->new_layout));
+ printf(" New Layout : %s\n", c?c:"-unknown-");
+ }
+ if (__le32_to_cpu(sb->level) == 10) {
+ printf(" New Layout :");
+ print_r10_layout(__le32_to_cpu(sb->new_layout));
+ printf("\n");
+ }
+ }
+ if (__le32_to_cpu(sb->new_chunk) != __le32_to_cpu(sb->chunksize))
+ printf(" New Chunksize : %dK\n", __le32_to_cpu(sb->new_chunk)/2);
+ printf("\n");
+ }
+ if (sb->devflags) {
+ printf(" Flags :");
+ if (sb->devflags & WriteMostly1)
+ printf(" write-mostly");
+ printf("\n");
+ }
+
+ atime = __le64_to_cpu(sb->utime) & 0xFFFFFFFFFFULL;
+ printf(" Update Time : %.24s\n", ctime(&atime));
+
+ if (sb->bblog_size && sb->bblog_offset) {
+ printf(" Bad Block Log : %d entries available at offset %ld sectors",
+ __le16_to_cpu(sb->bblog_size)*512/8,
+ (long)(int32_t)__le32_to_cpu(sb->bblog_offset));
+ if (sb->feature_map &
+ __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
+ printf(" - bad blocks present.");
+ printf("\n");
+ }
+
+ if (calc_sb_1_csum(sb) == sb->sb_csum)
+ printf(" Checksum : %x - correct\n", __le32_to_cpu(sb->sb_csum));
+ else
+ printf(" Checksum : %x - expected %x\n", __le32_to_cpu(sb->sb_csum),
+ __le32_to_cpu(calc_sb_1_csum(sb)));
+ printf(" Events : %llu\n", (unsigned long long)__le64_to_cpu(sb->events));
+ printf("\n");
+ if (__le32_to_cpu(sb->level) == 5) {
+ c = map_num(r5layout, __le32_to_cpu(sb->layout));
+ printf(" Layout : %s\n", c?c:"-unknown-");
+ }
+ if (__le32_to_cpu(sb->level) == 6) {
+ c = map_num(r6layout, __le32_to_cpu(sb->layout));
+ printf(" Layout : %s\n", c?c:"-unknown-");
+ }
+ if (__le32_to_cpu(sb->level) == 10) {
+ int lo = __le32_to_cpu(sb->layout);
+ printf(" Layout :");
+ print_r10_layout(lo);
+ printf("\n");
+ }
+ switch(__le32_to_cpu(sb->level)) {
+ case 0:
+ case 4:
+ case 5:
+ case 6:
+ case 10:
+ printf(" Chunk Size : %dK\n", __le32_to_cpu(sb->chunksize)/2);
+ break;
+ case -1:
+ printf(" Rounding : %dK\n", __le32_to_cpu(sb->chunksize)/2);
+ break;
+ default: break;
+ }
+ printf("\n");
+#if 0
+ /* This turns out to just be confusing */
+ printf(" Array Slot : %d (", __le32_to_cpu(sb->dev_number));
+ for (i= __le32_to_cpu(sb->max_dev); i> 0 ; i--)
+ if (__le16_to_cpu(sb->dev_roles[i-1]) != 0xffff)
+ break;
+ for (d=0; d < i; d++) {
+ int role = __le16_to_cpu(sb->dev_roles[d]);
+ if (d) printf(", ");
+ if (role == 0xffff) printf("empty");
+ else if(role == 0xfffe) printf("failed");
+ else printf("%d", role);
+ }
+ printf(")\n");
+#endif
+ printf(" Device Role : ");
+ d = __le32_to_cpu(sb->dev_number);
+ if (d < __le32_to_cpu(sb->max_dev))
+ role = __le16_to_cpu(sb->dev_roles[d]);
+ else
+ role = 0xFFFF;
+ if (role >= 0xFFFE)
+ printf("spare\n");
+ else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_REPLACEMENT))
+ printf("Replacement device %d\n", role);
+ else
+ printf("Active device %d\n", role);
+
+ printf(" Array State : ");
+ for (d=0; d<__le32_to_cpu(sb->raid_disks) + delta_extra; d++) {
+ int cnt = 0;
+ unsigned int i;
+ for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
+ unsigned int role = __le16_to_cpu(sb->dev_roles[i]);
+ if (role == d)
+ cnt++;
+ }
+ if (cnt == 2)
+ printf("R");
+ else if (cnt == 1)
+ printf("A");
+ else if (cnt == 0)
+ printf(".");
+ else
+ printf("?");
+ }
+#if 0
+ /* This is confusing too */
+ faulty = 0;
+ for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
+ int role = __le16_to_cpu(sb->dev_roles[i]);
+ if (role == 0xFFFE)
+ faulty++;
+ }
+ if (faulty) printf(" %d failed", faulty);
+#endif
+ printf(" ('A' == active, '.' == missing, 'R' == replacing)");
+ printf("\n");
+}
+
+static void brief_examine_super1(struct supertype *st, int verbose)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ int i;
+ unsigned long long sb_offset;
+ char *nm;
+ char *c=map_num(pers, __le32_to_cpu(sb->level));
+
+ nm = strchr(sb->set_name, ':');
+ if (nm)
+ nm++;
+ else if (sb->set_name[0])
+ nm = sb->set_name;
+ else
+ nm = NULL;
+
+ printf("ARRAY ");
+ if (nm) {
+ printf("/dev/md/");
+ print_escape(nm);
+ putchar(' ');
+ }
+ if (verbose && c)
+ printf(" level=%s", c);
+ sb_offset = __le64_to_cpu(sb->super_offset);
+ if (sb_offset <= 4)
+ printf(" metadata=1.1 ");
+ else if (sb_offset <= 8)
+ printf(" metadata=1.2 ");
+ else
+ printf(" metadata=1.0 ");
+ if (verbose)
+ printf("num-devices=%d ", __le32_to_cpu(sb->raid_disks));
+ printf("UUID=");
+ for (i=0; i<16; i++) {
+ if ((i&3)==0 && i != 0) printf(":");
+ printf("%02x", sb->set_uuid[i]);
+ }
+ if (sb->set_name[0]) {
+ printf(" name=");
+ print_quoted(sb->set_name);
+ }
+ printf("\n");
+}
+
+static void export_examine_super1(struct supertype *st)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ int i;
+ int len = 32;
+ int layout;
+
+ printf("MD_LEVEL=%s\n", map_num(pers, __le32_to_cpu(sb->level)));
+ printf("MD_DEVICES=%d\n", __le32_to_cpu(sb->raid_disks));
+ for (i=0; i<32; i++)
+ if (sb->set_name[i] == '\n' ||
+ sb->set_name[i] == '\0') {
+ len = i;
+ break;
+ }
+ if (len)
+ printf("MD_NAME=%.*s\n", len, sb->set_name);
+ if (__le32_to_cpu(sb->level) > 0) {
+ int ddsks = 0, ddsks_denom = 1;
+ switch(__le32_to_cpu(sb->level)) {
+ case 1: ddsks=1;break;
+ case 4:
+ case 5: ddsks = __le32_to_cpu(sb->raid_disks)-1; break;
+ case 6: ddsks = __le32_to_cpu(sb->raid_disks)-2; break;
+ case 10:
+ layout = __le32_to_cpu(sb->layout);
+ ddsks = __le32_to_cpu(sb->raid_disks);
+ ddsks_denom = (layout&255) * ((layout>>8)&255);
+ }
+ if (ddsks) {
+ long long asize = __le64_to_cpu(sb->size);
+ asize = (asize << 9) * ddsks / ddsks_denom;
+ printf("MD_ARRAY_SIZE=%s\n",human_size_brief(asize,JEDEC));
+ }
+ }
+ printf("MD_UUID=");
+ for (i=0; i<16; i++) {
+ if ((i&3)==0 && i != 0) printf(":");
+ printf("%02x", sb->set_uuid[i]);
+ }
+ printf("\n");
+ printf("MD_UPDATE_TIME=%llu\n",
+ __le64_to_cpu(sb->utime) & 0xFFFFFFFFFFULL);
+ printf("MD_DEV_UUID=");
+ for (i=0; i<16; i++) {
+ if ((i&3)==0 && i != 0) printf(":");
+ printf("%02x", sb->device_uuid[i]);
+ }
+ printf("\n");
+ printf("MD_EVENTS=%llu\n",
+ (unsigned long long)__le64_to_cpu(sb->events));
+}
+
+static int copy_metadata1(struct supertype *st, int from, int to)
+{
+ /* Read superblock. If it looks good, write it out.
+ * Then if a bitmap is present, copy that.
+ * And if a bad-block-list is present, copy that too.
+ */
+ void *buf;
+ unsigned long long dsize, sb_offset;
+ const int bufsize = 4*1024;
+ struct mdp_superblock_1 super, *sb;
+
+ if (posix_memalign(&buf, 4096, bufsize) != 0)
+ return 1;
+
+ if (!get_dev_size(from, NULL, &dsize))
+ goto err;
+
+ dsize >>= 9;
+ if (dsize < 24)
+ goto err;
+ switch(st->minor_version) {
+ case 0:
+ sb_offset = dsize;
+ sb_offset -= 8*2;
+ sb_offset &= ~(4*2-1);
+ break;
+ case 1:
+ sb_offset = 0;
+ break;
+ case 2:
+ sb_offset = 4*2;
+ break;
+ default:
+ goto err;
+ }
+
+ if (lseek64(from, sb_offset << 9, 0) < 0LL)
+ goto err;
+ if (read(from, buf, bufsize) != bufsize)
+ goto err;
+
+ sb = buf;
+ super = *sb; // save most of sb for when we reuse buf
+
+ if (__le32_to_cpu(super.magic) != MD_SB_MAGIC ||
+ __le32_to_cpu(super.major_version) != 1 ||
+ __le64_to_cpu(super.super_offset) != sb_offset ||
+ calc_sb_1_csum(sb) != super.sb_csum)
+ goto err;
+
+ if (lseek64(to, sb_offset << 9, 0) < 0LL)
+ goto err;
+ if (write(to, buf, bufsize) != bufsize)
+ goto err;
+
+ if (super.feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET)) {
+ unsigned long long bitmap_offset = sb_offset;
+ int bytes = 4096; // just an estimate.
+ int written = 0;
+ struct align_fd afrom, ato;
+
+ init_afd(&afrom, from);
+ init_afd(&ato, to);
+
+ bitmap_offset += (int32_t)__le32_to_cpu(super.bitmap_offset);
+
+ if (lseek64(from, bitmap_offset<<9, 0) < 0)
+ goto err;
+ if (lseek64(to, bitmap_offset<<9, 0) < 0)
+ goto err;
+
+ for (written = 0; written < bytes ; ) {
+ int n = bytes - written;
+ if (n > 4096)
+ n = 4096;
+ if (aread(&afrom, buf, n) != n)
+ goto err;
+ if (written == 0) {
+ /* have the header, can calculate
+ * correct bitmap bytes */
+ bitmap_super_t *bms;
+ int bits;
+ bms = (void*)buf;
+ bits = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
+ bytes = (bits+7) >> 3;
+ bytes += sizeof(bitmap_super_t);
+ bytes = ROUND_UP(bytes, 512);
+ if (n > bytes)
+ n = bytes;
+ }
+ if (awrite(&ato, buf, n) != n)
+ goto err;
+ written += n;
+ }
+ }
+
+ if (super.bblog_size != 0 &&
+ __le32_to_cpu(super.bblog_size) <= 100 &&
+ super.bblog_offset != 0 &&
+ (super.feature_map & __le32_to_cpu(MD_FEATURE_BAD_BLOCKS))) {
+ /* There is a bad block log */
+ unsigned long long bb_offset = sb_offset;
+ int bytes = __le32_to_cpu(super.bblog_size) * 512;
+ int written = 0;
+ struct align_fd afrom, ato;
+
+ init_afd(&afrom, from);
+ init_afd(&ato, to);
+
+ bb_offset += (int32_t)__le32_to_cpu(super.bblog_offset);
+
+ if (lseek64(from, bb_offset<<9, 0) < 0)
+ goto err;
+ if (lseek64(to, bb_offset<<9, 0) < 0)
+ goto err;
+
+ for (written = 0; written < bytes ; ) {
+ int n = bytes - written;
+ if (n > 4096)
+ n = 4096;
+ if (aread(&afrom, buf, n) != n)
+ goto err;
+
+ if (awrite(&ato, buf, n) != n)
+ goto err;
+ written += n;
+ }
+ }
+
+ free(buf);
+ return 0;
+
+err:
+ free(buf);
+ return 1;
+}
+
+static void detail_super1(struct supertype *st, char *homehost)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ int i;
+ int l = homehost ? strlen(homehost) : 0;
+
+ printf(" Name : %.32s", sb->set_name);
+ if (l > 0 && l < 32 &&
+ sb->set_name[l] == ':' &&
+ strncmp(sb->set_name, homehost, l) == 0)
+ printf(" (local to host %s)", homehost);
+ printf("\n UUID : ");
+ for (i=0; i<16; i++) {
+ if ((i&3)==0 && i != 0) printf(":");
+ printf("%02x", sb->set_uuid[i]);
+ }
+ printf("\n Events : %llu\n\n", (unsigned long long)__le64_to_cpu(sb->events));
+}
+
+static void brief_detail_super1(struct supertype *st)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ int i;
+
+ if (sb->set_name[0]) {
+ printf(" name=");
+ print_quoted(sb->set_name);
+ }
+ printf(" UUID=");
+ for (i=0; i<16; i++) {
+ if ((i&3)==0 && i != 0) printf(":");
+ printf("%02x", sb->set_uuid[i]);
+ }
+}
+
+static void export_detail_super1(struct supertype *st)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ int i;
+ int len = 32;
+
+ for (i=0; i<32; i++)
+ if (sb->set_name[i] == '\n' ||
+ sb->set_name[i] == '\0') {
+ len = i;
+ break;
+ }
+ if (len)
+ printf("MD_NAME=%.*s\n", len, sb->set_name);
+}
+
+static int examine_badblocks_super1(struct supertype *st, int fd, char *devname)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ unsigned long long offset;
+ int size;
+ __u64 *bbl, *bbp;
+ int i;
+
+ if (!sb->bblog_size || __le32_to_cpu(sb->bblog_size) > 100
+ || !sb->bblog_offset){
+ printf("No bad-blocks list configured on %s\n", devname);
+ return 0;
+ }
+ if ((sb->feature_map & __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
+ == 0) {
+ printf("Bad-blocks list is empty in %s\n", devname);
+ return 0;
+ }
+
+ size = __le32_to_cpu(sb->bblog_size)* 512;
+ if (posix_memalign((void**)&bbl, 4096, size) != 0) {
+ pr_err("%s could not allocate badblocks list\n", __func__);
+ return 0;
+ }
+ offset = __le64_to_cpu(sb->super_offset) +
+ (int)__le32_to_cpu(sb->bblog_offset);
+ offset <<= 9;
+ if (lseek64(fd, offset, 0) < 0) {
+ pr_err("Cannot seek to bad-blocks list\n");
+ return 1;
+ }
+ if (read(fd, bbl, size) != size) {
+ pr_err("Cannot read bad-blocks list\n");
+ return 1;
+ }
+ /* 64bits per entry. 10 bits is block-count, 54 bits is block
+ * offset. Blocks are sectors unless bblog->shift makes them bigger
+ */
+ bbp = (__u64*)bbl;
+ printf("Bad-blocks on %s:\n", devname);
+ for (i = 0; i < size/8; i++, bbp++) {
+ __u64 bb = __le64_to_cpu(*bbp);
+ int count = bb & 0x3ff;
+ unsigned long long sector = bb >> 10;
+
+ if (bb + 1 == 0)
+ break;
+
+ sector <<= sb->bblog_shift;
+ count <<= sb->bblog_shift;
+
+ printf("%20llu for %d sectors\n", sector, count);
+ }
+ return 0;
+}
+
+#endif
+
+static int match_home1(struct supertype *st, char *homehost)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ int l = homehost ? strlen(homehost) : 0;
+
+ return (l > 0 && l < 32 &&
+ sb->set_name[l] == ':' &&
+ strncmp(sb->set_name, homehost, l) == 0);
+}
+
+static void uuid_from_super1(struct supertype *st, int uuid[4])
+{
+ struct mdp_superblock_1 *super = st->sb;
+ char *cuuid = (char*)uuid;
+ int i;
+ for (i=0; i<16; i++)
+ cuuid[i] = super->set_uuid[i];
+}
+
+static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ struct bitmap_super_s *bsb = (void*)(((char*)sb)+MAX_SB_SIZE);
+ struct misc_dev_info *misc = (void*)(((char*)sb)+MAX_SB_SIZE+BM_SUPER_SIZE);
+ int working = 0;
+ unsigned int i;
+ unsigned int role;
+ unsigned int map_disks = info->array.raid_disks;
+ unsigned long long super_offset;
+ unsigned long long data_size;
+
+ memset(info, 0, sizeof(*info));
+ info->array.major_version = 1;
+ info->array.minor_version = st->minor_version;
+ info->array.patch_version = 0;
+ info->array.raid_disks = __le32_to_cpu(sb->raid_disks);
+ info->array.level = __le32_to_cpu(sb->level);
+ info->array.layout = __le32_to_cpu(sb->layout);
+ info->array.md_minor = -1;
+ info->array.ctime = __le64_to_cpu(sb->ctime);
+ info->array.utime = __le64_to_cpu(sb->utime);
+ info->array.chunk_size = __le32_to_cpu(sb->chunksize)*512;
+ info->array.state =
+ (__le64_to_cpu(sb->resync_offset) == MaxSector)
+ ? 1 : 0;
+
+ info->data_offset = __le64_to_cpu(sb->data_offset);
+ info->component_size = __le64_to_cpu(sb->size);
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET))
+ info->bitmap_offset = (int32_t)__le32_to_cpu(sb->bitmap_offset);
+
+ info->disk.major = 0;
+ info->disk.minor = 0;
+ info->disk.number = __le32_to_cpu(sb->dev_number);
+ if (__le32_to_cpu(sb->dev_number) >= __le32_to_cpu(sb->max_dev) ||
+ __le32_to_cpu(sb->dev_number) >= MAX_DEVS)
+ role = 0xfffe;
+ else
+ role = __le16_to_cpu(sb->dev_roles[__le32_to_cpu(sb->dev_number)]);
+
+ super_offset = __le64_to_cpu(sb->super_offset);
+ if (info->array.level <= 0)
+ data_size = __le64_to_cpu(sb->data_size);
+ else
+ data_size = __le64_to_cpu(sb->size);
+ if (info->data_offset < super_offset) {
+ unsigned long long end;
+ info->space_before = info->data_offset;
+ end = super_offset;
+
+ if (sb->bblog_offset && sb->bblog_size) {
+ unsigned long long bboffset = super_offset;
+ bboffset += (int32_t)__le32_to_cpu(sb->bblog_offset);
+ if (bboffset < end)
+ end = bboffset;
+ }
+
+ if (super_offset + info->bitmap_offset < end)
+ end = super_offset + info->bitmap_offset;
+
+ if (info->data_offset + data_size < end)
+ info->space_after = end - data_size - info->data_offset;
+ else
+ info->space_after = 0;
+ } else {
+ unsigned long long earliest;
+ earliest = super_offset + (32+4)*2; /* match kernel */
+ if (info->bitmap_offset > 0) {
+ unsigned long long bmend = info->bitmap_offset;
+ unsigned long long size = __le64_to_cpu(bsb->sync_size);
+ size /= __le32_to_cpu(bsb->chunksize) >> 9;
+ size = (size + 7) >> 3;
+ size += sizeof(bitmap_super_t);
+ size = ROUND_UP(size, 4096);
+ size /= 512;
+ bmend += size;
+ if (bmend > earliest)
+ bmend = earliest;
+ }
+ if (sb->bblog_offset && sb->bblog_size) {
+ unsigned long long bbend = super_offset;
+ bbend += (int32_t)__le32_to_cpu(sb->bblog_offset);
+ bbend += __le32_to_cpu(sb->bblog_size);
+ if (bbend > earliest)
+ earliest = bbend;
+ }
+ if (earliest < info->data_offset)
+ info->space_before = info->data_offset - earliest;
+ else
+ info->space_before = 0;
+ info->space_after = misc->device_size - data_size - info->data_offset;
+ }
+ if (info->space_before == 0 && info->space_after == 0) {
+ /* It will look like we don't support data_offset changes,
+ * be we do - it's just that there is no room.
+ * A change that reduced the number of devices should
+ * still be allowed, so set the otherwise useless value of '1'
+ */
+ info->space_after = 1;
+ }
+
+ info->disk.raid_disk = -1;
+ switch(role) {
+ case 0xFFFF:
+ info->disk.state = 0; /* spare: not active, not sync, not faulty */
+ break;
+ case 0xFFFE:
+ info->disk.state = 1; /* faulty */
+ break;
+ default:
+ info->disk.state = 6; /* active and in sync */
+ info->disk.raid_disk = role;
+ }
+ if (sb->devflags & WriteMostly1)
+ info->disk.state |= (1 << MD_DISK_WRITEMOSTLY);
+ info->events = __le64_to_cpu(sb->events);
+ sprintf(info->text_version, "1.%d", st->minor_version);
+ info->safe_mode_delay = 200;
+
+ memcpy(info->uuid, sb->set_uuid, 16);
+
+ strncpy(info->name, sb->set_name, 32);
+ info->name[32] = 0;
+
+ if ((__le32_to_cpu(sb->feature_map)&MD_FEATURE_REPLACEMENT)) {
+ info->disk.state &= ~(1 << MD_DISK_SYNC);
+ info->disk.state |= 1 << MD_DISK_REPLACEMENT;
+ }
+
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RECOVERY_OFFSET))
+ info->recovery_start = __le32_to_cpu(sb->recovery_offset);
+ else
+ info->recovery_start = MaxSector;
+
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE)) {
+ info->reshape_active = 1;
+ if ((sb->feature_map & __le32_to_cpu(MD_FEATURE_NEW_OFFSET)) &&
+ sb->new_offset != 0)
+ info->reshape_active |= RESHAPE_NO_BACKUP;
+ info->reshape_progress = __le64_to_cpu(sb->reshape_position);
+ info->new_level = __le32_to_cpu(sb->new_level);
+ info->delta_disks = __le32_to_cpu(sb->delta_disks);
+ info->new_layout = __le32_to_cpu(sb->new_layout);
+ info->new_chunk = __le32_to_cpu(sb->new_chunk)<<9;
+ if (info->delta_disks < 0)
+ info->array.raid_disks -= info->delta_disks;
+ } else
+ info->reshape_active = 0;
+
+ info->recovery_blocked = info->reshape_active;
+
+ if (map)
+ for (i=0; i<map_disks; i++)
+ map[i] = 0;
+ for (i = 0; i < __le32_to_cpu(sb->max_dev); i++) {
+ role = __le16_to_cpu(sb->dev_roles[i]);
+ if (/*role == 0xFFFF || */role < (unsigned) info->array.raid_disks) {
+ working++;
+ if (map && role < map_disks)
+ map[role] = 1;
+ }
+ }
+
+ info->array.working_disks = working;
+}
+
+static struct mdinfo *container_content1(struct supertype *st, char *subarray)
+{
+ struct mdinfo *info;
+
+ if (subarray)
+ return NULL;
+
+ info = xmalloc(sizeof(*info));
+ getinfo_super1(st, info, NULL);
+ return info;
+}
+
+static int update_super1(struct supertype *st, struct mdinfo *info,
+ char *update,
+ char *devname, int verbose,
+ int uuid_set, char *homehost)
+{
+ /* NOTE: for 'assemble' and 'force' we need to return non-zero
+ * if any change was made. For others, the return value is
+ * ignored.
+ */
+ int rv = 0;
+ struct mdp_superblock_1 *sb = st->sb;
+
+ if (strcmp(update, "homehost") == 0 &&
+ homehost) {
+ /* Note that 'homehost' is special as it is really
+ * a "name" update.
+ */
+ char *c;
+ update = "name";
+ c = strchr(sb->set_name, ':');
+ if (c)
+ strncpy(info->name, c+1, 31 - (c-sb->set_name));
+ else
+ strncpy(info->name, sb->set_name, 32);
+ info->name[32] = 0;
+ }
+
+ if (strcmp(update, "force-one")==0) {
+ /* Not enough devices for a working array,
+ * so bring this one up-to-date
+ */
+ if (sb->events != __cpu_to_le64(info->events))
+ rv = 1;
+ sb->events = __cpu_to_le64(info->events);
+ } else if (strcmp(update, "force-array")==0) {
+ /* Degraded array and 'force' requests to
+ * maybe need to mark it 'clean'.
+ */
+ switch(__le32_to_cpu(sb->level)) {
+ case 5: case 4: case 6:
+ /* need to force clean */
+ if (sb->resync_offset != MaxSector)
+ rv = 1;
+ sb->resync_offset = MaxSector;
+ }
+ } else if (strcmp(update, "assemble")==0) {
+ int d = info->disk.number;
+ int want;
+ if (info->disk.state & (1<<MD_DISK_ACTIVE))
+ want = info->disk.raid_disk;
+ else
+ want = 0xFFFF;
+ if (sb->dev_roles[d] != __cpu_to_le16(want)) {
+ sb->dev_roles[d] = __cpu_to_le16(want);
+ rv = 1;
+ }
+ if (info->reshape_active &&
+ sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE) &&
+ info->delta_disks >= 0 &&
+ info->reshape_progress < __le64_to_cpu(sb->reshape_position)) {
+ sb->reshape_position = __cpu_to_le64(info->reshape_progress);
+ rv = 1;
+ }
+ if (info->reshape_active &&
+ sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE) &&
+ info->delta_disks < 0 &&
+ info->reshape_progress > __le64_to_cpu(sb->reshape_position)) {
+ sb->reshape_position = __cpu_to_le64(info->reshape_progress);
+ rv = 1;
+ }
+ } else if (strcmp(update, "linear-grow-new") == 0) {
+ unsigned int i;
+ int rfd, fd;
+ unsigned int max = __le32_to_cpu(sb->max_dev);
+
+ for (i=0 ; i < max ; i++)
+ if (__le16_to_cpu(sb->dev_roles[i]) >= 0xfffe)
+ break;
+ sb->dev_number = __cpu_to_le32(i);
+ info->disk.number = i;
+ if (max >= __le32_to_cpu(sb->max_dev))
+ sb->max_dev = __cpu_to_le32(max+1);
+
+ if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
+ read(rfd, sb->device_uuid, 16) != 16) {
+ __u32 r[4] = {random(), random(), random(), random()};
+ memcpy(sb->device_uuid, r, 16);
+ }
+ if (rfd >= 0)
+ close(rfd);
+
+ sb->dev_roles[i] =
+ __cpu_to_le16(info->disk.raid_disk);
+
+ fd = open(devname, O_RDONLY);
+ if (fd >= 0) {
+ unsigned long long ds;
+ get_dev_size(fd, devname, &ds);
+ close(fd);
+ ds >>= 9;
+ if (__le64_to_cpu(sb->super_offset) <
+ __le64_to_cpu(sb->data_offset)) {
+ sb->data_size = __cpu_to_le64(
+ ds - __le64_to_cpu(sb->data_offset));
+ } else {
+ ds -= 8*2;
+ ds &= ~(unsigned long long)(4*2-1);
+ sb->super_offset = __cpu_to_le64(ds);
+ sb->data_size = __cpu_to_le64(
+ ds - __le64_to_cpu(sb->data_offset));
+ }
+ }
+ } else if (strcmp(update, "linear-grow-update") == 0) {
+ sb->raid_disks = __cpu_to_le32(info->array.raid_disks);
+ sb->dev_roles[info->disk.number] =
+ __cpu_to_le16(info->disk.raid_disk);
+ } else if (strcmp(update, "resync") == 0) {
+ /* make sure resync happens */
+ sb->resync_offset = 0ULL;
+ } else if (strcmp(update, "uuid") == 0) {
+ copy_uuid(sb->set_uuid, info->uuid, super1.swapuuid);
+
+ if (__le32_to_cpu(sb->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
+ struct bitmap_super_s *bm;
+ bm = (struct bitmap_super_s*)(st->sb+MAX_SB_SIZE);
+ memcpy(bm->uuid, sb->set_uuid, 16);
+ }
+ } else if (strcmp(update, "no-bitmap") == 0) {
+ sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
+ } else if (strcmp(update, "bbl") == 0) {
+ /* only possible if there is room after the bitmap, or if
+ * there is no bitmap
+ */
+ unsigned long long sb_offset = __le64_to_cpu(sb->super_offset);
+ unsigned long long data_offset = __le64_to_cpu(sb->data_offset);
+ long bitmap_offset = (long)(int32_t)__le32_to_cpu(sb->bitmap_offset);
+ long bm_sectors = 0;
+ long space;
+
+#ifndef MDASSEMBLE
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
+ struct bitmap_super_s *bsb;
+ bsb = (struct bitmap_super_s *)(((char*)sb)+MAX_SB_SIZE);
+ bm_sectors = bitmap_sectors(bsb);
+ }
+#endif
+ if (sb_offset < data_offset) {
+ /* 1.1 or 1.2. Put bbl after bitmap leaving at least 32K
+ */
+ long bb_offset;
+ bb_offset = sb_offset + 8;
+ if (bm_sectors && bitmap_offset > 0)
+ bb_offset = bitmap_offset + bm_sectors;
+ while (bb_offset < (long)sb_offset + 8 + 32*2
+ && bb_offset + 8+8 <= (long)data_offset)
+ /* too close to bitmap, and room to grow */
+ bb_offset += 8;
+ if (bb_offset + 8 <= (long)data_offset) {
+ sb->bblog_size = __cpu_to_le16(8);
+ sb->bblog_offset = __cpu_to_le32(bb_offset);
+ }
+ } else {
+ /* 1.0 - Put bbl just before super block */
+ if (bm_sectors && bitmap_offset < 0)
+ space = -bitmap_offset - bm_sectors;
+ else
+ space = sb_offset - data_offset -
+ __le64_to_cpu(sb->data_size);
+ if (space >= 8) {
+ sb->bblog_size = __cpu_to_le16(8);
+ sb->bblog_offset = __cpu_to_le32((unsigned)-8);
+ }
+ }
+ } else if (strcmp(update, "no-bbl") == 0) {
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
+ pr_err("Cannot remove active bbl from %s\n",devname);
+ else {
+ sb->bblog_size = 0;
+ sb->bblog_shift = 0;
+ sb->bblog_offset = 0;
+ }
+ } else if (strcmp(update, "name") == 0) {
+ if (info->name[0] == 0)
+ sprintf(info->name, "%d", info->array.md_minor);
+ memset(sb->set_name, 0, sizeof(sb->set_name));
+ if (homehost &&
+ strchr(info->name, ':') == NULL &&
+ strlen(homehost)+1+strlen(info->name) < 32) {
+ strcpy(sb->set_name, homehost);
+ strcat(sb->set_name, ":");
+ strcat(sb->set_name, info->name);
+ } else
+ strcpy(sb->set_name, info->name);
+ } else if (strcmp(update, "devicesize") == 0 &&
+ __le64_to_cpu(sb->super_offset) <
+ __le64_to_cpu(sb->data_offset)) {
+ /* set data_size to device size less data_offset */
+ struct misc_dev_info *misc = (struct misc_dev_info*)
+ (st->sb + MAX_SB_SIZE + BM_SUPER_SIZE);
+ printf("Size was %llu\n", (unsigned long long)
+ __le64_to_cpu(sb->data_size));
+ sb->data_size = __cpu_to_le64(
+ misc->device_size - __le64_to_cpu(sb->data_offset));
+ printf("Size is %llu\n", (unsigned long long)
+ __le64_to_cpu(sb->data_size));
+ } else if (strcmp(update, "revert-reshape") == 0) {
+ rv = -2;
+ if (!(sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)))
+ pr_err("No active reshape to revert on %s\n",
+ devname);
+ else {
+ __u32 temp;
+ unsigned long long reshape_sectors;
+ long reshape_chunk;
+ rv = 0;
+ /* reshape_position is a little messy.
+ * Its value must be a multiple of the larger
+ * chunk size, and of the "after" data disks.
+ * So when reverting we need to change it to
+ * be a multiple of the new "after" data disks,
+ * which is the old "before".
+ * If it isn't already a multiple of 'before',
+ * the only thing we could do would be
+ * copy some block around on the disks, which
+ * is easy to get wrong.
+ * So we reject a revert-reshape unless the
+ * alignment is good.
+ */
+ if (__le32_to_cpu(sb->level) >= 4 &&
+ __le32_to_cpu(sb->level) <= 6) {
+ reshape_sectors = __le64_to_cpu(sb->reshape_position);
+ reshape_chunk = __le32_to_cpu(sb->new_chunk);
+ reshape_chunk *= __le32_to_cpu(sb->raid_disks) - __le32_to_cpu(sb->delta_disks) -
+ (__le32_to_cpu(sb->level)==6 ? 2 : 1);
+ if (reshape_sectors % reshape_chunk) {
+ pr_err("Reshape position is not suitably aligned.\n");
+ pr_err("Try normal assembly and stop again\n");
+ return -2;
+ }
+ }
+ sb->raid_disks = __cpu_to_le32(__le32_to_cpu(sb->raid_disks) -
+ __le32_to_cpu(sb->delta_disks));
+ if (sb->delta_disks == 0)
+ sb->feature_map ^= __cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
+ else
+ sb->delta_disks = __cpu_to_le32(-__le32_to_cpu(sb->delta_disks));
+
+ temp = sb->new_layout;
+ sb->new_layout = sb->layout;
+ sb->layout = temp;
+
+ temp = sb->new_chunk;
+ sb->new_chunk = sb->chunksize;
+ sb->chunksize = temp;
+
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_NEW_OFFSET)) {
+ long offset_delta = (int32_t)__le32_to_cpu(sb->new_offset);
+ sb->data_offset = __cpu_to_le64(__le64_to_cpu(sb->data_offset) + offset_delta);
+ sb->new_offset = __cpu_to_le32(-offset_delta);
+ sb->data_size = __cpu_to_le64(__le64_to_cpu(sb->data_size) - offset_delta);
+ }
+ }
+ } else if (strcmp(update, "_reshape_progress")==0)
+ sb->reshape_position = __cpu_to_le64(info->reshape_progress);
+ else if (strcmp(update, "writemostly")==0)
+ sb->devflags |= WriteMostly1;
+ else if (strcmp(update, "readwrite")==0)
+ sb->devflags &= ~WriteMostly1;
+ else
+ rv = -1;
+
+ sb->sb_csum = calc_sb_1_csum(sb);
+ return rv;
+}
+
+static int init_super1(struct supertype *st, mdu_array_info_t *info,
+ unsigned long long size, char *name, char *homehost,
+ int *uuid, unsigned long long data_offset)
+{
+ struct mdp_superblock_1 *sb;
+ int spares;
+ int rfd;
+ char defname[10];
+ int sbsize;
+
+ if (posix_memalign((void**)&sb, 4096, SUPER1_SIZE) != 0) {
+ pr_err("%s could not allocate superblock\n", __func__);
+ return 0;
+ }
+ memset(sb, 0, SUPER1_SIZE);
+
+ st->sb = sb;
+ if (info == NULL) {
+ /* zeroing superblock */
+ return 0;
+ }
+
+ spares = info->working_disks - info->active_disks;
+ if (info->raid_disks + spares > MAX_DEVS) {
+ pr_err("too many devices requested: %d+%d > %d\n",
+ info->raid_disks , spares, MAX_DEVS);
+ return 0;
+ }
+
+ sb->magic = __cpu_to_le32(MD_SB_MAGIC);
+ sb->major_version = __cpu_to_le32(1);
+ sb->feature_map = 0;
+ sb->pad0 = 0;
+
+ if (uuid)
+ copy_uuid(sb->set_uuid, uuid, super1.swapuuid);
+ else {
+ if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
+ read(rfd, sb->set_uuid, 16) != 16) {
+ __u32 r[4] = {random(), random(), random(), random()};
+ memcpy(sb->set_uuid, r, 16);
+ }
+ if (rfd >= 0) close(rfd);
+ }
+
+ if (name == NULL || *name == 0) {
+ sprintf(defname, "%d", info->md_minor);
+ name = defname;
+ }
+ if (homehost &&
+ strchr(name, ':')== NULL &&
+ strlen(homehost)+1+strlen(name) < 32) {
+ strcpy(sb->set_name, homehost);
+ strcat(sb->set_name, ":");
+ strcat(sb->set_name, name);
+ } else
+ strcpy(sb->set_name, name);
+
+ sb->ctime = __cpu_to_le64((unsigned long long)time(0));
+ sb->level = __cpu_to_le32(info->level);
+ sb->layout = __cpu_to_le32(info->layout);
+ sb->size = __cpu_to_le64(size*2ULL);
+ sb->chunksize = __cpu_to_le32(info->chunk_size>>9);
+ sb->raid_disks = __cpu_to_le32(info->raid_disks);
+
+ sb->data_offset = __cpu_to_le64(data_offset);
+ sb->data_size = __cpu_to_le64(0);
+ sb->super_offset = __cpu_to_le64(0);
+ sb->recovery_offset = __cpu_to_le64(0);
+
+ sb->utime = sb->ctime;
+ sb->events = __cpu_to_le64(1);
+ if (info->state & (1<<MD_SB_CLEAN))
+ sb->resync_offset = MaxSector;
+ else
+ sb->resync_offset = 0;
+ sbsize = sizeof(struct mdp_superblock_1) + 2 * (info->raid_disks + spares);
+ sbsize = ROUND_UP(sbsize, 512);
+ sb->max_dev = __cpu_to_le32((sbsize - sizeof(struct mdp_superblock_1)) / 2);
+
+ memset(sb->dev_roles, 0xff, MAX_SB_SIZE - sizeof(struct mdp_superblock_1));
+
+ return 1;
+}
+
+struct devinfo {
+ int fd;
+ char *devname;
+ long long data_offset;
+ mdu_disk_info_t disk;
+ struct devinfo *next;
+};
+#ifndef MDASSEMBLE
+/* Add a device to the superblock being created */
+static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
+ int fd, char *devname, unsigned long long data_offset)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ __u16 *rp = sb->dev_roles + dk->number;
+ struct devinfo *di, **dip;
+
+ if ((dk->state & 6) == 6) /* active, sync */
+ *rp = __cpu_to_le16(dk->raid_disk);
+ else if ((dk->state & ~2) == 0) /* active or idle -> spare */
+ *rp = 0xffff;
+ else
+ *rp = 0xfffe;
+
+ if (dk->number >= (int)__le32_to_cpu(sb->max_dev) &&
+ __le32_to_cpu(sb->max_dev) < MAX_DEVS)
+ sb->max_dev = __cpu_to_le32(dk->number+1);
+
+ sb->dev_number = __cpu_to_le32(dk->number);
+ sb->devflags = 0; /* don't copy another disks flags */
+ sb->sb_csum = calc_sb_1_csum(sb);
+
+ dip = (struct devinfo **)&st->info;
+ while (*dip)
+ dip = &(*dip)->next;
+ di = xmalloc(sizeof(struct devinfo));
+ di->fd = fd;
+ di->devname = devname;
+ di->disk = *dk;
+ di->data_offset = data_offset;
+ di->next = NULL;
+ *dip = di;
+
+ return 0;
+}
+#endif
+
+static void locate_bitmap1(struct supertype *st, int fd);
+
+static int store_super1(struct supertype *st, int fd)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ unsigned long long sb_offset;
+ struct align_fd afd;
+ int sbsize;
+ unsigned long long dsize;
+
+ if (!get_dev_size(fd, NULL, &dsize))
+ return 1;
+
+ dsize >>= 9;
+
+ if (dsize < 24)
+ return 2;
+
+ init_afd(&afd, fd);
+
+ /*
+ * Calculate the position of the superblock.
+ * It is always aligned to a 4K boundary and
+ * depending on minor_version, it can be:
+ * 0: At least 8K, but less than 12K, from end of device
+ * 1: At start of device
+ * 2: 4K from start of device.
+ */
+ switch(st->minor_version) {
+ case 0:
+ sb_offset = dsize;
+ sb_offset -= 8*2;
+ sb_offset &= ~(4*2-1);
+ break;
+ case 1:
+ sb_offset = 0;
+ break;
+ case 2:
+ sb_offset = 4*2;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (sb_offset != __le64_to_cpu(sb->super_offset) &&
+ 0 != __le64_to_cpu(sb->super_offset)
+ ) {
+ pr_err("internal error - sb_offset is wrong\n");
+ abort();
+ }
+
+ if (lseek64(fd, sb_offset << 9, 0)< 0LL)
+ return 3;
+
+ sbsize = ROUND_UP(sizeof(*sb) + 2 * __le32_to_cpu(sb->max_dev), 512);
+
+ if (awrite(&afd, sb, sbsize) != sbsize)
+ return 4;
+
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
+ struct bitmap_super_s *bm = (struct bitmap_super_s*)
+ (((char*)sb)+MAX_SB_SIZE);
+ if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC) {
+ locate_bitmap1(st, fd);
+ if (awrite(&afd, bm, sizeof(*bm)) != sizeof(*bm))
+ return 5;
+ }
+ }
+ fsync(fd);
+ return 0;
+}
+
+static int load_super1(struct supertype *st, int fd, char *devname);
+
+static unsigned long choose_bm_space(unsigned long devsize)
+{
+ /* if the device is bigger than 8Gig, save 64k for bitmap usage,
+ * if bigger than 200Gig, save 128k
+ * NOTE: result must be multiple of 4K else bad things happen
+ * on 4K-sector devices.
+ */
+ if (devsize < 64*2) return 0;
+ if (devsize - 64*2 >= 200*1024*1024*2)
+ return 128*2;
+ if (devsize - 4*2 > 8*1024*1024*2)
+ return 64*2;
+ return 4*2;
+}
+
+static void free_super1(struct supertype *st);
+
+#ifndef MDASSEMBLE
+static int write_init_super1(struct supertype *st)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ struct supertype *refst;
+ int rfd;
+ int rv = 0;
+ unsigned long long bm_space;
+ struct devinfo *di;
+ unsigned long long dsize, array_size;
+ unsigned long long sb_offset;
+ unsigned long long data_offset;
+
+ for (di = st->info; di; di = di->next) {
+ if (di->disk.state & (1 << MD_DISK_FAULTY))
+ continue;
+ if (di->fd < 0)
+ continue;
+
+ while (Kill(di->devname, NULL, 0, -1, 1) == 0)
+ ;
+
+ sb->dev_number = __cpu_to_le32(di->disk.number);
+ if (di->disk.state & (1<<MD_DISK_WRITEMOSTLY))
+ sb->devflags |= WriteMostly1;
+ else
+ sb->devflags &= ~WriteMostly1;
+
+ if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
+ read(rfd, sb->device_uuid, 16) != 16) {
+ __u32 r[4] = {random(), random(), random(), random()};
+ memcpy(sb->device_uuid, r, 16);
+ }
+ if (rfd >= 0)
+ close(rfd);
+
+ sb->events = 0;
+
+ refst = dup_super(st);
+ if (load_super1(refst, di->fd, NULL)==0) {
+ struct mdp_superblock_1 *refsb = refst->sb;
+
+ memcpy(sb->device_uuid, refsb->device_uuid, 16);
+ if (memcmp(sb->set_uuid, refsb->set_uuid, 16)==0) {
+ /* same array, so preserve events and
+ * dev_number */
+ sb->events = refsb->events;
+ /* bugs in 2.6.17 and earlier mean the
+ * dev_number chosen in Manage must be preserved
+ */
+ if (get_linux_version() >= 2006018)
+ sb->dev_number = refsb->dev_number;
+ }
+ free_super1(refst);
+ }
+ free(refst);
+
+ if (!get_dev_size(di->fd, NULL, &dsize)) {
+ rv = 1;
+ goto error_out;
+ }
+ dsize >>= 9;
+
+ if (dsize < 24) {
+ close(di->fd);
+ rv = 2;
+ goto error_out;
+ }
+
+ /*
+ * Calculate the position of the superblock.
+ * It is always aligned to a 4K boundary and
+ * depending on minor_version, it can be:
+ * 0: At least 8K, but less than 12K, from end of device
+ * 1: At start of device
+ * 2: 4K from start of device.
+ * data_offset has already been set.
+ */
+ array_size = __le64_to_cpu(sb->size);
+ /* work out how much space we left for a bitmap,
+ * Add 8 sectors for bad block log */
+ bm_space = choose_bm_space(array_size) + 8;
+
+ data_offset = di->data_offset;
+ if (data_offset == INVALID_SECTORS)
+ data_offset = st->data_offset;
+ switch(st->minor_version) {
+ case 0:
+ if (data_offset == INVALID_SECTORS)
+ data_offset = 0;
+ sb_offset = dsize;
+ sb_offset -= 8*2;
+ sb_offset &= ~(4*2-1);
+ sb->data_offset = __cpu_to_le64(data_offset);
+ sb->super_offset = __cpu_to_le64(sb_offset);
+ if (sb_offset < array_size + bm_space)
+ bm_space = sb_offset - array_size;
+ sb->data_size = __cpu_to_le64(sb_offset - bm_space);
+ if (bm_space >= 8) {
+ sb->bblog_size = __cpu_to_le16(8);
+ sb->bblog_offset = __cpu_to_le32((unsigned)-8);
+ }
+ break;
+ case 1:
+ sb->super_offset = __cpu_to_le64(0);
+ if (data_offset == INVALID_SECTORS)
+ data_offset = 16;
+
+ sb->data_offset = __cpu_to_le64(data_offset);
+ sb->data_size = __cpu_to_le64(dsize - data_offset);
+ if (data_offset >= 8 + 32*2 + 8) {
+ sb->bblog_size = __cpu_to_le16(8);
+ sb->bblog_offset = __cpu_to_le32(8 + 32*2);
+ } else if (data_offset >= 16) {
+ sb->bblog_size = __cpu_to_le16(8);
+ sb->bblog_offset = __cpu_to_le32(data_offset-8);
+ }
+ break;
+ case 2:
+ sb_offset = 4*2;
+ sb->super_offset = __cpu_to_le64(sb_offset);
+ if (data_offset == INVALID_SECTORS)
+ data_offset = 24;
+
+ sb->data_offset = __cpu_to_le64(data_offset);
+ sb->data_size = __cpu_to_le64(dsize - data_offset);
+ if (data_offset >= 16 + 32*2 + 8) {
+ sb->bblog_size = __cpu_to_le16(8);
+ sb->bblog_offset = __cpu_to_le32(8 + 32*2);
+ } else if (data_offset >= 16+16) {
+ sb->bblog_size = __cpu_to_le16(8);
+ /* '8' sectors for the bblog, and another '8'
+ * because we want offset from superblock, not
+ * start of device.
+ */
+ sb->bblog_offset = __cpu_to_le32(data_offset-8-8);
+ }
+ break;
+ default:
+ pr_err("Failed to write invalid "
+ "metadata format 1.%i to %s\n",
+ st->minor_version, di->devname);
+ rv = -EINVAL;
+ goto out;
+ }
+ if (conf_get_create_info()->bblist == 0) {
+ sb->bblog_size = 0;
+ sb->bblog_offset = 0;
+ }
+
+ sb->sb_csum = calc_sb_1_csum(sb);
+ rv = store_super1(st, di->fd);
+ if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
+ rv = st->ss->write_bitmap(st, di->fd);
+ close(di->fd);
+ di->fd = -1;
+ if (rv)
+ goto error_out;
+ }
+error_out:
+ if (rv)
+ pr_err("Failed to write metadata to %s\n",
+ di->devname);
+out:
+ return rv;
+}
+#endif
+
+static int compare_super1(struct supertype *st, struct supertype *tst)
+{
+ /*
+ * return:
+ * 0 same, or first was empty, and second was copied
+ * 1 second had wrong number
+ * 2 wrong uuid
+ * 3 wrong other info
+ */
+ struct mdp_superblock_1 *first = st->sb;
+ struct mdp_superblock_1 *second = tst->sb;
+
+ if (second->magic != __cpu_to_le32(MD_SB_MAGIC))
+ return 1;
+ if (second->major_version != __cpu_to_le32(1))
+ return 1;
+
+ if (!first) {
+ if (posix_memalign((void**)&first, 4096, SUPER1_SIZE) != 0) {
+ pr_err("%s could not allocate superblock\n", __func__);
+ return 1;
+ }
+ memcpy(first, second, SUPER1_SIZE);
+ st->sb = first;
+ return 0;
+ }
+ if (memcmp(first->set_uuid, second->set_uuid, 16)!= 0)
+ return 2;
+
+ if (first->ctime != second->ctime ||
+ first->level != second->level ||
+ first->layout != second->layout ||
+ first->size != second->size ||
+ first->chunksize != second->chunksize ||
+ first->raid_disks != second->raid_disks)
+ return 3;
+ return 0;
+}
+
+static int load_super1(struct supertype *st, int fd, char *devname)
+{
+ unsigned long long dsize;
+ unsigned long long sb_offset;
+ struct mdp_superblock_1 *super;
+ int uuid[4];
+ struct bitmap_super_s *bsb;
+ struct misc_dev_info *misc;
+ struct align_fd afd;
+
+ free_super1(st);
+
+ init_afd(&afd, fd);
+
+ if (st->ss == NULL || st->minor_version == -1) {
+ int bestvers = -1;
+ struct supertype tst;
+ __u64 bestctime = 0;
+ /* guess... choose latest ctime */
+ memset(&tst, 0, sizeof(tst));
+ tst.ss = &super1;
+ for (tst.minor_version = 0; tst.minor_version <= 2 ; tst.minor_version++) {
+ switch(load_super1(&tst, fd, devname)) {
+ case 0: super = tst.sb;
+ if (bestvers == -1 ||
+ bestctime < __le64_to_cpu(super->ctime)) {
+ bestvers = tst.minor_version;
+ bestctime = __le64_to_cpu(super->ctime);
+ }
+ free(super);
+ tst.sb = NULL;
+ break;
+ case 1: return 1; /*bad device */
+ case 2: break; /* bad, try next */
+ }
+ }
+ if (bestvers != -1) {
+ int rv;
+ tst.minor_version = bestvers;
+ tst.ss = &super1;
+ tst.max_devs = MAX_DEVS;
+ rv = load_super1(&tst, fd, devname);
+ if (rv == 0)
+ *st = tst;
+ return rv;
+ }
+ return 2;
+ }
+ if (!get_dev_size(fd, devname, &dsize))
+ return 1;
+ dsize >>= 9;
+
+ if (dsize < 24) {
+ if (devname)
+ pr_err("%s is too small for md: size is %llu sectors.\n",
+ devname, dsize);
+ return 1;
+ }
+
+ /*
+ * Calculate the position of the superblock.
+ * It is always aligned to a 4K boundary and
+ * depending on minor_version, it can be:
+ * 0: At least 8K, but less than 12K, from end of device
+ * 1: At start of device
+ * 2: 4K from start of device.
+ */
+ switch(st->minor_version) {
+ case 0:
+ sb_offset = dsize;
+ sb_offset -= 8*2;
+ sb_offset &= ~(4*2-1);
+ break;
+ case 1:
+ sb_offset = 0;
+ break;
+ case 2:
+ sb_offset = 4*2;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (lseek64(fd, sb_offset << 9, 0)< 0LL) {
+ if (devname)
+ pr_err("Cannot seek to superblock on %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+
+ if (posix_memalign((void**)&super, 4096, SUPER1_SIZE) != 0) {
+ pr_err("%s could not allocate superblock\n",
+ __func__);
+ return 1;
+ }
+
+ if (aread(&afd, super, MAX_SB_SIZE) != MAX_SB_SIZE) {
+ if (devname)
+ pr_err("Cannot read superblock on %s\n",
+ devname);
+ free(super);
+ return 1;
+ }
+
+ if (__le32_to_cpu(super->magic) != MD_SB_MAGIC) {
+ if (devname)
+ pr_err("No super block found on %s (Expected magic %08x, got %08x)\n",
+ devname, MD_SB_MAGIC, __le32_to_cpu(super->magic));
+ free(super);
+ return 2;
+ }
+
+ if (__le32_to_cpu(super->major_version) != 1) {
+ if (devname)
+ pr_err("Cannot interpret superblock on %s - version is %d\n",
+ devname, __le32_to_cpu(super->major_version));
+ free(super);
+ return 2;
+ }
+ if (__le64_to_cpu(super->super_offset) != sb_offset) {
+ if (devname)
+ pr_err("No superblock found on %s (super_offset is wrong)\n",
+ devname);
+ free(super);
+ return 2;
+ }
+ st->sb = super;
+
+ bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
+
+ misc = (struct misc_dev_info*) (((char*)super)+MAX_SB_SIZE+BM_SUPER_SIZE);
+ misc->device_size = dsize;
+ if (st->data_offset == INVALID_SECTORS)
+ st->data_offset = __le64_to_cpu(super->data_offset);
+
+ /* Now check on the bitmap superblock */
+ if ((__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) == 0)
+ return 0;
+ /* Read the bitmap superblock and make sure it looks
+ * valid. If it doesn't clear the bit. An --assemble --force
+ * should get that written out.
+ */
+ locate_bitmap1(st, fd);
+ if (aread(&afd, bsb, 512) != 512)
+ goto no_bitmap;
+
+ uuid_from_super1(st, uuid);
+ if (__le32_to_cpu(bsb->magic) != BITMAP_MAGIC ||
+ memcmp(bsb->uuid, uuid, 16) != 0)
+ goto no_bitmap;
+ return 0;
+
+ no_bitmap:
+ super->feature_map = __cpu_to_le32(__le32_to_cpu(super->feature_map)
+ & ~MD_FEATURE_BITMAP_OFFSET);
+ return 0;
+}
+
+static struct supertype *match_metadata_desc1(char *arg)
+{
+ struct supertype *st = xcalloc(1, sizeof(*st));
+
+ st->container_devnm[0] = 0;
+ st->ss = &super1;
+ st->max_devs = MAX_DEVS;
+ st->sb = NULL;
+ st->data_offset = INVALID_SECTORS;
+ /* leading zeros can be safely ignored. --detail generates them. */
+ while (*arg == '0')
+ arg++;
+ if (strcmp(arg, "1.0") == 0 ||
+ strcmp(arg, "1.00") == 0) {
+ st->minor_version = 0;
+ return st;
+ }
+ if (strcmp(arg, "1.1") == 0 ||
+ strcmp(arg, "1.01") == 0
+ ) {
+ st->minor_version = 1;
+ return st;
+ }
+ if (strcmp(arg, "1.2") == 0 ||
+#ifndef DEFAULT_OLD_METADATA /* ifdef in super0.c */
+ strcmp(arg, "default") == 0 ||
+#endif /* DEFAULT_OLD_METADATA */
+ strcmp(arg, "1.02") == 0) {
+ st->minor_version = 2;
+ return st;
+ }
+ if (strcmp(arg, "1") == 0 ||
+ strcmp(arg, "default") == 0) {
+ st->minor_version = -1;
+ return st;
+ }
+
+ free(st);
+ return NULL;
+}
+
+/* find available size on device with this devsize, using
+ * superblock type st, and reserving 'reserve' sectors for
+ * a possible bitmap
+ */
+static __u64 avail_size1(struct supertype *st, __u64 devsize,
+ unsigned long long data_offset)
+{
+ struct mdp_superblock_1 *super = st->sb;
+ int bmspace = 0;
+ int bbspace = 0;
+ if (devsize < 24)
+ return 0;
+
+#ifndef MDASSEMBLE
+ if (__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
+ /* hot-add. allow for actual size of bitmap */
+ struct bitmap_super_s *bsb;
+ bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
+ bmspace = bitmap_sectors(bsb);
+ }
+#endif
+ /* Allow space for bad block log */
+ if (super->bblog_size)
+ bbspace = __le16_to_cpu(super->bblog_size);
+
+ if (st->minor_version < 0)
+ /* not specified, so time to set default */
+ st->minor_version = 2;
+
+ if (data_offset == INVALID_SECTORS)
+ data_offset = st->data_offset;
+
+ if (data_offset != INVALID_SECTORS)
+ switch(st->minor_version) {
+ case 0:
+ return devsize - data_offset - 8*2 - bbspace;
+ case 1:
+ case 2:
+ return devsize - data_offset;
+ default:
+ return 0;
+ }
+
+ devsize -= bmspace;
+
+ switch(st->minor_version) {
+ case 0:
+ /* at end */
+ return ((devsize - 8*2 - bbspace ) & ~(4*2-1));
+ case 1:
+ /* at start, 4K for superblock and possible bitmap */
+ return devsize - 4*2 - bbspace;
+ case 2:
+ /* 4k from start, 4K for superblock and possible bitmap */
+ return devsize - (4+4)*2 - bbspace;
+ }
+ return 0;
+}
+
+static int
+add_internal_bitmap1(struct supertype *st,
+ int *chunkp, int delay, int write_behind,
+ unsigned long long size,
+ int may_change, int major)
+{
+ /*
+ * If not may_change, then this is a 'Grow' without sysfs support for
+ * bitmaps, and the bitmap must fit after the superblock at 1K offset.
+ * If may_change, then this is create or a Grow with sysfs syupport,
+ * and we can put the bitmap wherever we like.
+ *
+ * size is in sectors, chunk is in bytes !!!
+ */
+
+ unsigned long long bits;
+ unsigned long long max_bits;
+ unsigned long long min_chunk;
+ long offset;
+ long bbl_offset, bbl_size;
+ unsigned long long chunk = *chunkp;
+ int room = 0;
+ int creating = 0;
+ struct mdp_superblock_1 *sb = st->sb;
+ bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
+ int uuid[4];
+
+ if (__le64_to_cpu(sb->data_size) == 0)
+ /* Must be creating the array, else data_size would be non-zero */
+ creating = 1;
+ switch(st->minor_version) {
+ case 0:
+ /* either 3K after the superblock (when hot-add),
+ * or some amount of space before.
+ */
+ if (creating) {
+ /* We are creating array, so we *know* how much room has
+ * been left.
+ */
+ offset = 0;
+ bbl_size = 8;
+ room = choose_bm_space(__le64_to_cpu(sb->size)) + bbl_size;
+ } else {
+ room = __le64_to_cpu(sb->super_offset)
+ - __le64_to_cpu(sb->data_offset)
+ - __le64_to_cpu(sb->data_size);
+ bbl_size = __le16_to_cpu(sb->bblog_size);
+ if (bbl_size < 8)
+ bbl_size = 8;
+ bbl_offset = (__s32)__le32_to_cpu(sb->bblog_offset);
+ if (bbl_size < -bbl_offset)
+ bbl_size = -bbl_offset;
+
+ if (!may_change || (room < 3*2 &&
+ __le32_to_cpu(sb->max_dev) <= 384)) {
+ room = 3*2;
+ offset = 1*2;
+ bbl_size = 0;
+ } else {
+ offset = 0; /* means movable offset */
+ }
+ }
+ break;
+ case 1:
+ case 2: /* between superblock and data */
+ if (creating) {
+ offset = 4*2;
+ bbl_size = 8;
+ room = choose_bm_space(__le64_to_cpu(sb->size)) + bbl_size;
+ } else {
+ room = __le64_to_cpu(sb->data_offset)
+ - __le64_to_cpu(sb->super_offset);
+ bbl_size = __le16_to_cpu(sb->bblog_size);
+ if (bbl_size)
+ room = __le32_to_cpu(sb->bblog_offset) + bbl_size;
+ else
+ bbl_size = 8;
+
+ if (!may_change) {
+ room -= 2; /* Leave 1K for superblock */
+ offset = 2;
+ bbl_size = 0;
+ } else {
+ room -= 4*2; /* leave 4K for superblock */
+ offset = 4*2;
+ }
+ }
+ break;
+ default:
+ return 0;
+ }
+
+ room -= bbl_size;
+ if (chunk == UnSet && room > 128*2)
+ /* Limit to 128K of bitmap when chunk size not requested */
+ room = 128*2;
+
+ if (room <= 1)
+ /* No room for a bitmap */
+ return 0;
+
+ max_bits = (room * 512 - sizeof(bitmap_super_t)) * 8;
+
+ min_chunk = 4096; /* sub-page chunks don't work yet.. */
+ bits = (size*512)/min_chunk +1;
+ while (bits > max_bits) {
+ min_chunk *= 2;
+ bits = (bits+1)/2;
+ }
+ if (chunk == UnSet) {
+ /* For practical purpose, 64Meg is a good
+ * default chunk size for internal bitmaps.
+ */
+ chunk = min_chunk;
+ if (chunk < 64*1024*1024)
+ chunk = 64*1024*1024;
+ } else if (chunk < min_chunk)
+ return 0; /* chunk size too small */
+ if (chunk == 0) /* rounding problem */
+ return 0;
+
+ if (offset == 0) {
+ /* start bitmap on a 4K boundary with enough space for
+ * the bitmap
+ */
+ bits = (size*512) / chunk + 1;
+ room = ((bits+7)/8 + sizeof(bitmap_super_t) +4095)/4096;
+ room *= 8; /* convert 4K blocks to sectors */
+ offset = -room - bbl_size;
+ }
+
+ sb->bitmap_offset = (int32_t)__cpu_to_le32(offset);
+
+ sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map)
+ | MD_FEATURE_BITMAP_OFFSET);
+ memset(bms, 0, sizeof(*bms));
+ bms->magic = __cpu_to_le32(BITMAP_MAGIC);
+ bms->version = __cpu_to_le32(major);
+ uuid_from_super1(st, uuid);
+ memcpy(bms->uuid, uuid, 16);
+ bms->chunksize = __cpu_to_le32(chunk);
+ bms->daemon_sleep = __cpu_to_le32(delay);
+ bms->sync_size = __cpu_to_le64(size);
+ bms->write_behind = __cpu_to_le32(write_behind);
+
+ *chunkp = chunk;
+ return 1;
+}
+
+static void locate_bitmap1(struct supertype *st, int fd)
+{
+ unsigned long long offset;
+ struct mdp_superblock_1 *sb;
+ int mustfree = 0;
+
+ if (!st->sb) {
+ if (st->ss->load_super(st, fd, NULL))
+ return; /* no error I hope... */
+ mustfree = 1;
+ }
+ sb = st->sb;
+
+ offset = __le64_to_cpu(sb->super_offset);
+ offset += (int32_t) __le32_to_cpu(sb->bitmap_offset);
+ if (mustfree)
+ free(sb);
+ lseek64(fd, offset<<9, 0);
+}
+
+static int write_bitmap1(struct supertype *st, int fd)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE);
+ int rv = 0;
+ void *buf;
+ int towrite, n;
+ struct align_fd afd;
+
+ init_afd(&afd, fd);
+
+ locate_bitmap1(st, fd);
+
+ if (posix_memalign(&buf, 4096, 4096))
+ return -ENOMEM;
+
+ memset(buf, 0xff, 4096);
+ memcpy(buf, (char *)bms, sizeof(bitmap_super_t));
+
+ towrite = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
+ towrite = (towrite+7) >> 3; /* bits to bytes */
+ towrite += sizeof(bitmap_super_t);
+ towrite = ROUND_UP(towrite, 512);
+ while (towrite > 0) {
+ n = towrite;
+ if (n > 4096)
+ n = 4096;
+ n = awrite(&afd, buf, n);
+ if (n > 0)
+ towrite -= n;
+ else
+ break;
+ memset(buf, 0xff, 4096);
+ }
+ fsync(fd);
+ if (towrite)
+ rv = -2;
+
+ free(buf);
+ return rv;
+}
+
+static void free_super1(struct supertype *st)
+{
+ if (st->sb)
+ free(st->sb);
+ while (st->info) {
+ struct devinfo *di = st->info;
+ st->info = di->next;
+ if (di->fd >= 0)
+ close(di->fd);
+ free(di);
+ }
+ st->sb = NULL;
+}
+
+#ifndef MDASSEMBLE
+static int validate_geometry1(struct supertype *st, int level,
+ int layout, int raiddisks,
+ int *chunk, unsigned long long size,
+ unsigned long long data_offset,
+ char *subdev, unsigned long long *freesize,
+ int verbose)
+{
+ unsigned long long ldsize, devsize;
+ int bmspace;
+ unsigned long long headroom;
+ int fd;
+
+ if (level == LEVEL_CONTAINER) {
+ if (verbose)
+ pr_err("1.x metadata does not support containers\n");
+ return 0;
+ }
+ if (*chunk == UnSet)
+ *chunk = DEFAULT_CHUNK;
+
+ if (!subdev)
+ return 1;
+
+ if (st->minor_version < 0)
+ /* not specified, so time to set default */
+ st->minor_version = 2;
+
+ fd = open(subdev, O_RDONLY|O_EXCL, 0);
+ if (fd < 0) {
+ if (verbose)
+ pr_err("super1.x cannot open %s: %s\n",
+ subdev, strerror(errno));
+ return 0;
+ }
+
+ if (!get_dev_size(fd, subdev, &ldsize)) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+
+ devsize = ldsize >> 9;
+ if (devsize < 24) {
+ *freesize = 0;
+ return 0;
+ }
+
+ /* creating: allow suitable space for bitmap */
+ bmspace = choose_bm_space(devsize);
+
+ if (data_offset == INVALID_SECTORS)
+ data_offset = st->data_offset;
+ if (data_offset == INVALID_SECTORS)
+ switch (st->minor_version) {
+ case 0:
+ data_offset = 0;
+ break;
+ case 1:
+ case 2:
+ /* Choose data offset appropriate for this device
+ * and use as default for whole array.
+ * The data_offset must allow for bitmap space
+ * and base metadata, should allow for some headroom
+ * for reshape, and should be rounded to multiple
+ * of 1M.
+ * Headroom is limited to 128M, but aim for about 0.1%
+ */
+ headroom = 128*1024*2;
+ while ((headroom << 10) > devsize &&
+ (*chunk == 0 ||
+ headroom / 2 >= ((unsigned)(*chunk)*2)*2))
+ headroom >>= 1;
+ data_offset = 12*2 + bmspace + headroom;
+ #define ONE_MEG (2*1024)
+ if (data_offset > ONE_MEG)
+ data_offset = (data_offset / ONE_MEG) * ONE_MEG;
+ break;
+ }
+ if (st->data_offset == INVALID_SECTORS)
+ st->data_offset = data_offset;
+ switch(st->minor_version) {
+ case 0: /* metadata at end. Round down and subtract space to reserve */
+ devsize = (devsize & ~(4ULL*2-1));
+ /* space for metadata, bblog, bitmap */
+ devsize -= 8*2 + 8 + bmspace;
+ break;
+ case 1:
+ case 2:
+ devsize -= data_offset;
+ break;
+ }
+ *freesize = devsize;
+ return 1;
+}
+#endif /* MDASSEMBLE */
+
+void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0)
+{
+ /* Create a v1.0 superblock based on 'info'*/
+ void *ret;
+ struct mdp_superblock_1 *sb;
+ int i;
+ int rfd;
+ unsigned long long offset;
+
+ if (posix_memalign(&ret, 4096, 1024) != 0)
+ return NULL;
+ sb = ret;
+ memset(ret, 0, 1024);
+ sb->magic = __cpu_to_le32(MD_SB_MAGIC);
+ sb->major_version = __cpu_to_le32(1);
+
+ copy_uuid(sb->set_uuid, info->uuid, super1.swapuuid);
+ sprintf(sb->set_name, "%d", sb0->md_minor);
+ sb->ctime = __cpu_to_le32(info->array.ctime+1);
+ sb->level = __cpu_to_le32(info->array.level);
+ sb->layout = __cpu_to_le32(info->array.layout);
+ sb->size = __cpu_to_le64(info->component_size);
+ sb->chunksize = __cpu_to_le32(info->array.chunk_size/512);
+ sb->raid_disks = __cpu_to_le32(info->array.raid_disks);
+ if (info->array.level > 0)
+ sb->data_size = sb->size;
+ else
+ sb->data_size = st->ss->avail_size(st, st->devsize/512, 0);
+ sb->resync_offset = MaxSector;
+ sb->max_dev = __cpu_to_le32(MD_SB_DISKS);
+ sb->dev_number = __cpu_to_le32(info->disk.number);
+ sb->utime = __cpu_to_le64(info->array.utime);
+
+ offset = st->devsize/512 - 8*2;
+ offset &= ~(4*2-1);
+ sb->super_offset = __cpu_to_le64(offset);
+ //*(__u64*)(st->other + 128 + 8 + 8) = __cpu_to_le64(offset);
+
+ if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
+ read(rfd, sb->device_uuid, 16) != 16) {
+ __u32 r[4] = {random(), random(), random(), random()};
+ memcpy(sb->device_uuid, r, 16);
+ }
+ if (rfd >= 0)
+ close(rfd);
+
+ for (i = 0; i < MD_SB_DISKS; i++) {
+ int state = sb0->disks[i].state;
+ sb->dev_roles[i] = 0xFFFF;
+ if ((state & (1<<MD_DISK_SYNC)) &&
+ !(state & (1<<MD_DISK_FAULTY)))
+ sb->dev_roles[i] = __cpu_to_le16(sb0->disks[i].raid_disk);
+ }
+ sb->sb_csum = calc_sb_1_csum(sb);
+ return ret;
+}
+
+struct superswitch super1 = {
+#ifndef MDASSEMBLE
+ .examine_super = examine_super1,
+ .brief_examine_super = brief_examine_super1,
+ .export_examine_super = export_examine_super1,
+ .detail_super = detail_super1,
+ .brief_detail_super = brief_detail_super1,
+ .export_detail_super = export_detail_super1,
+ .write_init_super = write_init_super1,
+ .validate_geometry = validate_geometry1,
+ .add_to_super = add_to_super1,
+ .examine_badblocks = examine_badblocks_super1,
+ .copy_metadata = copy_metadata1,
+#endif
+ .match_home = match_home1,
+ .uuid_from_super = uuid_from_super1,
+ .getinfo_super = getinfo_super1,
+ .container_content = container_content1,
+ .update_super = update_super1,
+ .init_super = init_super1,
+ .store_super = store_super1,
+ .compare_super = compare_super1,
+ .load_super = load_super1,
+ .match_metadata_desc = match_metadata_desc1,
+ .avail_size = avail_size1,
+ .add_internal_bitmap = add_internal_bitmap1,
+ .locate_bitmap = locate_bitmap1,
+ .write_bitmap = write_bitmap1,
+ .free_super = free_super1,
+#if __BYTE_ORDER == BIG_ENDIAN
+ .swapuuid = 0,
+#else
+ .swapuuid = 1,
+#endif
+ .name = "1.x",
+};
diff --git a/swap_super.c b/swap_super.c
new file mode 100644
index 00000000..b6db5743
--- /dev/null
+++ b/swap_super.c
@@ -0,0 +1,81 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/mount.h>
+/*
+ * This is a tiny test program to endian-swap
+ * the superblock on a given device.
+ * We simply read 4k from where the superblock should be
+ * do the swap, and write it back
+ * Don't use this on a real array, use mdadm.
+ */
+
+#define MD_RESERVED_BYTES (64 * 1024)
+#define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512)
+
+#define MD_NEW_SIZE_SECTORS(x) ((x & ~(MD_RESERVED_SECTORS - 1)) - MD_RESERVED_SECTORS)
+
+extern long long lseek64(int, long long, int);
+
+int main(int argc, char *argv[])
+{
+ int fd, i;
+ unsigned long size;
+ unsigned long long offset;
+ char super[4096];
+ if (argc != 2) {
+ fprintf(stderr, "Usage: swap_super device\n");
+ exit(1);
+ }
+ fd = open(argv[1], O_RDWR);
+ if (fd<0) {
+ perror(argv[1]);
+ exit(1);
+ }
+ if (ioctl(fd, BLKGETSIZE, &size)) {
+ perror("BLKGETSIZE");
+ exit(1);
+ }
+ offset = MD_NEW_SIZE_SECTORS(size) * 512LL;
+ if (lseek64(fd, offset, 0) < 0LL) {
+ perror("lseek64");
+ exit(1);
+ }
+ if (read(fd, super, 4096) != 4096) {
+ perror("read");
+ exit(1);
+ }
+
+ for (i=0; i < 4096 ; i+=4) {
+ char t = super[i];
+ super[i] = super[i+3];
+ super[i+3] = t;
+ t=super[i+1];
+ super[i+1]=super[i+2];
+ super[i+2]=t;
+ }
+ /* swap the u64 events counters */
+ for (i=0; i<4; i++) {
+ /* events_hi and events_lo */
+ char t=super[32*4+7*4 +i];
+ super[32*4+7*4 +i] = super[32*4+8*4 +i];
+ super[32*4+8*4 +i] = t;
+
+ /* cp_events_hi and cp_events_lo */
+ t=super[32*4+9*4 +i];
+ super[32*4+9*4 +i] = super[32*4+10*4 +i];
+ super[32*4+10*4 +i] = t;
+ }
+
+ if (lseek64(fd, offset, 0) < 0LL) {
+ perror("lseek64");
+ exit(1);
+ }
+ if (write(fd, super, 4096) != 4096) {
+ perror("write");
+ exit(1);
+ }
+ exit(0);
+
+}
diff --git a/sysfs.c b/sysfs.c
new file mode 100644
index 00000000..9a1d8569
--- /dev/null
+++ b/sysfs.c
@@ -0,0 +1,921 @@
+/*
+ * sysfs - extract md related information from sysfs. Part of:
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include <dirent.h>
+#include <ctype.h>
+
+int load_sys(char *path, char *buf)
+{
+ int fd = open(path, O_RDONLY);
+ int n;
+ if (fd < 0)
+ return -1;
+ n = read(fd, buf, 1024);
+ close(fd);
+ if (n <0 || n >= 1024)
+ return -1;
+ buf[n] = 0;
+ if (n && buf[n-1] == '\n')
+ buf[n-1] = 0;
+ return 0;
+}
+
+void sysfs_free(struct mdinfo *sra)
+{
+ while (sra) {
+ struct mdinfo *sra2 = sra->next;
+ while (sra->devs) {
+ struct mdinfo *d = sra->devs;
+ sra->devs = d->next;
+ free(d);
+ }
+ free(sra);
+ sra = sra2;
+ }
+}
+
+int sysfs_open(char *devnm, char *devname, char *attr)
+{
+ char fname[50];
+ int fd;
+
+ sprintf(fname, "/sys/block/%s/md/", devnm);
+ if (devname) {
+ strcat(fname, devname);
+ strcat(fname, "/");
+ }
+ strcat(fname, attr);
+ fd = open(fname, O_RDWR);
+ if (fd < 0 && errno == EACCES)
+ fd = open(fname, O_RDONLY);
+ return fd;
+}
+
+void sysfs_init(struct mdinfo *mdi, int fd, char *devnm)
+{
+ mdi->sys_name[0] = 0;
+ if (fd >= 0) {
+ mdu_version_t vers;
+ if (ioctl(fd, RAID_VERSION, &vers) != 0)
+ return;
+ devnm = fd2devnm(fd);
+ }
+ if (devnm == NULL)
+ return;
+ strcpy(mdi->sys_name, devnm);
+}
+
+struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
+{
+ char fname[PATH_MAX];
+ char buf[PATH_MAX];
+ char *base;
+ char *dbase;
+ struct mdinfo *sra;
+ struct mdinfo *dev, **devp;
+ DIR *dir = NULL;
+ struct dirent *de;
+
+ sra = xcalloc(1, sizeof(*sra));
+ sysfs_init(sra, fd, devnm);
+ if (sra->sys_name[0] == 0) {
+ free(sra);
+ return NULL;
+ }
+
+ sprintf(fname, "/sys/block/%s/md/", sra->sys_name);
+ base = fname + strlen(fname);
+
+ sra->devs = NULL;
+ if (options & GET_VERSION) {
+ strcpy(base, "metadata_version");
+ if (load_sys(fname, buf))
+ goto abort;
+ if (strncmp(buf, "none", 4) == 0) {
+ sra->array.major_version =
+ sra->array.minor_version = -1;
+ strcpy(sra->text_version, "");
+ } else if (strncmp(buf, "external:", 9) == 0) {
+ sra->array.major_version = -1;
+ sra->array.minor_version = -2;
+ strcpy(sra->text_version, buf+9);
+ } else {
+ sscanf(buf, "%d.%d",
+ &sra->array.major_version,
+ &sra->array.minor_version);
+ strcpy(sra->text_version, buf);
+ }
+ }
+ if (options & GET_LEVEL) {
+ strcpy(base, "level");
+ if (load_sys(fname, buf))
+ goto abort;
+ sra->array.level = map_name(pers, buf);
+ }
+ if (options & GET_LAYOUT) {
+ strcpy(base, "layout");
+ if (load_sys(fname, buf))
+ goto abort;
+ sra->array.layout = strtoul(buf, NULL, 0);
+ }
+ if (options & GET_DISKS) {
+ strcpy(base, "raid_disks");
+ if (load_sys(fname, buf))
+ goto abort;
+ sra->array.raid_disks = strtoul(buf, NULL, 0);
+ }
+ if (options & GET_DEGRADED) {
+ strcpy(base, "degraded");
+ if (load_sys(fname, buf))
+ goto abort;
+ sra->array.failed_disks = strtoul(buf, NULL, 0);
+ }
+ if (options & GET_COMPONENT) {
+ strcpy(base, "component_size");
+ if (load_sys(fname, buf))
+ goto abort;
+ sra->component_size = strtoull(buf, NULL, 0);
+ /* sysfs reports "K", but we want sectors */
+ sra->component_size *= 2;
+ }
+ if (options & GET_CHUNK) {
+ strcpy(base, "chunk_size");
+ if (load_sys(fname, buf))
+ goto abort;
+ sra->array.chunk_size = strtoul(buf, NULL, 0);
+ }
+ if (options & GET_CACHE) {
+ strcpy(base, "stripe_cache_size");
+ if (load_sys(fname, buf))
+ /* Probably level doesn't support it */
+ sra->cache_size = 0;
+ else
+ sra->cache_size = strtoul(buf, NULL, 0);
+ }
+ if (options & GET_MISMATCH) {
+ strcpy(base, "mismatch_cnt");
+ if (load_sys(fname, buf))
+ goto abort;
+ sra->mismatch_cnt = strtoul(buf, NULL, 0);
+ }
+ if (options & GET_SAFEMODE) {
+ int scale = 1;
+ int dot = 0;
+ unsigned i;
+ unsigned long msec;
+ size_t len;
+
+ strcpy(base, "safe_mode_delay");
+ if (load_sys(fname, buf))
+ goto abort;
+
+ /* remove a period, and count digits after it */
+ len = strlen(buf);
+ for (i = 0; i < len; i++) {
+ if (dot) {
+ if (isdigit(buf[i])) {
+ buf[i-1] = buf[i];
+ scale *= 10;
+ }
+ buf[i] = 0;
+ } else if (buf[i] == '.') {
+ dot=1;
+ buf[i] = 0;
+ }
+ }
+ msec = strtoul(buf, NULL, 10);
+ msec = (msec * 1000) / scale;
+ sra->safe_mode_delay = msec;
+ }
+ if (options & GET_BITMAP_LOCATION) {
+ strcpy(base, "bitmap/location");
+ if (load_sys(fname, buf))
+ goto abort;
+ if (strncmp(buf, "file", 4) == 0)
+ sra->bitmap_offset = 1;
+ else if (strncmp(buf, "none", 4) == 0)
+ sra->bitmap_offset = 0;
+ else if (buf[0] == '+')
+ sra->bitmap_offset = strtol(buf+1, NULL, 10);
+ else
+ goto abort;
+ }
+
+ if (! (options & GET_DEVS))
+ return sra;
+
+ /* Get all the devices as well */
+ *base = 0;
+ dir = opendir(fname);
+ if (!dir)
+ goto abort;
+ sra->array.spare_disks = 0;
+
+ devp = &sra->devs;
+ sra->devs = NULL;
+ while ((de = readdir(dir)) != NULL) {
+ char *ep;
+ if (de->d_ino == 0 ||
+ strncmp(de->d_name, "dev-", 4) != 0)
+ continue;
+ strcpy(base, de->d_name);
+ dbase = base + strlen(base);
+ *dbase++ = '/';
+
+ dev = xmalloc(sizeof(*dev));
+
+ /* Always get slot, major, minor */
+ strcpy(dbase, "slot");
+ if (load_sys(fname, buf)) {
+ /* hmm... unable to read 'slot' maybe the device
+ * is going away?
+ */
+ strcpy(dbase, "block");
+ if (readlink(fname, buf, sizeof(buf)) < 0 &&
+ errno != ENAMETOOLONG) {
+ /* ...yup device is gone */
+ free(dev);
+ continue;
+ } else {
+ /* slot is unreadable but 'block' link
+ * still intact... something bad is happening
+ * so abort
+ */
+ free(dev);
+ goto abort;
+ }
+
+ }
+ strcpy(dev->sys_name, de->d_name);
+ dev->disk.raid_disk = strtoul(buf, &ep, 10);
+ if (*ep) dev->disk.raid_disk = -1;
+
+ strcpy(dbase, "block/dev");
+ if (load_sys(fname, buf)) {
+ /* assume this is a stale reference to a hot
+ * removed device
+ */
+ free(dev);
+ continue;
+ }
+ sra->array.nr_disks++;
+ sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
+
+ /* special case check for block devices that can go 'offline' */
+ strcpy(dbase, "block/device/state");
+ if (load_sys(fname, buf) == 0 &&
+ strncmp(buf, "offline", 7) == 0) {
+ free(dev);
+ continue;
+ }
+
+ /* finally add this disk to the array */
+ *devp = dev;
+ devp = & dev->next;
+ dev->next = NULL;
+
+ if (options & GET_OFFSET) {
+ strcpy(dbase, "offset");
+ if (load_sys(fname, buf))
+ goto abort;
+ dev->data_offset = strtoull(buf, NULL, 0);
+ strcpy(dbase, "new_offset");
+ if (load_sys(fname, buf) == 0)
+ dev->new_data_offset = strtoull(buf, NULL, 0);
+ else
+ dev->new_data_offset = dev->data_offset;
+ }
+ if (options & GET_SIZE) {
+ strcpy(dbase, "size");
+ if (load_sys(fname, buf))
+ goto abort;
+ dev->component_size = strtoull(buf, NULL, 0) * 2;
+ }
+ if (options & GET_STATE) {
+ dev->disk.state = 0;
+ strcpy(dbase, "state");
+ if (load_sys(fname, buf))
+ goto abort;
+ if (strstr(buf, "in_sync"))
+ dev->disk.state |= (1<<MD_DISK_SYNC);
+ if (strstr(buf, "faulty"))
+ dev->disk.state |= (1<<MD_DISK_FAULTY);
+ if (dev->disk.state == 0)
+ sra->array.spare_disks++;
+ }
+ if (options & GET_ERROR) {
+ strcpy(buf, "errors");
+ if (load_sys(fname, buf))
+ goto abort;
+ dev->errors = strtoul(buf, NULL, 0);
+ }
+ }
+ closedir(dir);
+ return sra;
+
+ abort:
+ if (dir)
+ closedir(dir);
+ sysfs_free(sra);
+ return NULL;
+}
+
+int sysfs_attr_match(const char *attr, const char *str)
+{
+ /* See if attr, read from a sysfs file, matches
+ * str. They must either be the same, or attr can
+ * have a trailing newline or comma
+ */
+ while (*attr && *str && *attr == *str) {
+ attr++;
+ str++;
+ }
+
+ if (*str || (*attr && *attr != ',' && *attr != '\n'))
+ return 0;
+ return 1;
+}
+
+int sysfs_match_word(const char *word, char **list)
+{
+ int n;
+ for (n=0; list[n]; n++)
+ if (sysfs_attr_match(word, list[n]))
+ break;
+ return n;
+}
+
+unsigned long long get_component_size(int fd)
+{
+ /* Find out the component size of the array.
+ * We cannot trust GET_ARRAY_INFO ioctl as it's
+ * size field is only 32bits.
+ * So look in /sys/block/mdXXX/md/component_size
+ *
+ * This returns in units of sectors.
+ */
+ struct stat stb;
+ char fname[50];
+ int n;
+ if (fstat(fd, &stb)) return 0;
+ if (major(stb.st_rdev) != (unsigned)get_mdp_major())
+ sprintf(fname, "/sys/block/md%d/md/component_size",
+ (int)minor(stb.st_rdev));
+ else
+ sprintf(fname, "/sys/block/md_d%d/md/component_size",
+ (int)minor(stb.st_rdev)>>MdpMinorShift);
+ fd = open(fname, O_RDONLY);
+ if (fd < 0)
+ return 0;
+ n = read(fd, fname, sizeof(fname));
+ close(fd);
+ if (n < 0 || n == sizeof(fname))
+ return 0;
+ fname[n] = 0;
+ return strtoull(fname, NULL, 10) * 2;
+}
+
+int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, char *val)
+{
+ char fname[50];
+ unsigned int n;
+ int fd;
+
+ sprintf(fname, "/sys/block/%s/md/%s/%s",
+ sra->sys_name, dev?dev->sys_name:"", name);
+ fd = open(fname, O_WRONLY);
+ if (fd < 0)
+ return -1;
+ n = write(fd, val, strlen(val));
+ close(fd);
+ if (n != strlen(val)) {
+ dprintf(Name ": failed to write '%s' to '%s' (%s)\n",
+ val, fname, strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, unsigned long long val)
+{
+ char valstr[50];
+ sprintf(valstr, "%llu", val);
+ return sysfs_set_str(sra, dev, name, valstr);
+}
+
+int sysfs_set_num_signed(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, long long val)
+{
+ char valstr[50];
+ sprintf(valstr, "%lli", val);
+ return sysfs_set_str(sra, dev, name, valstr);
+}
+
+int sysfs_uevent(struct mdinfo *sra, char *event)
+{
+ char fname[50];
+ int n;
+ int fd;
+
+ sprintf(fname, "/sys/block/%s/uevent",
+ sra->sys_name);
+ fd = open(fname, O_WRONLY);
+ if (fd < 0)
+ return -1;
+ n = write(fd, event, strlen(event));
+ close(fd);
+ if (n != (int)strlen(event)) {
+ dprintf(Name ": failed to write '%s' to '%s' (%s)\n",
+ event, fname, strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev, char *name)
+{
+ char fname[50];
+ struct stat st;
+
+ sprintf(fname, "/sys/block/%s/md/%s/%s",
+ sra->sys_name, dev?dev->sys_name:"", name);
+
+ return stat(fname, &st) == 0;
+}
+
+int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev,
+ char *name)
+{
+ char fname[50];
+ int fd;
+
+ sprintf(fname, "/sys/block/%s/md/%s/%s",
+ sra->sys_name, dev?dev->sys_name:"", name);
+ fd = open(fname, O_RDWR);
+ if (fd < 0)
+ fd = open(fname, O_RDONLY);
+ return fd;
+}
+
+int sysfs_fd_get_ll(int fd, unsigned long long *val)
+{
+ char buf[50];
+ int n;
+ char *ep;
+
+ lseek(fd, 0, 0);
+ n = read(fd, buf, sizeof(buf));
+ if (n <= 0)
+ return -2;
+ buf[n] = 0;
+ *val = strtoull(buf, &ep, 0);
+ if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
+ return -1;
+ return 0;
+}
+
+int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, unsigned long long *val)
+{
+ int n;
+ int fd;
+
+ fd = sysfs_get_fd(sra, dev, name);
+ if (fd < 0)
+ return -1;
+ n = sysfs_fd_get_ll(fd, val);
+ close(fd);
+ return n;
+}
+
+int sysfs_fd_get_two(int fd, unsigned long long *v1, unsigned long long *v2)
+{
+ /* two numbers in this sysfs file, either
+ * NNN (NNN)
+ * or
+ * NNN / NNN
+ */
+ char buf[80];
+ int n;
+ char *ep, *ep2;
+
+ lseek(fd, 0, 0);
+ n = read(fd, buf, sizeof(buf));
+ if (n <= 0)
+ return -2;
+ buf[n] = 0;
+ *v1 = strtoull(buf, &ep, 0);
+ if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
+ return -1;
+ while (*ep == ' ' || *ep == '/' || *ep == '(')
+ ep++;
+ *v2 = strtoull(ep, &ep2, 0);
+ if (ep2 == ep || (*ep2 != 0 && *ep2 != '\n' && *ep2 != ' ' && *ep2 != ')')) {
+ *v2 = *v1;
+ return 1;
+ }
+ return 2;
+}
+
+int sysfs_get_two(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, unsigned long long *v1, unsigned long long *v2)
+{
+ int n;
+ int fd;
+
+ fd = sysfs_get_fd(sra, dev, name);
+ if (fd < 0)
+ return -1;
+ n = sysfs_fd_get_two(fd, v1, v2);
+ close(fd);
+ return n;
+}
+
+int sysfs_fd_get_str(int fd, char *val, int size)
+{
+ int n;
+
+ lseek(fd, 0, 0);
+ n = read(fd, val, size);
+ if (n <= 0)
+ return -1;
+ val[n] = 0;
+ return n;
+}
+
+int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
+ char *name, char *val, int size)
+{
+ int n;
+ int fd;
+
+ fd = sysfs_get_fd(sra, dev, name);
+ if (fd < 0)
+ return -1;
+ n = sysfs_fd_get_str(fd, val, size);
+ close(fd);
+ return n;
+}
+
+int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms)
+{
+ unsigned long sec;
+ unsigned long msec;
+ char delay[30];
+
+ sec = ms / 1000;
+ msec = ms % 1000;
+
+ sprintf(delay, "%ld.%03ld\n", sec, msec);
+ /* this '\n' ^ needed for kernels older than 2.6.28 */
+ return sysfs_set_str(sra, NULL, "safe_mode_delay", delay);
+}
+
+int sysfs_set_array(struct mdinfo *info, int vers)
+{
+ int rv = 0;
+ char ver[100];
+ int raid_disks = info->array.raid_disks;
+
+ ver[0] = 0;
+ if (info->array.major_version == -1 &&
+ info->array.minor_version == -2) {
+ char buf[1024];
+
+ strcat(strcpy(ver, "external:"), info->text_version);
+
+ /* meta version might already be set if we are setting
+ * new geometry for a reshape. In that case we don't
+ * want to over-write the 'readonly' flag that is
+ * stored in the metadata version. So read the current
+ * version first, and preserve the flag
+ */
+ if (sysfs_get_str(info, NULL, "metadata_version",
+ buf, 1024) > 0)
+ if (strlen(buf) >= 9 && buf[9] == '-')
+ ver[9] = '-';
+
+ if ((vers % 100) < 2 ||
+ sysfs_set_str(info, NULL, "metadata_version",
+ ver) < 0) {
+ pr_err("This kernel does not "
+ "support external metadata.\n");
+ return 1;
+ }
+ }
+ if (info->array.level < 0)
+ return 0; /* FIXME */
+ rv |= sysfs_set_str(info, NULL, "level",
+ map_num(pers, info->array.level));
+ if (info->reshape_active && info->delta_disks != UnSet)
+ raid_disks -= info->delta_disks;
+ rv |= sysfs_set_num(info, NULL, "raid_disks", raid_disks);
+ rv |= sysfs_set_num(info, NULL, "chunk_size", info->array.chunk_size);
+ rv |= sysfs_set_num(info, NULL, "layout", info->array.layout);
+ rv |= sysfs_set_num(info, NULL, "component_size", info->component_size/2);
+ if (info->custom_array_size) {
+ int rc;
+
+ rc = sysfs_set_num(info, NULL, "array_size",
+ info->custom_array_size/2);
+ if (rc && errno == ENOENT) {
+ pr_err("This kernel does not "
+ "have the md/array_size attribute, "
+ "the array may be larger than expected\n");
+ rc = 0;
+ }
+ rv |= rc;
+ }
+
+ if (info->array.level > 0)
+ rv |= sysfs_set_num(info, NULL, "resync_start", info->resync_start);
+
+ if (info->reshape_active) {
+ rv |= sysfs_set_num(info, NULL, "reshape_position",
+ info->reshape_progress);
+ rv |= sysfs_set_num(info, NULL, "chunk_size", info->new_chunk);
+ rv |= sysfs_set_num(info, NULL, "layout", info->new_layout);
+ rv |= sysfs_set_num(info, NULL, "raid_disks",
+ info->array.raid_disks);
+ /* We don't set 'new_level' here. That can only happen
+ * once the reshape completes.
+ */
+ }
+ return rv;
+}
+
+int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
+{
+ char dv[PATH_MAX];
+ char nm[PATH_MAX];
+ char *dname;
+ int rv;
+
+ sprintf(dv, "%d:%d", sd->disk.major, sd->disk.minor);
+ rv = sysfs_set_str(sra, NULL, "new_dev", dv);
+ if (rv)
+ return rv;
+
+ memset(nm, 0, sizeof(nm));
+ dname = devid2kname(makedev(sd->disk.major, sd->disk.minor));
+ strcpy(sd->sys_name, "dev-");
+ strcpy(sd->sys_name+4, dname);
+
+ /* test write to see if 'recovery_start' is available */
+ if (resume && sd->recovery_start < MaxSector &&
+ sysfs_set_num(sra, sd, "recovery_start", 0)) {
+ sysfs_set_str(sra, sd, "state", "remove");
+ return -1;
+ }
+
+ rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
+ rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
+ if (sra->array.level != LEVEL_CONTAINER) {
+ if (sd->recovery_start == MaxSector)
+ /* This can correctly fail if array isn't started,
+ * yet, so just ignore status for now.
+ */
+ sysfs_set_str(sra, sd, "state", "insync");
+ if (sd->disk.raid_disk >= 0)
+ rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk);
+ if (resume)
+ sysfs_set_num(sra, sd, "recovery_start", sd->recovery_start);
+ }
+ return rv;
+}
+
+#if 0
+int sysfs_disk_to_sg(int fd)
+{
+ /* from an open block device, try find and open its corresponding
+ * scsi_generic interface
+ */
+ struct stat st;
+ char path[256];
+ char sg_path[256];
+ char sg_major_minor[8];
+ char *c;
+ DIR *dir;
+ struct dirent *de;
+ int major, minor, rv;
+
+ if (fstat(fd, &st))
+ return -1;
+
+ snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device",
+ major(st.st_rdev), minor(st.st_rdev));
+
+ dir = opendir(path);
+ if (!dir)
+ return -1;
+
+ de = readdir(dir);
+ while (de) {
+ if (strncmp("scsi_generic:", de->d_name,
+ strlen("scsi_generic:")) == 0)
+ break;
+ de = readdir(dir);
+ }
+ closedir(dir);
+
+ if (!de)
+ return -1;
+
+ snprintf(sg_path, sizeof(sg_path), "%s/%s/dev", path, de->d_name);
+ fd = open(sg_path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ rv = read(fd, sg_major_minor, sizeof(sg_major_minor));
+ close(fd);
+ if (rv < 0)
+ return -1;
+ else
+ sg_major_minor[rv - 1] = '\0';
+
+ c = strchr(sg_major_minor, ':');
+ *c = '\0';
+ c++;
+ major = strtol(sg_major_minor, NULL, 10);
+ minor = strtol(c, NULL, 10);
+ snprintf(path, sizeof(path), "/dev/.tmp.md.%d:%d:%d",
+ (int) getpid(), major, minor);
+ if (mknod(path, S_IFCHR|0600, makedev(major, minor))==0) {
+ fd = open(path, O_RDONLY);
+ unlink(path);
+ return fd;
+ }
+
+ return -1;
+}
+#endif
+
+int sysfs_disk_to_scsi_id(int fd, __u32 *id)
+{
+ /* from an open block device, try to retrieve it scsi_id */
+ struct stat st;
+ char path[256];
+ DIR *dir;
+ struct dirent *de;
+ int host, bus, target, lun;
+
+ if (fstat(fd, &st))
+ return 1;
+
+ snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device/scsi_device",
+ major(st.st_rdev), minor(st.st_rdev));
+
+ dir = opendir(path);
+ if (!dir)
+ return 1;
+
+ for (de = readdir(dir); de; de = readdir(dir)) {
+ int count;
+
+ if (de->d_type != DT_DIR)
+ continue;
+
+ count = sscanf(de->d_name, "%d:%d:%d:%d", &host, &bus, &target, &lun);
+ if (count == 4)
+ break;
+ }
+ closedir(dir);
+
+ if (!de)
+ return 1;
+
+ *id = (host << 24) | (bus << 16) | (target << 8) | (lun << 0);
+ return 0;
+}
+
+int sysfs_unique_holder(char *devnm, long rdev)
+{
+ /* Check that devnm is a holder of rdev,
+ * and is the only holder.
+ * we should be locked against races by
+ * an O_EXCL on devnm
+ * Return values:
+ * 0 - not unique, not even a holder
+ * 1 - unique, this is the only holder.
+ * 2/3 - not unique, there is another holder
+ * -1 - error, cannot find the holders
+ */
+ DIR *dir;
+ struct dirent *de;
+ char dirname[100];
+ char l;
+ int ret = 0;
+ sprintf(dirname, "/sys/dev/block/%d:%d/holders",
+ major(rdev), minor(rdev));
+ dir = opendir(dirname);
+ if (!dir)
+ return -1;
+ l = strlen(dirname);
+ while ((de = readdir(dir)) != NULL) {
+ char buf[100];
+ char *sl;
+ int n;
+
+ if (de->d_ino == 0)
+ continue;
+ if (de->d_name[0] == '.')
+ continue;
+ strcpy(dirname+l, "/");
+ strcat(dirname+l, de->d_name);
+ n = readlink(dirname, buf, sizeof(buf)-1);
+ if (n <= 0)
+ continue;
+ buf[n] = 0;
+ sl = strrchr(buf, '/');
+ if (!sl)
+ continue;
+ sl++;
+
+ if (strcmp(devnm, sl) == 0)
+ ret |= 1;
+ else
+ ret |= 2;
+ }
+ closedir(dir);
+ return ret;
+}
+
+int sysfs_freeze_array(struct mdinfo *sra)
+{
+ /* Try to freeze resync/rebuild on this array/container.
+ * Return -1 if the array is busy,
+ * return 0 if this kernel doesn't support 'frozen'
+ * return 1 if it worked.
+ */
+ char buf[20];
+
+ if (!sysfs_attribute_available(sra, NULL, "sync_action"))
+ return 1; /* no sync_action == frozen */
+ if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0)
+ return 0;
+ if (strcmp(buf, "frozen\n") == 0)
+ /* Already frozen */
+ return 0;
+ if (strcmp(buf, "idle\n") != 0 && strcmp(buf, "recover\n") != 0)
+ return -1;
+ if (sysfs_set_str(sra, NULL, "sync_action", "frozen") < 0)
+ return 0;
+ return 1;
+}
+
+int sysfs_wait(int fd, int *msec)
+{
+ /* Wait up to '*msec' for fd to have an exception condition.
+ * if msec == NULL, wait indefinitely.
+ */
+ fd_set fds;
+ int n;
+ FD_ZERO(&fds);
+ FD_SET(fd, &fds);
+ if (msec == NULL)
+ n = select(fd+1, NULL, NULL, &fds, NULL);
+ else if (*msec < 0)
+ n = 0;
+ else {
+ struct timeval start, end, tv;
+ gettimeofday(&start, NULL);
+ if (*msec < 1000) {
+ tv.tv_sec = 0;
+ tv.tv_usec = (*msec)*1000;
+ } else {
+ tv.tv_sec = (*msec)/1000;
+ tv.tv_usec = 0;
+ }
+ n = select(fd+1, NULL, NULL, &fds, &tv);
+ gettimeofday(&end, NULL);
+ end.tv_sec -= start.tv_sec;
+ *msec -= (end.tv_sec * 1000 + end.tv_usec/1000
+ - start.tv_usec/1000) + 1;
+ }
+ return n;
+}
diff --git a/systemd/SUSE-mdadm_env.sh b/systemd/SUSE-mdadm_env.sh
new file mode 100644
index 00000000..10b2e749
--- /dev/null
+++ b/systemd/SUSE-mdadm_env.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+# extract configuration from /etc/sysconfig/mdadm and write
+# environment to /run/sysconfig/mdadm to be used by
+# systemd unit files.
+
+MDADM_SCAN="yes"
+
+# Following adapted from /etc/init.d/mdadmd on openSUSE
+
+mdadmd_CONFIG=/etc/sysconfig/mdadm
+if test -r $mdadmd_CONFIG; then
+ . $mdadmd_CONFIG
+fi
+
+if [ x$MDADM_DELAY != x"" ]; then
+ MDADM_DELAY="-d "$MDADM_DELAY;
+fi
+
+if [ x$MDADM_MAIL != x"" ]; then
+ MDADM_MAIL="-m \"$MDADM_MAIL\""
+fi
+
+if [ x$MDADM_PROGRAM != x"" ]; then
+ MDADM_PROGRAM="-p \"$MDADM_PROGRAM\""
+fi
+
+if [ x$MDADM_SCAN = x"yes" ]; then
+ MDADM_SCAN="--scan"
+else
+ MDADM_SCAN=""
+fi
+
+if [ x$MDADM_SEND_MAIL_ON_START = x"yes" ]; then
+ MDADM_SEND_MAIL="-t"
+else
+ MDADM_SEND_MAIL=""
+fi
+
+if [ x$MDADM_CONFIG != x"" ]; then
+ MDADM_CONFIG="-c \"$MDADM_CONFIG\""
+fi
+
+mkdir -p /run/sysconfig
+echo "MDADM_MONITOR_ARGS=$MDADM_RAIDDEVICES $MDADM_DELAY $MDADM_MAIL $MDADM_PROGRAM $MDADM_SCAN $MDADM_SEND_MAIL $MDADM_CONFIG" > /run/sysconfig/mdadm
diff --git a/systemd/mdadm-grow-continue@.service b/systemd/mdadm-grow-continue@.service
new file mode 100644
index 00000000..5c667d2a
--- /dev/null
+++ b/systemd/mdadm-grow-continue@.service
@@ -0,0 +1,17 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=Manage MD Reshape on /dev/%I
+DefaultDependencies=no
+
+[Service]
+ExecStart=BINDIR/mdadm --grow --continue /dev/%I
+StandardInput=null
+StandardOutput=null
+StandardError=null
+KillMode=none
diff --git a/systemd/mdadm-last-resort@.service b/systemd/mdadm-last-resort@.service
new file mode 100644
index 00000000..5179f194
--- /dev/null
+++ b/systemd/mdadm-last-resort@.service
@@ -0,0 +1,7 @@
+[Unit]
+Description=Activate md array even though degraded
+DefaultDependencies=no
+
+[Service]
+Type=oneshot
+ExecStart=BINDIR/mdadm --run /dev/%i
diff --git a/systemd/mdadm-last-resort@.timer b/systemd/mdadm-last-resort@.timer
new file mode 100644
index 00000000..52b3f227
--- /dev/null
+++ b/systemd/mdadm-last-resort@.timer
@@ -0,0 +1,7 @@
+[Unit]
+Description=Timer to wait for more drives before activating degraded array.
+DefaultDependencies=no
+Conflicts=sys-devices-virtual-block-%i.device
+
+[Timer]
+OnActiveSec=30
diff --git a/systemd/mdadm.shutdown b/systemd/mdadm.shutdown
new file mode 100644
index 00000000..33f27783
--- /dev/null
+++ b/systemd/mdadm.shutdown
@@ -0,0 +1,4 @@
+#!/bin/sh
+# We need to ensure all md arrays with external metadata
+# (e.g. IMSM, DDF) are clean before completing the shutdown.
+BINDIR/mdadm --wait-clean --scan
diff --git a/systemd/mdmon@.service b/systemd/mdmon@.service
new file mode 100644
index 00000000..85a3a7c5
--- /dev/null
+++ b/systemd/mdmon@.service
@@ -0,0 +1,28 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=MD Metadata Monitor on /dev/%I
+DefaultDependencies=no
+Before=initrd-switch-root.target
+
+[Service]
+# mdmon should never complain due to lack of a platform,
+# that is mdadm's job if at all.
+Environment=IMSM_NO_PLATFORM=1
+# The mdmon starting in the initramfs (with dracut at least)
+# cannot see sysfs after root is mounted, so we will have to
+# 'takeover'. As the '--offroot --takeover' don't hurt when
+# not necessary, are are useful with root-on-md in dracut,
+# have them always present.
+ExecStart=BINDIR/mdmon --offroot --takeover %I
+Type=forking
+# Don't set the PIDFile. It isn't necessary (systemd can work
+# it out) and systemd will remove it when transitioning from
+# initramfs to rootfs.
+#PIDFile=/run/mdadm/%I.pid
+KillMode=none
diff --git a/systemd/mdmonitor.service b/systemd/mdmonitor.service
new file mode 100644
index 00000000..9aff2f56
--- /dev/null
+++ b/systemd/mdmonitor.service
@@ -0,0 +1,13 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=MD array monitor
+DefaultDependencies=no
+
+[Service]
+ExecStart=BINDIR/mdadm --monitor --scan
diff --git a/test b/test
new file mode 100755
index 00000000..8e61f01c
--- /dev/null
+++ b/test
@@ -0,0 +1,402 @@
+#!/bin/bash
+#
+# run test suite for mdadm
+user=`id -un`
+if [ " $user" != " root" ]
+then echo >&2 "test: testing can only be done as 'root'."
+ exit 1;
+fi
+
+prefix='[0-9][0-9]'
+
+dir=`pwd`
+mdadm=$dir/mdadm
+if [ \! -x $mdadm ]
+then
+ echo >&2 "test: $mdadm isn't usable."
+fi
+
+testdir="tests"
+logdir="$testdir/logs"
+logsave=0
+exitonerror=1
+
+echo "Testing on linux-$(uname -r) kernel"
+
+# Check whether to run multipath tests
+modprobe multipath 2> /dev/null
+if grep -s 'Personalities : .*multipath' > /dev/null /proc/mdstat ; then
+ MULTIPATH="yes"
+fi
+INTEGRITY=yes
+DEVTYPE=loop
+LVM_VOLGROUP=mdtest
+
+# make sure to test local mdmon, not system one
+export MDADM_NO_SYSTEMCTL=1
+
+# assume md0, md1, md2 exist in /dev
+md0=/dev/md0 md1=/dev/md1 md2=/dev/md2
+mdp0=/dev/md_d0
+mdp1=/dev/md_d1
+
+# We test mdadm on loop-back block devices.
+# dir for storing files should be settable by command line maybe
+targetdir=/var/tmp
+size=20000
+# super0, round down to multiple of 64 and substract 64
+mdsize0=19904
+# super00 is nested, subtract 128
+mdsize00=19840
+# super1.0 round down to multiple of 2, subtract 8
+mdsize1=19992
+mdsize1a=19988
+mdsize12=19988
+# super1.2 for linear: round to multiple of 2, subtract 4
+mdsize1_l=19996
+mdsize2_l=19996
+# subtract another 4 for bitmaps
+mdsize1b=19988
+mdsize11=19992
+mdsize11a=19456
+mdsize12=19988
+
+# ddf needs bigger devices as 32Meg is reserved!
+ddfsize=65536
+
+config=/tmp/mdadm.conf
+
+cleanup() {
+ udevadm settle
+ $mdadm -Ssq 2> /dev/null
+ case $DEVTYPE in
+ loop)
+ for d in 0 1 2 3 4 5 6 7 8 9 10 11 12 13
+ do
+ losetup -d /dev/loop$d ; # rm -f $targetdir/mdtest$d
+ rm -f /dev/disk/by-path/loop*
+ done
+ ;;
+ lvm)
+ for d in 0 1 2 3 4 5 6 7 8 9 10 11 12 13
+ do
+ eval "lvremove --quiet -f \$dev$d"
+ done
+ ;;
+ esac
+}
+
+ctrl_c() {
+ exitonerror=1
+}
+
+do_setup() {
+ trap cleanup 0 1 3 15
+ trap ctrl_c 2
+
+ devlist=
+ for d in 0 1 2 3 4 5 6 7 8 9 10 11 12 13
+ do
+ sz=$size
+ if [ $d -gt 7 ]; then sz=$ddfsize ; fi
+ case $DEVTYPE in
+ loop)
+ [ -f $targetdir/mdtest$d ] || dd if=/dev/zero of=$targetdir/mdtest$d count=$sz bs=1K > /dev/null 2>&1
+ [ -b /dev/loop$d ] || mknod /dev/loop$d b 7 $d
+ if [ $d -eq 7 ]
+ then
+ losetup /dev/loop$d $targetdir/mdtest6 # for multipath use
+ else
+ losetup /dev/loop$d $targetdir/mdtest$d
+ fi
+ eval dev$d=/dev/loop$d
+ eval file$d=$targetdir/mdtest$d
+ ;;
+ lvm)
+ unset MULTIPATH
+ eval dev$d=/dev/mapper/${LVM_VOLGROUP}-mdtest$d
+ if ! lvcreate --quiet -L ${sz}K -n mdtest$d $LVM_VOLGROUP; then
+ trap '' 0 # make sure lvremove is not called
+ eval echo error creating \$dev$d
+ exit 129
+ fi
+ ;;
+ ram)
+ unset MULTIPATH
+ eval dev$d=/dev/ram$d
+ ;;
+ esac
+ eval devlist=\"\$devlist \$dev$d\"
+ eval devlist$d=\"\$devlist\"
+ #" <-- add this quote to un-confuse vim syntax highlighting
+ done
+ path0=$dev6
+ path1=$dev7
+
+ ulimit -c unlimited
+ [ -f /proc/mdstat ] || modprobe md_mod
+ echo 2000 > /proc/sys/dev/raid/speed_limit_max
+ echo 0 > /sys/module/md_mod/parameters/start_ro
+}
+
+# mdadm always adds --quiet, and we want to see any unexpected messages
+mdadm() {
+ rm -f $targetdir/stderr
+ case $* in
+ *-S* ) udevadm settle;;
+ esac
+ case $* in
+ *-C* ) $mdadm 2> $targetdir/stderr --quiet "$@" --auto=yes;;
+ * ) $mdadm 2> $targetdir/stderr --quiet "$@"
+ esac
+ rv=$?
+ case $* in
+ *-S* ) udevadm settle;;
+ esac
+ cat >&2 $targetdir/stderr
+ return $rv
+}
+
+# check various things
+check() {
+ case $1 in
+ spares )
+ spares=`tr '] ' '\012\012' < /proc/mdstat | grep -c '(S)' || exit 0`
+ if [ $spares -ne $2 ]
+ then
+ echo >&2 "ERROR expected $2 spares, found $spares"; exit 1;
+ fi
+ ;;
+ raid* | linear )
+ grep -s "active $1 " /proc/mdstat > /dev/null || {
+ echo >&2 "ERROR active $1 not found" ; cat /proc/mdstat ; exit 1;}
+ ;;
+ algorithm )
+ grep -s " algorithm $2 " /proc/mdstat > /dev/null || {
+ echo >&2 "ERROR algorithm $2 not found"; cat /proc/mdstat; exit 1;}
+ ;;
+ resync | recovery | reshape)
+ sleep 0.5
+ grep -s $1 /proc/mdstat > /dev/null || {
+ echo >&2 ERROR no $1 happening; cat /proc/mdstat; exit 1; }
+ ;;
+
+ nosync )
+ sleep 0.5
+ if grep -s -E '(resync|recovery|reshape) *=' > /dev/null /proc/mdstat ; then
+ echo >&2 "ERROR resync or recovery is happening!"; cat /proc/mdstat ; exit 1;
+ fi
+ ;;
+
+ wait )
+ p=`cat /proc/sys/dev/raid/speed_limit_max`
+ echo 2000000 > /proc/sys/dev/raid/speed_limit_max
+ sleep 0.1
+ while grep -E '(resync|recovery|reshape|check|repair) *=' > /dev/null /proc/mdstat
+ do sleep 0.5;
+ done
+ echo $p > /proc/sys/dev/raid/speed_limit_max
+ ;;
+
+ state )
+ grep -s "blocks.*\[$2\]\$" /proc/mdstat > /dev/null || {
+ echo >&2 "ERROR state $2 not found!"; cat /proc/mdstat ; exit 1; }
+ sleep 0.5
+ ;;
+
+ bitmap )
+ grep -s bitmap > /dev/null /proc/mdstat || {
+ echo >&2 ERROR no bitmap ; cat /proc/mdstat ; exit 1; }
+ ;;
+ nobitmap )
+ if grep -s "bitmap" > /dev/null /proc/mdstat
+ then
+ echo >&2 ERROR bitmap present ; cat /proc/mdstat ; exit 1;
+ fi
+ ;;
+
+ * ) echo >&2 ERROR unknown check $1 ; exit 1;
+ esac
+}
+
+no_errors() {
+ if [ -s $targetdir/stderr ]
+ then echo Bad errors from mdadm: ; cat $targetdir/stderr; exit 2;
+ fi
+}
+# basic device test
+
+testdev() {
+ udevadm settle
+ dev=$1
+ cnt=$2
+ dvsize=$3
+ chunk=$4
+ if [ -z "$5" ]; then
+ mkfs -j $dev > /dev/null 2>&1 && fsck -fn $dev >&2
+ fi
+ dsize=$[dvsize/chunk]
+ dsize=$[dsize*chunk]
+ rasize=$[dsize*2*cnt]
+ # rasize is in sectors
+ if [ -n "$DEV_ROUND_K" ]; then
+ rasize=$[rasize/DEV_ROUND_K/2]
+ rasize=$[rasize*DEV_ROUND_K*2]
+ fi
+ if [ `/sbin/blockdev --getsize $dev` -eq 0 ]; then sleep 2 ; fi
+ _sz=`/sbin/blockdev --getsize $dev`
+ if [ $rasize -lt $_sz -o $[rasize*4/5] -gt $_sz ]
+ then
+ echo "ERROR: size is wrong for $dev: $cnt * $dvsize (chunk=$chunk) = $rasize, not $_sz"
+ exit 1
+ fi
+}
+
+fast_sync() {
+ echo 200000 > /proc/sys/dev/raid/speed_limit_max
+}
+
+rotest() {
+ dev=$1
+ fsck -fn $dev >&2
+}
+
+do_test() {
+ _script=$1
+ _basename=`basename $_script`
+ if [ -f "$_script" ]
+ then
+ rm -f $targetdir/stderr
+ # stop all arrays, just incase some script left an array active.
+ $mdadm -Ssq 2> /dev/null
+ mdadm --zero $devlist 2> /dev/null
+ mdadm --zero $devlist 2> /dev/null
+ # this might have been reset: restore the default.
+ echo 2000 > /proc/sys/dev/raid/speed_limit_max
+ # source script in a subshell, so it has access to our
+ # namespace, but cannot change it.
+ echo -ne "$_script... "
+ if ( set -ex ; . $_script ) &> $targetdir/log
+ then
+ echo "succeeded"
+ _fail=0
+ else
+ log=log
+ cat $targetdir/stderr >> $targetdir/log
+ if [ $exitonerror == 0 ]; then
+ log=log-`basename $_script`
+ mv $targetdir/log $targetdir/$log
+ fi
+ echo "FAILED - see $targetdir/$log for details"
+ _fail=1
+ fi
+ if [ "$savelogs" == "1" ]; then
+ cp $targetdir/log $logdir/$_basename.log
+ fi
+ if [ "$_fail" == "1" -a "$exitonerror" == "1" ]; then
+ exit 1
+ fi
+ fi
+}
+
+do_help() {
+ echo "Usage: $0 [options]"
+ echo " Options:"
+ echo " --tests=<test1,test2,..> Comma separated list of tests to run"
+ echo " --disable-multipath Disable any tests involving multipath"
+ echo " --disable-integrity Disable slow tests of RAID[56] consistency"
+ echo " --logdir=<directory> Directory to save logfiles in"
+ echo " --save-logs Save all logs in <logdir>"
+ echo " --keep-going Don't stop on error, ie. run all tests"
+ echo " --dev=[loop|lvm|ram] Use loop devices (default), LVM, or RAM disk"
+ echo " --volgroup=<name> LVM volume group for LVM test"
+ echo " setup Setup test environment and exit"
+ echo " cleanup Cleanup test environment"
+ echo " <prefix> Run tests with <prefix>"
+}
+
+parse_args() {
+ for i in $*
+ do
+ case $i in
+ [0-9]*)
+ prefix=$i
+ ;;
+ setup)
+ echo "mdadm test environment setup"
+ do_setup
+ trap 0; exit 0
+ ;;
+ cleanup)
+ cleanup
+ exit 0
+ ;;
+ --tests=*)
+ TESTLIST=`expr "x$i" : 'x[^=]*=\(.*\)' | sed -e 's/,/ /g'`
+ ;;
+ --logdir=*)
+ logdir=`expr "x$i" : 'x[^=]*=\(.*\)'`
+ ;;
+ --save-logs)
+ savelogs=1
+ if [ ! -d $logdir ] ; then
+ mkdir $logdir
+ if [ $? -ne 0 ] ; then
+ exit 1;
+ fi
+ fi
+ ;;
+ --keep-going | --no-error)
+ exitonerror=0
+ ;;
+ --disable-multipath)
+ unset MULTIPATH
+ ;;
+ --disable-integrity)
+ unset INTEGRITY
+ ;;
+ --dev=loop)
+ DEVTYPE=loop
+ ;;
+ --dev=lvm)
+ DEVTYPE=lvm
+ ;;
+ --dev=ram)
+ DEVTYPE=ram
+ ;;
+ --volgroup=*)
+ LVM_VOLGROUP=`expr "x$i" : 'x[^=]*=\(.*\)'`
+ ;;
+ --help)
+ do_help
+ exit 0;
+ ;;
+ -*)
+ echo " $0: Unknown argument: $i"
+ do_help
+ exit 0;
+ ;;
+ esac
+done
+}
+
+parse_args $@
+
+do_setup
+
+if [ "$savelogs" == "1" ]; then
+ echo "Saving logs to $logdir"
+fi
+
+if [ "x$TESTLIST" != "x" ]; then
+ for script in $TESTLIST
+ do
+ do_test $testdir/$script
+ done
+else
+ for script in $testdir/$prefix $testdir/$prefix*[^~]
+ do
+ do_test $script
+ done
+fi
+exit 0
diff --git a/tests/00linear b/tests/00linear
new file mode 100644
index 00000000..30b2c65c
--- /dev/null
+++ b/tests/00linear
@@ -0,0 +1,25 @@
+
+# create a simple linear
+
+mdadm -CR $md0 -l linear -n3 $dev0 $dev1 $dev2
+check linear
+testdev $md0 3 $mdsize2_l 1
+mdadm -S $md0
+
+# now with version-0.90 superblock
+mdadm -CR $md0 -e0.90 --level=linear -n4 $dev0 $dev1 $dev2 $dev3
+check linear
+testdev $md0 4 $mdsize0 1
+mdadm -S $md0
+
+# now with version-1.0 superblock
+mdadm -CR $md0 -e1.0 --level=linear -n4 $dev0 $dev1 $dev2 $dev3
+check linear
+testdev $md0 4 $mdsize1 1
+mdadm -S $md0
+
+# now with no superblock
+mdadm -B $md0 -l linear -n5 $dev0 $dev1 $dev2 $dev3 $dev4
+check linear
+testdev $md0 5 $size 64
+mdadm -S $md0
diff --git a/tests/00multipath b/tests/00multipath
new file mode 100644
index 00000000..84e4d693
--- /dev/null
+++ b/tests/00multipath
@@ -0,0 +1,29 @@
+
+#
+# create a multipath, and fail and stuff
+
+if [ "$MULTIPATH" != "yes" ]; then
+ echo -ne 'skipping... '
+ exit 0
+fi
+
+mdadm -CR $md1 -l multipath -n2 $path0 $path1
+
+testdev $md1 1 $mdsize12 1
+
+mdadm $md1 -f $path0
+rotest $md1
+testdev $md1 1 $mdsize12 1
+
+mdadm $md1 -r $path0
+mdadm $md1 -a $path0
+
+rotest $md1
+testdev $md1 1 $mdsize12 1
+
+mdadm $md1 -f $path1
+mdadm $md1 -r $path1
+rotest $md1
+testdev $md1 1 $mdsize12 1
+
+mdadm -S $md1
diff --git a/tests/00names b/tests/00names
new file mode 100644
index 00000000..7a066d8f
--- /dev/null
+++ b/tests/00names
@@ -0,0 +1,13 @@
+set -x -e
+
+# create arrays with non-numeric names
+conf=$targetdir/mdadm.conf
+echo "CREATE names=yes" > $conf
+
+for i in linear raid0 raid1 raid4 raid5 raid6
+do
+ mdadm -CR --config $conf /dev/md/$i -l $i -n 4 $dev4 $dev3 $dev2 $dev1
+ check $i
+ [ -d /sys/class/block/md_$i/md ]
+ mdadm -S md_$i
+done
diff --git a/tests/00raid0 b/tests/00raid0
new file mode 100644
index 00000000..7f87e6ca
--- /dev/null
+++ b/tests/00raid0
@@ -0,0 +1,43 @@
+
+# create a simple raid0
+
+mdadm -CR $md0 -l raid0 -n3 $dev0 $dev1 $dev2
+check raid0
+testdev $md0 3 $mdsize2_l 512
+mdadm -S $md0
+
+# now with version-0.90 superblock
+mdadm -CR $md0 -e0.90 -l0 -n4 $dev0 $dev1 $dev2 $dev3
+check raid0
+testdev $md0 4 $mdsize0 512
+mdadm -S $md0
+
+# now with no superblock
+mdadm -B $md0 -l0 -n5 $dev0 $dev1 $dev2 $dev3 $dev4
+check raid0
+testdev $md0 5 $size 512
+mdadm -S $md0
+
+
+# now same again with different chunk size
+for chunk in 4 32 256
+do
+ mdadm -CR $md0 -e0.90 -l raid0 --chunk $chunk -n3 $dev0 $dev1 $dev2
+ check raid0
+ testdev $md0 3 $mdsize0 $chunk
+ mdadm -S $md0
+
+ # now with version-1 superblock
+ mdadm -CR $md0 -e1.0 -l0 -c $chunk -n4 $dev0 $dev1 $dev2 $dev3
+ check raid0
+ testdev $md0 4 $mdsize1 $chunk
+ mdadm -S $md0
+
+ # now with no superblock
+ mdadm -B $md0 -l0 -n5 --chun=$chunk $dev0 $dev1 $dev2 $dev3 $dev4
+ check raid0
+ testdev $md0 5 $size $chunk
+ mdadm -S $md0
+
+done
+exit 0
diff --git a/tests/00raid1 b/tests/00raid1
new file mode 100644
index 00000000..eb9a658c
--- /dev/null
+++ b/tests/00raid1
@@ -0,0 +1,34 @@
+
+# create a simple mirror
+# test version0, version1, and no super
+# test resync and recovery.
+
+mdadm -CR $md0 -l 1 -n2 $dev0 $dev1
+check resync
+check raid1
+testdev $md0 1 $mdsize1a 64
+mdadm -S $md0
+
+# now with version-0.90 superblock, spare
+mdadm -CR $md0 -e0.90 --level=raid1 -n3 -x2 $dev0 missing missing $dev1 $dev2
+check recovery
+check raid1
+testdev $md0 1 $mdsize0 64
+mdadm -S $md0
+
+# now with no superblock
+mdadm -B $md0 -l mirror -n2 $dev0 $dev1
+check resync
+check raid1
+testdev $md0 1 $size 1
+mdadm -S $md0
+
+# again, but with no resync
+mdadm -B $md0 -l 1 --assume-clean -n2 $dev0 $dev1
+check raid1
+check nosync
+testdev $md0 1 $size 1
+mdadm -S $md0
+
+
+exit 0
diff --git a/tests/00raid10 b/tests/00raid10
new file mode 100644
index 00000000..796b9702
--- /dev/null
+++ b/tests/00raid10
@@ -0,0 +1,18 @@
+
+# Create some raid10 arrays, all with 6 devices and one spare
+devs="$dev0 $dev1 $dev2 $dev3 $dev4 $dev5 $dev6"
+
+for lo in n2 n3 f2 f3
+do
+ cm=1
+ case $lo in
+ f2 ) m=3 cm=2;;
+ f3 ) m=2 cm=3;;
+ n2 ) m=3;;
+ n3 ) m=2;;
+ esac
+ mdadm --create --run --level=raid10 --layout $lo --raid-disks 6 -x 1 $md0 $devs
+ check resync ; check raid10
+ testdev $md0 $m $mdsize1 $[512*cm]
+ mdadm -S $md0
+done
diff --git a/tests/00raid4 b/tests/00raid4
new file mode 100644
index 00000000..97d3e3fd
--- /dev/null
+++ b/tests/00raid4
@@ -0,0 +1,16 @@
+
+# create a simple raid4 set
+
+mdadm -CfR $md0 -l 4 -n3 $dev0 $dev1 $dev2
+check resync ; check raid[45]
+testdev $md0 2 $mdsize1 512
+mdadm -S $md0
+
+# now with version-1 superblock
+mdadm -CR $md0 -e1 --level=raid4 -n4 $dev0 $dev1 $dev2 $dev3
+check recovery; check raid[45]
+testdev $md0 3 $mdsize1 512
+mdadm -S $md0
+
+
+exit 0
diff --git a/tests/00raid5 b/tests/00raid5
new file mode 100644
index 00000000..a288c839
--- /dev/null
+++ b/tests/00raid5
@@ -0,0 +1,33 @@
+
+# create a simple raid5 set
+
+mdadm -CfR $md0 -e 0.90 -l 5 -n3 $dev0 $dev1 $dev2
+check resync
+testdev $md0 2 $mdsize0 512
+mdadm -S $md0
+
+# now with version-1 superblock
+mdadm -CR $md0 -e1 --level=raid5 -n4 $dev0 $dev1 $dev2 $dev3
+check recovery
+testdev $md0 3 $mdsize1 512
+mdadm -S $md0
+
+# now same again with explicit layout
+
+for lo in la ra left-symmetric right-symmetric
+do
+
+ mdadm -CfR $md0 -l 5 -p $lo -n3 $dev0 $dev1 $dev2
+ check resync ; check raid5
+ testdev $md0 2 $mdsize1 512
+ mdadm -S $md0
+
+ # now with version-1 superblock
+ mdadm -CR $md0 -e1 --level=raid5 --layout $lo -n4 $dev0 $dev1 $dev2 $dev3
+ check recovery ; check raid5
+ testdev $md0 3 $mdsize1 512
+ mdadm -S $md0
+
+done
+
+exit 0
diff --git a/tests/00raid6 b/tests/00raid6
new file mode 100644
index 00000000..63d60f5c
--- /dev/null
+++ b/tests/00raid6
@@ -0,0 +1,16 @@
+
+# create a simple raid6 set
+
+mdadm -CfR $md0 -e0.90 -l 6 -n4 $dev0 $dev1 $dev2 $dev3
+check resync ; check raid6
+testdev $md0 2 $mdsize0 512
+mdadm -S $md0
+
+# now with version-1 superblock
+mdadm -CR $md0 -e1 --level=raid6 -n5 $dev0 $dev1 $dev2 $dev3 $dev4
+check resync ; check raid6
+testdev $md0 3 $mdsize1 512
+mdadm -S $md0
+
+
+exit 0
diff --git a/tests/01r1fail b/tests/01r1fail
new file mode 100644
index 00000000..9f556321
--- /dev/null
+++ b/tests/01r1fail
@@ -0,0 +1,29 @@
+
+# create a raid1, fail and remove a drive during initial sync
+# Add two more, fail and remove one
+# wait for sync to complete, fail, remove, re-add
+
+mdadm -CR $md0 -l1 -n4 $dev0 $dev1 $dev2 missing
+check resync
+mdadm $md0 --fail $dev2
+check resync
+mdadm $md0 --fail $dev1
+sleep 1
+check nosync
+check state U___
+mdadm $md0 --add $dev4 $dev3
+check recovery
+# there could be two separate recoveries, one for each dev
+check wait
+check wait
+mdadm $md0 --remove $dev2 $dev1
+check nosync
+check state UUU_
+
+mdadm --zero-superblock $dev2
+mdadm $md0 -a $dev2
+check recovery
+check wait
+check state UUUU
+
+mdadm -S $md0
diff --git a/tests/01r5fail b/tests/01r5fail
new file mode 100644
index 00000000..1d629321
--- /dev/null
+++ b/tests/01r5fail
@@ -0,0 +1,27 @@
+
+
+# create a raid5, fail and remove a drive during initial sync
+# Add two more, fail and remove one
+# wait for sync to complete, fail, remove, re-add
+
+mdadm -CR $md0 -l5 -n4 $dev0 $dev1 $dev2 $dev3
+check recovery
+mdadm $md0 --fail $dev3
+sleep 1
+check nosync
+check state UUU_
+
+mdadm $md0 --add $dev4 $dev5
+check recovery
+check wait
+mdadm $md0 --fail $dev0
+mdadm $md0 --remove $dev3 $dev0
+check recovery
+check state _UUU
+
+mdadm $md0 -a $dev3
+check recovery
+check wait
+check state UUUU
+
+mdadm -S $md0 \ No newline at end of file
diff --git a/tests/01r5integ b/tests/01r5integ
new file mode 100644
index 00000000..ffb30ce9
--- /dev/null
+++ b/tests/01r5integ
@@ -0,0 +1,33 @@
+
+# Check integrity of raid5 in degraded mode
+# Create a 4 disk raid5, create a filesystem and
+# sha1sum it with each device failed
+
+if [ "$INTEGRITY" != "yes" ]; then
+ echo -ne 'skipping... '
+ exit 0
+fi
+
+for layout in ls rs la ra
+do
+ mdadm -CR $md0 -l5 --layout $layout -n4 $dev0 $dev1 $dev2 $dev3
+ check wait
+ tar cf - /etc > $md0
+ sum=`sha1sum $md0`
+
+ for i in $dev0 $dev1 $dev2 $dev3
+ do
+ mdadm $md0 -f $i
+ mdadm $md0 -r $i
+ blockdev --flushbufs $md0
+ sum1=`sha1sum $md0`
+ if [ "$sum" != "$sum1" ]
+ then
+ echo $sum does not match $sum1 with $i missing
+ exit 1
+ fi
+ mdadm $md0 -a $i
+ check wait
+ done
+ mdadm -S $md0
+done
diff --git a/tests/01raid6integ b/tests/01raid6integ
new file mode 100644
index 00000000..c6fcdae6
--- /dev/null
+++ b/tests/01raid6integ
@@ -0,0 +1,57 @@
+
+# Check integrity of raid6 in degraded modes
+# Create a 5 disk raid6, dump some data to it, then
+# sha1sum it with different pairs of devices failed
+
+if [ "$INTEGRITY" != "yes" ]; then
+ echo -ne 'skipping... '
+ exit 0
+fi
+
+layouts='ls rs la ra'
+lv=`uname -r`
+if expr $lv '>=' 2.6.30 > /dev/null
+then
+ layouts="$layouts parity-first ddf-zero-restart ddf-N-restart ddf-N-continue \
+ left-asymmetric-6 right-asymmetric-6 left-symmetric-6 right-symmetric-6 parity-first-6"
+fi
+
+for layout in $layouts
+do
+ mdadm -CR $md0 -l6 --layout $layout -n5 $dev0 $dev1 $dev2 $dev3 $dev4
+ check wait
+ tar cf - /etc > $md0
+ sum=`sha1sum $md0`
+
+ totest=
+ for second in $dev0 $dev1 $dev2 $dev3 $dev4
+ do
+ mdadm $md0 -f $second
+ mdadm $md0 -r $second
+ blockdev --flushbufs $md0
+ sum1=`sha1sum $md0`
+ if [ "$sum" != "$sum1" ]
+ then
+ echo $sum does not match $sum1 with $second missing
+ exit 1
+ fi
+ for first in $totest
+ do
+ mdadm $md0 -f $first
+ mdadm $md0 -r $first
+ blockdev --flushbufs $md0
+ sum1=`sha1sum $md0`
+ if [ "$sum" != "$sum1" ]
+ then
+ echo $sum does not match $sum1 with $first and $second missing
+ exit 1
+ fi
+ mdadm $md0 -a $first
+ check wait
+ done
+ mdadm $md0 -a $second
+ check wait
+ totest="$totest $second"
+ done
+ mdadm -S $md0
+done
diff --git a/tests/01replace b/tests/01replace
new file mode 100644
index 00000000..6223a223
--- /dev/null
+++ b/tests/01replace
@@ -0,0 +1,52 @@
+set -x -e
+
+## test --replace for raid5 raid6 raid1 and raid10
+#1/ after replace, can remove replaced device
+#2/ after --replace-with cannot remove the 'with' device
+#3/ preserve integrity with concurrent failure
+
+for level in 1 5 6 10
+do
+ dd if=/dev/zero of=$dev4 bs=1M || true
+ dd if=/dev/zero of=$dev5 bs=1M || true
+ mdadm -CR $md0 -l $level -n4 -x2 $devlist5
+ dd if=/dev/urandom of=$md0 bs=1M || true
+ sum=`sha1sum < $md0`
+ check wait
+ mdadm $md0 --replace $dev1
+ check wait
+ mdadm $md0 --remove $dev1
+ mdadm $md0 --remove $dev5 && exit 1
+ mdadm -S $md0
+ dd if=/dev/zero of=$dev4 bs=1M || true
+ dd if=/dev/zero of=$dev5 bs=1M || true
+ mdadm -CR $md0 -l $level -n4 -x2 $devlist5
+ check wait
+ sum1=`sha1sum < $md0`
+ [ "$sum" == "$sum1" ]
+
+ mdadm $md0 --replace $dev1 --with $dev4
+ check wait
+ mdadm $md0 --remove $dev1
+ mdadm $md0 --remove $dev5
+ mdadm $md0 --remove $dev4 && exit 1
+
+ mdadm $md0 --add $dev1 $dev5
+ mdadm $md0 --replace $dev0
+ sleep 1
+ mdadm $md0 --fail $dev2
+ check wait
+ sum2=`sha1sum < $md0`
+ [ "$sum" == "$sum2" ]
+
+ mdadm $md0 --remove $dev0 $dev2
+ mdadm $md0 --add $dev0 $dev2
+ mdadm $md0 --replace $dev3
+ sleep 1
+ mdadm $md0 --fail $dev0 $dev2
+ check wait
+ sum3=`sha1sum < $md0`
+ [ "$sum" == "$sum3" ]
+
+ mdadm -S $md0
+done
diff --git a/tests/02lineargrow b/tests/02lineargrow
new file mode 100644
index 00000000..e05c219d
--- /dev/null
+++ b/tests/02lineargrow
@@ -0,0 +1,23 @@
+
+# create a liner array, and add more drives to to.
+
+for e in 0.90 1 1.1 1.2
+do
+ case $e in
+ 0.90 ) sz=$mdsize0 ;;
+ 1 ) sz=$mdsize2_l ;;
+ 1.0 ) sz=$mdsize1 ;;
+ 1.1 ) sz=$mdsize1_l ;;
+ 1.2 ) sz=$mdsize2_l ;;
+ esac
+ mdadm -CRf $md0 --level linear -e $e --raid-disks=1 $dev1
+ testdev $md0 1 $sz 1
+
+ mdadm --grow $md0 --add $dev2
+ testdev $md0 2 $sz 1
+
+ mdadm --grow $md0 --add $dev3
+ testdev $md0 3 $sz 1
+
+ mdadm -S $md0
+done
diff --git a/tests/02r1add b/tests/02r1add
new file mode 100644
index 00000000..c07ec977
--- /dev/null
+++ b/tests/02r1add
@@ -0,0 +1,40 @@
+
+# Make a raid1, add a device, then remove it again.
+
+mdadm -CR $md0 -l1 -n2 -x1 $dev0 $dev1 $dev2
+check resync
+check wait
+check state UU
+
+mdadm --grow $md0 -n 3
+check recovery
+check wait
+check state UUU
+
+mdadm $md0 --fail $dev0
+check state _UU
+
+mdadm --grow $md0 -n 2
+check state UU
+
+mdadm -S $md0
+# same again for version-1
+
+
+mdadm -CR $md0 -l1 -n2 -e1.2 -x1 $dev0 $dev1 $dev2
+check resync
+check wait
+check state UU
+
+mdadm --grow $md0 -n 3
+check recovery
+check wait
+check state UUU
+
+mdadm $md0 --fail $dev0
+check state _UU
+
+mdadm --grow $md0 -n 2
+check state UU
+
+mdadm -S $md0
diff --git a/tests/02r1grow b/tests/02r1grow
new file mode 100644
index 00000000..6604208d
--- /dev/null
+++ b/tests/02r1grow
@@ -0,0 +1,36 @@
+
+
+# create a small raid1 array, make it larger. Then make it smaller
+
+mdadm -CR $md0 -e 0.90 --level raid1 --raid-disks 3 --size $[size/2] $dev1 $dev2 $dev3
+check wait
+check state UUU
+testdev $md0 1 $[size/2] 1
+
+mdadm --grow $md0 --size max
+check resync
+check wait
+testdev $md0 1 $mdsize0 1
+
+mdadm --grow $md0 --size $[size/2]
+check nosync
+testdev $md0 1 $[size/2] 1
+
+mdadm -S $md0
+
+# same again with version 1.1 superblock
+mdadm -CR $md0 --level raid1 --metadata=1.1 --raid-disks 3 --size $[size/2] $dev1 $dev2 $dev3
+check wait
+check state UUU
+testdev $md0 1 $[size/2] 1
+
+mdadm --grow $md0 --size max
+check resync
+check wait
+testdev $md0 1 $mdsize1_l 1
+
+mdadm --grow $md0 --size $[size/2]
+check nosync
+testdev $md0 1 $[size/2] 1
+
+mdadm -S $md0
diff --git a/tests/02r5grow b/tests/02r5grow
new file mode 100644
index 00000000..b91ffcd4
--- /dev/null
+++ b/tests/02r5grow
@@ -0,0 +1,36 @@
+
+
+# create a small raid5 array, make it larger. Then make it smaller
+
+mdadm -CR $md0 -e0.90 --level raid5 --chunk=64 --raid-disks 3 --size $[size/2] $dev1 $dev2 $dev3
+check wait
+check state UUU
+testdev $md0 2 $[size/2] 32
+
+mdadm --grow $md0 --size max
+check resync
+check wait
+testdev $md0 2 $mdsize0 32
+
+mdadm --grow $md0 --size $[size/2]
+check nosync
+testdev $md0 2 $[size/2] 32
+
+mdadm -S $md0
+
+# same again with version 1.1 superblock
+mdadm -CR $md0 --level raid5 --metadata=1.1 --chunk=128 --raid-disks 4 --size $[size/2] $dev1 $dev2 $dev3 $dev4
+check wait
+check state UUUU
+testdev $md0 3 $[size/2] 128
+
+mdadm --grow $md0 --size max
+check resync
+check wait
+testdev $md0 3 $[mdsize1_l] 128
+
+mdadm --grow $md0 --size $[size/2]
+check nosync
+sh tests/testdev $md0 3 $[size/2] 128
+
+mdadm -S $md0
diff --git a/tests/02r6grow b/tests/02r6grow
new file mode 100644
index 00000000..38c77810
--- /dev/null
+++ b/tests/02r6grow
@@ -0,0 +1,36 @@
+
+
+# create a small raid6 array, make it larger. Then make it smaller
+
+mdadm -CR $md0 -e 0.90 --level raid6 --chunk=64 --raid-disks 4 --size $[size/2] $dev1 $dev2 $dev3 $dev4
+check wait
+check state UUUU
+testdev $md0 2 $[size/2] 32
+
+mdadm --grow $md0 --size max
+check resync
+check wait
+testdev $md0 2 $mdsize0 32
+
+mdadm --grow $md0 --size $[size/2]
+check nosync
+testdev $md0 2 $[size/2] 32
+
+mdadm -S $md0
+
+# same again with version 1.1 superblock
+mdadm -CR $md0 --level raid6 --metadata=1.1 --chunk=128 --raid-disks 4 --size $[size/2] $dev1 $dev2 $dev3 $dev4
+check wait
+check state UUUU
+testdev $md0 2 $[size/2] 128
+
+mdadm --grow $md0 --size max
+check resync
+check wait
+testdev $md0 2 $[mdsize1_l] 128
+
+mdadm --grow $md0 --size $[size/2]
+check nosync
+testdev $md0 2 $[size/2] 128
+
+mdadm -S $md0
diff --git a/tests/03assem-incr b/tests/03assem-incr
new file mode 100644
index 00000000..f10a1a48
--- /dev/null
+++ b/tests/03assem-incr
@@ -0,0 +1,17 @@
+set -x -e
+
+# Test interaction between -I and -A
+# there are locking issue too, but those are hard to test for.
+#
+# Here just test that a partly "-I" assembled array can
+# be completed with "-A"
+
+for l in 0 1 5 linear
+do
+ mdadm -CR $md0 -l $l -n5 $dev0 $dev1 $dev2 $dev3 $dev4 --assume-clean
+ mdadm -S md0
+ mdadm -I $dev1
+ mdadm -I $dev3
+ mdadm -A /dev/md0 $dev0 $dev1 $dev2 $dev3 $dev4
+ mdadm -S /dev/md0
+done
diff --git a/tests/03r0assem b/tests/03r0assem
new file mode 100644
index 00000000..0b998ec1
--- /dev/null
+++ b/tests/03r0assem
@@ -0,0 +1,137 @@
+
+# create a raid0 array from 3 devices, and assemble it in a multitude of ways.
+# explicitly list devices
+# uuid, md-minor on command line with wildcard devices
+# mdadm.conf file
+
+mdadm -CR $md2 -l0 -n3 $dev0 $dev1 $dev2
+check raid0
+tst="testdev $md2 3 $mdsize1_l 512"
+$tst
+uuid=`mdadm -Db $md2 | sed 's/.*UUID=//'`
+mdadm -S $md2
+
+mdadm -A $md2 $dev0 $dev1 $dev2
+$tst
+mdadm -S $md2
+
+mdadm -A $md2 -u $uuid $devlist
+$tst
+mdadm -S $md2
+
+mdadm --assemble $md2 --name=2 $devlist
+$tst
+mdadm -S $md2
+
+conf=$targetdir/mdadm.conf
+{
+ echo DEVICE $devlist
+ echo array $md2 UUID=$uuid
+} > $conf
+
+mdadm -As -c $conf $md2
+$tst
+mdadm -S $md2
+
+{
+ echo DEVICE $devlist
+ echo array $md2 name=2
+} > $conf
+
+mdadm -As -c $conf $md2
+$tst
+mdadm -S $md2
+
+
+{
+ echo DEVICE $devlist
+ echo array $md2 devices=$dev0,$dev1,$dev2
+} > $conf
+
+mdadm -As -c $conf $md2
+$tst
+
+echo "DEVICE $devlist" > $conf
+mdadm -Db $md2 >> $conf
+mdadm -S $md2
+
+mdadm --assemble --scan --config=$conf $md2
+$tst
+mdadm -S $md2
+
+echo " metadata=0.90 devices=$dev0,$dev1,$dev2" >> $conf
+mdadm --assemble --scan --config=$conf $md2
+$tst
+mdadm -S $md2
+
+
+### Now for version 0...
+
+mdadm --zero-superblock $dev0 $dev1 $dev2
+mdadm -CR $md2 -l0 --metadata=0.90 -n3 $dev0 $dev1 $dev2
+check raid0
+tst="testdev $md2 3 $mdsize0 512"
+$tst
+
+uuid=`mdadm -Db $md2 | sed 's/.*UUID=//'`
+mdadm -S $md2
+
+mdadm -A $md2 $dev0 $dev1 $dev2
+$tst
+mdadm -S $md2
+
+mdadm -A $md2 -u $uuid $devlist
+$tst
+mdadm -S $md2
+
+mdadm --assemble $md2 --super-minor=2 $devlist #
+$tst
+mdadm -S $md2
+
+conf=$targetdir/mdadm.conf
+{
+ echo DEVICE $devlist
+ echo array $md2 UUID=$uuid
+} > $conf
+
+mdadm -As -c $conf $md2
+$tst
+mdadm -S $md2
+
+{
+ echo DEVICE $devlist
+ echo array $md2 super-minor=2
+} > $conf
+
+mdadm -As -c $conf $md2
+$tst
+mdadm -S $md2
+
+
+{
+ echo DEVICE $devlist
+ echo array $md2 devices=$dev0,$dev1,$dev2
+} > $conf
+
+mdadm -As -c $conf $md2
+$tst
+
+echo "DEVICE $devlist" > $conf
+mdadm -Db $md2 >> $conf
+mdadm -S $md2
+
+mdadm --assemble --scan --config=$conf $md2
+$tst
+mdadm -S $md2
+
+echo " metadata=1 devices=$dev0,$dev1,$dev2" >> $conf
+mdadm --assemble --scan --config=$conf $md2
+$tst
+mdadm -S $md2
+
+# Now use incremental assembly.
+mdadm -I --config=$conf $dev0
+mdadm -I --config=$conf $dev1
+mdadm -I --config=$conf $dev2
+$tst
+mdadm -S $md2
diff --git a/tests/03r5assem b/tests/03r5assem
new file mode 100644
index 00000000..de0d56b6
--- /dev/null
+++ b/tests/03r5assem
@@ -0,0 +1,109 @@
+
+# create a raid5 array and assemble it in various ways,
+# including with missing devices.
+
+mdadm -CR -e 0.90 $md1 -l5 -n3 $dev0 $dev1 $dev2
+tst="check raid5 ;testdev $md1 2 $mdsize0 512 ; mdadm -S $md1"
+uuid=`mdadm -Db $md1 | sed 's/.*UUID=//'`
+check wait
+eval $tst
+
+mdadm -A $md1 $dev0 $dev1 $dev2
+eval $tst
+
+mdadm -A $md1 -u $uuid $devlist
+eval $tst
+
+mdadm -A $md1 -m 1 $devlist
+eval $tst
+
+
+conf=$targetdir/mdadm.conf
+{
+ echo DEVICE $devlist
+ echo array $md1 UUID=$uuid
+} > $conf
+
+mdadm -As -c $conf $md1
+eval $tst
+
+{
+ echo DEVICE $devlist
+ echo array $md1 super-minor=1
+} > $conf
+
+mdadm -As -c $conf
+eval $tst
+
+{
+ echo DEVICE $devlist
+ echo array $md1 devices=$dev0,$dev1,$dev2
+} > $conf
+
+mdadm -As -c $conf
+
+echo "DEVICE $devlist" > $conf
+mdadm -Db $md1 >> $conf
+eval $tst
+
+mdadm --assemble --scan --config=$conf $md1
+eval $tst
+
+echo " metadata=0.90 devices=$dev0,$dev1,$dev2" >> $conf
+mdadm --assemble --scan --config=$conf $md1
+eval $tst
+
+### Now with a missing device
+
+mdadm -AR $md1 $dev0 $dev2 #
+check state U_U
+eval $tst
+
+mdadm -A $md1 -u $uuid $devlist
+check state U_U
+eval $tst
+
+mdadm -A $md1 -m 1 $devlist
+check state U_U
+eval $tst
+
+
+conf=$targetdir/mdadm.conf
+{
+ echo DEVICE $devlist
+ echo array $md1 UUID=$uuid
+} > $conf
+
+mdadm -As -c $conf $md1
+check state U_U
+eval $tst
+
+{
+ echo DEVICE $devlist
+ echo array $md1 super-minor=1
+} > $conf
+
+mdadm -As -c $conf
+check state U_U
+eval $tst
+
+{
+ echo DEVICE $devlist
+ echo array $md1 devices=$dev0,$dev1,$dev2
+} > $conf
+
+mdadm -As -c $conf
+
+echo "DEVICE $devlist" > $conf
+mdadm -Db $md1 >> $conf
+check state U_U
+eval $tst
+
+mdadm --assemble --scan --config=$conf $md1
+check state U_U
+eval $tst
+
+echo " metadata=0.90 devices=$dev0,$dev1,$dev2" >> $conf
+mdadm --assemble --scan --config=$conf $md1
+check state U_U
+eval $tst
diff --git a/tests/03r5assem-failed b/tests/03r5assem-failed
new file mode 100644
index 00000000..d38241df
--- /dev/null
+++ b/tests/03r5assem-failed
@@ -0,0 +1,12 @@
+
+# Create an array, fail one device while array is active, stop array,
+# then re-assemble listing the failed device first.
+
+mdadm -CR $md1 -l5 -n4 $dev0 $dev1 $dev2 $dev3
+check wait
+
+echo 2000 > /sys/block/md1/md/safe_mode_delay
+mkfs $md1
+mdadm $md1 -f $dev0
+mdadm -S $md1
+mdadm -A $md1 $dev0 $dev1 $dev2 $dev3 || exit 1
diff --git a/tests/03r5assemV1 b/tests/03r5assemV1
new file mode 100644
index 00000000..bca0c583
--- /dev/null
+++ b/tests/03r5assemV1
@@ -0,0 +1,128 @@
+
+# create a v-1 raid5 array and assemble in various ways
+
+mdadm -CR -e1 --name one $md1 -l5 -n3 -x2 $dev0 $dev1 $dev2 $dev3 $dev4
+tst="check raid5 ;testdev $md1 2 $mdsize1 512 ; mdadm -S $md1"
+uuid=`mdadm -Db $md1 | sed 's/.*UUID=//'`
+check wait
+
+eval $tst
+
+mdadm -A $md1 $dev0 $dev1 $dev2
+mdadm $md1 --add $dev3 $dev4
+check spares 2
+eval $tst
+
+mdadm -A $md1 -u $uuid $devlist
+check spares 2
+eval $tst
+
+mdadm -A $md1 --name one $devlist
+check spares 2
+eval $tst
+
+
+conf=$targetdir/mdadm.conf
+{
+ echo DEVICE $devlist
+ echo array $md1 UUID=$uuid
+} > $conf
+
+mdadm -As -c $conf $md1
+eval $tst
+
+{
+ echo DEVICE $devlist
+ echo array $md1 name=one
+} > $conf
+
+mdadm -As -c $conf
+eval $tst
+
+{
+ echo DEVICE $devlist
+ echo array $md1 devices=$dev0,$dev1,$dev2,$dev3,$dev4
+} > $conf
+
+mdadm -As -c $conf
+
+echo "DEVICE $devlist" > $conf
+mdadm -Db $md1 >> $conf
+eval $tst
+mdadm --assemble --scan --config=$conf $md1
+eval $tst
+echo PING >&2
+
+echo " metadata=1.0 devices=$dev0,$dev1,$dev2,$dev3,$dev4" >> $conf
+mdadm --assemble --scan --config=$conf $md1
+eval $tst
+
+### Now with a missing device
+# We don't want the recovery to complete while we are
+# messing about here.
+echo 100 > /proc/sys/dev/raid/speed_limit_max
+echo 100 > /proc/sys/dev/raid/speed_limit_min
+
+mdadm -AR $md1 $dev0 $dev2 $dev3 $dev4 #
+check state U_U
+check spares 1
+eval $tst
+
+mdadm -A $md1 -u $uuid $devlist
+check state U_U
+eval $tst
+
+mdadm -A $md1 --name=one $devlist
+check state U_U
+check spares 1
+eval $tst
+
+
+conf=$targetdir/mdadm.conf
+{
+ echo DEVICE $devlist
+ echo array $md1 UUID=$uuid
+} > $conf
+
+mdadm -As -c $conf $md1
+check state U_U
+eval $tst
+
+{
+ echo DEVICE $devlist
+ echo array $md1 name=one
+} > $conf
+
+mdadm -As -c $conf
+check state U_U
+eval $tst
+
+{
+ echo DEVICE $devlist
+ echo array $md1 devices=$dev0,$dev1,$dev2
+} > $conf
+
+mdadm -As -c $conf
+
+echo "DEVICE $devlist" > $conf
+mdadm -Db $md1 >> $conf
+check state U_U
+eval $tst
+
+mdadm --assemble --scan --config=$conf $md1
+check state U_U
+eval $tst
+
+echo " metadata=1.0 devices=$dev0,$dev1,$dev2" >> $conf
+mdadm --assemble --scan --config=$conf $md1
+check state U_U
+eval $tst
+
+# And now assemble with -I
+mdadm -Ss
+mdadm -I -c $conf $dev0
+mdadm -I -c $conf $dev1
+mdadm -I -c $conf $dev2
+eval $tst
+echo 2000 > /proc/sys/dev/raid/speed_limit_max
+echo 1000 > /proc/sys/dev/raid/speed_limit_min
diff --git a/tests/04r0update b/tests/04r0update
new file mode 100644
index 00000000..0cd815ac
--- /dev/null
+++ b/tests/04r0update
@@ -0,0 +1,20 @@
+
+# create a raid0, re-assemble with a different super-minor
+mdadm -CR -e 0.90 $md0 -l0 -n3 $dev0 $dev1 $dev2
+testdev $md0 3 $mdsize0 512
+minor1=`mdadm -E $dev0 | sed -n -e 's/.*Preferred Minor : //p'`
+mdadm -S /dev/md0
+
+mdadm -A $md1 $dev0 $dev1 $dev2
+minor2=`mdadm -E $dev0 | sed -n -e 's/.*Preferred Minor : //p'`
+mdadm -S /dev/md1
+
+mdadm -A $md1 --update=super-minor $dev0 $dev1 $dev2
+minor3=`mdadm -E $dev0 | sed -n -e 's/.*Preferred Minor : //p'`
+mdadm -S /dev/md1
+
+case "$minor1 $minor2 $minor3" in
+ "0 0 1" ) ;;
+ * ) echo >&2 "ERROR minors should be '0 0 1' but are '$minor1 $minor2 $minor3'"
+ exit 1
+esac
diff --git a/tests/04r1update b/tests/04r1update
new file mode 100644
index 00000000..7a50131f
--- /dev/null
+++ b/tests/04r1update
@@ -0,0 +1,15 @@
+set -i
+
+# create a raid1 array, let it sync, then re-assemble with a force-sync
+
+mdadm -CR $md0 -l1 -n2 $dev0 $dev1
+check wait
+mdadm -S $md0
+
+mdadm -A $md0 $dev0 $dev1
+check nosync
+mdadm -S $md0
+
+mdadm -A $md0 -U resync $dev0 $dev1
+check resync
+mdadm -S $md0
diff --git a/tests/04r5swap b/tests/04r5swap
new file mode 100644
index 00000000..5373a607
--- /dev/null
+++ b/tests/04r5swap
@@ -0,0 +1,18 @@
+
+# make a raid5 array, byte swap the superblocks, then assemble...
+
+mdadm -CR $md0 -e 0.90 -l5 -n4 $dev0 $dev1 $dev2 $dev3
+sleep 4
+mdadm -S $md0
+
+mdadm -E --metadata=0 $dev1 > $targetdir/d1
+for d in $dev0 $dev1 $dev2 $dev3
+do $dir/swap_super $d
+done
+mdadm -E --metadata=0.swap $dev1 > $targetdir/d1s
+diff -u $targetdir/d1 $targetdir/d1s
+
+mdadm --assemble --update=byteorder $md0 $dev0 $dev1 $dev2 $dev3
+sleep 3
+check recovery
+mdadm -S $md0
diff --git a/tests/04update-metadata b/tests/04update-metadata
new file mode 100644
index 00000000..10bb70f0
--- /dev/null
+++ b/tests/04update-metadata
@@ -0,0 +1,48 @@
+set -xe
+
+# test converting v0.90 to v1.0
+# check for different levels
+# check it fails for non-v0.90
+# check it fails during reshape or recovery
+# check it fails when bitmap is present
+
+dlist="$dev0 $dev1 $dev2 $dev3"
+
+for ls in raid0/4 linear/4 raid1/1 raid5/3 raid6/2
+do
+ s=${ls#*/} l=${ls%/*}
+ mdadm -CR --assume-clean -e 0.90 $md0 --level $l -n 4 -c 64 $dlist
+ testdev $md0 $s 19904 64
+ mdadm -S $md0
+ mdadm -A $md0 --update=metadata $dlist
+ testdev $md0 $s 19904 64 check
+ mdadm -S $md0
+done
+
+if mdadm -A $md0 --update=metadata $dlist
+then echo >&2 should fail with v1.0 metadata
+ exit 1
+fi
+
+mdadm -CR -e 0.90 $md0 --level=6 -n4 -c32 $dlist
+mdadm -S $md0
+
+if mdadm -A $md0 --update=metadata $dlist
+then echo >&2 should fail during resync
+ exit 1
+fi
+mdadm -A $md0 $dlist
+mdadm --wait $md0
+mdadm -S $md0
+
+# should succeed now
+mdadm -A $md0 --update=metadata $dlist
+
+mdadm -S /dev/md0
+mdadm -CR --assume-clean -e 0.90 $md0 --level=6 -n4 -c32 $dlist --bitmap=internal
+mdadm -S $md0
+
+if mdadm -A $md0 --update=metadata $dlist
+then echo >&2 should fail when bitmap present
+ exit 1
+fi
diff --git a/tests/04update-uuid b/tests/04update-uuid
new file mode 100644
index 00000000..a4409e78
--- /dev/null
+++ b/tests/04update-uuid
@@ -0,0 +1,82 @@
+set -x
+
+# create an array, then change the uuid.
+
+mdadm -CR --assume-clean $md0 -l5 -n3 $dev0 $dev1 $dev2
+mdadm -S /dev/md0
+mdadm -A /dev/md0 --update=uuid --uuid=0123456789abcdef:fedcba9876543210 $dev0 $dev1 $dev2
+no_errors
+mdadm -D /dev/md0 | grep -s > /dev/null 01234567:89abcdef:fedcba98:76543210 || {
+ echo Wrong uuid; mdadm -D /dev/md0 ; exit 2;
+}
+mdadm -S /dev/md0
+
+# try v1 superblock
+
+mdadm -CR --assume-clean -e1 $md0 -l5 -n3 $dev0 $dev1 $dev2
+mdadm -S /dev/md0
+mdadm -A /dev/md0 --update=uuid --uuid=0123456789abcdef:fedcba9876543210 $dev0 $dev1 $dev2
+no_errors
+mdadm -D /dev/md0 | grep -s > /dev/null 01234567:89abcdef:fedcba98:76543210 || {
+ echo Wrong uuid; mdadm -D /dev/md0 ; exit 2;
+}
+mdadm -S /dev/md0
+
+
+# now if we have a bitmap, that needs updating too.
+rm -f $targetdir/bitmap
+mdadm -CR --assume-clean -b $targetdir/bitmap $md0 -l5 -n3 $dev0 $dev1 $dev2
+mdadm -S /dev/md0
+mdadm -A /dev/md0 -b $targetdir/bitmap --update=uuid --uuid=0123456789abcdef:fedcba9876543210 $dev0 $dev1 $dev2
+no_errors
+mdadm -D /dev/md0 | grep -s > /dev/null 01234567:89abcdef:fedcba98:76543210 || {
+ echo Wrong uuid; mdadm -D /dev/md0 ; exit 2;
+}
+if mdadm -X $targetdir/bitmap | grep -s > /dev/null 01234567:89abcdef:fedcba98:76543210 ||
+ mdadm -X $targetdir/bitmap | grep -s > /dev/null 67452301:efcdab89:98badcfe:10325476
+then : ; else
+ echo Wrong uuid; mdadm -X $targetdir/bitmap ; exit 2;
+fi
+mdadm -S /dev/md0
+
+# and bitmap for version1
+rm -f $targetdir/bitmap
+mdadm -CR --assume-clean -e1.1 -b $targetdir/bitmap $md0 -l5 -n3 $dev0 $dev1 $dev2
+mdadm -S /dev/md0
+mdadm -A /dev/md0 -b $targetdir/bitmap --update=uuid --uuid=0123456789abcdef:fedcba9876543210 $dev0 $dev1 $dev2
+no_errors
+mdadm -D /dev/md0 | grep -s > /dev/null 01234567:89abcdef:fedcba98:76543210 || {
+ echo Wrong uuid; mdadm -D /dev/md0 ; exit 2;
+}
+# -X cannot tell which byteorder to use for the UUID, so allow both.
+if mdadm -X $targetdir/bitmap | grep -s > /dev/null 01234567:89abcdef:fedcba98:76543210 ||
+ mdadm -X $targetdir/bitmap | grep -s > /dev/null 67452301:efcdab89:98badcfe:10325476
+then : ; else
+ echo Wrong uuid; mdadm -X $targetdir/bitmap ; exit 2;
+fi
+mdadm -S /dev/md0
+
+# Internal bitmaps too.
+mdadm -CR --assume-clean -b internal --bitmap-chunk 4 $md0 -l5 -n3 $dev0 $dev1 $dev2
+mdadm -S /dev/md0
+mdadm -A /dev/md0 --update=uuid --uuid=0123456789abcdef:fedcba9876543210 $dev0 $dev1 $dev2
+no_errors
+mdadm -D /dev/md0 | grep -s > /dev/null 01234567:89abcdef:fedcba98:76543210 || {
+ echo Wrong uuid; mdadm -D /dev/md0 ; exit 2;
+}
+mdadm -X $dev0 | grep -s > /dev/null 01234567:89abcdef:fedcba98:76543210 || {
+ echo Wrong uuid; mdadm -X $dev0; exit 2;
+}
+mdadm -S /dev/md0
+
+mdadm -CR --assume-clean -e1.2 -b internal --bitmap-chunk=4 $md0 -l5 -n3 $dev0 $dev1 $dev2
+mdadm -S /dev/md0
+mdadm -A /dev/md0 --update=uuid --uuid=0123456789abcdef:fedcba9876543210 $dev0 $dev1 $dev2
+no_errors
+mdadm -D /dev/md0 | grep -s > /dev/null 01234567:89abcdef:fedcba98:76543210 || {
+ echo Wrong uuid; mdadm -D /dev/md0 ; exit 2;
+}
+mdadm -X $dev0 | grep -s > /dev/null 01234567:89abcdef:fedcba98:76543210 || {
+ echo Wrong uuid; mdadm -X $dev0; exit 2;
+}
+mdadm -S /dev/md0
diff --git a/tests/05r1-add-internalbitmap b/tests/05r1-add-internalbitmap
new file mode 100644
index 00000000..4e203052
--- /dev/null
+++ b/tests/05r1-add-internalbitmap
@@ -0,0 +1,20 @@
+#
+# create a raid1 without any bitmap, add the bitmap and then write to
+# the device. This should catch the case where the bitmap is created
+# but not reloaded correctly, such as the case fixed by
+# 4474ca42e2577563a919fd3ed782e2ec55bf11a2
+#
+mdadm --create --run $md0 --metadata=0.9 --level=1 -n2 --delay=1 $dev1 $dev2
+check wait
+check nobitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -Gb internal --bitmap-chunk=4 $md0
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
+
+# Re-assemble the array and verify the bitmap is still present
+mdadm --assemble $md0 $dev1 $dev2
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
diff --git a/tests/05r1-add-internalbitmap-v1a b/tests/05r1-add-internalbitmap-v1a
new file mode 100644
index 00000000..721a41c1
--- /dev/null
+++ b/tests/05r1-add-internalbitmap-v1a
@@ -0,0 +1,20 @@
+#
+# create a raid1 without any bitmap, add the bitmap and then write to
+# the device. This should catch the case where the bitmap is created
+# but not reloaded correctly, such as the case fixed by
+# 4474ca42e2577563a919fd3ed782e2ec55bf11a2
+#
+mdadm --create --run $md0 --metadata=1.0 --level=1 -n2 --delay=1 $dev1 $dev2
+check wait
+check nobitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -Gb internal --bitmap-chunk=4 $md0
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
+
+# Re-assemble the array and verify the bitmap is still present
+mdadm --assemble $md0 $dev1 $dev2
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
diff --git a/tests/05r1-add-internalbitmap-v1b b/tests/05r1-add-internalbitmap-v1b
new file mode 100644
index 00000000..b077fa7e
--- /dev/null
+++ b/tests/05r1-add-internalbitmap-v1b
@@ -0,0 +1,20 @@
+#
+# create a raid1 without any bitmap, add the bitmap and then write to
+# the device. This should catch the case where the bitmap is created
+# but not reloaded correctly, such as the case fixed by
+# 4474ca42e2577563a919fd3ed782e2ec55bf11a2
+#
+mdadm --create --run $md0 --metadata=1.1 --level=1 -n2 --delay=1 $dev1 $dev2
+check wait
+check nobitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -Gb internal --bitmap-chunk=4 $md0
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
+
+# Re-assemble the array and verify the bitmap is still present
+mdadm --assemble $md0 $dev1 $dev2
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
diff --git a/tests/05r1-add-internalbitmap-v1c b/tests/05r1-add-internalbitmap-v1c
new file mode 100644
index 00000000..7b397844
--- /dev/null
+++ b/tests/05r1-add-internalbitmap-v1c
@@ -0,0 +1,20 @@
+#
+# create a raid1 without any bitmap, add the bitmap and then write to
+# the device. This should catch the case where the bitmap is created
+# but not reloaded correctly, such as the case fixed by
+# 4474ca42e2577563a919fd3ed782e2ec55bf11a2
+#
+mdadm --create --run $md0 --metadata=1.2 --level=1 -n2 --delay=1 $dev1 $dev2
+check wait
+check nobitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -Gb internal --bitmap-chunk=4 $md0
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
+
+# Re-assemble the array and verify the bitmap is still present
+mdadm --assemble $md0 $dev1 $dev2
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
diff --git a/tests/05r1-bitmapfile b/tests/05r1-bitmapfile
new file mode 100644
index 00000000..fcd8372b
--- /dev/null
+++ b/tests/05r1-bitmapfile
@@ -0,0 +1,49 @@
+
+#
+# create a raid1 with a bitmap file
+#
+bmf=$targetdir/bitmap
+rm -f $bmf
+mdadm --create --run $md0 --level=1 -n2 --delay=1 --bitmap $bmf $dev1 $dev2
+check wait
+testdev $md0 1 $mdsize1a 64
+mdadm -S $md0
+
+mdadm --assemble $md0 --bitmap=$bmf $dev1 $dev2
+testdev $md0 1 $mdsize1a 64
+dirty1=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty2=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty1 -lt 400 -o $dirty2 -ne 0 ]
+then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
+ exit 1
+fi
+mdadm $md0 -f $dev1
+testdev $md0 1 $mdsize1a 64
+sleep 4
+dirty3=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+if [ $dirty3 -lt 400 ]
+then
+ echo >&2 "ERROR dirty count $dirty3 is too small"
+ exit 2
+fi
+
+mdadm -S $md0
+
+mdadm --assemble -R $md0 --bitmap=$bmf $dev2
+dirty4=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+mdadm --zero $dev1 # force --add, not --re-add
+mdadm $md0 --add $dev1
+#it is too fast# check recovery
+
+check wait
+sleep 4
+dirty5=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty4 -lt 400 -o $dirty5 -ne 0 ]
+then echo echo >&2 "ERROR bad 'dirty' counts at end: $dirty4 $dirty5"
+ exit 1
+fi
+
+mdadm -S $md0
diff --git a/tests/05r1-grow-external b/tests/05r1-grow-external
new file mode 100644
index 00000000..49666369
--- /dev/null
+++ b/tests/05r1-grow-external
@@ -0,0 +1,33 @@
+
+#
+# create a raid1 array, add an external bitmap
+#
+mdadm --create --run $md0 -l 1 -n 2 $dev1 $dev2
+check wait
+testdev $md0 1 $mdsize1a 64
+
+bmf=$targetdir/bm
+rm -f $bmf
+#mdadm -E $dev1
+mdadm --grow $md0 --bitmap=$bmf --delay=1 || { mdadm -X $bmf ; exit 1; }
+dirty1=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty2=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+testdev $md0 1 $mdsize1a 64
+dirty3=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty4=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+#echo $dirty1 $dirty2 $dirty3 $dirty4
+if [ $dirty2 -ne 0 -o $dirty4 -ne 0 -o $dirty3 -lt 400 ]
+then
+ echo bad dirty counts
+ exit 1
+fi
+
+# now to remove the bitmap
+check bitmap
+mdadm --grow $md0 --bitmap=none
+check nobitmap
+mdadm -S $md0
diff --git a/tests/05r1-grow-internal b/tests/05r1-grow-internal
new file mode 100644
index 00000000..a648b440
--- /dev/null
+++ b/tests/05r1-grow-internal
@@ -0,0 +1,31 @@
+
+#
+# create a raid1 array, add an internal bitmap
+#
+mdadm --create --run $md0 -l 1 -n 2 $dev1 $dev2
+check wait
+testdev $md0 1 $mdsize1a 64
+
+#mdadm -E $dev1
+mdadm --grow $md0 --bitmap=internal --bitmap-chunk=4 --delay=1 || { mdadm -X $dev2 ; exit 1; }
+dirty1=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty2=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+testdev $md0 1 $mdsize1a 64
+dirty3=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty4=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+#echo $dirty1 $dirty2 $dirty3 $dirty4
+if [ $dirty2 -ne 0 -o $dirty4 -ne 0 -o $dirty3 -lt 400 ]
+then
+ echo bad dirty counts
+ exit 1
+fi
+
+# now to remove the bitmap
+check bitmap
+mdadm --grow $md0 --bitmap=none
+check nobitmap
+mdadm -S $md0
diff --git a/tests/05r1-grow-internal-1 b/tests/05r1-grow-internal-1
new file mode 100644
index 00000000..a110f3b0
--- /dev/null
+++ b/tests/05r1-grow-internal-1
@@ -0,0 +1,31 @@
+
+#
+# create a raid1 array, version 1 superblock, add an internal bitmap
+#
+mdadm --create --run $md0 -e1 -l 1 -n 2 $dev1 $dev2
+check wait
+testdev $md0 1 $mdsize1b 64
+
+#mdadm -E $dev1
+mdadm --grow $md0 --bitmap=internal --bitmap-chunk=4 --delay=1
+dirty1=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty2=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+testdev $md0 1 $mdsize1b 64
+dirty3=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty4=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+#echo $dirty1 $dirty2 $dirty3 $dirty4
+if [ $dirty2 -ne 0 -o $dirty4 -ne 0 -o $dirty3 -lt 400 ]
+then
+ echo bad dirty counts
+ exit 1
+fi
+
+# now to remove the bitmap
+check bitmap
+mdadm --grow $md0 --bitmap=none
+check nobitmap
+mdadm -S $md0
diff --git a/tests/05r1-internalbitmap b/tests/05r1-internalbitmap
new file mode 100644
index 00000000..2bfeab8e
--- /dev/null
+++ b/tests/05r1-internalbitmap
@@ -0,0 +1,47 @@
+
+#
+# create a raid1 with an internal bitmap
+#
+mdadm --create -e0.90 --run $md0 --level=1 -n2 --delay=1 --bitmap internal --bitmap-chunk=4 $dev1 $dev2
+check wait
+testdev $md0 1 $mdsize0 64
+mdadm -S $md0
+
+mdadm --assemble $md0 $dev1 $dev2
+testdev $md0 1 $mdsize0 64
+dirty1=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty2=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty1 -lt 400 -o $dirty2 -ne 0 ]
+then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
+ exit 1
+fi
+mdadm $md0 -f $dev1
+testdev $md0 1 $mdsize0 64
+sleep 4
+dirty3=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+if [ $dirty3 -lt 400 ]
+then
+ echo >&2 "ERROR dirty count $dirty3 is too small"
+ exit 2
+fi
+
+mdadm -S $md0
+
+mdadm --assemble -R $md0 $dev2
+mdadm --zero-superblock $dev1
+mdadm $md0 --add $dev1
+check recovery
+
+dirty4=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+check wait
+sleep 4
+dirty5=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty4 -lt 400 -o $dirty5 -ne 0 ]
+then echo echo >&2 "ERROR bad 'dirty' counts at end: $dirty4 $dirty5"
+ exit 1
+fi
+
+mdadm -S $md0
diff --git a/tests/05r1-internalbitmap-v1a b/tests/05r1-internalbitmap-v1a
new file mode 100644
index 00000000..3fcb0249
--- /dev/null
+++ b/tests/05r1-internalbitmap-v1a
@@ -0,0 +1,48 @@
+
+#
+# create a raid1 with an internal bitmap
+#
+mdadm --create --run $md0 --metadata=1.0 --level=1 -n2 --delay=1 --bitmap internal --bitmap-chunk=4 $dev1 $dev2
+check wait
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
+
+mdadm --assemble $md0 $dev1 $dev2
+testdev $md0 1 $mdsize1b 64
+dirty1=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty2=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty1 -lt 400 -o $dirty2 -ne 0 ]
+then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
+ exit 1
+fi
+mdadm $md0 -f $dev1
+testdev $md0 1 $mdsize1b 64
+sleep 4
+dirty3=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+if [ $dirty3 -lt 400 ]
+then
+ echo >&2 "ERROR dirty count $dirty3 is too small"
+ exit 2
+fi
+
+mdadm -S $md0
+
+mdadm --zero-superblock $dev1
+mdadm --assemble -R $md0 $dev2
+mdadm $md0 --add $dev1
+check recovery
+
+dirty4=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+check wait
+sleep 4
+dirty5=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty4 -lt 400 -o $dirty5 -ne 0 ]
+then echo echo >&2 "ERROR bad 'dirty' counts at end: $dirty4 $dirty5"
+ exit 1
+fi
+
+mdadm -S $md0
diff --git a/tests/05r1-internalbitmap-v1b b/tests/05r1-internalbitmap-v1b
new file mode 100644
index 00000000..1097536b
--- /dev/null
+++ b/tests/05r1-internalbitmap-v1b
@@ -0,0 +1,49 @@
+
+#
+# create a raid1 with an internal bitmap
+#
+mdadm --create --run $md0 --metadata=1.1 --level=1 -n2 --delay=1 --bitmap internal --bitmap-chunk=4 $dev1 $dev2
+check wait
+check bitmap
+testdev $md0 1 $mdsize11 64
+mdadm -S $md0
+
+mdadm --assemble $md0 $dev1 $dev2
+check bitmap
+testdev $md0 1 $mdsize11 64
+dirty1=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty2=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty1 -lt 400 -o $dirty2 -ne 0 ]
+then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
+ exit 1
+fi
+mdadm $md0 -f $dev1
+testdev $md0 1 $mdsize11 64
+sleep 4
+dirty3=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+if [ $dirty3 -lt 400 ]
+then
+ echo >&2 "ERROR dirty count $dirty3 is too small"
+ exit 2
+fi
+
+mdadm -S $md0
+
+mdadm --zero-superblock $dev1
+mdadm --assemble -R $md0 $dev2
+mdadm $md0 --add $dev1
+check recovery
+
+dirty4=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+check wait
+sleep 4
+dirty5=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty4 -lt 400 -o $dirty5 -ne 0 ]
+then echo echo >&2 "ERROR bad 'dirty' counts at end: $dirty4 $dirty5"
+ exit 1
+fi
+
+mdadm -S $md0
diff --git a/tests/05r1-internalbitmap-v1c b/tests/05r1-internalbitmap-v1c
new file mode 100644
index 00000000..f6bbcf6b
--- /dev/null
+++ b/tests/05r1-internalbitmap-v1c
@@ -0,0 +1,48 @@
+
+#
+# create a raid1 with an internal bitmap
+#
+mdadm --create --run $md0 --metadata=1.2 --level=1 -n2 --delay=1 --bitmap internal --bitmap-chunk 4 $dev1 $dev2
+check wait
+check bitmap
+testdev $md0 1 $mdsize12 64
+mdadm -S $md0
+
+mdadm --assemble $md0 $dev1 $dev2
+testdev $md0 1 $mdsize12 64
+dirty1=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty2=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty1 -lt 400 -o $dirty2 -ne 0 ]
+then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
+ exit 1
+fi
+mdadm $md0 -f $dev1
+testdev $md0 1 $mdsize12 64
+sleep 4
+dirty3=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+if [ $dirty3 -lt 400 ]
+then
+ echo >&2 "ERROR dirty count $dirty3 is too small"
+ exit 2
+fi
+
+mdadm -S $md0
+
+mdadm --zero-superblock $dev1
+mdadm --assemble -R $md0 $dev2
+mdadm $md0 --add $dev1
+check recovery
+
+dirty4=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+check wait
+sleep 4
+dirty5=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty4 -lt 400 -o $dirty5 -ne 0 ]
+then echo echo >&2 "ERROR bad 'dirty' counts at end: $dirty4 $dirty5"
+ exit 1
+fi
+
+mdadm -S $md0
diff --git a/tests/05r1-n3-bitmapfile b/tests/05r1-n3-bitmapfile
new file mode 100644
index 00000000..1bb9af16
--- /dev/null
+++ b/tests/05r1-n3-bitmapfile
@@ -0,0 +1,53 @@
+
+#
+# create a raid1 with 3 devices and a bitmap file
+# make sure resync does right thing.
+#
+#
+bmf=$targetdir/bitmap
+rm -f $bmf
+mdadm --create -e0.90 --run $md0 --level=1 -n3 --delay=1 --bitmap $bmf $dev1 $dev2 $dev3
+check wait
+testdev $md0 1 $mdsize0 64
+mdadm -S $md0
+
+mdadm --assemble $md0 --bitmap=$bmf $dev1 $dev2 $dev3
+testdev $md0 1 $mdsize0 64
+dirty1=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty2=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty1 -lt 400 -o $dirty2 -ne 0 ]
+then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
+ exit 1
+fi
+mdadm $md0 -f $dev2
+testdev $md0 1 $mdsize0 64
+sleep 4
+dirty3=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+if [ $dirty3 -lt 400 ]
+then
+ echo >&2 "ERROR dirty count $dirty3 is too small"
+ exit 2
+fi
+
+mdadm -S $md0
+
+mdadm --assemble -R $md0 --bitmap=$bmf $dev1 $dev3
+check nosync
+mdadm --zero-superblock $dev2
+mdadm $md0 --add $dev2
+check recovery
+
+dirty4=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+check wait
+sleep 4
+dirty5=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty4 -lt 400 -o $dirty5 -ne 0 ]
+then echo echo >&2 "ERROR bad 'dirty' counts at end: $dirty4 $dirty5"
+ exit 1
+fi
+
+mdadm -S $md0
+exit 0
diff --git a/tests/05r1-re-add b/tests/05r1-re-add
new file mode 100644
index 00000000..44da8908
--- /dev/null
+++ b/tests/05r1-re-add
@@ -0,0 +1,36 @@
+
+#
+# create a raid1, remove a drive, and readd it.
+# resync should be instant.
+# Then do some IO first. Resync should still be very fast
+#
+
+mdadm -CR $md0 -l1 -n2 -binternal --bitmap-chunk=4 -d1 $dev1 $dev2
+check resync
+check wait
+testdev $md0 1 $mdsize1a 64
+sleep 4
+
+mdadm $md0 -f $dev2
+sleep 1
+mdadm $md0 -r $dev2
+mdadm $md0 -a $dev2
+#cat /proc/mdstat
+check nosync
+
+mdadm $md0 -f $dev2
+sleep 1
+mdadm $md0 -r $dev2
+testdev $md0 1 $mdsize1a 64
+mdadm $md0 -a $dev2
+check wait
+cmp --ignore-initial=$[16*512] --bytes=$[$mdsize0*1024] $dev1 $dev2
+
+mdadm $md0 -f $dev2; sleep 1
+mdadm $md0 -r $dev2
+if dd if=/dev/zero of=$md0 ; then : ; fi
+mdadm $md0 -a $dev2
+check recovery
+check wait
+cmp --ignore-initial=$[16*512] --bytes=$[$mdsize0*1024] $dev1 $dev2
+mdadm -S $md0
diff --git a/tests/05r1-re-add-nosuper b/tests/05r1-re-add-nosuper
new file mode 100644
index 00000000..8025a685
--- /dev/null
+++ b/tests/05r1-re-add-nosuper
@@ -0,0 +1,37 @@
+
+#
+# create a raid1, remove a drive, and readd it.
+# resync should be instant.
+# Then do some IO first. Resync should still be very fast
+#
+bmf=$targetdir/bitmap2
+rm -f $bmf
+mdadm -B $md0 -l1 -n2 -b$bmf -d1 $dev1 $dev2
+check resync
+check wait
+testdev $md0 1 $size 1
+sleep 4
+
+mdadm $md0 -f $dev2
+sleep 1
+mdadm $md0 -r $dev2
+mdadm $md0 --re-add $dev2
+check nosync
+
+mdadm $md0 -f $dev2
+sleep 1
+mdadm $md0 -r $dev2
+testdev $md0 1 $size 1
+mdadm $md0 --re-add $dev2
+check wait
+cmp --bytes=$[$mdsize0*1024] $dev1 $dev2
+
+mdadm $md0 -f $dev2; sleep 1
+mdadm $md0 -r $dev2
+if dd if=/dev/zero of=$md0 ; then : ; fi
+mdadm $md0 --re-add $dev2
+check recovery
+check wait
+# should BLKFLSBUF and then read $dev1/$dev2...
+cmp --bytes=$[$mdsize0*1024] $file1 $file2
+mdadm -S $md0
diff --git a/tests/05r1-remove-internalbitmap b/tests/05r1-remove-internalbitmap
new file mode 100644
index 00000000..712fd56f
--- /dev/null
+++ b/tests/05r1-remove-internalbitmap
@@ -0,0 +1,18 @@
+#
+# create a raid1 with bitmap, remove the bitmap and verify it is still
+# gone when re-assembling the array
+#
+mdadm --create --run $md0 --metadata=0.9 --level=1 -n2 --bitmap internal --bitmap-chunk=4 --delay=1 $dev1 $dev2
+check wait
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -Gb none $md0
+check nobitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
+
+# Re-assemble the array and verify the bitmap is still present
+mdadm --assemble $md0 $dev1 $dev2
+check nobitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
diff --git a/tests/05r1-remove-internalbitmap-v1a b/tests/05r1-remove-internalbitmap-v1a
new file mode 100644
index 00000000..a4a9aaf1
--- /dev/null
+++ b/tests/05r1-remove-internalbitmap-v1a
@@ -0,0 +1,18 @@
+#
+# create a raid1 with bitmap, remove the bitmap and verify it is still
+# gone when re-assembling the array
+#
+mdadm --create --run $md0 --metadata=1.0 --level=1 -n2 --bitmap internal --bitmap-chunk=4 --delay=1 $dev1 $dev2
+check wait
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -Gb none $md0
+check nobitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
+
+# Re-assemble the array and verify the bitmap is still present
+mdadm --assemble $md0 $dev1 $dev2
+check nobitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
diff --git a/tests/05r1-remove-internalbitmap-v1b b/tests/05r1-remove-internalbitmap-v1b
new file mode 100644
index 00000000..c0918eb6
--- /dev/null
+++ b/tests/05r1-remove-internalbitmap-v1b
@@ -0,0 +1,18 @@
+#
+# create a raid1 with bitmap, remove the bitmap and verify it is still
+# gone when re-assembling the array
+#
+mdadm --create --run $md0 --metadata=1.1 --level=1 -n2 --bitmap internal --bitmap-chunk=4 --delay=1 $dev1 $dev2
+check wait
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -Gb none $md0
+check nobitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
+
+# Re-assemble the array and verify the bitmap is still present
+mdadm --assemble $md0 $dev1 $dev2
+check nobitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
diff --git a/tests/05r1-remove-internalbitmap-v1c b/tests/05r1-remove-internalbitmap-v1c
new file mode 100644
index 00000000..15f1fbb0
--- /dev/null
+++ b/tests/05r1-remove-internalbitmap-v1c
@@ -0,0 +1,18 @@
+#
+# create a raid1 with bitmap, remove the bitmap and verify it is still
+# gone when re-assembling the array
+#
+mdadm --create --run $md0 --metadata=1.2 --level=1 -n2 --bitmap internal --bitmap-chunk=4 --delay=1 $dev1 $dev2
+check wait
+check bitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -Gb none $md0
+check nobitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
+
+# Re-assemble the array and verify the bitmap is still present
+mdadm --assemble $md0 $dev1 $dev2
+check nobitmap
+testdev $md0 1 $mdsize1b 64
+mdadm -S $md0
diff --git a/tests/05r5-bitmapfile b/tests/05r5-bitmapfile
new file mode 100644
index 00000000..6d173d88
--- /dev/null
+++ b/tests/05r5-bitmapfile
@@ -0,0 +1,49 @@
+
+#
+# create a raid1 with a bitmap file
+#
+bmf=$targetdir/bitmap
+rm -f $bmf
+mdadm --create --run $md0 --level=5 -n3 --delay=1 --bitmap $bmf $dev1 $dev2 $dev3
+check wait
+testdev $md0 2 $mdsize1 512
+mdadm -S $md0
+
+mdadm --assemble $md0 --bitmap=$bmf $dev1 $dev2 $dev3
+testdev $md0 2 $mdsize1 512
+dirty1=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty2=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty1 -lt 400 -o $dirty2 -ne 0 ]
+then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
+ exit 1
+fi
+mdadm $md0 -f $dev1
+testdev $md0 2 $mdsize1 512
+sleep 4
+dirty3=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+if [ $dirty3 -lt 400 ]
+then
+ echo >&2 "ERROR dirty count $dirty3 is too small"
+ exit 2
+fi
+
+mdadm -S $md0
+
+mdadm --assemble -R $md0 --bitmap=$bmf $dev2 $dev3
+mdadm --zero $dev1 # force add, not re-add
+mdadm $md0 --add $dev1
+check recovery
+
+dirty4=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+check wait
+sleep 4
+dirty5=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty4 -lt 400 -o $dirty5 -ne 0 ]
+then echo echo >&2 "ERROR bad 'dirty' counts at end: $dirty4 $dirty5"
+ exit 1
+fi
+
+mdadm -S $md0
diff --git a/tests/05r5-internalbitmap b/tests/05r5-internalbitmap
new file mode 100644
index 00000000..13dc5921
--- /dev/null
+++ b/tests/05r5-internalbitmap
@@ -0,0 +1,47 @@
+
+#
+# create a raid1 with an internal bitmap
+#
+mdadm --create --run $md0 --level=5 -n3 --delay=1 --bitmap internal --bitmap-chunk=4 $dev1 $dev2 $dev3
+check wait
+testdev $md0 2 $mdsize1 512
+mdadm -S $md0
+
+mdadm --assemble $md0 $dev1 $dev2 $dev3
+testdev $md0 2 $mdsize1 512
+dirty1=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty2=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty1 -lt 400 -o $dirty2 -ne 0 ]
+then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
+ exit 1
+fi
+mdadm $md0 -f $dev1
+testdev $md0 2 $mdsize1 512
+sleep 4
+dirty3=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+if [ $dirty3 -lt 400 ]
+then
+ echo >&2 "ERROR dirty count $dirty3 is too small"
+ exit 2
+fi
+
+mdadm -S $md0
+
+mdadm --assemble -R $md0 $dev2 $dev3
+mdadm --zero $dev1 # force --add, not --re-add
+mdadm $md0 --add $dev1
+check recovery
+
+dirty4=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+check wait
+sleep 4
+dirty5=`mdadm -X $dev2 | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty4 -lt 400 -o $dirty5 -ne 0 ]
+then echo echo >&2 "ERROR bad 'dirty' counts at end: $dirty4 $dirty5"
+ exit 1
+fi
+
+mdadm -S $md0
diff --git a/tests/05r6-bitmapfile b/tests/05r6-bitmapfile
new file mode 100644
index 00000000..d11896db
--- /dev/null
+++ b/tests/05r6-bitmapfile
@@ -0,0 +1,49 @@
+
+#
+# create a raid1 with a bitmap file
+#
+bmf=$targetdir/bitmap
+rm -f $bmf
+mdadm --create --run $md0 --level=6 -n4 --delay=1 --bitmap $bmf $dev1 $dev2 $dev3 $dev4
+check wait
+testdev $md0 2 $mdsize1 512
+mdadm -S $md0
+
+mdadm --assemble $md0 --bitmap=$bmf $dev1 $dev2 $dev3 $dev4
+testdev $md0 2 $mdsize1 512
+dirty1=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+sleep 4
+dirty2=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty1 -lt 400 -o $dirty2 -ne 0 ]
+then echo >&2 "ERROR bad 'dirty' counts: $dirty1 and $dirty2"
+ exit 1
+fi
+mdadm $md0 -f $dev3
+testdev $md0 2 $mdsize1 512
+sleep 4
+dirty3=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+if [ $dirty3 -lt 400 ]
+then
+ echo >&2 "ERROR dirty count $dirty3 is too small"
+ exit 2
+fi
+
+mdadm -S $md0
+
+mdadm --assemble -R $md0 --bitmap=$bmf $dev1 $dev2 $dev4
+mdadm --zero $dev3 # force --add, not --re-add
+mdadm $md0 --add $dev3
+check recovery
+
+dirty4=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+check wait
+sleep 4
+dirty5=`mdadm -X $bmf | sed -n -e 's/.*Bitmap.* \([0-9]*\) dirty.*/\1/p'`
+
+if [ $dirty4 -lt 400 -o $dirty5 -ne 0 ]
+then echo echo >&2 "ERROR bad 'dirty' counts at end: $dirty4 $dirty5"
+ exit 1
+fi
+
+mdadm -S $md0
diff --git a/tests/05r6tor0 b/tests/05r6tor0
new file mode 100644
index 00000000..2e0f1853
--- /dev/null
+++ b/tests/05r6tor0
@@ -0,0 +1,27 @@
+set -x -e
+
+# reshape a RAID6 to RAID5 and then RAID0.
+# then reshape back up to RAID5 and RAID5
+
+mdadm -CR $md0 -l6 -n5 $dev0 $dev1 $dev2 $dev3 $dev4
+check wait
+check raid6
+testdev $md0 3 19456 512
+mdadm -G $md0 -l5
+check wait
+check raid5
+testdev $md0 3 19456 512
+mdadm -G $md0 -l0
+check wait
+check raid0
+testdev $md0 3 19456 512
+mdadm -G $md0 -l5 --add $dev3 $dev4
+check wait
+check raid5
+check algorithm 2
+testdev $md0 3 19456 512
+mdadm -G $md0 -l 6
+check wait
+check raid6
+check algorithm 2
+testdev $md0 3 19456 512
diff --git a/tests/06name b/tests/06name
new file mode 100644
index 00000000..4d5e824d
--- /dev/null
+++ b/tests/06name
@@ -0,0 +1,12 @@
+set -x
+
+# create an array with a name
+
+mdadm -CR $md0 -l0 -n2 --metadata=1 --name="Fred" $dev0 $dev1
+mdadm -E $dev0 | grep 'Name : [^:]*:Fred ' > /dev/null || exit 1
+mdadm -D $md0 | grep 'Name : [^:]*:Fred ' > /dev/null || exit 1
+mdadm -S $md0
+
+mdadm -A $md0 --name="Fred" $devlist
+#mdadm -Db $md0
+mdadm -S $md0
diff --git a/tests/06sysfs b/tests/06sysfs
new file mode 100644
index 00000000..af63ef45
--- /dev/null
+++ b/tests/06sysfs
@@ -0,0 +1,11 @@
+exit 0
+mdadm -CR $md0 -l1 -n3 $dev1 $dev2 $dev3
+
+ls -Rl /sys/block/md0
+
+cat /sys/block/md0/md/level
+cat /sys/block/md0/md/raid_disks
+
+mdadm -S $md0
+
+exit 1
diff --git a/tests/06wrmostly b/tests/06wrmostly
new file mode 100644
index 00000000..968c1974
--- /dev/null
+++ b/tests/06wrmostly
@@ -0,0 +1,13 @@
+
+# create a raid1 array with a wrmostly device
+
+mdadm -CR $md0 -l1 -n3 $dev0 $dev1 --write-mostly $dev2
+testdev $md0 1 $mdsize1a 64
+
+# unfortunately, we cannot measure if any read requests are going to $dev2
+
+mdadm -S $md0
+
+mdadm -CR $md0 -l1 -n3 --write-behind --bitmap=internal --bitmap-chunk=4 $dev0 $dev1 --write-mostly $dev2
+testdev $md0 1 $mdsize1a 64
+mdadm -S $md0
diff --git a/tests/07autoassemble b/tests/07autoassemble
new file mode 100644
index 00000000..e689be7c
--- /dev/null
+++ b/tests/07autoassemble
@@ -0,0 +1,24 @@
+
+# create two raid1s, build a raid0 on top, then
+# tear it down and get auto-assemble to rebuild it.
+
+mdadm -CR $md1 -l1 -n2 $dev0 $dev1 --homehost=testing
+mdadm -CR $md2 -l1 -n2 $dev2 $dev3 --homehost=testing
+mdadm -CR $md0 -l0 -n2 $md1 $md2 --homehost=testing
+
+mdadm -Ss
+mdadm -As -c /dev/null --homehost=testing -vvv
+testdev $md1 1 $mdsize1a 64
+testdev $md2 1 $mdsize1a 64
+testdev $md0 2 $mdsize11a 512
+mdadm -Ss
+
+mdadm --zero-superblock $dev0 $dev1 $dev2 $dev3
+## Now the raid0 uses one stacked and one not
+mdadm -CR $md1 -l1 -n2 $dev0 $dev1 --homehost=testing
+mdadm -CR $md0 -l0 -n2 $md1 $dev2 --homehost=testing
+mdadm -Ss
+mdadm -As -c /dev/null --homehost=testing -vvv
+testdev $md1 1 $mdsize1a 64
+testdev $md0 1 $[mdsize1a+mdsize11a] 512
+mdadm -Ss
diff --git a/tests/07autodetect b/tests/07autodetect
new file mode 100644
index 00000000..917e0d66
--- /dev/null
+++ b/tests/07autodetect
@@ -0,0 +1,34 @@
+
+#
+# Test in-kernel autodetect.
+# Create a partitionable array on each of two devices,
+# put a partition on each, create an array, and see if we can
+# use autodetect to restart the array.
+
+if lsmod | grep md_mod > /dev/null 2>&1
+then
+ echo md is a module - cannot test autodetect
+ exit 0
+fi
+
+
+mdadm -CR -e 0 $mdp0 -l0 -f -n1 $dev0
+mdadm -CR -e 0 $mdp1 -l0 -f -n1 $dev1
+udevadm settle
+sfdisk $mdp0 >&2 << END
+,,FD
+END
+sfdisk $mdp1 >&2 << END
+,,FD
+END
+udevadm settle
+mdadm -CR -e 0 $md0 -l1 -n2 ${mdp0}p1 ${mdp1}p1
+check resync
+check raid1
+check wait
+mdadm -S $md0
+mdadm --auto-detect
+check raid1
+
+mdadm -Ss
+exit 0
diff --git a/tests/07changelevelintr b/tests/07changelevelintr
new file mode 100644
index 00000000..89cfff0c
--- /dev/null
+++ b/tests/07changelevelintr
@@ -0,0 +1,60 @@
+
+#
+# test that we can stop and restart a level change.
+# just test a few in-place changes, and a few
+# size-reducing changes.
+
+
+checkgeo() {
+ # check the geometry of an array
+ # level raid_disks chunk_size layout
+ dev=$1
+ shift
+ sleep 0.5
+ check wait
+ for attr in level raid_disks chunk_size layout
+ do
+ if [ $# -gt 0 ] ; then
+ val=$1
+ shift
+ if [ " `cat /sys/block/$dev/md/$attr`" != " $val" ]
+ then echo "$attr doesn't match for $dev"
+ exit 1
+ fi
+ fi
+ done
+}
+
+restart() {
+ sleep 0.5
+ check reshape
+ mdadm -S $md0
+ mdadm -A $md0 $devs --backup-file=$bu
+ sleep 0.5
+ check reshape
+}
+
+bu=/tmp/md-backup
+rm -f $bu
+devs="$dev0 $dev1 $dev2 $dev3 $dev4"
+mdadm -CR $md0 -l5 -n5 -c 256 $devs
+checkgeo md0 raid5 5 $[256*1024] 2
+
+mdadm -G $md0 -c 128 --backup-file=$bu
+restart
+checkgeo md0 raid5 5 $[128*1024] 2
+
+mdadm -G $md0 --layout rs --backup-file=$bu
+restart
+checkgeo md0 raid5 5 $[128*1024] 3
+
+mdadm -G $md0 --array-size 58368
+mdadm -G $md0 --raid-disks 4 -c 64 --backup-file=$bu
+restart
+checkgeo md0 raid5 4 $[64*1024] 3
+
+devs="$dev0 $dev1 $dev2 $dev3"
+mdadm -G $md0 --array-size 19456
+mdadm -G $md0 -n 2 -c 256 --backup-file=$bu
+restart
+checkgeo md0 raid5 2 $[256*1024] 3
diff --git a/tests/07changelevels b/tests/07changelevels
new file mode 100644
index 00000000..d33e007c
--- /dev/null
+++ b/tests/07changelevels
@@ -0,0 +1,113 @@
+
+# Test changing of level, chunksize etc.
+# Create a RAID1, convert to RAID5, add a disk, add another disk
+# convert to RAID6, back to RAID5 and ultimately to RAID1
+
+testK=$[64*3*6]
+dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$testK
+export MDADM_GROW_VERIFY=1
+
+dotest() {
+ sleep 2
+ check wait
+ testdev $md0 $1 19968 64 nd
+ blockdev --flushbufs $md0
+ cmp -s -n $[textK*1024] $md0 /tmp/RandFile || { echo cmp failed; exit 2; }
+ # write something new - shift chars 4 space
+ tr ' -~' '$-~ -#' < /tmp/RandFile > /tmp/RandFile2
+ mv /tmp/RandFile2 /tmp/RandFile
+ dd if=/tmp/RandFile of=$md0
+}
+
+checkgeo() {
+ # check the geometry of an array
+ # level raid_disks chunk_size layout
+ dev=$1
+ shift
+ sleep 0.5
+ check wait
+ for attr in level raid_disks chunk_size layout
+ do
+ if [ $# -gt 0 ] ; then
+ val=$1
+ shift
+ if [ " `cat /sys/block/$dev/md/$attr`" != " $val" ]
+ then echo "$attr doesn't match for $dev"
+ exit 1
+ fi
+ fi
+ done
+}
+
+
+bu=/tmp/md-test-backup
+rm -f $bu
+mdadm -CR $md0 -l1 -n2 -x1 $dev0 $dev1 $dev2 -z 19968
+testdev $md0 1 $mdsize1a 64
+dd if=/tmp/RandFile of=$md0
+dotest 1
+
+mdadm --grow $md0 -l5 -n3 --chunk 64
+dotest 2
+
+mdadm $md0 --add $dev3 $dev4
+mdadm --grow $md0 -n4 --chunk 32
+dotest 3
+
+mdadm -G $md0 -l6 --backup-file $bu
+dotest 3
+
+mdadm -G /dev/md0 --array-size 39936
+mdadm -G $md0 -n4 --backup-file $bu
+checkgeo md0 raid6 4 $[32*1024]
+dotest 2
+
+mdadm -G $md0 -l5 --backup-file $bu
+checkgeo md0 raid5 3 $[32*1024]
+dotest 2
+
+mdadm -G /dev/md0 --array-size 19968
+mdadm -G $md0 -n2 --backup-file $bu
+checkgeo md0 raid5 2 $[32*1024]
+dotest 1
+
+mdadm -G --level=1 $md0
+dotest 1
+
+# now repeat that last few steps only with a degraded array.
+mdadm -S $md0
+mdadm -CR $md0 -l6 -n5 $dev0 $dev1 $dev2 $dev3 $dev4
+dd if=/tmp/RandFile of=$md0
+dotest 3
+
+mdadm $md0 --fail $dev0
+
+mdadm -G /dev/md0 --array-size 37888
+mdadm -G $md0 -n4 --backup-file $bu
+dotest 2
+checkgeo md0 raid6 4 $[512*1024]
+mdadm $md0 --fail $dev4
+
+mdadm $md0 --fail $dev3
+# now double-degraded.
+# switch layout to a DDF layout and back to make sure that works.
+
+mdadm -G /dev/md0 --layout=ddf-N-continue --backup-file $bu
+checkgeo md0 raid6 4 $[512*1024] 10
+dotest 2
+mdadm -G /dev/md0 --layout=ra --backup-file $bu
+checkgeo md0 raid6 4 $[512*1024] 1
+dotest 2
+
+mdadm -G $md0 -l5 --backup-file $bu
+dotest 2
+
+mdadm -G /dev/md0 --array-size 18944
+mdadm -G $md0 -n2 --backup-file $bu
+dotest 1
+checkgeo md0 raid5 2 $[512*1024]
+mdadm $md0 --fail $dev2
+
+mdadm -G --level=1 $md0
+dotest 1
+checkgeo md0 raid1 2
diff --git a/tests/07layouts b/tests/07layouts
new file mode 100644
index 00000000..4c20eb8b
--- /dev/null
+++ b/tests/07layouts
@@ -0,0 +1,91 @@
+
+# check that kernel an restripe interpret all the different layouts
+# the same
+# This involves changing the layout to each different possibility
+# while MDADM_GROW_VERIFY is set.
+
+testK=$[64*3*6]
+dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$testK
+export MDADM_GROW_VERITY=1
+
+
+dotest() {
+ sleep 0.5
+ check wait
+ testdev $md0 $1 $mdsize1 512 nd
+ blockdev --flushbufs $md0
+ cmp -s -n $[textK*1024] $md0 /tmp/RandFile || { echo cmp failed; exit 2; }
+ # write something new - shift chars 4 space
+ tr ' -~' '$-~ -#' < /tmp/RandFile > /tmp/RandFile2
+ mv /tmp/RandFile2 /tmp/RandFile
+ dd if=/tmp/RandFile of=$md0
+}
+
+checkgeo() {
+ # check the geometry of an array
+ # level raid_disks chunk_size layout
+ dev=$1
+ shift
+ sleep 0.5
+ check wait
+ for attr in level raid_disks chunk_size layout
+ do
+ if [ $# -gt 0 ] ; then
+ val=$1
+ shift
+ if [ " `sed 's/ .*//' /sys/block/$dev/md/$attr`" != " $val" ]
+ then echo "$attr doesn't match for $dev"
+ exit 1
+ fi
+ fi
+ done
+}
+
+
+bu=/tmp/md-test-backup
+rm -f $bu
+
+# first a degraded 5 device raid5
+mdadm -CR $md0 -l5 -n5 $dev0 $dev1 missing $dev2 $dev3
+dd if=/tmp/RandFile of=$md0
+dotest 4
+
+l5[0]=la
+l5[1]=ra
+l5[2]=ls
+l5[3]=rs
+l5[4]=parity-first
+l5[5]=parity-last
+for layout in 0 1 2 3 4 5 0
+do
+ mdadm -G $md0 --layout=${l5[$layout]} --backup-file $bu
+ checkgeo md0 raid5 5 $[512*1024] $layout
+ dotest 4
+done
+
+mdadm -S $md0
+# now a doubly degraded raid6
+mdadm -CR $md0 -l6 -n5 $dev0 missing $dev2 missing $dev4
+dd if=/tmp/RandFile of=$md0
+dotest 3
+
+l6[0]=la
+l6[1]=ra
+l6[2]=ls
+l6[3]=rs
+l6[4]=parity-first
+l6[5]=parity-last
+l6[8]=ddf-zero-restart
+l6[9]=ddf-N-restart
+l6[10]=ddf-N-continue
+l6[16]=left-asymmetric-6
+l6[17]=right-asymmetric-6
+l6[18]=left-symmetric-6
+l6[19]=right-symmetric-6
+l6[20]=parity-first-6
+for layout in 0 1 2 3 4 5 8 9 10 16 17 18 19 20 0
+do
+ mdadm -G $md0 --layout=${l6[$layout]} --backup-file $bu
+ checkgeo md0 raid6 5 $[512*1024] $layout
+ dotest 3
+done
diff --git a/tests/07reshape5intr b/tests/07reshape5intr
new file mode 100644
index 00000000..71ef2c78
--- /dev/null
+++ b/tests/07reshape5intr
@@ -0,0 +1,41 @@
+
+#
+# test interrupting and restarting raid5 reshape.
+set -x
+devs="$dev1"
+st=UU
+for disks in 2 3 4 5
+do
+ eval devs=\"$devs \$dev$disks\"
+ st=U$st
+ for d in $devs
+ do dd if=/dev/urandom of=$d bs=1024 || true
+ done
+
+ case $disks in
+ 2 | 3) chunk=1024;;
+ 4 ) chunk=512;;
+ 5 ) chunk=256;;
+ esac
+
+ mdadm -CR $md0 -amd -l5 -c $chunk -n$disks --assume-clean $devs
+ mdadm $md0 --add $dev6
+ echo 20 > /proc/sys/dev/raid/speed_limit_min
+ echo 20 > /proc/sys/dev/raid/speed_limit_max
+ mdadm --grow $md0 -n $[disks+1]
+ check reshape
+ check state $st
+ mdadm --stop $md0
+ mdadm --assemble $md0 $devs $dev6
+ check reshape
+ echo 1000 > /proc/sys/dev/raid/speed_limit_min
+ echo 2000 > /proc/sys/dev/raid/speed_limit_max
+ check wait
+ echo check > /sys/block/md0/md/sync_action
+ check wait
+ mm=`cat /sys/block/md0/md/mismatch_cnt`
+ if [ $mm -gt 0 ]
+ then echo >&2 "ERROR mismatch_cnt non-zero : $mm" ; exit 1
+ fi
+ mdadm -S $md0
+done
diff --git a/tests/07revert-grow b/tests/07revert-grow
new file mode 100644
index 00000000..c8c4e855
--- /dev/null
+++ b/tests/07revert-grow
@@ -0,0 +1,52 @@
+set -e -x
+
+# revert a reshape that is increasing the number of devices,
+# raid5, raid6, and raid10
+
+# metadate 0.90 cannot handle RAID10 growth
+# metadata 1.0 doesn't get a default headspace, is don't try it either.
+
+for metadata in 0.90 1.1 1.2
+do
+# RAID5
+mdadm -CR --assume-clean $md0 -l5 -n4 -x1 $devlist4 --metadata=$metadata
+check raid5
+testdev $md0 3 $mdsize1 512
+mdadm -G $md0 -n 5
+sleep 3
+mdadm -S $md0
+mdadm -A $md0 --update=revert-reshape $devlist4 --backup-file=/tmp/md-backup
+check wait
+check raid5
+testdev $md0 3 $mdsize1 512
+mdadm -S $md0
+
+# RAID6
+mdadm -CR --assume-clean $md0 -l6 -n4 -x1 $devlist4 --metadata=$metadata
+check raid6
+testdev $md0 2 $mdsize1 512
+mdadm -G $md0 -n 5
+sleep 3
+mdadm -S $md0
+mdadm -A $md0 --update=revert-reshape $devlist4 --backup-file=/tmp/md-backup
+check wait
+check raid6
+testdev $md0 2 $mdsize1 512
+mdadm -S $md0
+
+if [ $metadata = 0.90 ]; then continue; fi
+
+# RAID10
+mdadm -CR --assume-clean $md0 -l10 -n4 -x1 $devlist4 --metadata=$metadata
+check raid10
+testdev $md0 2 $mdsize1 512
+mdadm -G $md0 -n 5
+sleep 3
+mdadm -S $md0
+strace -o /tmp/str ./mdadm -A $md0 --update=revert-reshape $devlist4
+check wait
+check raid10
+testdev $md0 2 $mdsize1 512
+mdadm -S $md0
+
+done
diff --git a/tests/07revert-inplace b/tests/07revert-inplace
new file mode 100644
index 00000000..48416c8f
--- /dev/null
+++ b/tests/07revert-inplace
@@ -0,0 +1,44 @@
+set -e -x
+
+# revert a reshape that is not changing the number of data devices,
+# raid5, raid6, and raid10
+
+# RAID5 -> RAID6
+mdadm -CR --assume-clean $md0 -l5 -n4 -x1 $devlist4
+check raid5
+testdev $md0 3 $mdsize1 512
+mdadm -G $md0 -l 6
+sleep 3
+mdadm -S $md0
+mdadm -A $md0 --update=revert-reshape $devlist4 --backup-file=/tmp/md-backup
+check wait
+check raid6
+check algorithm 18
+testdev $md0 3 $mdsize1 512
+mdadm -S $md0
+
+# RAID6 -> RAID5
+mdadm -CR --assume-clean $md0 -l6 -n5 $devlist4
+check raid6
+testdev $md0 3 $mdsize1 512
+mdadm -G $md0 -l 5
+sleep 3
+mdadm -S $md0
+mdadm -A $md0 --update=revert-reshape $devlist4 --backup-file=/tmp/md-backup
+check wait
+check raid6
+testdev $md0 3 $mdsize1 512
+mdadm -S $md0
+
+# RAID10 - decrease chunk size
+mdadm -CR --assume-clean $md0 -l10 -n6 -c 64 $devlist5
+check raid10
+testdev $md0 3 $mdsize1 64
+mdadm -G $md0 -c 32
+sleep 3
+mdadm -S $md0
+strace -o /tmp/str ./mdadm -A $md0 --update=revert-reshape $devlist5
+check wait
+check raid10
+testdev $md0 3 $mdsize1 64
+mdadm -S $md0
diff --git a/tests/07revert-shrink b/tests/07revert-shrink
new file mode 100644
index 00000000..62b5ae02
--- /dev/null
+++ b/tests/07revert-shrink
@@ -0,0 +1,56 @@
+set -e -x
+
+# revert a reshape that is decreasing the number of devices,
+# raid5, raid6, and raid10
+
+bu=$targetdir/md-backup
+rm -f $bu
+# RAID5
+mdadm -CR --assume-clean $md0 -l5 -n5 $devlist4
+check raid5
+testdev $md0 4 $mdsize1 512
+mdadm --grow $md0 --array-size 56832
+testdev $md0 3 $mdsize1 512
+mdadm -G $md0 -n 4 --backup=$bu
+sleep 3
+mdadm -S $md0
+mdadm -A $md0 --update=revert-reshape $devlist4 --backup-file=$bu
+check wait
+check raid5
+fsck -f -n $md0
+testdev $md0 4 $mdsize1 512
+mdadm -S $md0
+
+#FIXME
+rm -f $bu
+# RAID6
+mdadm -CR --assume-clean $md0 -l6 -n5 $devlist4
+check raid6
+testdev $md0 3 $mdsize1 512
+mdadm --grow $md0 --array-size 37888
+testdev $md0 2 $mdsize1 512
+mdadm -G $md0 -n 4 --backup=$bu
+sleep 2
+mdadm -S $md0
+mdadm -A $md0 --update=revert-reshape $devlist4 --backup-file=$bu
+check wait
+check raid6
+fsck -f -n $md0
+testdev $md0 3 $mdsize1 512
+mdadm -S $md0
+
+# RAID10
+mdadm -CR --assume-clean $md0 -l10 -n6 $devlist5
+check raid10
+testdev $md0 3 $mdsize1 512
+mdadm --grow $md0 --array-size 36864
+testdev $md0 2 $mdsize1 512
+mdadm -G $md0 -n 4
+sleep 3
+mdadm -S $md0
+mdadm -A $md0 --update=revert-reshape $devlist5
+check wait
+check raid10
+fsck -f -n $md0
+testdev $md0 3 $mdsize1 512
+mdadm -S $md0
diff --git a/tests/07testreshape5 b/tests/07testreshape5
new file mode 100644
index 00000000..0e1f25f9
--- /dev/null
+++ b/tests/07testreshape5
@@ -0,0 +1,45 @@
+
+#
+# test the reshape code by using test_reshape and the
+# kernel md code to move data into and out of variously
+# shaped md arrays.
+set -x
+layouts=(la ra ls rs)
+for level in 5 6
+do
+for chunk in 4 8 16 32 64 128
+do
+ devs="$dev1"
+ for disks in 2 3 4 5 6
+ do
+ eval devs=\"$devs \$dev$disks\"
+ if [ " $level $disks" = " 6 3" -o " $level $disks" = " 6 2" ]
+ then continue
+ fi
+ for nlayout in 0 1 2 3
+ do
+ layout=${layouts[$nlayout]}
+
+ size=$[chunk*(disks-(level-4))*disks]
+
+ # test restore: make a raid5 from a file, then do a compare
+ dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$size
+ $dir/test_stripe restore /tmp/RandFile $disks $[chunk*1024] $level $nlayout 0 $[size*1024] $devs
+ mdadm -CR -e 1.0 $md0 -amd -l$level -n$disks --assume-clean -c $chunk -p $layout $devs
+ cmp -s -n $[size*1024] $md0 /tmp/RandFile || { echo cmp failed ; exit 2; }
+
+ # FIXME check parity
+
+ # test save
+ dd if=/dev/urandom of=$md0 bs=1024 count=$size
+ blockdev --flushbufs $md0 $devs; sync
+ > /tmp/NewRand
+ $dir/test_stripe save /tmp/NewRand $disks $[chunk*1024] $level $nlayout 0 $[size*1024] $devs
+ cmp -s -n $[size*1024] $md0 /tmp/NewRand || { echo cmp failed ; exit 2; }
+ mdadm -S $md0
+ udevadm settle
+ done
+ done
+done
+done
+exit 0
diff --git a/tests/09imsm-assemble b/tests/09imsm-assemble
new file mode 100644
index 00000000..2a42369e
--- /dev/null
+++ b/tests/09imsm-assemble
@@ -0,0 +1,73 @@
+# validate the prodigal member disk scenario i.e. a former container
+# member is returned after having been rebuilt on another system
+
+
+imsm_check_hold() {
+ if mdadm --remove $1 $2; then
+ echo "$2 removal from $1 should have been blocked" >&2
+ cat /proc/mdstat >&2
+ mdadm -E $2
+ exit 1
+ fi
+}
+
+imsm_check_removal() {
+ if ! mdadm --remove $1 $2 ; then
+ echo "$2 removal from $1 should have succeeded" >&2
+ cat /proc/mdstat >&2
+ mdadm -E $2
+ exit 1
+ fi
+}
+
+export IMSM_DEVNAME_AS_SERIAL=1
+export IMSM_TEST_OROM=1
+export IMSM_NO_PLATFORM=1
+container=/dev/md/container
+member=/dev/md/vol0
+
+
+num_disks=4
+size=$((10*1024))
+mdadm -CR $container -e imsm -n $num_disks $dev0 $dev1 $dev2 $dev3
+mdadm -CR $member $dev0 $dev2 -n 2 -l 1 -z $size
+mdadm --wait $member
+mdadm -Ss
+
+# make dev0 and dev1 a new rebuild family
+mdadm -A $container $dev0 $dev1
+mdadm -IR $container
+mdadm --wait ${member}_0
+mdadm -Ss
+
+# make dev2 and dev3 a new rebuild family
+mdadm -A $container $dev2 $dev3
+mdadm -IR $container
+mdadm --wait ${member}_0
+mdadm -Ss
+
+# reassemble and make sure one of the families falls out
+mdadm -A $container $dev0 $dev1 $dev2 $dev3
+mdadm -IR $container
+testdev ${member}_0 1 $size 64
+if mdadm --remove $container $dev0 ; then
+ # the dev[23] family won
+ imsm_check_removal $container $dev1
+ imsm_check_hold $container $dev2
+ imsm_check_hold $container $dev3
+else
+ # the dev[01] family won
+ imsm_check_hold $container $dev1
+ imsm_check_removal $container $dev2
+ imsm_check_removal $container $dev3
+fi
+mdadm -Ss
+
+# reassemble with a new id for the dev[23] family
+mdadm -A $container $dev0 $dev1
+mdadm -IR $container
+mdadm -A ${container}2 $dev2 $dev3 --update=uuid
+mdadm -IR ${container}2
+
+testdev ${member}_0 1 $size 64
+testdev ${member}_1 1 $size 64
diff --git a/tests/09imsm-create-fail-rebuild b/tests/09imsm-create-fail-rebuild
new file mode 100644
index 00000000..4b656b53
--- /dev/null
+++ b/tests/09imsm-create-fail-rebuild
@@ -0,0 +1,78 @@
+# sanity check array creation
+
+imsm_check_hold() {
+ if mdadm --remove $1 $2; then
+ echo "$2 removal from $1 should have been blocked" >&2
+ cat /proc/mdstat >&2
+ mdadm -E $2
+ exit 1
+ fi
+}
+
+imsm_check_removal() {
+ if ! mdadm --remove $1 $2 ; then
+ echo "$2 removal from $1 should have succeeded" >&2
+ cat /proc/mdstat >&2
+ mdadm -E $2
+ exit 1
+ fi
+}
+
+. tests/env-imsm-template
+
+# IMSM rounds to multiples of one mebibyte - 1024K
+DEV_ROUND_K=1024
+
+num_disks=2
+mdadm -CR $container -e imsm -n $num_disks $dev0 $dev1
+imsm_check container $num_disks
+
+# RAID0 + RAID1
+size=9000
+level=0
+chunk=64
+offset=0
+mdadm -CR $member0 $dev0 $dev1 -n $num_disks -l $level -z $size -c $chunk
+imsm_check member $member0 $num_disks $level $size $((size*2)) $offset $chunk
+testdev $member0 $num_disks $size $chunk
+
+offset=$(((size & ~(chunk - 1)) + 2048))
+size=4000
+level=1
+chunk=0
+mdadm -CR $member1 $dev0 $dev1 -n $num_disks -l $level -z $size
+imsm_check member $member1 $num_disks $level $size $size $offset $chunk
+testdev $member1 1 $size 64
+check wait
+
+mdadm -Ss
+
+# RAID10 + RAID5
+num_disks=4
+mdadm -CR $container -e imsm -n $num_disks $dev0 $dev1 $dev2 $dev3
+imsm_check container $num_disks
+
+size=9000
+level=10
+chunk=64
+offset=0
+mdadm -CR $member0 $dev0 $dev1 $dev2 $dev3 -n $num_disks -l $level -z $size -c $chunk
+imsm_check member $member0 $num_disks $level $size $((size*2)) $offset $chunk
+testdev $member0 $((num_disks-2)) $size $chunk
+
+offset=$(((size & ~(chunk - 1)) + 2048))
+size=4000
+level=5
+mdadm -CR $member1 $dev0 $dev1 $dev2 $dev3 -n $num_disks -l $level -z $size -c $chunk
+imsm_check member $member1 $num_disks $level $size $((size*3)) $offset $chunk
+testdev $member1 $((num_disks-1)) $size $chunk
+check wait
+
+# FAIL / REBUILD
+imsm_check_hold $container $dev0
+mdadm --fail $member0 $dev0
+mdadm --wait-clean --scan
+imsm_check_removal $container $dev0
+mdadm --add $container $dev4
+check wait
+imsm_check_hold $container $dev4
diff --git a/tests/09imsm-overlap b/tests/09imsm-overlap
new file mode 100644
index 00000000..e832257c
--- /dev/null
+++ b/tests/09imsm-overlap
@@ -0,0 +1,30 @@
+
+. tests/env-imsm-template
+
+# create raid arrays with varying degress of overlap
+mdadm -CR $container -e imsm -n 6 $dev0 $dev1 $dev2 $dev3 $dev4 $dev5
+imsm_check container 6
+
+size=1910
+level=1
+num_disks=2
+mdadm -CR $member0 $dev0 $dev1 -n $num_disks -l $level -z $size
+mdadm -CR $member1 $dev1 $dev2 -n $num_disks -l $level -z $size
+mdadm -CR $member2 $dev2 $dev3 -n $num_disks -l $level -z $size
+mdadm -CR $member3 $dev3 $dev4 -n $num_disks -l $level -z $size
+mdadm -CR $member4 $dev4 $dev5 -n $num_disks -l $level -z $size
+
+udevadm settle
+
+offset=0
+imsm_check member $member0 $num_disks $level $size 1024 $offset
+offset=$((offset+size+2048))
+imsm_check member $member1 $num_disks $level $size 1024 $offset
+offset=$((offset+size+2048))
+imsm_check member $member2 $num_disks $level $size 1024 $offset
+# at this point there should be more freespace at the start of the disk
+# than the end
+offset=0
+imsm_check member $member3 $num_disks $level $size 1024 $offset
+offset=$((offset+size+2048))
+imsm_check member $member4 $num_disks $level $size 1024 $offset
diff --git a/tests/10ddf-assemble-missing b/tests/10ddf-assemble-missing
new file mode 100644
index 00000000..17f17244
--- /dev/null
+++ b/tests/10ddf-assemble-missing
@@ -0,0 +1,61 @@
+# An array is assembled incompletely.
+# Re missing disks get marked as missing and are not allowed back in
+
+. tests/env-ddf-template
+tmp=$(mktemp /tmp/mdtest-XXXXXX)
+rm -f $tmp /var/tmp/mdmon.log
+ret=0
+
+mdadm -CR $container -e ddf -n 4 $dev8 $dev9 $dev10 $dev11
+ddf_check container 4
+
+mdadm -CR $member1 -n 4 -l 10 $dev8 $dev10 $dev9 $dev11 -z 10000
+mdadm -CR $member0 -n 2 -l 1 $dev8 $dev9 -z 10000
+
+mdadm --wait $member0
+mdadm --wait $member1
+
+mdadm -Ss
+sleep 1
+
+# Add all devices except those for $member0
+mdadm -I $dev10
+mdadm -I $dev11
+
+# Start runnable members
+mdadm -IRs
+mdadm -Ss
+
+#[ -f /var/tmp/mdmon.log ] && cat /var/tmp/mdmon.log
+
+# Now reassemble
+# This should work because BVDs weren't written to
+for d in $dev8 $dev9 $dev10 $dev11; do
+ mdadm -I $d
+done
+mdadm -Ss
+
+# Expect consistent state
+for d in $dev10 $dev11; do
+ mdadm -E $d>$tmp
+ egrep 'state\[0\] : Degraded, Consistent' $tmp || {
+ ret=1
+ echo ERROR: $member0 has unexpected state on $d
+ }
+ egrep 'state\[1\] : Optimal, Consistent' $tmp || {
+ ret=1
+ echo ERROR: $member1 has unexpected state on $d
+ }
+
+ if [ x$(egrep -c 'active/Online$' $tmp) != x2 ]; then
+ ret=1
+ echo ERROR: unexpected number of online disks on $d
+ fi
+done
+
+if [ $ret -ne 0 ]; then
+ mdadm -E $dev10
+ mdadm -E $dev8
+fi
+rm -f $tmp /var/tmp/mdmon.log
+[ $ret -eq 0 ]
diff --git a/tests/10ddf-create b/tests/10ddf-create
new file mode 100644
index 00000000..2f7747cd
--- /dev/null
+++ b/tests/10ddf-create
@@ -0,0 +1,89 @@
+#
+# Test basic DDF functionality.
+#
+# Create a container with 5 drives
+# create a small raid0 across them all,
+# then a small raid10 using 4 drives, then a 2disk raid1
+# and a 3disk raid5 using the remaining space
+#
+# add some data, tear down the array, reassemble
+# and make sure it is still there.
+set -e
+. tests/env-ddf-template
+sda=$(get_rootdev) || exit 1
+
+mdadm -CR /dev/md/ddf0 -e ddf -n 5 $dev8 $dev9 $dev10 $dev11 $dev12
+mdadm -CR r5 -l5 -n5 /dev/md/ddf0 -z 5000
+if mdadm -CR r5 -l1 -n2 /dev/md/ddf0 -z 5000
+then echo >&2 create with same name should fail ; exit 1
+fi
+mdadm -CR r10 -l10 -n4 -pn2 /dev/md/ddf0 -z 5000
+mdadm -CR r1 -l1 -n2 /dev/md/ddf0
+mdadm -CR r0 -l0 -n3 /dev/md/ddf0
+testdev /dev/md/r5 4 5000 512
+testdev /dev/md/r10 2 5000 512
+# r0/r10 will use 4608 due to chunk size, so that leaves 23552 for the rest
+testdev /dev/md/r1 1 23552 64
+testdev /dev/md/r0 3 23552 512
+dd if=$sda of=/dev/md/r0 || true
+dd if=$sda of=/dev/md/r10 || true
+dd if=$sda of=/dev/md/r1 || true
+dd if=$sda of=/dev/md/r5 || true
+
+s0=`sha1sum /dev/md/r0`
+s10=`sha1sum /dev/md/r10`
+s1=`sha1sum /dev/md/r1`
+s5=`sha1sum /dev/md/r5`
+
+
+mdadm -Ss
+mdadm -A /dev/md/ddf0 $dev8 $dev9 $dev10 $dev11 $dev12
+mdadm -I /dev/md/ddf0
+
+udevadm settle
+s0a=`sha1sum /dev/md/r0`
+s10a=`sha1sum /dev/md/r10`
+s1a=`sha1sum /dev/md/r1`
+s5a=`sha1sum /dev/md/r5`
+
+if [ "$s0" != "$s0a" ]; then
+ echo r0 did not match ; exit 1;
+fi
+if [ "$s10" != "$s10a" ]; then
+ echo r10 did not match ; exit 1;
+fi
+if [ "$s1" != "$s1a" ]; then
+ echo r1 did not match ; exit 1;
+fi
+if [ "$s5" != "$s5a" ]; then
+ echo r5 did not match ; exit 1;
+fi
+
+# failure status just means it has completed already, so ignore it.
+mdadm --wait /dev/md/r1 || true
+mdadm --wait /dev/md/r10 || true
+mdadm --wait /dev/md/r5 || true
+
+mdadm -Dbs > /var/tmp/mdadm.conf
+
+mdadm -Ss
+
+# Now try to assemble using mdadm.conf
+mdadm -Asc /var/tmp/mdadm.conf
+check nosync # This failed once. The raid5 was resyncing.
+udevadm settle
+mdadm -Dbs | sort > /tmp/mdadm.conf
+sort /var/tmp/mdadm.conf | diff /tmp/mdadm.conf -
+mdadm -Ss
+
+# and now assemble fully incrementally.
+for i in $dev8 $dev9 $dev10 $dev11 $dev12
+do
+ mdadm -I $i -c /var/tmp/mdadm.conf
+done
+check nosync
+udevadm settle
+mdadm -Dbs | sort > /tmp/mdadm.conf
+sort /var/tmp/mdadm.conf | diff /tmp/mdadm.conf -
+mdadm -Ss
+rm /tmp/mdadm.conf /var/tmp/mdadm.conf
diff --git a/tests/10ddf-create-fail-rebuild b/tests/10ddf-create-fail-rebuild
new file mode 100644
index 00000000..525c24d3
--- /dev/null
+++ b/tests/10ddf-create-fail-rebuild
@@ -0,0 +1,77 @@
+# sanity check array creation
+
+ddf_check_hold() {
+ if mdadm --remove $1 $2; then
+ echo "$2 removal from $1 should have been blocked" >&2
+ cat /proc/mdstat >&2
+ mdadm -E $2
+ exit 1
+ fi
+}
+
+ddf_check_removal() {
+ if ! mdadm --remove $1 $2 ; then
+ echo "$2 removal from $1 should have succeeded" >&2
+ cat /proc/mdstat >&2
+ mdadm -E $2
+ exit 1
+ fi
+}
+
+. tests/env-ddf-template
+
+num_disks=2
+mdadm -CR $container -e ddf -n $num_disks $dev8 $dev9
+ddf_check container $num_disks
+
+# RAID0 + RAID1
+size=9000
+level=0
+chunk=64
+offset=0
+layout=0
+mdadm -CR $member0 $dev8 $dev9 -n $num_disks -l $level -z $size -c $chunk
+ddf_check member $member0 $num_disks $level $size $((size*2)) $offset $chunk $layout
+testdev $member0 $num_disks $size $chunk
+
+offset=$(((size & ~(chunk - 1))))
+size=4000
+level=1
+chunk=0
+mdadm -CR $member1 $dev8 $dev9 -n $num_disks -l $level -z $size
+ddf_check member $member1 $num_disks $level $size $size $offset $chunk $layout
+testdev $member1 1 $size 1
+check wait
+
+mdadm -Ss
+
+# RAID10 + RAID5
+num_disks=4
+mdadm -CR $container -e ddf -n $num_disks $dev8 $dev9 $dev10 $dev11
+ddf_check container $num_disks
+
+size=9000
+level=10
+chunk=64
+offset=0
+layout=2
+mdadm -CR $member0 $dev8 $dev9 $dev10 $dev11 -n $num_disks -l $level -z $size -c $chunk
+ddf_check member $member0 $num_disks $level $size $((size*2)) $offset $chunk $layout
+testdev $member0 $((num_disks-2)) $size $chunk
+
+offset=$(((size & ~(chunk - 1))))
+size=4000
+level=5
+mdadm -CR $member1 $dev8 $dev9 $dev10 $dev11 -n $num_disks -l $level -z $size -c $chunk
+ddf_check member $member1 $num_disks $level $size $((size*3)) $offset $chunk $layout
+testdev $member1 $((num_disks-1)) $size $chunk
+check wait
+
+# FAIL / REBUILD
+ddf_check_hold $container $dev8
+mdadm --fail $member0 $dev8
+mdadm --wait-clean --scan
+ddf_check_removal $container $dev8
+mdadm --add $container $dev12
+check wait
+ddf_check_hold $container $dev12
diff --git a/tests/10ddf-fail-create-race b/tests/10ddf-fail-create-race
new file mode 100644
index 00000000..bd5dfb51
--- /dev/null
+++ b/tests/10ddf-fail-create-race
@@ -0,0 +1,66 @@
+# This test creates a RAID1, fails a disk, and immediately
+# (simultaneously) creates a new array. This tests for a possible
+# race where the meta data reflecting the disk failure may not
+# be written when the 2nd array is created.
+. tests/env-ddf-template
+
+mdadm --zero-superblock $dev8 $dev9 $dev10 $dev11 $dev12 $dev13
+
+mdadm -CR $container -e ddf -l container -n 2 $dev11 $dev12
+#$dir/mdadm -CR $member0 -l raid1 -n 2 $container -z 10000 >/tmp/mdmon.txt 2>&1
+mdadm -CR $member0 -l raid1 -n 2 $container -z 10000
+check wait
+fail0=$dev11
+mdadm --fail $member0 $fail0 &
+
+# The test can succeed two ways:
+# 1) mdadm -C member1 fails - in this case the meta data
+# was already on disk when the create attempt was made
+# 2) mdadm -C succeeds in the first place (meta data not on disk yet),
+# but mdmon detects the problem and sets the disk faulty.
+
+if mdadm -CR $member1 -l raid1 -n 2 $container; then
+
+ echo create should have failed / race condition?
+
+ check wait
+ set -- $(get_raiddisks $member0)
+ d0=$1
+ ret=0
+ if [ $1 = $fail0 -o $2 = $fail0 ]; then
+ ret=1
+ else
+ set -- $(get_raiddisks $member1)
+ if [ $1 = $fail0 -o $2 = $fail0 ]; then
+ ret=1
+ fi
+ fi
+ if [ $ret -eq 1 ]; then
+ echo ERROR: failed disk $fail0 is still a RAID member
+ echo $member0: $(get_raiddisks $member0)
+ echo $member1: $(get_raiddisks $member1)
+ fi
+ tmp=$(mktemp /tmp/mdest-XXXXXX)
+ mdadm -E $d0 >$tmp
+ if [ x$(grep -c 'state\[[01]\] : Degraded' $tmp) != x2 ]; then
+ echo ERROR: non-degraded array found
+ mdadm -E $d0
+ ret=1
+ fi
+ if ! grep -q '^ *0 *[0-9a-f]\{8\} .*Offline, Failed' $tmp; then
+ echo ERROR: disk 0 not marked as failed in meta data
+ mdadm -E $d0
+ ret=1
+ fi
+ rm -f $tmp
+else
+ ret=0
+fi
+
+[ -f /tmp/mdmon.txt ] && {
+ cat /tmp/mdmon.txt
+ rm -f /tmp/mdmon.txt
+}
+
+[ $ret -eq 0 ]
+
diff --git a/tests/10ddf-fail-readd b/tests/10ddf-fail-readd
new file mode 100644
index 00000000..746f049e
--- /dev/null
+++ b/tests/10ddf-fail-readd
@@ -0,0 +1,55 @@
+# Simple fail / re-add test
+. tests/env-ddf-template
+
+tmp=$(mktemp /tmp/mdtest-XXXXXX)
+rm -f $tmp
+
+mdadm --zero-superblock $dev8 $dev9
+mdadm -CR $container -e ddf -l container -n 2 $dev8 $dev9
+
+mdadm -CR $member0 -l raid1 -n 2 $container
+#$dir/mdadm -CR $member0 -l raid1 -n 2 $container >/tmp/mdmon.txt 2>&1
+
+mke2fs $member0
+check wait
+
+set -- $(get_raiddisks $member0)
+fail0=$1
+mdadm $member0 --fail $fail0
+
+sleep 1
+mdadm $container --remove $fail0
+
+set -- $(get_raiddisks $member0)
+case $1 in MISSING) shift;; esac
+good0=$1
+
+# We re-add the disk now
+mdadm $container --add $fail0
+
+sleep 1
+mdadm --wait $member0
+
+ret=0
+set -- $(get_raiddisks $member0)
+case $1:$2 in
+ $dev8:$dev9|$dev9:$dev8);;
+ *) echo ERROR: bad raid disks "$@"; ret=1;;
+esac
+
+mdadm -Ss
+for x in $@; do
+ mdadm -E $x >$tmp
+ if ! grep -q 'state\[0\] : Optimal, Consistent' $tmp; then
+ echo ERROR: member 0 should be optimal in meta data on $x
+ ret=1
+ fi
+done
+
+rm -f $tmp
+if [ $ret -ne 0 ]; then
+ mdadm -E $dev8
+ mdadm -E $dev9
+fi
+
+[ $ret -eq 0 ]
diff --git a/tests/10ddf-fail-readd-readonly b/tests/10ddf-fail-readd-readonly
new file mode 100644
index 00000000..ed24585d
--- /dev/null
+++ b/tests/10ddf-fail-readd-readonly
@@ -0,0 +1,71 @@
+# Simple fail / re-add test
+. tests/env-ddf-template
+
+tmp=$(mktemp /tmp/mdtest-XXXXXX)
+rm -f $tmp
+
+mdadm --zero-superblock $dev8 $dev9
+mdadm -CR $container -e ddf -l container -n 2 $dev8 $dev9
+
+mdadm -CR $member0 -l raid1 -n 2 $container
+#$dir/mdadm -CR $member0 -l raid1 -n 2 $container >/tmp/mdmon.txt 2>&1
+
+check wait
+
+set -- $(get_raiddisks $member0)
+fail0=$1
+mdadm $member0 --fail $fail0
+
+sleep 1
+set -- $(get_raiddisks $member0)
+case $1 in MISSING) shift;; esac
+good0=$1
+
+# Check that the meta data now show one disk as failed
+ret=0
+for x in $@; do
+ mdadm -E $x >$tmp
+ if ! grep -q 'state\[0\] : Degraded, Consistent' $tmp; then
+ echo ERROR: member 0 should be degraded in meta data on $x
+ ret=1
+ fi
+ phys=$(grep $x $tmp)
+ case $x:$phys in
+ $fail0:*active/Offline,\ Failed);;
+ $good0:*active/Online);;
+ *) echo ERROR: wrong phys disk state for $x
+ ret=1
+ ;;
+ esac
+done
+
+mdadm $container --remove $fail0
+
+# We re-add the disk now
+mdadm $container --add $fail0
+
+sleep 1
+mdadm --wait $member0
+
+set -- $(get_raiddisks $member0)
+case $1:$2 in
+ $dev8:$dev9|$dev9:$dev8);;
+ *) echo ERROR: bad raid disks "$@"; ret=1;;
+esac
+
+mdadm -Ss
+for x in $@; do
+ mdadm -E $x >$tmp
+ if ! grep -q 'state\[0\] : Optimal, Consistent' $tmp; then
+ echo ERROR: member 0 should be optimal in meta data on $x
+ ret=1
+ fi
+done
+
+rm -f $tmp
+if [ $ret -ne 0 ]; then
+ mdadm -E $dev8
+ mdadm -E $dev9
+fi
+
+[ $ret -eq 0 ]
diff --git a/tests/10ddf-fail-spare b/tests/10ddf-fail-spare
new file mode 100644
index 00000000..ab737ca4
--- /dev/null
+++ b/tests/10ddf-fail-spare
@@ -0,0 +1,86 @@
+# Test suggested by Albert Pauw: Create, fail one disk, have mdmon
+# activate the spare,
+# then run create again. Shouldn't use the failed disk for Create,
+. tests/env-ddf-template
+
+tmp=$(mktemp /tmp/mdtest-XXXXXX)
+rm -f $tmp
+
+mdadm --zero-superblock $dev8 $dev9 $dev10 $dev11 $dev12 $dev13
+mdadm -CR $container -e ddf -l container -n 5 $dev8 $dev9 $dev10 $dev11 $dev12
+
+mdadm -CR $member0 -l raid1 -n 2 $container
+#$dir/mdadm -CR $member0 -l raid1 -n 2 $container >/tmp/mdmon.txt 2>&1
+
+check wait
+
+set -- $(get_raiddisks $member0)
+fail0=$1
+mdadm --fail $member0 $fail0
+
+# To make sure the spare is activated, we may have to sleep
+# 2s has always been enough for me
+sleep 2
+check wait
+
+# This test can succeed both ways - if spare was activated
+# before new array was created, we see only member 0.
+# otherwise, we see both, adn member0 is degraded because the
+# new array grabbed the spare
+# which case occurs depends on the sleep time above.
+ret=0
+if mdadm -CR $member1 -l raid5 -n 3 $container; then
+ # Creation successful - must have been quicker than spare activation
+
+ check wait
+ set -- $(get_raiddisks $member1)
+ if [ $1 = $fail0 -o $2 = $fail0 -o $3 = $fail0 ]; then
+ echo ERROR: $member1 must not contain $fail0: $@
+ ret=1
+ fi
+ d1=$1
+ mdadm -E $d1 >$tmp
+ if ! grep -q 'state\[1\] : Optimal, Consistent' $tmp; then
+ echo ERROR: member 1 should be optimal in meta data
+ ret=1
+ fi
+ state0=Degraded
+else
+ # Creation unsuccessful - spare was used for member 0
+ state0=Optimal
+fi
+
+# need to delay a little bit, sometimes the meta data aren't
+# up-to-date yet
+sleep 0.5
+set -- $(get_raiddisks $member0)
+if [ $1 = $fail0 -o $2 = $fail0 ]; then
+ echo ERROR: $member0 must not contain $fail0: $@
+ ret=1
+fi
+d0=$1
+
+[ -f $tmp ] || mdadm -E $d0 >$tmp
+
+if ! grep -q 'state\[0\] : '$state0', Consistent' $tmp; then
+ echo ERROR: member 0 should be $state0 in meta data
+ ret=1
+fi
+if ! grep -q 'Offline, Failed' $tmp; then
+ echo ERROR: Failed disk expected in meta data
+ ret=1
+fi
+if [ $ret -eq 1 ]; then
+ cat /proc/mdstat
+ mdadm -E $d0
+ mdadm -E $d1
+ mdadm -E $fail0
+fi
+
+[ -f /tmp/mdmon.txt ] && {
+ cat /tmp/mdmon.txt
+ rm -f /tmp/mdmon.txt
+}
+
+rm -f $tmp
+[ $ret -eq 0 ]
diff --git a/tests/10ddf-fail-stop-readd b/tests/10ddf-fail-stop-readd
new file mode 100644
index 00000000..4a0511a1
--- /dev/null
+++ b/tests/10ddf-fail-stop-readd
@@ -0,0 +1,66 @@
+# Simple fail / re-add test
+. tests/env-ddf-template
+
+tmp=$(mktemp /tmp/mdtest-XXXXXX)
+rm -f $tmp
+
+mdadm --zero-superblock $dev8 $dev9
+mdadm -CR $container -e ddf -l container -n 2 $dev8 $dev9
+
+mdadm -CR $member0 -l raid1 -n 2 $container
+#$dir/mdadm -CR $member0 -l raid1 -n 2 $container >/tmp/mdmon.txt 2>&1
+
+# Write to the array
+mke2fs $member0
+check wait
+
+set -- $(get_raiddisks $member0)
+fail0=$1
+mdadm $member0 --fail $fail0
+
+sleep 1
+mdadm $container --remove $fail0
+
+set -- $(get_raiddisks $member0)
+case $1 in MISSING) shift;; esac
+good0=$1
+
+mdadm -Ss
+
+sleep 1
+# Now simulate incremental assembly
+mdadm -I $good0
+mdadm -IRs
+
+# Write to the array
+mke2fs $member0
+
+# We re-add the disk now
+mdadm $container --add $fail0
+
+sleep 1
+mdadm --wait $member0
+
+ret=0
+set -- $(get_raiddisks $member0)
+case $1:$2 in
+ $dev8:$dev9|$dev9:$dev8);;
+ *) echo ERROR: bad raid disks "$@"; ret=1;;
+esac
+
+mdadm -Ss
+for x in $@; do
+ mdadm -E $x >$tmp
+ if ! grep -q 'state\[0\] : Optimal, Consistent' $tmp; then
+ echo ERROR: member 0 should be optimal in meta data on $x
+ ret=1
+ fi
+done
+
+rm -f $tmp
+if [ $ret -ne 0 ]; then
+ mdadm -E $dev8
+ mdadm -E $dev9
+fi
+
+[ $ret -eq 0 ]
diff --git a/tests/10ddf-fail-twice b/tests/10ddf-fail-twice
new file mode 100644
index 00000000..d0b4ac61
--- /dev/null
+++ b/tests/10ddf-fail-twice
@@ -0,0 +1,59 @@
+. tests/env-ddf-template
+
+num_disks=5
+mdadm -CR $container -e ddf -n $num_disks $dev8 $dev9 $dev10 $dev11 $dev12
+ddf_check container $num_disks
+
+mdadm -CR $member0 -n 2 -l 1 $container
+mdadm -CR $member1 -n 3 -l 5 $container
+
+mdadm --wait $member1 $member0 || mdadm --wait $member1 $member0
+
+set -- $(get_raiddisks $member0)
+fail0=$1
+mdadm $member0 --fail $fail0
+set -- $(get_raiddisks $member1)
+fail1=$1
+mdadm $member1 --fail $fail1
+
+mdadm $container --add $dev13
+
+mdadm --wait $member1 $member0 || mdadm --wait $member1 $member0
+
+
+devs0="$(get_raiddisks $member0)"
+devs1="$(get_raiddisks $member1)"
+
+present=$(($(get_present $member0) + $(get_present $member1)))
+[ $present -eq 4 ] || {
+ echo expected 4 present disks, got $present
+ devices for $member0: $devs0
+ devices for $member1: $devs1
+ exit 1
+}
+
+if echo "$devs0" | grep -q MISSING; then
+ good=1
+ bad=0
+else
+ good=0
+ bad=1
+fi
+
+# find a good device
+eval "set -- \$devs$good"
+check=$1
+
+tmp=$(mktemp /tmp/mdtest-XXXXXX)
+mdadm -E $check >$tmp
+
+{ grep -q 'state\['$bad'\] : Degraded, Consistent' $tmp &&
+ grep -q 'state\['$good'\] : Optimal, Consistent' $tmp; } || {
+ echo unexpected meta data state on $check
+ mdadm -E $check
+ rm -f $tmp
+ exit 1
+}
+
+rm -f $tmp
+exit 0
diff --git a/tests/10ddf-fail-two-spares b/tests/10ddf-fail-two-spares
new file mode 100644
index 00000000..fa6e2e8c
--- /dev/null
+++ b/tests/10ddf-fail-two-spares
@@ -0,0 +1,86 @@
+# Simulate two disks failing shorty after each other
+. tests/env-ddf-template
+sda=$(get_rootdev) || exit 1
+tmp=$(mktemp /tmp/mdtest-XXXXXX)
+
+mdadm --zero-superblock $dev8 $dev9 $dev10 $dev11 $dev12 $dev13
+mdadm -CR $container -e ddf -l container -n 6 \
+ $dev8 $dev9 $dev10 $dev11 $dev12 $dev13
+
+#fast_sync
+
+mdadm -CR $member0 -l raid6 -n 4 $dev10 $dev11 $dev12 $dev13 -z 16384
+#$dir/mdadm -CR $member0 -l raid6 -n 4 $dev10 $dev11 $dev12 $dev13 -z 16384 \
+# >/tmp/mdmon.txt 2>&1
+mdadm -CR $member1 -l raid10 -n 4 $dev10 $dev11 $dev12 $dev13 -z 16384
+
+dd if=$sda of=$member0 bs=1M
+dd if=$sda of=$member1 bs=1M skip=16
+
+check wait
+
+sum0=$(sha1sum $member0)
+sum1=$(sha1sum $member1)
+
+mdadm --fail $member1 $dev11
+sleep 1
+mdadm --fail $member1 $dev12
+
+# We will have 4 resync procedures, 2 spares for 2 arrays.
+mdadm --wait $member1 $member0
+mdadm --wait $member1 $member0
+
+devs0="$(get_raiddisks $member0)"
+devs1="$(get_raiddisks $member1)"
+expected="$dev10
+$dev13
+$dev8
+$dev9"
+
+ret=0
+if [ "$(echo "$devs0" | sort)" != "$expected" \
+ -o "$(echo "$devs1" | sort)" != "$expected" ]; then
+ echo ERROR: unexpected members
+ echo $member0: $devs0
+ echo $member1: $devs1
+ ret=1
+fi
+
+mdadm -E $dev10 >$tmp
+if ! grep -q 'state\[0\] : Optimal, Consistent' $tmp; then
+ echo ERROR: $member0 should be optimal in meta data
+ ret=1
+fi
+if ! grep -q 'state\[1\] : Optimal, Consistent' $tmp; then
+ echo ERROR: $member1 should be optimal in meta data
+ ret=1
+fi
+if [ x"$(grep -c active/Online $tmp)" != x4 ]; then
+ echo ERROR: expected 4 online disks
+ ret=1
+fi
+if [ x"$(grep -c "Offline, Failed" $tmp)" != x2 ]; then
+ echo ERROR: expected 2 failed disks
+ ret=1
+fi
+
+sum0a=$(sha1sum $member0)
+sum1a=$(sha1sum $member1)
+
+if [ "$sum0" != "$sum0a" -o "$sum1" != "$sum1a" ]; then
+ echo ERROR: checksum mismatch
+ ret=1
+fi
+
+if [ $ret -eq 1 ]; then
+ cat /proc/mdstat
+ cat $tmp
+fi
+
+[ -f /tmp/mdmon.txt ] && {
+ cat /tmp/mdmon.txt
+ rm -f /tmp/mdmon.txt
+}
+rm -f $tmp
+
+[ $ret -eq 0 ]
diff --git a/tests/10ddf-geometry b/tests/10ddf-geometry
new file mode 100644
index 00000000..b0cce2f6
--- /dev/null
+++ b/tests/10ddf-geometry
@@ -0,0 +1,82 @@
+#
+# Test various RAID geometries, creation and deletion of subarrays
+#
+
+assert_fail() {
+ if mdadm "$@"; then
+ echo mdadm "$@" must fail
+ return 1
+ else
+ return 0
+ fi
+}
+
+assert_kill() {
+ local dev=$1 n=$2
+ mdadm -S $dev
+ mdadm --kill-subarray=$n /dev/md/ddf0
+ if mdadm -Dbs | grep -q $dev; then
+ echo >&2 $dev should be deleted
+ return 1
+ fi
+ return 0
+}
+
+set -e
+mdadm -CR /dev/md/ddf0 -e ddf -n 6 $dev8 $dev9 $dev10 $dev11 $dev12 $dev13
+
+# RAID1 geometries
+# Use different sizes to make offset calculation harder
+mdadm -CR l1s -l1 -n2 /dev/md/ddf0 -z 8000
+mdadm -CR l1m -l1 -n3 $dev8 $dev9 $dev10 -z 10000
+assert_fail -CR badl1 -l1 -n4 /dev/md/ddf0
+
+# RAID10 geometries
+mdadm -CR l10_0 -l10 -n3 /dev/md/ddf0 -z 1000
+mdadm -CR l10_1 -l10 -n5 /dev/md/ddf0 -z 1000
+assert_fail mdadm -CR badl10 -l10 -n4 -pn3 /dev/md/ddf0
+mdadm -CR l10_2 -l10 -n6 -pn2 /dev/md/ddf0 -z 4000
+mdadm -CR l10_3 -l10 -n6 -pn3 /dev/md/ddf0 -z 4000
+
+assert_fail -CR l10_2 -l10 -n6 -pn2 /dev/md/ddf0 -z 5000
+assert_kill /dev/md/l10_2 4
+# gone now, must be able to create it again
+mdadm -CR l10_2 -l10 -n6 -pn2 /dev/md/ddf0 -z 5000
+
+# Now stop and reassemble
+mdadm -Ss
+mdadm -A /dev/md/ddf0 $dev8 $dev9 $dev10 $dev11 $dev12 $dev13
+
+# Same as above, on inactive container
+assert_fail -CR l10_3 -l10 -n6 -pn2 /dev/md/ddf0 -z 5000
+# Kill subarray without having started anything (no mdmon)
+mdadm --kill-subarray=5 /dev/md/ddf0
+mdadm -I /dev/md/ddf0
+mdadm -CR l10_3 -l10 -n6 -pn3 /dev/md/ddf0 -z 5000
+
+assert_kill /dev/md/l10_2 4
+assert_kill /dev/md/l10_3 5
+
+# RAID5 geometries
+mdadm -CR l5la -l5 -n3 --layout=ddf-N-restart /dev/md/ddf0 -z 5000
+mdadm -CR l5ra -l5 -n3 --layout=ddf-zero-restart /dev/md/ddf0 -z 5000
+mdadm -CR l5ls -l5 -n3 --layout=ddf-N-continue /dev/md/ddf0 -z 5000
+assert_fail -CR l5rs -l5 -n3 -prs /dev/md/ddf0 -z 5000
+
+# Stop and reassemble
+mdadm -Ss
+mdadm -A /dev/md/ddf0 $dev8 $dev9 $dev10 $dev11 $dev12 $dev13
+mdadm -I /dev/md/ddf0
+
+assert_kill /dev/md/l5la 4
+assert_kill /dev/md/l5ls 6
+assert_kill /dev/md/l5ra 5
+
+# RAID6 geometries
+assert_fail -CR l6la -l6 -n3 -pla /dev/md/ddf0 -z 5000
+assert_fail -CR l6rs -l5 -n4 -prs /dev/md/ddf0 -z 5000
+mdadm -CR l6la -l6 -n4 --layout=ddf-N-restart /dev/md/ddf0 -z 5000
+mdadm -CR l6ra -l6 -n4 --layout=ddf-zero-restart $dev8 $dev9 $dev10 $dev11 -z 5000
+mdadm -CR l6ls -l6 -n4 --layout=ddf-N-continue $dev13 $dev8 $dev9 $dev12 -z 5000
+
+mdadm -Ss
diff --git a/tests/10ddf-incremental-wrong-order b/tests/10ddf-incremental-wrong-order
new file mode 100644
index 00000000..2324f1d7
--- /dev/null
+++ b/tests/10ddf-incremental-wrong-order
@@ -0,0 +1,131 @@
+# An array is assembled incompletely. Some disks will
+# have later metadata than others.
+# The array is then reassembled in the "wrong" order -
+# older meta data first.
+# This FAILS with mdadm 3.3
+. tests/env-ddf-template
+tmp=$(mktemp /tmp/mdtest-XXXXXX)
+rm -f $tmp /var/tmp/mdmon.log
+ret=0
+
+mdadm -CR $container -e ddf -n 4 $dev8 $dev9 $dev10 $dev11
+ddf_check container 4
+
+mdadm -CR $member1 -n 4 -l 10 $dev8 $dev10 $dev9 $dev11 -z 10000
+mdadm -CR $member0 -n 2 -l 1 $dev8 $dev9 -z 10000
+
+mdadm --wait $member0
+mdadm --wait $member1
+
+mke2fs $member0
+mke2fs $member1
+sha_0a=$(sha1_sum $member0)
+sha_1a=$(sha1_sum $member1)
+
+mdadm -Ss
+sleep 1
+
+# Add all devices except those for $member0
+mdadm -I $dev10
+mdadm -I $dev11
+
+# Start runnable members ($member1) and write
+mdadm -IRs
+e2fsck -fy $member1
+sha_1b=$(sha1_sum $member1)
+
+mdadm -Ss
+sleep 1
+
+# Seq number should be different now
+seq8a=$(mdadm -E $dev8 | sed -n 's/^ *Seq : //p')
+seq10a=$(mdadm -E $dev10 | sed -n 's/^ *Seq : //p')
+
+if [ $seq8a -ge $seq10a ]; then
+ ret=1
+ echo ERROR: sequential number of $dev10 not bigger than $dev8
+fi
+if [ x$sha_1a = x$sha_1b ]; then
+ ret=1
+ echo ERROR: sha1sums equal after write
+fi
+
+#[ -f /var/tmp/mdmon.log ] && cat /var/tmp/mdmon.log
+
+# Now reassemble
+# Note that we add the previously missing disks first.
+# $dev10 should have a higher seq number than $dev8
+for d in $dev8 $dev9 $dev10 $dev11; do
+ mdadm -I $d
+done
+
+mdadm -IRs
+sha_0c=$(sha1_sum $member0)
+sha_1c=$(sha1_sum $member1)
+
+mdadm -Ss
+sleep 1
+
+seq8c=$(mdadm -E $dev8 | sed -n 's/^ *Seq : //p')
+seq10c=$(mdadm -E $dev10 | sed -n 's/^ *Seq : //p')
+
+if [ x$sha_0a != x$sha_0c ]; then
+ ret=1
+ echo ERROR: sha1sum of $member0 has changed
+fi
+if [ x$sha_1b != x$sha_1c ]; then
+ ret=1
+ echo ERROR: sha1sum of $member1 has changed
+fi
+if [ \( $seq10a -ge $seq10c \) -o \( $seq8c -ne $seq10c \) ]; then
+ ret=1
+ echo ERROR: sequential numbers are wrong
+fi
+
+# Expect consistent state
+for d in $dev10 $dev8; do
+ mdadm -E $d>$tmp
+ for x in 0 1; do
+ egrep 'state\['$x'\] : Optimal, Consistent' $tmp || {
+ ret=1
+ echo ERROR: $member0 has unexpected state on $d
+ }
+ done
+ if [ x$(egrep -c 'active/Online$' $tmp) != x4 ]; then
+ ret=1
+ echo ERROR: unexpected number of online disks on $d
+ fi
+done
+
+# Now try assembly
+if mdadm -A $container $dev8 $dev9 $dev10 $dev11; then
+ mdadm -IR $container
+ sha_0d=$(sha1_sum $member0)
+ sha_1d=$(sha1_sum $member1)
+ mdadm -Ss
+ sleep 1
+ seq8d=$(mdadm -E $dev8 | sed -n 's/^ *Seq : //p')
+ seq10d=$(mdadm -E $dev10 | sed -n 's/^ *Seq : //p')
+ if [ x$sha_0a != x$sha_0d ]; then
+ ret=1
+ echo ERROR: sha1sum of $member0 has changed
+ fi
+ if [ x$sha_1b != x$sha_1d ]; then
+ ret=1
+ echo ERROR: sha1sum of $member1 has changed
+ fi
+ if [ \( $seq10a -ge $seq10d \) -o \( $seq8d -ne $seq10d \) ]; then
+ ret=1
+ echo ERROR: sequential numbers are wrong
+ fi
+else
+ ret=1
+ echo ERROR: assembly failed
+fi
+
+if [ $ret -ne 0 ]; then
+ mdadm -E $dev10
+ mdadm -E $dev8
+fi
+rm -f $tmp /var/tmp/mdmon.log
+[ $ret -eq 0 ]
diff --git a/tests/10ddf-sudden-degraded b/tests/10ddf-sudden-degraded
new file mode 100644
index 00000000..1eab361a
--- /dev/null
+++ b/tests/10ddf-sudden-degraded
@@ -0,0 +1,18 @@
+#
+# An array is assembled with one device missing.
+# The other device must be marked as Failed in metadata
+
+. tests/env-ddf-template
+
+mdadm -CR $container -e ddf -n 2 $dev8 $dev9
+ddf_check container 2
+
+mdadm -CR $member1 -n 2 -l1 $dev8 $dev9
+mdadm --wait $member1
+mdadm -Ss
+
+mdadm -I $dev8
+mdadm -R $container
+mkfs $member1
+# There must be a missing device recorded
+mdadm --examine $dev8 | grep 'Raid Devices.*--' || exit 1
diff --git a/tests/11spare-migration b/tests/11spare-migration
new file mode 100644
index 00000000..24b6ec69
--- /dev/null
+++ b/tests/11spare-migration
@@ -0,0 +1,454 @@
+# Set of tests for autorebuild functionality using mdadm -F
+# To be able to test ddf one must have all loop devices of bigger size, with the ones
+# above number 7 bigger again by any amount (this is not changed for now as it
+# could affect other tests)
+
+export IMSM_DEVNAME_AS_SERIAL=1
+export IMSM_TEST_OROM=1
+export IMSM_NO_PLATFORM=1
+
+. tests/utils
+set -ex
+verbose="yes"
+sleeptime=10
+
+# if listfailed=yes then don't exit if test failed due to wrong
+# spare-migration and just print a list at the end. Other errors still
+# stop the test.
+# if listfailed=no then exit on first failure
+listfailed="yes"
+
+# start Monitor, set monitorpid
+# uses global scan variable
+# all parameters are numbers of devices to be monitored. only used when $scan="no"
+# eg. monitor 0 1 will start monitoring of containers c0, c1 and subarrays v0, v1
+monitor(){
+ [ -z $monitorpid ] || return
+ if [ "$scan" == "yes" ]; then
+ $mdadm -F -d 1 --scan --mail root@localhost -c $config &
+ monitorpid=$!
+ return
+ fi
+ unset mddevs
+ while [ -n "$1" ]
+ do
+ eval container=\$c$1
+ eval volumes=\$v$1
+ mddevs="$mddevs /dev/$container"
+ if [ "$container" != "$volumes" ]; then
+ for vol in $volumes; do
+ mddevs="$mddevs /dev/$vol"
+ done
+ fi
+ shift
+ done
+ if [ -n "$mddevs" ]; then
+ if [ "$verbose" != "yes" ]; then
+ $mdadm -F -d 1 $mddevs -c $config >&2 &
+ monitorpid=$!
+ else
+ $mdadm -F -t -d 1 $mddevs -c $config &
+ monitorpid=$!
+ fi
+ fi
+ [ "$verbose" != "yes" ] || echo $mddevs $monitorpid
+}
+
+test0()
+{
+dsc "Test 0: No config file, no spare should be moved"
+> $config
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/$v0 $dev0
+# check that spare loop2 was not moved from container c1 to container c0
+chksparemoved $c1 $c0 $dev2 n
+tidyup
+}
+
+test0a()
+{
+dsc "Test 0a: No domains in config file, no spare should be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+createconfig a
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/$v0 $dev0
+# check that spare loop2 was not moved from container c1 to container c0
+chksparemoved $c1 $c0 $dev2 n
+tidyup
+}
+
+test1()
+{
+dsc "Test 1: Common domain, add disk to one container and fail first one in another container, spare should be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+# create config file with arrays and common domain
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 3 4
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/$v0 $dev0
+# check that spare loop2 was moved from container c1 to container c0
+chksparemoved $c1 $c0 $dev2
+tidyup
+}
+
+test1a()
+{
+dsc "Test 1a: Common domain, add disk to one container and fail second one in another container, spare should be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 3 4
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/$v0 $dev1
+# check that spare loop2 was moved from container c1 to container c0
+chksparemoved $c1 $c0 $dev2
+tidyup
+}
+
+test2()
+{
+dsc "Test 2: Common domain, fail disk in one container and add one to another container, spare should be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 3 4
+monitor 0 1
+mdadm --fail /dev/$v0 $dev1
+mdadm -a /dev/$c1 $dev2
+chksparemoved $c1 $c0 $dev2
+tidyup
+}
+
+test3()
+{
+dsc "Test 3: Two domains, fail a disk in one domain, add a disk to another domain, the spare should not be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+# create config file with 2 domains
+createconfig a
+createconfig domain-$platform"1" $platform spare 0 1 2
+createconfig domain-$platform"2" $platform spare 3 4 5
+monitor 0 1
+mdadm --fail /dev/$v0 $dev1
+mdadm -a /dev/$c1 $dev5
+chksparemoved $c1 $c0 $dev5 n
+tidyup
+}
+
+test4()
+{
+dsc "Test 4: One domain holds one container, fail a disk in domain, and add disk to a container not described by domain, move if metadata allows"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2
+monitor 0 1
+mdadm --fail /dev/$v0 $dev1
+mdadm -a /dev/$c1 $dev5
+unset shouldmove
+[ "$platform" == "imsm" ] || shouldmove="n"
+chksparemoved $c1 $c0 $dev5 $shouldmove
+tidyup
+}
+
+test5()
+{
+dsc "Test 5: Two domains, two containers in each domain"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+setupdevs 2 5 6 $platform
+setupdevs 3 8 10 $platform
+# 2 and 9 for spares
+createconfig a
+createconfig domain-$platform"1" $platform spare 0 1 2 3 4
+createconfig domain-$platform"2" $platform spare 5 6 8 9 10
+monitor 0 1 2 3
+test5a
+test5b
+test5c
+tidyup
+}
+
+test5a()
+{
+dsc "Test 5a: Two containers in each domain, add spare loop2 to domain1 and fail disk in the other domain, the spare should not be moved"
+mdadm -a /dev/$c0 $dev2
+mdadm --fail /dev/$v2 $dev5
+chksparemoved $c0 $c2 $dev2 n
+}
+
+test5b()
+{
+dsc "Test 5b: Fail disk in the same domain but different container, spare loop2 should be moved"
+mdadm --fail /dev/$v1 $dev3
+chksparemoved $c0 $c1 $dev2
+}
+
+test5c()
+{
+dsc "Test 5c: Add spare loop9 to different container in domain with degraded array, spare should be moved"
+mdadm -a /dev/$c3 $dev9
+chksparemoved $c3 $c2 $dev9
+}
+
+test6()
+{
+dsc "Test 6: One domain has two containers, fail a disk in one container, there is a spare in other container too small to use for rebuild"
+setupdevs 0 0 1 $platform
+setupdevs 1 8 9 $platform
+# all devices in one domain
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 8 9
+monitor 0 1
+mdadm -a /dev/$c0 $dev2
+mdadm --fail /dev/$v1 $dev8
+chksparemoved $c0 $c1 $dev2 n
+tidyup
+}
+
+test7()
+{
+dsc "Test 7: One domain, add small spare to container, fail disk in array, spare not used, add suitable spare to other container, spare should be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 8 9 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 8 9 10
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/$v1 $dev8
+mdadm -a /dev/$c0 $dev10
+chksparemoved $c0 $c1 $dev10
+tidyup
+}
+
+
+test7a()
+{
+dsc "Test 7a: Small spare in parent, suitable one in other container, $dev2 in $c1 is not in common domain"
+setupdevs 0 0 1 $platform
+setupdevs 1 8 9 $platform
+#all $platform devices in one domain
+createconfig a
+createconfig domain-$platform"1" $platform spare 0 1 8 9 10
+createconfig domain-$platform"2" $platform spare 2
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+chkspare $c1 $dev2
+mdadm --fail /dev/$v1 $dev8
+mdadm -a /dev/$c0 $dev10
+chksparemoved $c0 $c1 $dev10
+tidyup
+}
+
+test8()
+{
+# ddf does not have getinfo_super_disks implemented so skip this test
+return
+dsc "Test 8: imsm and ddf - spare should not be migrated"
+setupdevs 0 10 11 imsm
+setupdevs 1 8 9 ddf
+createconfig a
+createconfig domain0 noplatform spare 8 9 10 11 12
+monitor 0 1
+mdadm -a /dev/$c1 $dev12
+mdadm --fail /dev/$v0 $dev10
+chksparemoved $c1 $c0 $dev12 n
+tidyup
+}
+
+test9()
+{
+dsc "Test 9: imsm and native 1.2 - one domain, no metadata specified, spare should be moved"
+setupdevs 0 10 11 imsm
+setupdevs 1 8 9 1.2
+createconfig a
+createconfig domain0 noplatform spare 8 9 10 11 12
+monitor 0 1
+mdadm -a /dev/$c1 $dev12
+mdadm --fail /dev/$v0 $dev10
+chksparemoved $c1 $c0 $dev12
+tidyup
+}
+
+test9a()
+{
+dsc "Test 9a: imsm and native 1.2 - spare in global domain, should be moved"
+setupdevs 0 10 11 imsm
+setupdevs 1 8 9 1.2
+createconfig a
+createconfig domain-global noplatform spare 8 9 10 11 12
+createconfig domain-1.2 1.2 spare 8 9
+createconfig domain-imsm imsm spare 10 11
+monitor 0 1
+mdadm -a /dev/$c1 $dev12
+mdadm --fail /dev/$v0 $dev10
+chksparemoved $c1 $c0 $dev12
+tidyup
+}
+
+test10()
+{
+dsc "Test 10: Two arrays on the same devices in container"
+setupdevs 0 0 1 $platform 10000
+setupdevs 1 3 4 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 3 4 5
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/md/sub0_ $dev0
+chksparemoved $c1 $c0 $dev2
+if [ $failed -eq 0 ]; then
+# now fail the spare and see if we get another one
+ mdadm --fail /dev/md/sub0_ $dev2
+ mdadm -a /dev/$c1 $dev5
+ chksparemoved $c1 $c0 $dev5
+fi
+tidyup
+}
+
+test11()
+{
+dsc "Test 11: Failed spare from other container should not be used"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 3 4
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/$v1 $dev3
+#wait until recovery finishes so no degraded array in c1
+check wait
+mdadm --fail /dev/$v0 $dev0
+chksparemoved $c1 $c0 $dev3 n
+tidyup
+}
+
+test12()
+{
+dsc "Test 12: Only one spare should be taken for rebuild, second not needed"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 3 4 5
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm -a /dev/$c1 $dev5
+mdadm --fail /dev/$v0 $dev0
+sleep $sleeptime
+chkarray $dev2 n
+sc1=$c
+chkarray $dev5 n
+sc2=$c
+[ "$sc1" != "$sc2" ] || err "both spares in the same container $sc1"
+tidyup
+}
+
+test13()
+{
+dsc "Test 13: Common domain, two containers, fail a disk in container, action is below spare, the spare should be moved regadless of action"
+setupdevs 0 0 1 $platform
+setupdevs 1 4 5 $platform
+# same domain but different action on 4 5 6
+createconfig a
+createconfig domain-$platform $platform spare 0 1
+createconfig domain-$platform $platform include 4 5 6
+monitor 0 1
+mdadm -a /dev/$c1 $dev6
+mdadm --fail /dev/$v0 $dev0
+chksparemoved $c1 $c0 $d6
+tidyup
+}
+
+test14()
+{
+dsc "Test 14: One domain, small array on big disks, check if small spare is accepted"
+setupdevs 0 8 9 $platform 10000 1
+setupdevs 1 0 1 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 8 9
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/$v0 $dev9
+chksparemoved $c1 $c0 $d2
+tidyup
+}
+
+test15()
+{
+dsc "Test 15: spare in global domain for $platform metadata, should be moved"
+# this is like 9a but only one metadata used
+setupdevs 0 10 11 $platform
+setupdevs 1 8 9 $platform
+createconfig a
+createconfig domain-global $platform spare 8 9 10 11 12
+createconfig domain-1 $platform spare 8 9
+createconfig domain-2 $platform spare 10 11
+monitor 0 1
+mdadm -a /dev/$c1 $dev12
+mdadm --fail /dev/$v0 $dev10
+chksparemoved $c1 $c0 $dev12
+tidyup
+}
+
+try()
+{
+test0
+test0a
+test1
+test1a
+test2
+test3
+test4
+test5
+test6
+if [ "$platform" != "1.2" ]; then
+# this is because we can't have a small spare added to native array
+ test7
+ test7a
+fi
+test8
+test9
+test9a
+if [ "$platform" != "1.2" ]; then
+# we can't create two subarrays on the same devices for native (without
+# partitions)
+ test10
+fi
+test11
+test12
+test13
+test14
+test15
+}
+
+try_failed()
+{
+platform="1.2"
+scan="no"
+test5
+test9
+test13
+scan="yes"
+test9
+}
+
+#try_failed
+
+for scan in no yes; do
+ for platform in 1.2 imsm; do
+ try
+ done
+done
+
+[ $listfailed == "no" ] || [ -z $flist ] || echo -e "\n FAILED TESTS: $flist"
+
+#cat $targetdir/log
+rm -f /dev/disk/by-path/loop*
diff --git a/tests/12imsm-r0_2d-grow-r0_3d b/tests/12imsm-r0_2d-grow-r0_3d
new file mode 100644
index 00000000..3c6cf743
--- /dev/null
+++ b/tests/12imsm-r0_2d-grow-r0_3d
@@ -0,0 +1,20 @@
+. tests/env-imsm-template
+
+# RAID 0 volume, 2 disks grow to RAID 0 volume, 3 disks
+# POSITIVE test
+
+num_disks=2
+device_list="$dev0 $dev1"
+spare_list="$dev2"
+
+# Before: RAID 0 volume, 2 disks, 64k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+# After: RAID 0 volume, 3 disks, 64k chunk size
+vol0_new_num_comps=$((num_disks + 1))
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/12imsm-r0_2d-grow-r0_4d b/tests/12imsm-r0_2d-grow-r0_4d
new file mode 100644
index 00000000..e4fccda5
--- /dev/null
+++ b/tests/12imsm-r0_2d-grow-r0_4d
@@ -0,0 +1,20 @@
+. tests/env-imsm-template
+
+# RAID 0 volume, 2 disks grow to RAID 0 volume, 4 disks
+# POSITIVE test
+
+num_disks=2
+device_list="$dev0 $dev1"
+spare_list="$dev2 $dev3"
+
+# Before: RAID 0 volume, 2 disks, 64k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+# After: RAID 0 volume, 4 disks, 64k chunk size
+vol0_new_num_comps=$((num_disks + 2))
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/12imsm-r0_2d-grow-r0_5d b/tests/12imsm-r0_2d-grow-r0_5d
new file mode 100644
index 00000000..388a5bbd
--- /dev/null
+++ b/tests/12imsm-r0_2d-grow-r0_5d
@@ -0,0 +1,20 @@
+. tests/env-imsm-template
+
+# RAID 0 volume, 2 disks grow to RAID 0 volume, 5 disks
+# POSITIVE test
+
+num_disks=2
+device_list="$dev0 $dev1"
+spare_list="$dev2 $dev3 $dev4"
+
+# Before: RAID 0 volume, 2 disks, 64k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+# After: RAID 0 volume, 5 disks, 64k chunk size
+vol0_new_num_comps=$((num_disks + 3))
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/12imsm-r0_3d-grow-r0_4d b/tests/12imsm-r0_3d-grow-r0_4d
new file mode 100644
index 00000000..7065f07b
--- /dev/null
+++ b/tests/12imsm-r0_3d-grow-r0_4d
@@ -0,0 +1,20 @@
+. tests/env-imsm-template
+
+# RAID 0 volume, 3 disks grow to RAID 0 volume, 4 disks
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+spare_list="$dev3"
+
+# Before: RAID 0 volume, 3 disks, 64k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+# After: RAID 0 volume, 4 disks, 64k chunk size
+vol0_new_num_comps=$((num_disks + 1))
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/12imsm-r5_3d-grow-r5_4d b/tests/12imsm-r5_3d-grow-r5_4d
new file mode 100644
index 00000000..097da0a7
--- /dev/null
+++ b/tests/12imsm-r5_3d-grow-r5_4d
@@ -0,0 +1,20 @@
+. tests/env-imsm-template
+
+# RAID 5 volume, 3 disks grow to RAID 5 volume, 4 disks
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+spare_list="$dev3"
+
+# Before: RAID 5 volume, 3 disks, 64k chunk size
+vol0_level=5
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+# After: RAID 5 volume, 4 disks, 64k chunk size
+vol0_new_num_comps=$num_disks
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/12imsm-r5_3d-grow-r5_5d b/tests/12imsm-r5_3d-grow-r5_5d
new file mode 100644
index 00000000..2e5c7d25
--- /dev/null
+++ b/tests/12imsm-r5_3d-grow-r5_5d
@@ -0,0 +1,20 @@
+. tests/env-imsm-template
+
+# RAID 5 volume, 3 disks grow to RAID 5 volume, 5 disks
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+spare_list="$dev3 $dev4"
+
+# Before: RAID 5 volume, 3 disks, 64k chunk size
+vol0_level=5
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+# After: RAID 5 volume, 5 disks, 64k chunk size
+vol0_new_num_comps=$((num_disks + 1))
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/13imsm-r0_r0_2d-grow-r0_r0_4d b/tests/13imsm-r0_r0_2d-grow-r0_r0_4d
new file mode 100644
index 00000000..f85efa5d
--- /dev/null
+++ b/tests/13imsm-r0_r0_2d-grow-r0_r0_4d
@@ -0,0 +1,29 @@
+. tests/env-imsm-template
+
+# Grow the container (arrays inside) from 2 disks to 4 disks
+# POSITIVE test
+
+num_disks=2
+device_list="$dev0 $dev1"
+spare_list="$dev2 $dev3"
+
+# Before: RAID 0 volume in slot #0, 2 disks, 128k chunk size
+# RAID 0 volume in slot #1, 2 disks, 64k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=128
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+vol1_level=0
+vol1_comp_size=$((5 * 1024))
+vol1_chunk=64
+vol1_num_comps=$num_disks
+vol1_offset=$((vol0_comp_size + 2048))
+
+# After: RAID 0 volume in slot #0, 4 disks, 128k chunk size
+# RAID 0 volume in slot #1, 4 disks, 64k chunk size
+vol0_new_num_comps=$((num_disks + 2))
+vol1_new_num_comps=$vol0_new_num_comps
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/13imsm-r0_r0_2d-grow-r0_r0_5d b/tests/13imsm-r0_r0_2d-grow-r0_r0_5d
new file mode 100644
index 00000000..1b851a9b
--- /dev/null
+++ b/tests/13imsm-r0_r0_2d-grow-r0_r0_5d
@@ -0,0 +1,29 @@
+. tests/env-imsm-template
+
+# Grow both members from 2 disks to 5 disks
+# POSITIVE test
+
+num_disks=2
+device_list="$dev0 $dev1"
+spare_list="$dev2 $dev3 $dev4"
+
+# Before: RAID 0 volume in slot #0, 2 disks, 64k chunk size
+# RAID 0 volume in slot #1, 2 disks, 256k chunk size
+vol0_level=0
+vol0_comp_size=$((4 * 1024))
+vol0_chunk=64
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+vol1_level=0
+vol1_comp_size=$((6 * 1024))
+vol1_chunk=256
+vol1_num_comps=$num_disks
+vol1_offset=$((vol0_comp_size + 2048))
+
+# After: RAID 0 volume in slot #0, 5 disks, 64k chunk size
+# RAID 0 volume in slot #1, 5 disks, 256k chunk size
+vol0_new_num_comps=$((num_disks + 3))
+vol1_new_num_comps=$vol0_new_num_comps
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/13imsm-r0_r0_3d-grow-r0_r0_4d b/tests/13imsm-r0_r0_3d-grow-r0_r0_4d
new file mode 100644
index 00000000..27ba83b3
--- /dev/null
+++ b/tests/13imsm-r0_r0_3d-grow-r0_r0_4d
@@ -0,0 +1,29 @@
+. tests/env-imsm-template
+
+# Grow a container (arrays inside) from 3 disks to 4 disks
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+spare_list="$dev3"
+
+# Before: RAID 0 volume in slot #0, 3 disks, 128k chunk size
+# RAID 0 volume in slot #1, 3 disks, 512k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=128
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+vol1_level=0
+vol1_comp_size=$((5 * 1024))
+vol1_chunk=128
+vol1_num_comps=$num_disks
+vol1_offset=$((vol0_comp_size + 2048))
+
+# After: RAID0 volume in slot #0, 4 disks, 128k chunk size
+# RAID0 volume in slot #1, 4 disks, 512k chunk size
+vol0_new_num_comps=$((num_disks + 1))
+vol1_new_num_comps=$vol0_new_num_comps
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/13imsm-r0_r5_3d-grow-r0_r5_4d b/tests/13imsm-r0_r5_3d-grow-r0_r5_4d
new file mode 100644
index 00000000..b4bde449
--- /dev/null
+++ b/tests/13imsm-r0_r5_3d-grow-r0_r5_4d
@@ -0,0 +1,29 @@
+. tests/env-imsm-template
+
+# Grow the container (arrays inside) from 3 disks to 4 disks
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+spare_list="$dev3"
+
+# Before: RAID 0 volume in slot #0, 3 disks, 64k chunk size
+# RAID 5 volume in slot #1, 3 disks, 128k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+vol1_level=5
+vol1_comp_size=$((5 * 1024))
+vol1_chunk=128
+vol1_num_comps=$((num_disks - 1))
+vol1_offset=$((vol0_comp_size + 2048))
+
+# After: RAID 0 volume in slot #0, 4 disks, 64k chunk size
+# RAID 5 volume in slot #1, 4 disks, 128k chunk size
+vol1_new_num_comps=$num_disks
+vol0_new_num_comps=$((num_disks + 1))
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/13imsm-r0_r5_3d-grow-r0_r5_5d b/tests/13imsm-r0_r5_3d-grow-r0_r5_5d
new file mode 100644
index 00000000..d0db9aeb
--- /dev/null
+++ b/tests/13imsm-r0_r5_3d-grow-r0_r5_5d
@@ -0,0 +1,29 @@
+. tests/env-imsm-template
+
+# Grow the container (arrays inside) from 3 disks to 5 disks
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+spare_list="$dev3 $dev4"
+
+# Before: RAID 0 volume in slot #0, 3 disks, 256k chunk size
+# RAID 5 volume in slot #1, 3 disks, 512k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=128
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+vol1_level=5
+vol1_comp_size=$((5 * 1024))
+vol1_chunk=128
+vol1_num_comps=$((num_disks - 1))
+vol1_offset=$((vol0_comp_size + 2048))
+
+# After: RAID 0 volume in slot #0, 5 disks, 256k chunk size
+# RAID 5 volume in slot #1, 5 disks, 512k chunk size
+vol0_new_num_comps=$((num_disks + 2))
+vol1_new_num_comps=$((num_disks + 1))
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/13imsm-r5_r0_3d-grow-r5_r0_4d b/tests/13imsm-r5_r0_3d-grow-r5_r0_4d
new file mode 100644
index 00000000..32ebc924
--- /dev/null
+++ b/tests/13imsm-r5_r0_3d-grow-r5_r0_4d
@@ -0,0 +1,29 @@
+. tests/env-imsm-template
+
+# Grow the container (arrays inside) from 3 disks to 4 disks
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+spare_list="$dev3"
+
+# Before: RAID 5 volume in slot #0, 3 disks, 64k chunk size
+# RAID 0 volume in slot #1, 3 disks, 64k chunk size
+vol0_level=5
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+vol1_level=0
+vol1_comp_size=$((5 * 1024))
+vol1_chunk=64
+vol1_offset=$((vol0_comp_size + 2048))
+vol1_num_comps=$num_disks
+
+# After: RAID 5 volume in slot #0, 4 disks, 64k chunk size
+# RAID 0 volume in slot #1, 4 disks, 64k chunk size
+vol0_new_num_comps=$num_disks
+vol1_new_num_comps=$((num_disks + 1))
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/13imsm-r5_r0_3d-grow-r5_r0_5d b/tests/13imsm-r5_r0_3d-grow-r5_r0_5d
new file mode 100644
index 00000000..a97002d0
--- /dev/null
+++ b/tests/13imsm-r5_r0_3d-grow-r5_r0_5d
@@ -0,0 +1,29 @@
+. tests/env-imsm-template
+
+# Grow the container (arrays inside) from 3 disks to 5 disks
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+spare_list="$dev3 $dev4"
+
+# Before: RAID 5 volume in slot #0, 3 disks, 128k chunk size
+# RAID 0 volume in slot #1, 3 disks, 256k chunk size
+vol0_level=5
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+vol1_level=0
+vol1_comp_size=$((5 * 1024))
+vol1_chunk=64
+vol1_offset=$((vol0_comp_size + 2048))
+vol1_num_comps=$num_disks
+
+# After: RAID 5 volume in slot #0, 5 disks, 128k chunk size
+# RAID 0 volume in slot #1, 5 disks, 256k chunk size
+vol0_new_num_comps=$((num_disks + 1))
+vol1_new_num_comps=$((num_disks + 2))
+
+. tests/imsm-grow-template 0 0
diff --git a/tests/14imsm-r0_3d-r5_3d-migrate-r5_4d-r5_4d b/tests/14imsm-r0_3d-r5_3d-migrate-r5_4d-r5_4d
new file mode 100644
index 00000000..386abeee
--- /dev/null
+++ b/tests/14imsm-r0_3d-r5_3d-migrate-r5_4d-r5_4d
@@ -0,0 +1,29 @@
+. tests/env-imsm-template
+
+# RAID 0 and RAID 5 volumes (3 disks) migrate to RAID 5 and RAID 5 volumes (4 disks)
+# NEGATIVE test - migration is not allowed if there is more then one array in a container
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+spare_list="$dev3"
+
+# Before: RAID 0 volume, 3 disks, 64k chunk size, as member #0
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+# Extra: RAID 5 volume, 3 disks, 64k chunk size, as member #1
+vol1_level=5
+vol1_comp_size=$((5 * 1024))
+vol1_chunk=64
+vol1_num_comps=$((num_disks - 1))
+vol1_offset=$((vol0_comp_size + 2048))
+
+# After: RAID 5 volume, 4 disks, 64k chunk size (only member #0)
+vol0_new_level=5
+vol0_new_num_comps=$num_disks
+vol0_new_chunk=64
+
+. tests/imsm-grow-template 1 1
diff --git a/tests/14imsm-r0_3d_no_spares-migrate-r5_3d b/tests/14imsm-r0_3d_no_spares-migrate-r5_3d
new file mode 100644
index 00000000..10bbab6d
--- /dev/null
+++ b/tests/14imsm-r0_3d_no_spares-migrate-r5_3d
@@ -0,0 +1,21 @@
+. tests/env-imsm-template
+
+# RAID 0 volume (3 disks, no spares) migrate to RAID 5 volume (3 disks)
+# NEGATIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+
+# Before: RAID 0 volume, 3 disks, 64k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+# After: RAID 5, 3 disks, 64k chunk size
+vol0_new_level=5
+vol0_new_num_comps=$((num_disks - 1))
+vol0_new_chunk=64
+
+. tests/imsm-grow-template 1
diff --git a/tests/14imsm-r0_r0_2d-takeover-r10_4d b/tests/14imsm-r0_r0_2d-takeover-r10_4d
new file mode 100644
index 00000000..df5b0ce3
--- /dev/null
+++ b/tests/14imsm-r0_r0_2d-takeover-r10_4d
@@ -0,0 +1,30 @@
+. tests/env-imsm-template
+
+
+# Two RAID 0 volumes (2 disks) migrate to RAID 10 volume (4 disks)
+# NEGATIVE test
+
+num_disks=2
+device_list="$dev0 $dev1"
+
+# Before: RAID 0 volume in slot #0, 2 disks, 64k chunk size
+# RAID 0 volume in slot #1, 2 disks, 64k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+# Before: RAID 0 volume, disks, 64k chunk size
+vol1_level=0
+vol1_comp_size=$((5 * 1024))
+vol1_chunk=64
+vol1_num_comps=num_disks
+vol1_offset=$(( $vol0_comp_size + 2048 ))
+
+# After: RAID 10, 4 disks, 64k chunk size
+vol0_new_level=10
+vol0_new_num_comps=$((num_disks - 1))
+vol0_new_chunk=64
+
+. tests/imsm-grow-template 1 1
diff --git a/tests/14imsm-r10_4d-grow-r10_5d b/tests/14imsm-r10_4d-grow-r10_5d
new file mode 100644
index 00000000..bcbe1476
--- /dev/null
+++ b/tests/14imsm-r10_4d-grow-r10_5d
@@ -0,0 +1,20 @@
+. tests/env-imsm-template
+
+# RAID 10 volume, 4 disks grow to RAID 10 volume, 5 disks
+# NEGATIVE test
+
+num_disks=4
+device_list="$dev0 $dev1 $dev2 $dev3"
+spare_list="$dev4"
+
+# Before: RAID 10 volume, 4 disks, 128k chunk size
+vol0_level=10
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=128
+vol0_num_comps=$((num_disks - 2))
+vol0_offset=0
+
+# After: RAID 10 volume, 5 disks, 128k chunks size (test should fail)
+vol0_new_num_comps=$((num_disks + 1))
+
+. tests/imsm-grow-template 1 0
diff --git a/tests/14imsm-r10_r5_4d-takeover-r0_2d b/tests/14imsm-r10_r5_4d-takeover-r0_2d
new file mode 100644
index 00000000..9e5205e2
--- /dev/null
+++ b/tests/14imsm-r10_r5_4d-takeover-r0_2d
@@ -0,0 +1,30 @@
+. tests/env-imsm-template
+
+
+# Two RAID volumes: RAID10 and RAID5 (4 disks) migrate to RAID 0 volume (2 disks)
+# NEGATIVE test
+
+num_disks=4
+device_list="$dev0 $dev1 $dev2 $dev3"
+
+# Before: RAID 10 volume in slot #0, 4 disks, 64k chunk size
+# RAID 5 volume in slot #1, 4 disks, 64k chunk size
+vol0_level=10
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$(( $num_disks - 2 ))
+vol0_offset=0
+
+# Before: RAID 0 volume, disks, 64k chunk size
+vol1_level=5
+vol1_comp_size=$((5 * 1024))
+vol1_chunk=64
+vol1_num_comps=$(( $num_disks - 1 ))
+vol1_offset=$(( $vol0_comp_size + 2048 ))
+
+# After: RAID 10, 4 disks, 64k chunk size
+vol0_new_level=0
+vol0_new_num_comps=2
+vol0_new_chunk=64
+
+. tests/imsm-grow-template 1 1
diff --git a/tests/14imsm-r1_2d-grow-r1_3d b/tests/14imsm-r1_2d-grow-r1_3d
new file mode 100644
index 00000000..1edd50e4
--- /dev/null
+++ b/tests/14imsm-r1_2d-grow-r1_3d
@@ -0,0 +1,20 @@
+. tests/env-imsm-template
+
+# RAID 1 volume, 2 disks grow to RAID 1 volume, 3 disks
+# NEGATIVE test
+
+num_disks=2
+device_list="$dev0 $dev1"
+spare_list="$dev4"
+
+# Before: RAID 1 volume, 2 disks, 64k chunk size
+vol0_level=1
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+# After: RAID 1 volume, 3 disks, 64k chunks size (test should fail)
+vol0_new_num_comps=$num_disks
+
+. tests/imsm-grow-template 1 0
diff --git a/tests/14imsm-r1_2d-takeover-r0_2d b/tests/14imsm-r1_2d-takeover-r0_2d
new file mode 100644
index 00000000..d8296815
--- /dev/null
+++ b/tests/14imsm-r1_2d-takeover-r0_2d
@@ -0,0 +1,22 @@
+. tests/env-imsm-template
+
+# RAID 1 volume, 2 disks change to RAID 0 volume, 2 disks
+#
+#NEGATIVE test
+
+num_disks=2
+device_list="$dev0 $dev1"
+
+# Before: RAID 1 volume, 2 disks, 64k chunk size
+vol0_level=1
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+# After: RAID 0 volume, 2 disks, 64k chunk size
+vol0_new_level=0
+vol0_new_num_comps=$num_disks
+vol0_new_chunk=64
+
+. tests/imsm-grow-template 1
diff --git a/tests/14imsm-r5_3d-grow-r5_5d-no-spares b/tests/14imsm-r5_3d-grow-r5_5d-no-spares
new file mode 100644
index 00000000..ed18e72b
--- /dev/null
+++ b/tests/14imsm-r5_3d-grow-r5_5d-no-spares
@@ -0,0 +1,20 @@
+. tests/env-imsm-template
+
+# RAID 5 volume, 3 disks grow to RAID 5 volume, 4 disks
+# NEGATIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+
+# Before: RAID 5 volume, 3 disks, 64k chunk size
+vol0_level=5
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+# After: RAID 5 volume, 4 disks, 64k chunks size
+add_to_num_disks=2
+vol0_new_num_comps=$((num_disks + 2))
+
+. tests/imsm-grow-template 1 0
diff --git a/tests/14imsm-r5_3d-migrate-r4_3d b/tests/14imsm-r5_3d-migrate-r4_3d
new file mode 100644
index 00000000..e3b971cc
--- /dev/null
+++ b/tests/14imsm-r5_3d-migrate-r4_3d
@@ -0,0 +1,21 @@
+. tests/env-imsm-template
+
+# RAID 5 volume (3 disks) migrate to RAID 4 volume (3 disks)
+# NEGATIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+
+# Before: RAID 5 volume, 3 disks, 64k chunk size
+vol0_level=5
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+# After: RAID 4, 3 disks, 64k chunk size
+vol0_new_level=4
+vol0_new_num_comps=$((num_disks - 1))
+vol0_new_chunk=64
+
+. tests/imsm-grow-template 1
diff --git a/tests/15imsm-r0_3d_64k-migrate-r0_3d_256k b/tests/15imsm-r0_3d_64k-migrate-r0_3d_256k
new file mode 100644
index 00000000..4fe3807e
--- /dev/null
+++ b/tests/15imsm-r0_3d_64k-migrate-r0_3d_256k
@@ -0,0 +1,21 @@
+. tests/env-imsm-template
+
+# RAID 0 volume, Migration from 64k to 256k chunk size.
+# POSITIVE test
+
+num_disks=2
+device_list="$dev0 $dev1"
+
+# RAID 0, 2 disks, 64k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+# RAID 0, 2 disks, 256k chunk size
+vol0_new_level=0
+vol0_new_num_comps=$vol0_num_comps
+vol0_new_chunk=256
+
+. tests/imsm-grow-template 0 1
diff --git a/tests/15imsm-r5_3d_4k-migrate-r5_3d_256k b/tests/15imsm-r5_3d_4k-migrate-r5_3d_256k
new file mode 100644
index 00000000..025e9efb
--- /dev/null
+++ b/tests/15imsm-r5_3d_4k-migrate-r5_3d_256k
@@ -0,0 +1,21 @@
+. tests/env-imsm-template
+
+# RAID 5 volume, Migration from 4k to 256 chunk size.
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+
+# RAID 5, 3 disks, 4k chunk size
+vol0_level=5
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=4
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+# RAID 5, 3 disks, 256k chunk size
+vol0_new_level=5
+vol0_new_num_comps=$vol0_num_comps
+vol0_new_chunk=256
+
+. tests/imsm-grow-template 0 1
diff --git a/tests/15imsm-r5_3d_64k-migrate-r5_3d_256k b/tests/15imsm-r5_3d_64k-migrate-r5_3d_256k
new file mode 100644
index 00000000..37547b74
--- /dev/null
+++ b/tests/15imsm-r5_3d_64k-migrate-r5_3d_256k
@@ -0,0 +1,21 @@
+. tests/env-imsm-template
+
+# RAID 5 volume, Migration from 64k to 256k chunk size.
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+
+# RAID 5, 3 disks, 64k chunk size
+vol0_level=5
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+# RAID 5, 3 disks, 256k chunk size
+vol0_new_level=5
+vol0_new_num_comps=$vol0_num_comps
+vol0_new_chunk=256
+
+. tests/imsm-grow-template 0 1
diff --git a/tests/15imsm-r5_6d_4k-migrate-r5_6d_256k b/tests/15imsm-r5_6d_4k-migrate-r5_6d_256k
new file mode 100644
index 00000000..d2f6c707
--- /dev/null
+++ b/tests/15imsm-r5_6d_4k-migrate-r5_6d_256k
@@ -0,0 +1,21 @@
+. tests/env-imsm-template
+
+# RAID 5 volume, Migration from 4k to 256k chunk size.
+# POSITIVE test
+
+num_disks=6
+device_list="$dev0 $dev1 $dev2 $dev3 $dev4 $dev5"
+
+# RAID 5, 6 disks, 4k chunk size
+vol0_level=5
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=4
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+# RAID 5, 6 disks, 256k chunk size
+vol0_new_level=5
+vol0_new_num_comps=$vol0_num_comps
+vol0_new_chunk=256
+
+. tests/imsm-grow-template 0 1
diff --git a/tests/15imsm-r5_r0_3d_64k-migrate-r5_r0_3d_256k b/tests/15imsm-r5_r0_3d_64k-migrate-r5_r0_3d_256k
new file mode 100644
index 00000000..da218efa
--- /dev/null
+++ b/tests/15imsm-r5_r0_3d_64k-migrate-r5_r0_3d_256k
@@ -0,0 +1,34 @@
+. tests/env-imsm-template
+
+# Member 0: RAID 5 volume, Member 1: RAID 0 volume
+# Migration from 64k to 256k chunk size (both members)
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+
+# RAID 5, 3 disks, 64k chunk size
+vol0_level=5
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+# After migration parameters
+vol0_new_level=5
+vol0_new_num_comps=$vol0_num_comps
+vol0_new_chunk=256
+
+# RAID 0, 3 disks, 64k chunk size
+vol1_level=0
+vol1_comp_size=$((5 * 1024))
+vol1_chunk=64
+vol1_num_comps=$num_disks
+vol1_offset=$((vol0_comp_size + 2048))
+
+# After migration paramters
+vol1_new_level=0
+vol1_new_num_comps=$vol1_num_comps
+vol1_new_chunk=256
+
+. tests/imsm-grow-template 0 1
diff --git a/tests/16imsm-r0_3d-migrate-r5_4d b/tests/16imsm-r0_3d-migrate-r5_4d
new file mode 100644
index 00000000..4f45479a
--- /dev/null
+++ b/tests/16imsm-r0_3d-migrate-r5_4d
@@ -0,0 +1,21 @@
+. tests/env-imsm-template
+
+# RAID 0 volume (3 disks) migrate to RAID 5 volume (4 disks)
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+
+# Before: RAID 0, 3 disks, 64k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+# After: RAID 5, 4 disks, 64k chunk size
+vol0_new_level=5
+vol0_new_num_comps=$num_disks
+vol0_new_chunk=64
+
+. tests/imsm-grow-template 0 1
diff --git a/tests/16imsm-r0_5d-migrate-r5_6d b/tests/16imsm-r0_5d-migrate-r5_6d
new file mode 100644
index 00000000..bee505bf
--- /dev/null
+++ b/tests/16imsm-r0_5d-migrate-r5_6d
@@ -0,0 +1,21 @@
+. tests/env-imsm-template
+
+# RAID 0 volume (5 disks) migrate to RAID 5 volume (6 disks)
+# POSITIVE test
+
+num_disks=5
+device_list="$dev0 $dev1 $dev2 $dev3 $dev4"
+
+# Before: RAID 0, 5 disks, 64k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+# After: RAID 5, 6 disks, 64k chunk size
+vol0_new_level=5
+vol0_new_num_comps=$num_disks
+vol0_new_chunk=64
+
+. tests/imsm-grow-template 0 1
diff --git a/tests/16imsm-r5_3d-migrate-r0_3d b/tests/16imsm-r5_3d-migrate-r0_3d
new file mode 100644
index 00000000..b1459cc1
--- /dev/null
+++ b/tests/16imsm-r5_3d-migrate-r0_3d
@@ -0,0 +1,21 @@
+. tests/env-imsm-template
+
+# RAID 5 volume (3 disks) migrate to RAID 0 volume (2 disks)
+# POSITIVE test
+
+num_disks=3
+device_list="$dev0 $dev1 $dev2"
+
+# Before: RAID 5, 3 disks, 64k chunk size
+vol0_level=5
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+# After: RAID 0, 3 disks, 64k chunk size
+vol0_new_level=0
+vol0_new_num_comps=$((num_disks-1))
+vol0_new_chunk=64
+
+. tests/imsm-grow-template 0 1
diff --git a/tests/16imsm-r5_5d-migrate-r0_5d b/tests/16imsm-r5_5d-migrate-r0_5d
new file mode 100644
index 00000000..323ca52e
--- /dev/null
+++ b/tests/16imsm-r5_5d-migrate-r0_5d
@@ -0,0 +1,21 @@
+. tests/env-imsm-template
+
+# RAID 5 volume (5 disks) migration to RAID 0 volume (4 disks)
+# POSITIVE test
+
+num_disks=5
+device_list="$dev0 $dev1 $dev2 $dev3 $dev4"
+
+# Before: RAID 5 volume, 5 disks, 64k chunk size
+vol0_level=5
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$((num_disks - 1))
+vol0_offset=0
+
+# After: RAID 0 volume, 5 disks, 64k chunk size
+vol0_new_level=0
+vol0_new_num_comps=$((num_disks - 1))
+vol0_new_chunk=64
+
+. tests/imsm-grow-template 0 1
diff --git a/tests/18imsm-1d-takeover-r0_1d b/tests/18imsm-1d-takeover-r0_1d
new file mode 100644
index 00000000..bf462e27
--- /dev/null
+++ b/tests/18imsm-1d-takeover-r0_1d
@@ -0,0 +1,23 @@
+. tests/env-imsm-template
+
+# Create RAID 0 from a single disk.
+# POSITIVE test
+
+vol0_num_comps=1
+vol0_comp_size=$((10 * 1024))
+
+# Create container
+mdadm --create --run $container --auto=md --metadata=imsm --force --raid-disks=$vol0_num_comps $dev0
+wait
+imsm_check container $vol0_num_comps
+
+# Create RAID 0 volume
+mdadm --create --run $member0 --auto=md --level=0 --size=$vol0_comp_size --chunk=64 --force --raid-disks=$vol0_num_comps $dev0
+wait
+check wait
+
+# Test the member
+imsm_check member $member0 $vol0_num_comps 0 $vol0_comp_size $((vol0_num_comps * vol0_comp_size)) 0 64
+testdev $member0 $vol0_num_comps $vol0_comp_size 64
+
+exit 0
diff --git a/tests/18imsm-1d-takeover-r1_2d b/tests/18imsm-1d-takeover-r1_2d
new file mode 100644
index 00000000..fa02b6c3
--- /dev/null
+++ b/tests/18imsm-1d-takeover-r1_2d
@@ -0,0 +1,21 @@
+. tests/env-imsm-template
+
+# Create RAID 1 from a single disk
+# POSITIVE test
+
+vol0_num_comps=1
+vol0_comp_size=$((10 * 1024))
+
+# Create container
+mdadm --create --run $container --auto=md --metadata=imsm --force --raid-disks=$vol0_num_comps $dev0
+wait
+imsm_check container $vol0_num_comps
+
+# Create RAID 1 volume
+mdadm --create --run $member0 --auto=md --level=1 --size=$vol0_comp_size --chunk=64 --raid-disks=$((vol0_num_comps + 1)) $dev0 missing
+wait
+check wait
+
+# Test the member0
+imsm_check member $member0 $((vol_num_comps + 1)) 1 $vol0_comp_size $((vol0_num_comps * vol0_comp_size)) 0 64
+testdev $member0 $vol0_num_comps $vol0_comp_size 64
diff --git a/tests/18imsm-r0_2d-takeover-r10_4d b/tests/18imsm-r0_2d-takeover-r10_4d
new file mode 100644
index 00000000..0e77e5da
--- /dev/null
+++ b/tests/18imsm-r0_2d-takeover-r10_4d
@@ -0,0 +1,22 @@
+. tests/env-imsm-template
+
+# RAID 0 volume, 2 disks change to RAID 10 volume, 4 disks
+# POSITIVE test
+
+num_disks=2
+device_list="$dev0 $dev1"
+spare_list="$dev2 $dev3"
+
+# Before: RAID 0 volume, 2 disks, 256k chunk size
+vol0_level=0
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=128
+vol0_num_comps=$num_disks
+vol0_offset=0
+
+# After: RAID 10 volume, 4 disks, 256k chunk size
+vol0_new_level=10
+vol0_new_num_comps=$vol0_num_comps
+vol0_new_chunk=128
+
+. tests/imsm-grow-template 0 1
diff --git a/tests/18imsm-r10_4d-takeover-r0_2d b/tests/18imsm-r10_4d-takeover-r0_2d
new file mode 100644
index 00000000..8a9606b4
--- /dev/null
+++ b/tests/18imsm-r10_4d-takeover-r0_2d
@@ -0,0 +1,22 @@
+. tests/env-imsm-template
+
+# RAID 10 volume, 4 disks change to RAID 0 volume, 2 disks
+# POSITIVE test
+
+num_disks=4
+device_list="$dev0 $dev1 $dev2 $dev3"
+
+# Before: RAID 10 volume, 4 disks, 128k chunk size
+vol0_level=10
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=128
+vol0_num_comps=$((num_disks - 2))
+vol0_offset=0
+
+# After: RAID 0 volume, 2 disks, 128k chunk size
+vol0_new_level=0
+vol0_new_num_comps=2
+vol0_new_chunk=128
+new_num_disks=2
+
+. tests/imsm-grow-template 0 1
diff --git a/tests/18imsm-r1_2d-takeover-r0_1d b/tests/18imsm-r1_2d-takeover-r0_1d
new file mode 100644
index 00000000..1697d60e
--- /dev/null
+++ b/tests/18imsm-r1_2d-takeover-r0_1d
@@ -0,0 +1,22 @@
+. tests/env-imsm-template
+
+# RAID 1 volume, 2 disks change to RAID 0 volume, 1 disks
+# POSITIVE test
+
+num_disks=2
+device_list="$dev0 $dev1"
+
+# Before: RAID 1 volume, 2 disks
+vol0_level=1
+vol0_comp_size=$((5 * 1024))
+vol0_chunk=64
+vol0_num_comps=$(( $num_disks - 1 ))
+vol0_offset=0
+
+# After: RAID 0 volume, 1 disks, 64k chunk size
+vol0_new_level=0
+vol0_new_num_comps=1
+vol0_new_chunk=64
+new_num_disks=0
+
+. tests/imsm-grow-template 0 1
diff --git a/tests/19raid6auto-repair b/tests/19raid6auto-repair
new file mode 100644
index 00000000..23b14e4b
--- /dev/null
+++ b/tests/19raid6auto-repair
@@ -0,0 +1,43 @@
+number_of_disks=5
+chunksize_in_kib=512
+chunksize_in_b=$[chunksize_in_kib*1024]
+array_data_size_in_kib=$[chunksize_in_kib*(number_of_disks-2)*number_of_disks]
+array_data_size_in_b=$[array_data_size_in_kib*1024]
+devs="$dev0 $dev1 $dev2 $dev3 $dev4"
+
+# default 2048 sectors
+data_offset_in_kib=$[2048/2]
+
+# make a raid5 from a file
+dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$array_data_size_in_kib
+mdadm -CR $md0 -l6 -n$number_of_disks -c $chunksize_in_kib $devs
+dd if=/tmp/RandFile of=$md0 bs=1024 count=$array_data_size_in_kib
+blockdev --flushbufs $md0; sync
+check wait
+blockdev --flushbufs $devs; sync
+echo 3 > /proc/sys/vm/drop_caches
+cmp -s -n $array_data_size_in_b $md0 /tmp/RandFile || { echo sanity cmp failed ; exit 2; }
+
+# wipe out 5 chunks on each device
+dd if=/dev/urandom of=$dev0 bs=1024 count=$[5*chunksize_in_kib] seek=$[data_offset_in_kib+chunksize_in_kib*0]
+dd if=/dev/urandom of=$dev1 bs=1024 count=$[5*chunksize_in_kib] seek=$[data_offset_in_kib+chunksize_in_kib*5]
+dd if=/dev/urandom of=$dev2 bs=1024 count=$[5*chunksize_in_kib] seek=$[data_offset_in_kib+chunksize_in_kib*10]
+dd if=/dev/urandom of=$dev3 bs=1024 count=$[5*chunksize_in_kib] seek=$[data_offset_in_kib+chunksize_in_kib*15]
+dd if=/dev/urandom of=$dev4 bs=1024 count=$[5*chunksize_in_kib] seek=$[data_offset_in_kib+chunksize_in_kib*20]
+
+blockdev --flushbufs $devs; sync
+echo 3 > /proc/sys/vm/drop_caches
+
+$dir/raid6check $md0 0 0 2>&1 | grep -qs "Error" || { echo should detect errors; exit 2; }
+
+$dir/raid6check $md0 0 0 autorepair > /dev/null || { echo repair failed; exit 2; }
+blockdev --flushbufs $md0 $devs; sync
+echo 3 > /proc/sys/vm/drop_caches
+
+$dir/raid6check $md0 0 0 2>&1 | grep -qs "Error" && { echo errors detected; exit 2; }
+cmp -s -n $array_data_size_in_b $md0 /tmp/RandFile || { echo cmp failed ; exit 2; }
+
+mdadm -S $md0
+udevadm settle
+blockdev --flushbufs $md0 $devs; sync
+echo 3 > /proc/sys/vm/drop_caches
diff --git a/tests/19raid6repair b/tests/19raid6repair
new file mode 100644
index 00000000..ecff2c3a
--- /dev/null
+++ b/tests/19raid6repair
@@ -0,0 +1,47 @@
+number_of_disks=4
+chunksize_in_kib=512
+chunksize_in_b=$[chunksize_in_kib*1024]
+array_data_size_in_kib=$[chunksize_in_kib*(number_of_disks-2)*number_of_disks]
+array_data_size_in_b=$[array_data_size_in_kib*1024]
+devs="$dev1 $dev2 $dev3 $dev4"
+
+# default 2048 sectors
+data_offset_in_kib=$[2048/2]
+
+for failure in "$dev3 3 3 2" "$dev3 3 2 3" "$dev3 3 2 1" "$dev3 3 2 0" "$dev4 3 3 0" "$dev4 3 3 1" "$dev4 3 3 2" \
+ "$dev1 3 0 1" "$dev1 3 0 2" "$dev1 3 0 3" "$dev2 3 1 0" "$dev2 3 1 2" "$dev2 3 1 3" ; do
+ failure_split=( $failure )
+ device_with_error=${failure_split[0]}
+ stripe_with_error=${failure_split[1]}
+ repair_params="$stripe_with_error ${failure_split[2]} ${failure_split[3]}"
+ start_of_errors_in_kib=$[data_offset_in_kib+chunksize_in_kib*stripe_with_error]
+
+ # make a raid5 from a file
+ dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$array_data_size_in_kib
+ mdadm -CR $md0 -l6 -n$number_of_disks -c $chunksize_in_kib $devs
+ dd if=/tmp/RandFile of=$md0 bs=1024 count=$array_data_size_in_kib
+ blockdev --flushbufs $md0; sync
+
+ check wait
+ blockdev --flushbufs $devs; sync
+ echo 3 > /proc/sys/vm/drop_caches
+ cmp -s -n $array_data_size_in_b $md0 /tmp/RandFile || { echo sanity cmp failed ; exit 2; }
+
+ dd if=/dev/urandom of=$device_with_error bs=1024 count=$chunksize_in_kib seek=$start_of_errors_in_kib
+ blockdev --flushbufs $device_with_error; sync
+ echo 3 > /proc/sys/vm/drop_caches
+
+ $dir/raid6check $md0 0 0 2>&1 | grep -qs "Error" || { echo should detect errors; exit 2; }
+
+ $dir/raid6check $md0 repair $repair_params > /dev/null || { echo repair failed; exit 2; }
+ blockdev --flushbufs $md0 $devs; sync
+ echo 3 > /proc/sys/vm/drop_caches
+
+ $dir/raid6check $md0 0 0 2>&1 | grep -qs "Error" && { echo errors detected; exit 2; }
+ cmp -s -n $array_data_size_in_b $md0 /tmp/RandFile || { echo cmp failed ; exit 2; }
+
+ mdadm -S $md0
+ udevadm settle
+ blockdev --flushbufs $md0 $devs; sync
+ echo 3 > /proc/sys/vm/drop_caches
+done
diff --git a/tests/19repair-does-not-destroy b/tests/19repair-does-not-destroy
new file mode 100644
index 00000000..1d3b9b45
--- /dev/null
+++ b/tests/19repair-does-not-destroy
@@ -0,0 +1,29 @@
+number_of_disks=7
+chunksize_in_kib=512
+array_data_size_in_kib=$[chunksize_in_kib*(number_of_disks-2)*number_of_disks]
+array_data_size_in_b=$[array_data_size_in_kib*1024]
+devs="$dev0 $dev1 $dev2 $dev3 $dev4 $dev5 $dev6"
+
+dd if=/dev/urandom of=/tmp/RandFile bs=1024 count=$array_data_size_in_kib
+mdadm -CR $md0 -l6 -n$number_of_disks -c $chunksize_in_kib $devs
+dd if=/tmp/RandFile of=$md0 bs=1024 count=$array_data_size_in_kib
+blockdev --flushbufs $md0; sync
+check wait
+blockdev --flushbufs $devs; sync
+echo 3 > /proc/sys/vm/drop_caches
+$dir/raid6check $md0 repair 1 2 3 > /dev/null # D D
+$dir/raid6check $md0 repair 8 2 5 > /dev/null # D P
+$dir/raid6check $md0 repair 15 4 6 > /dev/null # D Q
+$dir/raid6check $md0 repair 22 5 6 > /dev/null # P Q
+$dir/raid6check $md0 repair 3 4 0 > /dev/null # Q D
+$dir/raid6check $md0 repair 3 3 1 > /dev/null # P D
+$dir/raid6check $md0 repair 6 4 5 > /dev/null # D<D
+$dir/raid6check $md0 repair 13 5 4 > /dev/null # D>D
+blockdev --flushbufs $devs; sync
+echo 3 > /proc/sys/vm/drop_caches
+$dir/raid6check $md0 0 0 2>&1 | grep -qs "Error" && { echo errors detected; exit 2; }
+cmp -s -n $array_data_size_in_b $md0 /tmp/RandFile || { echo should not mess up correct stripe ; exit 2; }
+
+mdadm -S $md0
+udevadm settle
+blockdev --flushbufs $md0 $devs; sync
diff --git a/tests/ToTest b/tests/ToTest
new file mode 100644
index 00000000..31d70828
--- /dev/null
+++ b/tests/ToTest
@@ -0,0 +1,44 @@
+
+multipath!!
+
+add/remove/fail
+ raid1 DONE
+ raid5 DONE
+ raid6/10 needed??
+
+assemble
+ by devices DONE
+ by uuid DONE
+ by superminor DONE
+ by config file DONE
+
+ various --updates DONE (not sparc2.2 or summaries)
+
+stop
+ --scan
+
+readonly/readwrite
+
+bitmap
+ separate file
+ internal
+ filename in config file
+
+examine
+ --scan
+ --brief
+
+detail
+
+grow:
+ size
+ raid1/5/6 DONE
+ devices
+ raid1 add DONE
+ raid1 shrink DONE
+
+'--quiet' option, and remove ""
+'--name' option fo v1, and configfile etc...
+
+faulty
+ errors in raid1/5/6
diff --git a/tests/check b/tests/check
new file mode 100644
index 00000000..f4ed6d5b
--- /dev/null
+++ b/tests/check
@@ -0,0 +1,35 @@
+
+case $1 in
+ raid* | linear )
+ grep -s "active $1 " /proc/mdstat > /dev/null || {
+ echo >&2 "ERROR active $1 not found" ; cat /proc/mdstat ; exit 1;}
+ ;;
+ resync | recovery )
+ sleep 0.1
+ grep -s $1 /proc/mdstat > /dev/null || {
+ echo >&2 ERROR no $1 happening; cat /proc/mdstat; exit 1; }
+ ;;
+
+ nosync )
+ sleep 0.5
+ grep -s 're[synccovery]* =' > /dev/null /proc/mdstat && {
+ echo >&2 "ERROR resync or recovery is happening!"; cat /proc/mdstat ; exit 1; }
+ ;;
+
+ wait )
+ sleep 0.1
+ while grep 're[synccovery]* =' > /dev/null /proc/mdstat
+ do sleep 2;
+ done
+ ;;
+
+ state )
+ grep -s "blocks.*\[$2\]\$" /proc/mdstat > /dev/null || {
+ echo >&2 "ERROR state $2 not found!"; cat /proc/mdstat ; exit 1; }
+ sleep 0.5
+ ;;
+
+ * ) echo >&2 ERROR unknown check $1 ; exit 1;
+esac
+
+exit 0
diff --git a/tests/env-ddf-template b/tests/env-ddf-template
new file mode 100644
index 00000000..90d7272f
--- /dev/null
+++ b/tests/env-ddf-template
@@ -0,0 +1,113 @@
+sha1_sum() {
+ sha1sum "$1" | cut -c 1-40
+}
+
+get_rootdev() {
+ local dev=$(stat -c %D /)
+ local maj=$(expr $dev : '\(..*\)..')
+ local min=${dev#$maj}
+ local bd=/dev/$(basename $(readlink /sys/dev/block/$((0x$maj)):$((0x$min))))
+ [ -b $bd ] || exit 1
+ echo $bd
+}
+
+get_sysdir() {
+ local mddev=$1
+ [ -L $mddev ] && mddev=$(readlink -f $mddev)
+ echo "/sys/class/block/$(basename $mddev)/md"
+}
+
+get_raiddisks() {
+ sysdir=$(get_sysdir "$1")
+ for i in $(seq 0 $(($(cat $sysdir/raid_disks)-1))); do
+ if [ -d $sysdir/rd$i ]; then
+ readlink -f /dev/block/$(cat $sysdir/rd$i/block/dev)
+ else
+ echo MISSING
+ fi
+ done
+}
+
+get_present() {
+ get_raiddisks $1 | grep -vc MISSING
+}
+
+ddf_check() {
+ udevadm settle
+ case $1 in
+ container )
+ grep -s "blocks super external:ddf" /proc/mdstat > /dev/null || {
+ echo >&2 "**Fatal** Correctly formed container not found"; cat /proc/mdstat; exit 1; }
+ ;;
+ member )
+ t_member=$2
+ t_num_disks=$3
+ t_level=$4
+ t_rd_size=$5
+ t_size=$6
+ t_offset=$7
+ t_chunk=$8
+ t_layout=$9
+
+ if [ $t_chunk -ne 0 ]; then
+ t_rd_size=$((t_rd_size & ~(t_chunk - 1)))
+ fi
+ case $t_level in
+ 0) t_size=$((t_num_disks*$t_rd_size));;
+ 1) t_size=$t_rd_size;;
+ 4|5) t_size=$(((t_num_disks-1)*$t_rd_size));;
+ 6) t_size=$(((t_num_disks-2)*$t_rd_size));;
+ 10) t_size=$((t_num_disks*$t_rd_size/t_layout));;
+ esac
+
+ err=0
+
+ eval `stat -L -c "let major=0x%t; let minor=0x%T;" $t_member`
+ sysfs=/sys/dev/block/${major}:${minor}
+ if [ ! -f ${sysfs}/md/array_state ]; then
+ echo "**Fatal**: Array member $t_member not found" >&2; cat /proc/mdstat >&2; exit 1
+ fi
+ _chunk=`cat ${sysfs}/md/chunk_size`
+ if [ $t_chunk -ne $((_chunk/1024)) ]; then
+ echo "**Error**: Chunk size mismatch - expected $t_chunk, actual $_chunk" >&2
+ err=$((err + 1))
+ fi
+ for i in `seq 0 $((t_num_disks - 1))`; do
+ _offset=`cat ${sysfs}/md/rd${i}/offset`
+ if [ $t_offset -ne $((_offset / 2)) ]; then
+ echo "**Error**: Offset mismatch - expected $t_offset, actual $((_offset/2))" >&2
+ err=$((err + 1))
+ fi
+ _rd_size=`cat ${sysfs}/md/rd${i}/size`
+ if [ $t_rd_size -ne $_rd_size ]; then
+ echo "**Error**: Component size mismatch - expected $t_rd_size, actual $_rd_size" >&2
+ err=$((err + 1))
+ fi
+ done
+ _size=`cat ${sysfs}/md/array_size`
+ [ o$_size = odefault ] && _size=$(($(cat ${sysfs}/size)/2))
+ if [ $t_size -ne $_size ]; then
+ echo "**Error**: Array size mismatch - expected $t_size, actual $_size" >&2
+ err=$((err + 1))
+ fi
+ if [ $err -gt 0 ]; then
+ echo "$t_member failed check" >&2
+ cat /proc/mdstat >&2
+ mdadm -E /dev/loop8 >&2
+ exit 1
+ fi
+ ;;
+ * )
+ echo >&2 "**Error** unknown check $1"; exit 1;
+ esac
+}
+
+container=/dev/md/ddf0
+member0=/dev/md/vol0
+member1=/dev/md/vol1
+member2=/dev/md/vol2
+member3=/dev/md/vol3
+member4=/dev/md/vol4
+
+# We don't want systemd to start system mdmon; start our own
+export MDADM_NO_SYSTEMCTL=1
diff --git a/tests/env-imsm-template b/tests/env-imsm-template
new file mode 100644
index 00000000..bc5f5852
--- /dev/null
+++ b/tests/env-imsm-template
@@ -0,0 +1,74 @@
+imsm_check() {
+ udevadm settle
+ case $1 in
+ container )
+ grep -s "blocks super external:imsm" /proc/mdstat > /dev/null || {
+ echo >&2 "**Fatal** Correctly formed container not found"; cat /proc/mdstat; exit 1; }
+ ;;
+ member )
+ t_member=$2
+ t_num_disks=$3
+ t_level=$4
+ t_rd_size=$5
+ t_size=$6
+ t_offset=$7
+ t_chunk=$8
+
+ if [ $t_level -ne 1 ]; then
+ t_rd_size=$((t_rd_size & ~(t_chunk - 1)))
+ else
+ t_chunk=64
+ fi
+ t_size=$((t_size/1024))
+ t_size=$((t_size*1024))
+ err=0
+
+ eval `stat -L -c "let major=0x%t; let minor=0x%T;" $t_member`
+ sysfs=/sys/dev/block/${major}:${minor}
+ if [ ! -f ${sysfs}/md/array_state ]; then
+ echo "**Fatal**: Array member $t_member not found" >&2; cat /proc/mdstat >&2; exit 1
+ fi
+ _chunk=`cat ${sysfs}/md/chunk_size`
+ if [ $t_chunk -ne $((_chunk/1024)) ]; then
+ echo "**Error**: Chunk size mismatch - expected $t_chunk, actual $_chunk" >&2
+ err=$((err + 1))
+ fi
+ for i in `seq 0 $((t_num_disks - 1))`; do
+ _offset=`cat ${sysfs}/md/rd${i}/offset`
+ if [ $t_offset -ne $((_offset / 2)) ]; then
+ echo "**Error**: Offset mismatch - expected $t_offset, actual $_offset" >&2
+ err=$((err + 1))
+ fi
+ _rd_size=`cat ${sysfs}/md/rd${i}/size`
+ if [ $t_rd_size -ne $_rd_size ]; then
+ echo "**Error**: Component size mismatch - expected $t_rd_size, actual $_rd_size" >&2
+ err=$((err + 1))
+ fi
+ done
+ _size=`cat ${sysfs}/md/array_size`
+ if [ $t_size -ne $_size ]; then
+ echo "**Error**: Array size mismatch - expected $t_size, actual $_size" >&2
+ err=$((err + 1))
+ fi
+ if [ $err -gt 0 ]; then
+ echo "$t_member failed check" >&2
+ cat /proc/mdstat >&2
+ mdadm -E /dev/loop0 >&2
+ exit 1
+ fi
+ ;;
+ * )
+ echo >&2 "**Error** unknown check $1"; exit 1;
+ esac
+}
+
+export IMSM_NO_PLATFORM=1
+export IMSM_DEVNAME_AS_SERIAL=1
+export IMSM_TEST_OROM=1
+export MDADM_EXPERIMENTAL=1
+container=/dev/md/container
+member0=/dev/md/vol0
+member1=/dev/md/vol1
+member2=/dev/md/vol2
+member3=/dev/md/vol3
+member4=/dev/md/vol4
diff --git a/tests/imsm-grow-template b/tests/imsm-grow-template
new file mode 100644
index 00000000..8022e3ad
--- /dev/null
+++ b/tests/imsm-grow-template
@@ -0,0 +1,104 @@
+
+# 0 - POSITIVE test, otherwise NEGATIVE test
+negative_test=$1
+
+# 0 - On-line Capacity Expansion test, otherwise LEVEL migration or CHUNK size migration test
+migration_test=$2
+
+function grow_member() {
+ local member=$1
+ local disks=$2
+ local comps=$3
+ local level=$4
+ local size=$5
+ local offset=$6
+ local chunk=$7
+ local array_size=$((comps * size))
+
+ rm -f $backup_imsm
+ ( set -ex; mdadm --grow $member --chunk=$chunk --level=$level )
+ local status=$?
+ if [ $negative_test -ne 0 ]; then
+ if [ $status -eq 0 ]; then
+ echo >&2 "**Error**: $member: --grow should failed, but it completed successfuly"
+ exit 1
+ fi
+ return
+ fi
+ check wait
+ sleep 5
+ imsm_check member $member $disks $level $size $array_size $offset $chunk
+ testdev $member $comps $size $chunk
+}
+
+# Create container
+mdadm --create --run $container --auto=md --metadata=imsm --raid-disks=$num_disks $device_list
+wait
+imsm_check container $num_disks
+
+# Create first volume inside the container
+mdadm --create --run $member0 --auto=md --level=$vol0_level --size=$vol0_comp_size --chunk=$vol0_chunk --raid-disks=$num_disks $device_list
+wait
+
+# Create second volume inside the container (if defined)
+if [ ! -z $vol1_chunk ]; then
+ mdadm --create --run $member1 --auto=md --level=$vol1_level --size=$vol1_comp_size --chunk=$vol1_chunk --raid-disks=$num_disks $device_list
+ wait
+fi
+
+# Wait for any RESYNC to complete
+check wait
+
+# Test first volume
+imsm_check member $member0 $num_disks $vol0_level $vol0_comp_size $((vol0_comp_size * vol0_num_comps)) $vol0_offset $vol0_chunk
+testdev $member0 $vol0_num_comps $vol0_comp_size $vol0_chunk
+
+# Test second volume (if defined)
+if [ ! -z $vol1_chunk ]; then
+ imsm_check member $member1 $num_disks $vol1_level $vol1_comp_size $((vol1_comp_size * vol1_num_comps)) $vol1_offset $vol1_chunk
+ testdev $member1 $vol1_num_comps $vol1_comp_size $vol1_chunk
+fi
+
+# Add extra disks to container if operation requires spares in container.
+for i in $spare_list
+do
+ mdadm --add $container $i
+ wait
+ num_disks=$((num_disks + 1))
+done
+
+imsm_check container $num_disks
+num_disks=$((num_disks + add_to_num_disks))
+backup_imsm=/tmp/backup_imsm
+
+# Grow each member or a container depending on the type of an operation
+if [ $migration_test -ne 0 ]; then
+ if [ -z $new_num_disks ]; then
+ new_num_disks=$num_disks
+ fi
+ grow_member $member0 $new_num_disks $vol0_new_num_comps $vol0_new_level $vol0_comp_size $vol0_offset $vol0_new_chunk
+ if [[ $vol1_new_chunk -ne 0 ]] ; then
+ grow_member $member1 $new_num_disks $vol1_new_num_comps $vol1_new_level $vol1_comp_size $vol1_offset $vol1_new_chunk
+ fi
+else
+ rm -f $backup_imsm
+ ( set -x; mdadm --grow $container --raid-disks=$num_disks )
+ grow_status=$?
+ if [ $negative_test -ne 0 ]; then
+ if [ $grow_status -eq 0 ]; then
+ echo >&2 "**Error**: $container: --grow should failed, but it completed successfuly"
+ exit 1
+ fi
+ else
+ check wait
+ sleep 5
+ imsm_check member $member0 $num_disks $vol0_level $vol0_comp_size $((vol0_comp_size * vol0_new_num_comps)) $vol0_offset $vol0_chunk
+ testdev $member0 $vol0_new_num_comps $vol0_comp_size $vol0_chunk
+ if [ $vol1_new_num_comps -ne 0 ]; then
+ imsm_check member $member1 $num_disks $vol1_level $vol1_comp_size $((vol1_comp_size * vol1_new_num_comps)) $vol1_offset $vol1_chunk
+ testdev $member1 $vol1_new_num_comps $vol1_comp_size $vol1_chunk
+ fi
+ fi
+fi
+
+exit 0
diff --git a/tests/testdev b/tests/testdev
new file mode 100644
index 00000000..8b6e6f06
--- /dev/null
+++ b/tests/testdev
@@ -0,0 +1,13 @@
+dev=$1
+cnt=$2
+size=$3
+chunk=$4
+mkfs -j $dev > /dev/null 2>&1 && fsck -fn $dev >&2
+dsize=$[size/chunk]
+dsize=$[dsize*chunk]
+rasize=$[dsize*2*cnt]
+if [ $rasize -ne `/sbin/blockdev --getsize $dev` ]
+then
+ echo "ERROR: size is wrong for $dev: $cnt * $size (chunk=$chunk) = $rasize, not `/sbin/blockdev --getsize $dev`"
+ exit 1;
+fi
diff --git a/tests/utils b/tests/utils
new file mode 100644
index 00000000..3acebd77
--- /dev/null
+++ b/tests/utils
@@ -0,0 +1,191 @@
+# set of functions used to test policy framework with assemble, incremental and Monitor
+
+set +e
+#create links to be able to use domains
+for d in 0 1 2 3 4 5 6 7 8 9 10 11 12
+do
+ eval ln -s \$dev$d /dev/disk/by-path/loop$d
+ eval d$d="loop$d"
+ eval mdadm --zero-superblock \$dev$d
+done
+
+devices="/dev/loop[0-9] /dev/loop10 /dev/loop11 /dev/loop12"
+
+# on failure print out few things before exit
+# uses testdsc and platform global variables
+err(){
+ echo >&2 "ERROR: $*"
+ cat $config >&2 || true
+ cat /proc/mdstat >&2
+ [ -z "$testdsc" ] || { echo >&2 $platform: $testdsc "- failed"; }
+ ps -e | grep mdadm >&2 || true
+ if [ $listfailed == "yes" ]; then
+ [ "$verbose" != "yes" ] || echo ---FAILED---
+ flist="$flist \n $platform $testdsc"
+ failed=1
+ else
+ exit 1
+ fi
+}
+
+# set test description
+dsc(){
+ failed=0
+ testdsc="$*"
+ [ "$verbose" != "yes" ] || echo $testdsc
+}
+
+killmonitor(){
+ [ -z "$monitorpid" ] || { kill -9 $monitorpid; unset monitorpid; }
+}
+
+tidyup(){
+ killmonitor
+ mdadm -Ss || true
+ mdadm -Ss
+ mdadm --zero-superblock $devices || true
+ udevadm settle
+ rm -f $config
+}
+
+trap tidyup 0 1 2 3 15
+
+# create a RAID 1 array or container and subarray(s) on 2 disks
+# if platform not specified imsm is used
+# if subsize is given, first subarray is created with given size and second one on remaining space
+ccv(){
+ # mddevno used to name created array
+ local mddevno="$1"
+ # numbers of devices to be used in array
+ local devno1="$2"
+ local devno2="$3"
+ local platform="$4"
+ local subsize="$5"
+ local onearray="$6"
+ [ -n "$platform" ] || platform="imsm"
+ if [ "$platform" == "imsm" ] || [ "$platform" == "ddf" ]; then
+ eval mdadm -CR /dev/md/con$mddevno -e $platform -n 2 \$dev$devno1 \$dev$devno2
+ udevadm settle
+ [ -z "$subsize" ] || eval mdadm -CR sub$mddevno"_" -l 1 -n 2 /dev/md/con$mddevno -z $subsize
+ [ -n "$onearray" ] || eval mdadm -CR sub$mddevno -l 1 -n 2 /dev/md/con$mddevno
+ else
+ [ -z "$subsize" ] || sizepar="-z $subsize"
+ eval mdadm -CR arr$mddevno -e $platform -l 1 -n 2 \$dev$devno1 \$dev$devno2 $sizepar
+ unset sizepar
+ fi
+}
+
+# get container and subarray using given device from mdstat
+# sets global variables c and v
+getarray(){
+ local devname=`basename $1`
+ local platformtype=`grep -A 1 $devname /proc/mdstat | awk '/active/ {getline; print $4 }' | awk -F ":" 'END {print $1}'`
+ c=`grep "inactive.*$devname" /proc/mdstat | awk -F " " '{print $1}'`
+ v=`grep " active.*$devname" /proc/mdstat | awk -F " " '{print $1}'`
+ [ "$platformtype" == "external" ] || c=$v
+}
+
+# check if given device belongs to any container and subarray
+# if $2 given then only container checked
+chkarray(){
+ local devname="$1"
+ local subcheck="$2"
+ getarray $devname
+ [ -n "$c" ] || err "$devname not in any container"
+ [ -n "$subcheck" ] || [ -n "$v" ] || err " $devname not in subarray"
+}
+
+# test if two devices in the same container/subarray
+# $1 $2 - devices
+# $3 don't check subarrays, only containers
+tst(){
+ local device1=`basename $1`
+ local device2=`basename $2`
+ local subcheck="$3"
+ chkarray $device1 $subcheck
+ local x="$c"
+ local y="$v"
+ chkarray $device2 $subcheck
+ [ "$c" == "$x" ] || err "$device1 and $device2 not in the same container"
+ [ -n "$subcheck" ] || [ "$v" == "$y" ] || err "$device1 and $device2 not in the same subarray"
+}
+
+# same as tst, just use numbers of devices instead of names as parameters
+dtst(){
+ local devno1="$1"
+ local devno2="$2"
+ local subcheck="$3"
+ eval tst \$dev$devno1 \$dev$devno2 $subcheck
+}
+
+# create containers/subarrays, check if created properly,
+# set global variables c$mddevno v$mddevno, usually c0=md127, v0=md126 , etc.
+setupdevs(){
+ local mddevno="$1"
+ local devno1="$2"
+ local devno2="$3"
+ local p="$4"
+ local subsize="$5"
+ local onearray="$6"
+ [ -n "$p" ] || p=$platform
+ ccv $mddevno $devno1 $devno2 $p $subsize $onearray
+ dtst $devno1 $devno2
+ eval c$mddevno=\"$c\"
+ eval v$mddevno=\"$v\"
+}
+
+# check if given spare in container
+# usage: chkspare container spare [n] (n if spare shouldn't be in container)
+chkspare(){
+ local container=`basename $1`
+ local spare=$2
+ local expected=$3
+ getarray $spare
+ [ -n "$expected" ] || expected="y"
+ if [ "$expected" == "y" ]; then
+ [ "$c" == "$container" ] || err "$spare not in container $container"
+ else
+ [ "$c" != "$container" ] || err "$spare in container $container"
+ fi
+}
+
+#check if spare was moved from one container to another
+# args: from_container to_container spare [yn]
+# n when spare should remain in original container
+chksparemoved(){
+ sleep $sleeptime
+ from_container="$1"
+ to_container="$2"
+ spare="$3"
+ expected="$4"
+ [ -n "$expected" ] || expected="y"
+ notexpected="n"; [ "$expected" == "y" ] || notexpected="y"
+ chkspare $from_container $spare $notexpected
+ [ $failed -eq 1 ] || chkspare $to_container $spare $expected
+}
+
+
+# for domains defined through policy
+createconfig(){
+if [ "$1" != "a" ]; then
+{
+ domain=$1
+ metadata=$2
+ action=$3
+ while [ -n "$4" ]; do
+ echo="policy domain=$domain"
+ [ "$metadata" == "noplatform" ] || echo="$echo metadata=$metadata"
+ echo="$echo path=loop$4"
+ echo="$echo action=$action"
+ echo "$echo"
+ shift
+ done
+} >> $config
+else
+{
+ echo "DEVICES $devlist /dev/md1*"
+ mdadm -Ebs
+} > $config
+fi
+#[ "$verbose" != "yes" ] || cat $config | grep policy || true
+}
diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules
new file mode 100644
index 00000000..a32b6d2d
--- /dev/null
+++ b/udev-md-raid-arrays.rules
@@ -0,0 +1,39 @@
+# do not edit this file, it will be overwritten on update
+
+SUBSYSTEM!="block", GOTO="md_end"
+
+# handle md arrays
+ACTION!="add|change", GOTO="md_end"
+KERNEL!="md*", GOTO="md_end"
+
+# partitions have no md/{array_state,metadata_version}, but should not
+# for that reason be ignored.
+ENV{DEVTYPE}=="partition", GOTO="md_ignore_state"
+
+# container devices have a metadata version of e.g. 'external:ddf' and
+# never leave state 'inactive'
+ATTR{md/metadata_version}=="external:[A-Za-z]*", ATTR{md/array_state}=="inactive", GOTO="md_ignore_state"
+TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end"
+ATTR{md/array_state}=="|clear|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
+LABEL="md_ignore_state"
+
+IMPORT{program}="BINDIR/mdadm --detail --export $tempnode"
+ENV{DEVTYPE}=="disk", ENV{MD_NAME}=="?*", SYMLINK+="disk/by-id/md-name-$env{MD_NAME}", OPTIONS+="string_escape=replace"
+ENV{DEVTYPE}=="disk", ENV{MD_UUID}=="?*", SYMLINK+="disk/by-id/md-uuid-$env{MD_UUID}"
+ENV{DEVTYPE}=="disk", ENV{MD_DEVNAME}=="?*", SYMLINK+="md/$env{MD_DEVNAME}"
+ENV{DEVTYPE}=="partition", ENV{MD_NAME}=="?*", SYMLINK+="disk/by-id/md-name-$env{MD_NAME}-part%n", OPTIONS+="string_escape=replace"
+ENV{DEVTYPE}=="partition", ENV{MD_UUID}=="?*", SYMLINK+="disk/by-id/md-uuid-$env{MD_UUID}-part%n"
+ENV{DEVTYPE}=="partition", ENV{MD_DEVNAME}=="*[^0-9]", SYMLINK+="md/$env{MD_DEVNAME}%n"
+ENV{DEVTYPE}=="partition", ENV{MD_DEVNAME}=="*[0-9]", SYMLINK+="md/$env{MD_DEVNAME}p%n"
+
+IMPORT{program}="/sbin/blkid -o udev -p -u noraid $tempnode"
+ENV{ID_FS_USAGE}=="filesystem|other|crypto", ENV{ID_FS_UUID_ENC}=="?*", SYMLINK+="disk/by-uuid/$env{ID_FS_UUID_ENC}"
+ENV{ID_FS_USAGE}=="filesystem|other", ENV{ID_FS_LABEL_ENC}=="?*", SYMLINK+="disk/by-label/$env{ID_FS_LABEL_ENC}"
+
+ENV{MD_LEVEL}=="raid[1-9]*", ENV{SYSTEMD_WANTS}+="mdmonitor.service"
+
+# Tell systemd to run mdmon for our container, if we need it.
+ENV{MD_LEVEL}=="raid[1-9]*", ENV{MD_CONTAINER}=="?*", PROGRAM="/bin/readlink $env{MD_CONTAINER}", ENV{MD_MON_THIS}="%c"
+ENV{MD_MON_THIS}=="?*", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@%c.service"
+
+LABEL="md_end"
diff --git a/udev-md-raid-assembly.rules b/udev-md-raid-assembly.rules
new file mode 100644
index 00000000..4d27d630
--- /dev/null
+++ b/udev-md-raid-assembly.rules
@@ -0,0 +1,35 @@
+# do not edit this file, it will be overwritten on update
+
+# Don't process any events if anaconda is running as anaconda brings up
+# raid devices manually
+ENV{ANACONDA}=="?*", GOTO="md_inc_end"
+# assemble md arrays
+
+SUBSYSTEM!="block", GOTO="md_inc_end"
+
+# handle potential components of arrays (the ones supported by md)
+ENV{ID_FS_TYPE}=="linux_raid_member", GOTO="md_inc"
+
+# "noiswmd" on kernel command line stops mdadm from handling
+# "isw" (aka IMSM - Intel RAID).
+# "nodmraid" on kernel command line stops mdadm from handling
+# "isw" or "ddf".
+IMPORT{cmdline}="noiswmd"
+IMPORT{cmdline}="nodmraid"
+
+ENV{nodmraid}=="?*", GOTO="md_inc_end"
+ENV{ID_FS_TYPE}=="ddf_raid_member", GOTO="md_inc"
+ENV{noiswmd}=="?*", GOTO="md_inc_end"
+ENV{ID_FS_TYPE}=="isw_raid_member", GOTO="md_inc"
+GOTO="md_inc_end"
+
+LABEL="md_inc"
+
+# remember you can limit what gets auto/incrementally assembled by
+# mdadm.conf(5)'s 'AUTO' and selectively whitelist using 'ARRAY'
+ACTION=="add|change", IMPORT{program}="BINDIR/mdadm --incremental --export $tempnode --offroot ${DEVLINKS}"
+ACTION=="add|change", ENV{MD_STARTED}=="*unsafe*", ENV{MD_FOREIGN}=="no", ENV{SYSTEMD_WANTS}+="mdadm-last-resort@$env{MD_DEVICE}.timer"
+ACTION=="remove", ENV{ID_PATH}=="?*", RUN+="BINDIR/mdadm -If $name --path $env{ID_PATH}"
+ACTION=="remove", ENV{ID_PATH}!="?*", RUN+="BINDIR/mdadm -If $name"
+
+LABEL="md_inc_end"
diff --git a/util.c b/util.c
new file mode 100644
index 00000000..37c6e0d3
--- /dev/null
+++ b/util.c
@@ -0,0 +1,1975 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include "md_p.h"
+#include <sys/socket.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <sys/un.h>
+#include <sys/resource.h>
+#include <sys/vfs.h>
+#include <linux/magic.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <signal.h>
+
+/*
+ * following taken from linux/blkpg.h because they aren't
+ * anywhere else and it isn't safe to #include linux/ * stuff.
+ */
+
+#define BLKPG _IO(0x12,105)
+
+/* The argument structure */
+struct blkpg_ioctl_arg {
+ int op;
+ int flags;
+ int datalen;
+ void *data;
+};
+
+/* The subfunctions (for the op field) */
+#define BLKPG_ADD_PARTITION 1
+#define BLKPG_DEL_PARTITION 2
+
+/* Sizes of name fields. Unused at present. */
+#define BLKPG_DEVNAMELTH 64
+#define BLKPG_VOLNAMELTH 64
+
+/* The data structure for ADD_PARTITION and DEL_PARTITION */
+struct blkpg_partition {
+ long long start; /* starting offset in bytes */
+ long long length; /* length in bytes */
+ int pno; /* partition number */
+ char devname[BLKPG_DEVNAMELTH]; /* partition name, like sda5 or c0d1p2,
+ to be used in kernel messages */
+ char volname[BLKPG_VOLNAMELTH]; /* volume label */
+};
+
+#include "part.h"
+
+/* Force a compilation error if condition is true */
+#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
+
+/* Force a compilation error if condition is true, but also produce a
+ result (of value 0 and type size_t), so the expression can be used
+ e.g. in a structure initializer (or where-ever else comma expressions
+ aren't permitted). */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+
+/*
+ * Parse a 128 bit uuid in 4 integers
+ * format is 32 hexx nibbles with options :.<space> separator
+ * If not exactly 32 hex digits are found, return 0
+ * else return 1
+ */
+int parse_uuid(char *str, int uuid[4])
+{
+ int hit = 0; /* number of Hex digIT */
+ int i;
+ char c;
+ for (i = 0; i < 4; i++)
+ uuid[i] = 0;
+
+ while ((c = *str++) != 0) {
+ int n;
+ if (c >= '0' && c <= '9')
+ n = c-'0';
+ else if (c >= 'a' && c <= 'f')
+ n = 10 + c - 'a';
+ else if (c >= 'A' && c <= 'F')
+ n = 10 + c - 'A';
+ else if (strchr(":. -", c))
+ continue;
+ else return 0;
+
+ if (hit<32) {
+ uuid[hit/8] <<= 4;
+ uuid[hit/8] += n;
+ }
+ hit++;
+ }
+ if (hit == 32)
+ return 1;
+ return 0;
+}
+
+/*
+ * Get the md version number.
+ * We use the RAID_VERSION ioctl if it is supported
+ * If not, but we have a block device with major '9', we assume
+ * 0.36.0
+ *
+ * Return version number as 24 but number - assume version parts
+ * always < 255
+ */
+
+int md_get_version(int fd)
+{
+ struct stat stb;
+ mdu_version_t vers;
+
+ if (fstat(fd, &stb)<0)
+ return -1;
+ if ((S_IFMT&stb.st_mode) != S_IFBLK)
+ return -1;
+
+ if (ioctl(fd, RAID_VERSION, &vers) == 0)
+ return (vers.major*10000) + (vers.minor*100) + vers.patchlevel;
+ if (errno == EACCES)
+ return -1;
+ if (major(stb.st_rdev) == MD_MAJOR)
+ return (3600);
+ return -1;
+}
+
+int get_linux_version()
+{
+ struct utsname name;
+ char *cp;
+ int a = 0, b = 0,c = 0;
+ if (uname(&name) <0)
+ return -1;
+
+ cp = name.release;
+ a = strtoul(cp, &cp, 10);
+ if (*cp == '.')
+ b = strtoul(cp+1, &cp, 10);
+ if (*cp == '.')
+ c = strtoul(cp+1, &cp, 10);
+
+ return (a*1000000)+(b*1000)+c;
+}
+
+#ifndef MDASSEMBLE
+int mdadm_version(char *version)
+{
+ int a, b, c;
+ char *cp;
+
+ if (!version)
+ version = Version;
+
+ cp = strchr(version, '-');
+ if (!cp || *(cp+1) != ' ' || *(cp+2) != 'v')
+ return -1;
+ cp += 3;
+ a = strtoul(cp, &cp, 10);
+ if (*cp != '.')
+ return -1;
+ b = strtoul(cp+1, &cp, 10);
+ if (*cp == '.')
+ c = strtoul(cp+1, &cp, 10);
+ else
+ c = 0;
+ if (*cp != ' ' && *cp != '-')
+ return -1;
+ return (a*1000000)+(b*1000)+c;
+}
+
+unsigned long long parse_size(char *size)
+{
+ /* parse 'size' which should be a number optionally
+ * followed by 'K', 'M', or 'G'.
+ * Without a suffix, K is assumed.
+ * Number returned is in sectors (half-K)
+ * INVALID_SECTORS returned on error.
+ */
+ char *c;
+ long long s = strtoll(size, &c, 10);
+ if (s > 0) {
+ switch (*c) {
+ case 'K':
+ c++;
+ default:
+ s *= 2;
+ break;
+ case 'M':
+ c++;
+ s *= 1024 * 2;
+ break;
+ case 'G':
+ c++;
+ s *= 1024 * 1024 * 2;
+ break;
+ case 's': /* sectors */
+ c++;
+ break;
+ }
+ } else
+ s = INVALID_SECTORS;
+ if (*c)
+ s = INVALID_SECTORS;
+ return s;
+}
+
+int parse_layout_10(char *layout)
+{
+ int copies, rv;
+ char *cp;
+ /* Parse the layout string for raid10 */
+ /* 'f', 'o' or 'n' followed by a number <= raid_disks */
+ if ((layout[0] != 'n' && layout[0] != 'f' && layout[0] != 'o') ||
+ (copies = strtoul(layout+1, &cp, 10)) < 1 ||
+ copies > 200 ||
+ *cp)
+ return -1;
+ if (layout[0] == 'n')
+ rv = 256 + copies;
+ else if (layout[0] == 'o')
+ rv = 0x10000 + (copies<<8) + 1;
+ else
+ rv = 1 + (copies<<8);
+ return rv;
+}
+
+int parse_layout_faulty(char *layout)
+{
+ /* Parse the layout string for 'faulty' */
+ int ln = strcspn(layout, "0123456789");
+ char *m = xstrdup(layout);
+ int mode;
+ m[ln] = 0;
+ mode = map_name(faultylayout, m);
+ if (mode == UnSet)
+ return -1;
+
+ return mode | (atoi(layout+ln)<< ModeShift);
+}
+
+long parse_num(char *num)
+{
+ /* Either return a valid number, or -1 */
+ char *c;
+ long rv = strtol(num, &c, 10);
+ if (rv < 0 || *c || !num[0])
+ return -1;
+ else
+ return rv;
+}
+#endif
+
+void remove_partitions(int fd)
+{
+ /* remove partitions from this block devices.
+ * This is used for components added to an array
+ */
+#ifdef BLKPG_DEL_PARTITION
+ struct blkpg_ioctl_arg a;
+ struct blkpg_partition p;
+
+ a.op = BLKPG_DEL_PARTITION;
+ a.data = (void*)&p;
+ a.datalen = sizeof(p);
+ a.flags = 0;
+ memset(a.data, 0, a.datalen);
+ for (p.pno = 0; p.pno < 16; p.pno++)
+ ioctl(fd, BLKPG, &a);
+#endif
+}
+
+int test_partition(int fd)
+{
+ /* Check if fd is a whole-disk or a partition.
+ * BLKPG will return EINVAL on a partition, and BLKPG_DEL_PARTITION
+ * will return ENXIO on an invalid partition number.
+ */
+ struct blkpg_ioctl_arg a;
+ struct blkpg_partition p;
+ a.op = BLKPG_DEL_PARTITION;
+ a.data = (void*)&p;
+ a.datalen = sizeof(p);
+ a.flags = 0;
+ memset(a.data, 0, a.datalen);
+ p.pno = 1<<30;
+ if (ioctl(fd, BLKPG, &a) == 0)
+ /* Very unlikely, but not a partition */
+ return 0;
+ if (errno == ENXIO || errno == ENOTTY)
+ /* not a partition */
+ return 0;
+
+ return 1;
+}
+
+int test_partition_from_id(dev_t id)
+{
+ char buf[20];
+ int fd, rv;
+
+ sprintf(buf, "%d:%d", major(id), minor(id));
+ fd = dev_open(buf, O_RDONLY);
+ if (fd < 0)
+ return -1;
+ rv = test_partition(fd);
+ close(fd);
+ return rv;
+}
+
+int enough(int level, int raid_disks, int layout, int clean, char *avail)
+{
+ int copies, first;
+ int i;
+ int avail_disks = 0;
+
+ for (i = 0; i < raid_disks; i++)
+ avail_disks += !!avail[i];
+
+ switch (level) {
+ case 10:
+ /* This is the tricky one - we need to check
+ * which actual disks are present.
+ */
+ copies = (layout&255)* ((layout>>8) & 255);
+ first = 0;
+ do {
+ /* there must be one of the 'copies' form 'first' */
+ int n = copies;
+ int cnt = 0;
+ int this = first;
+ while (n--) {
+ if (avail[this])
+ cnt++;
+ this = (this+1) % raid_disks;
+ }
+ if (cnt == 0)
+ return 0;
+ first = (first+(layout&255)) % raid_disks;
+ } while (first != 0);
+ return 1;
+
+ case LEVEL_MULTIPATH:
+ return avail_disks>= 1;
+ case LEVEL_LINEAR:
+ case 0:
+ return avail_disks == raid_disks;
+ case 1:
+ return avail_disks >= 1;
+ case 4:
+ case 5:
+ if (clean)
+ return avail_disks >= raid_disks-1;
+ else
+ return avail_disks >= raid_disks;
+ case 6:
+ if (clean)
+ return avail_disks >= raid_disks-2;
+ else
+ return avail_disks >= raid_disks;
+ default:
+ return 0;
+ }
+}
+
+int enough_fd(int fd)
+{
+ struct mdu_array_info_s array;
+ struct mdu_disk_info_s disk;
+ int i, rv;
+ char *avail;
+
+ if (ioctl(fd, GET_ARRAY_INFO, &array) != 0 ||
+ array.raid_disks <= 0)
+ return 0;
+ avail = xcalloc(array.raid_disks, 1);
+ for (i = 0; i < MAX_DISKS && array.nr_disks > 0; i++) {
+ disk.number = i;
+ if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ array.nr_disks--;
+
+ if (! (disk.state & (1<<MD_DISK_SYNC)))
+ continue;
+ if (disk.raid_disk < 0 || disk.raid_disk >= array.raid_disks)
+ continue;
+ avail[disk.raid_disk] = 1;
+ }
+ /* This is used on an active array, so assume it is clean */
+ rv = enough(array.level, array.raid_disks, array.layout,
+ 1, avail);
+ free(avail);
+ return rv;
+}
+
+const int uuid_zero[4] = { 0, 0, 0, 0 };
+
+int same_uuid(int a[4], int b[4], int swapuuid)
+{
+ if (swapuuid) {
+ /* parse uuids are hostendian.
+ * uuid's from some superblocks are big-ending
+ * if there is a difference, we need to swap..
+ */
+ unsigned char *ac = (unsigned char *)a;
+ unsigned char *bc = (unsigned char *)b;
+ int i;
+ for (i = 0; i < 16; i += 4) {
+ if (ac[i+0] != bc[i+3] ||
+ ac[i+1] != bc[i+2] ||
+ ac[i+2] != bc[i+1] ||
+ ac[i+3] != bc[i+0])
+ return 0;
+ }
+ return 1;
+ } else {
+ if (a[0]==b[0] &&
+ a[1]==b[1] &&
+ a[2]==b[2] &&
+ a[3]==b[3])
+ return 1;
+ return 0;
+ }
+}
+
+void copy_uuid(void *a, int b[4], int swapuuid)
+{
+ if (swapuuid) {
+ /* parse uuids are hostendian.
+ * uuid's from some superblocks are big-ending
+ * if there is a difference, we need to swap..
+ */
+ unsigned char *ac = (unsigned char *)a;
+ unsigned char *bc = (unsigned char *)b;
+ int i;
+ for (i = 0; i < 16; i += 4) {
+ ac[i+0] = bc[i+3];
+ ac[i+1] = bc[i+2];
+ ac[i+2] = bc[i+1];
+ ac[i+3] = bc[i+0];
+ }
+ } else
+ memcpy(a, b, 16);
+}
+
+char *__fname_from_uuid(int id[4], int swap, char *buf, char sep)
+{
+ int i, j;
+ char uuid[16];
+ char *c = buf;
+ strcpy(c, "UUID-");
+ c += strlen(c);
+ copy_uuid(uuid, id, swap);
+ for (i = 0; i < 4; i++) {
+ if (i)
+ *c++ = sep;
+ for (j = 3; j >= 0; j--) {
+ sprintf(c,"%02x", (unsigned char) uuid[j+4*i]);
+ c+= 2;
+ }
+ }
+ return buf;
+
+}
+
+char *fname_from_uuid(struct supertype *st, struct mdinfo *info, char *buf, char sep)
+{
+ // dirty hack to work around an issue with super1 superblocks...
+ // super1 superblocks need swapuuid set in order for assembly to
+ // work, but can't have it set if we want this printout to match
+ // all the other uuid printouts in super1.c, so we force swapuuid
+ // to 1 to make our printout match the rest of super1
+ return __fname_from_uuid(info->uuid, (st->ss == &super1) ? 1 : st->ss->swapuuid, buf, sep);
+}
+
+#ifndef MDASSEMBLE
+int check_ext2(int fd, char *name)
+{
+ /*
+ * Check for an ext2fs file system.
+ * Superblock is always 1K at 1K offset
+ *
+ * s_magic is le16 at 56 == 0xEF53
+ * report mtime - le32 at 44
+ * blocks - le32 at 4
+ * logblksize - le32 at 24
+ */
+ unsigned char sb[1024];
+ time_t mtime;
+ unsigned long long size;
+ int bsize;
+ if (lseek(fd, 1024,0)!= 1024)
+ return 0;
+ if (read(fd, sb, 1024)!= 1024)
+ return 0;
+ if (sb[56] != 0x53 || sb[57] != 0xef)
+ return 0;
+
+ mtime = sb[44]|(sb[45]|(sb[46]|sb[47]<<8)<<8)<<8;
+ bsize = sb[24]|(sb[25]|(sb[26]|sb[27]<<8)<<8)<<8;
+ size = sb[4]|(sb[5]|(sb[6]|sb[7]<<8)<<8)<<8;
+ size <<= bsize;
+ pr_err("%s appears to contain an ext2fs file system\n",
+ name);
+ cont_err("size=%lluK mtime=%s", size, ctime(&mtime));
+ return 1;
+}
+
+int check_reiser(int fd, char *name)
+{
+ /*
+ * superblock is at 64K
+ * size is 1024;
+ * Magic string "ReIsErFs" or "ReIsEr2Fs" at 52
+ *
+ */
+ unsigned char sb[1024];
+ unsigned long long size;
+ if (lseek(fd, 64*1024, 0) != 64*1024)
+ return 0;
+ if (read(fd, sb, 1024) != 1024)
+ return 0;
+ if (strncmp((char*)sb+52, "ReIsErFs",8) != 0 &&
+ strncmp((char*)sb+52, "ReIsEr2Fs",9) != 0)
+ return 0;
+ pr_err("%s appears to contain a reiserfs file system\n",name);
+ size = sb[0]|(sb[1]|(sb[2]|sb[3]<<8)<<8)<<8;
+ cont_err("size = %lluK\n", size*4);
+
+ return 1;
+}
+
+int check_raid(int fd, char *name)
+{
+ struct mdinfo info;
+ time_t crtime;
+ char *level;
+ struct supertype *st = guess_super(fd);
+
+ if (!st)
+ return 0;
+ st->ss->load_super(st, fd, name);
+ /* Looks like a raid array .. */
+ pr_err("%s appears to be part of a raid array:\n",
+ name);
+ st->ss->getinfo_super(st, &info, NULL);
+ st->ss->free_super(st);
+ crtime = info.array.ctime;
+ level = map_num(pers, info.array.level);
+ if (!level) level = "-unknown-";
+ cont_err("level=%s devices=%d ctime=%s",
+ level, info.array.raid_disks, ctime(&crtime));
+ return 1;
+}
+
+int ask(char *mesg)
+{
+ char *add = "";
+ int i;
+ for (i = 0; i < 5; i++) {
+ char buf[100];
+ fprintf(stderr, "%s%s", mesg, add);
+ fflush(stderr);
+ if (fgets(buf, 100, stdin)==NULL)
+ return 0;
+ if (buf[0]=='y' || buf[0]=='Y')
+ return 1;
+ if (buf[0]=='n' || buf[0]=='N')
+ return 0;
+ add = "(y/n) ";
+ }
+ pr_err("assuming 'no'\n");
+ return 0;
+}
+#endif /* MDASSEMBLE */
+
+int is_standard(char *dev, int *nump)
+{
+ /* tests if dev is a "standard" md dev name.
+ * i.e if the last component is "/dNN" or "/mdNN",
+ * where NN is a string of digits
+ * Returns 1 if a partitionable standard,
+ * -1 if non-partitonable,
+ * 0 if not a standard name.
+ */
+ char *d = strrchr(dev, '/');
+ int type = 0;
+ int num;
+ if (!d)
+ return 0;
+ if (strncmp(d, "/d",2) == 0)
+ d += 2, type = 1; /* /dev/md/dN{pM} */
+ else if (strncmp(d, "/md_d", 5) == 0)
+ d += 5, type = 1; /* /dev/md_dN{pM} */
+ else if (strncmp(d, "/md", 3) == 0)
+ d += 3, type = -1; /* /dev/mdN */
+ else if (d-dev > 3 && strncmp(d-2, "md/", 3) == 0)
+ d += 1, type = -1; /* /dev/md/N */
+ else
+ return 0;
+ if (!*d)
+ return 0;
+ num = atoi(d);
+ while (isdigit(*d))
+ d++;
+ if (*d)
+ return 0;
+ if (nump) *nump = num;
+
+ return type;
+}
+
+unsigned long calc_csum(void *super, int bytes)
+{
+ unsigned long long newcsum = 0;
+ int i;
+ unsigned int csum;
+ unsigned int *superc = (unsigned int*) super;
+
+ for(i = 0; i < bytes/4; i++)
+ newcsum += superc[i];
+ csum = (newcsum& 0xffffffff) + (newcsum>>32);
+#ifdef __alpha__
+/* The in-kernel checksum calculation is always 16bit on
+ * the alpha, though it is 32 bit on i386...
+ * I wonder what it is elsewhere... (it uses an API in
+ * a way that it shouldn't).
+ */
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+#endif
+ return csum;
+}
+
+#ifndef MDASSEMBLE
+char *human_size(long long bytes)
+{
+ static char buf[30];
+
+ /* We convert bytes to either centi-M{ega,ibi}bytes or
+ * centi-G{igi,ibi}bytes, with appropriate rounding,
+ * and then print 1/100th of those as a decimal.
+ * We allow upto 2048Megabytes before converting to
+ * gigabytes, as that shows more precision and isn't
+ * too large a number.
+ * Terabytes are not yet handled.
+ */
+
+ if (bytes < 5000*1024)
+ buf[0] = 0;
+ else if (bytes < 2*1024LL*1024LL*1024LL) {
+ long cMiB = (bytes / ( (1LL<<20) / 200LL ) +1) /2;
+ long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2;
+ snprintf(buf, sizeof(buf), " (%ld.%02ld MiB %ld.%02ld MB)",
+ cMiB/100 , cMiB % 100,
+ cMB/100, cMB % 100);
+ } else {
+ long cGiB = (bytes / ( (1LL<<30) / 200LL ) +1) /2;
+ long cGB = (bytes / (1000000000LL/200LL ) +1) /2;
+ snprintf(buf, sizeof(buf), " (%ld.%02ld GiB %ld.%02ld GB)",
+ cGiB/100 , cGiB % 100,
+ cGB/100, cGB % 100);
+ }
+ return buf;
+}
+
+char *human_size_brief(long long bytes, int prefix)
+{
+ static char buf[30];
+
+ /* We convert bytes to either centi-M{ega,ibi}bytes or
+ * centi-G{igi,ibi}bytes, with appropriate rounding,
+ * and then print 1/100th of those as a decimal.
+ * We allow upto 2048Megabytes before converting to
+ * gigabytes, as that shows more precision and isn't
+ * too large a number.
+ * Terabytes are not yet handled.
+ *
+ * If prefix == IEC, we mean prefixes like kibi,mebi,gibi etc.
+ * If prefix == JEDEC, we mean prefixes like kilo,mega,giga etc.
+ */
+
+ if (bytes < 5000*1024)
+ buf[0] = 0;
+ else if (prefix == IEC) {
+ if (bytes < 2*1024LL*1024LL*1024LL) {
+ long cMiB = (bytes / ( (1LL<<20) / 200LL ) +1) /2;
+ snprintf(buf, sizeof(buf), "%ld.%02ldMiB",
+ cMiB/100 , cMiB % 100);
+ } else {
+ long cGiB = (bytes / ( (1LL<<30) / 200LL ) +1) /2;
+ snprintf(buf, sizeof(buf), "%ld.%02ldGiB",
+ cGiB/100 , cGiB % 100);
+ }
+ }
+ else if (prefix == JEDEC) {
+ if (bytes < 2*1024LL*1024LL*1024LL) {
+ long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2;
+ snprintf(buf, sizeof(buf), "%ld.%02ldMB",
+ cMB/100, cMB % 100);
+ } else {
+ long cGB = (bytes / (1000000000LL/200LL ) +1) /2;
+ snprintf(buf, sizeof(buf), "%ld.%02ldGB",
+ cGB/100 , cGB % 100);
+ }
+ }
+ else
+ buf[0] = 0;
+
+ return buf;
+}
+
+void print_r10_layout(int layout)
+{
+ int near = layout & 255;
+ int far = (layout >> 8) & 255;
+ int offset = (layout&0x10000);
+ char *sep = "";
+
+ if (near != 1) {
+ printf("%s near=%d", sep, near);
+ sep = ",";
+ }
+ if (far != 1)
+ printf("%s %s=%d", sep, offset?"offset":"far", far);
+ if (near*far == 1)
+ printf("NO REDUNDANCY");
+}
+#endif
+
+unsigned long long calc_array_size(int level, int raid_disks, int layout,
+ int chunksize, unsigned long long devsize)
+{
+ if (level == 1)
+ return devsize;
+ devsize &= ~(unsigned long long)((chunksize>>9)-1);
+ return get_data_disks(level, layout, raid_disks) * devsize;
+}
+
+int get_data_disks(int level, int layout, int raid_disks)
+{
+ int data_disks = 0;
+ switch (level) {
+ case 0: data_disks = raid_disks;
+ break;
+ case 1: data_disks = 1;
+ break;
+ case 4:
+ case 5: data_disks = raid_disks - 1;
+ break;
+ case 6: data_disks = raid_disks - 2;
+ break;
+ case 10: data_disks = raid_disks / (layout & 255) / ((layout>>8)&255);
+ break;
+ }
+
+ return data_disks;
+}
+
+int devnm2devid(char *devnm)
+{
+ /* First look in /sys/block/$DEVNM/dev for %d:%d
+ * If that fails, try parsing out a number
+ */
+ char path[100];
+ char *ep;
+ int fd;
+ int mjr,mnr;
+
+ sprintf(path, "/sys/block/%s/dev", devnm);
+ fd = open(path, O_RDONLY);
+ if (fd >= 0) {
+ char buf[20];
+ int n = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (n > 0)
+ buf[n] = 0;
+ if (n > 0 && sscanf(buf, "%d:%d\n", &mjr, &mnr) == 2)
+ return makedev(mjr, mnr);
+ }
+ if (strncmp(devnm, "md_d", 4) == 0 &&
+ isdigit(devnm[4]) &&
+ (mnr = strtoul(devnm+4, &ep, 10)) >= 0 &&
+ ep > devnm && *ep == 0)
+ return makedev(get_mdp_major(), mnr << MdpMinorShift);
+
+ if (strncmp(devnm, "md", 2) == 0 &&
+ isdigit(devnm[2]) &&
+ (mnr = strtoul(devnm+2, &ep, 10)) >= 0 &&
+ ep > devnm && *ep == 0)
+ return makedev(MD_MAJOR, mnr);
+
+ return 0;
+}
+
+#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
+char *get_md_name(char *devnm)
+{
+ /* find /dev/md%d or /dev/md/%d or make a device /dev/.tmp.md%d */
+ /* if dev < 0, want /dev/md/d%d or find mdp in /proc/devices ... */
+
+ static char devname[50];
+ struct stat stb;
+ dev_t rdev = devnm2devid(devnm);
+ char *dn;
+
+ if (rdev == 0)
+ return 0;
+ if (strncmp(devnm, "md_", 3) == 0) {
+ snprintf(devname, sizeof(devname), "/dev/md/%s",
+ devnm + 3);
+ if (stat(devname, &stb) == 0
+ && (S_IFMT&stb.st_mode) == S_IFBLK
+ && (stb.st_rdev == rdev))
+ return devname;
+ }
+ snprintf(devname, sizeof(devname), "/dev/%s", devnm);
+ if (stat(devname, &stb) == 0
+ && (S_IFMT&stb.st_mode) == S_IFBLK
+ && (stb.st_rdev == rdev))
+ return devname;
+
+ snprintf(devname, sizeof(devname), "/dev/md/%s", devnm+2);
+ if (stat(devname, &stb) == 0
+ && (S_IFMT&stb.st_mode) == S_IFBLK
+ && (stb.st_rdev == rdev))
+ return devname;
+
+ dn = map_dev(major(rdev), minor(rdev), 0);
+ if (dn)
+ return dn;
+ snprintf(devname, sizeof(devname), "/dev/.tmp.%s", devnm);
+ if (mknod(devname, S_IFBLK | 0600, rdev) == -1)
+ if (errno != EEXIST)
+ return NULL;
+
+ if (stat(devname, &stb) == 0
+ && (S_IFMT&stb.st_mode) == S_IFBLK
+ && (stb.st_rdev == rdev))
+ return devname;
+ unlink(devname);
+ return NULL;
+}
+
+void put_md_name(char *name)
+{
+ if (strncmp(name, "/dev/.tmp.md", 12) == 0)
+ unlink(name);
+}
+#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
+
+int get_maj_min(char *dev, int *major, int *minor)
+{
+ char *e;
+ *major = strtoul(dev, &e, 0);
+ return (e > dev && *e == ':' && e[1] &&
+ (*minor = strtoul(e+1, &e, 0)) >= 0 &&
+ *e == 0);
+}
+
+int dev_open(char *dev, int flags)
+{
+ /* like 'open', but if 'dev' matches %d:%d, create a temp
+ * block device and open that
+ */
+ int fd = -1;
+ char devname[32];
+ int major;
+ int minor;
+
+ if (!dev) return -1;
+ flags |= O_DIRECT;
+
+ if (get_maj_min(dev, &major, &minor)) {
+ snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d",
+ (int)getpid(), major, minor);
+ if (mknod(devname, S_IFBLK|0600, makedev(major, minor)) == 0) {
+ fd = open(devname, flags);
+ unlink(devname);
+ }
+ if (fd < 0) {
+ /* Try /tmp as /dev appear to be read-only */
+ snprintf(devname, sizeof(devname), "/tmp/.tmp.md.%d:%d:%d",
+ (int)getpid(), major, minor);
+ if (mknod(devname, S_IFBLK|0600, makedev(major, minor)) == 0) {
+ fd = open(devname, flags);
+ unlink(devname);
+ }
+ }
+ } else
+ fd = open(dev, flags);
+ return fd;
+}
+
+int open_dev_flags(char *devnm, int flags)
+{
+ int devid;
+ char buf[20];
+
+ devid = devnm2devid(devnm);
+ sprintf(buf, "%d:%d", major(devid), minor(devid));
+ return dev_open(buf, flags);
+}
+
+int open_dev(char *devnm)
+{
+ return open_dev_flags(devnm, O_RDONLY);
+}
+
+int open_dev_excl(char *devnm)
+{
+ char buf[20];
+ int i;
+ int flags = O_RDWR;
+ int devid = devnm2devid(devnm);
+ long delay = 1000;
+
+ sprintf(buf, "%d:%d", major(devid), minor(devid));
+ for (i = 0 ; i < 25 ; i++) {
+ int fd = dev_open(buf, flags|O_EXCL);
+ if (fd >= 0)
+ return fd;
+ if (errno == EACCES && flags == O_RDWR) {
+ flags = O_RDONLY;
+ continue;
+ }
+ if (errno != EBUSY)
+ return fd;
+ usleep(delay);
+ if (delay < 200000)
+ delay *= 2;
+ }
+ return -1;
+}
+
+int same_dev(char *one, char *two)
+{
+ struct stat st1, st2;
+ if (stat(one, &st1) != 0)
+ return 0;
+ if (stat(two, &st2) != 0)
+ return 0;
+ if ((st1.st_mode & S_IFMT) != S_IFBLK)
+ return 0;
+ if ((st2.st_mode & S_IFMT) != S_IFBLK)
+ return 0;
+ return st1.st_rdev == st2.st_rdev;
+}
+
+void wait_for(char *dev, int fd)
+{
+ int i;
+ struct stat stb_want;
+ long delay = 1000;
+
+ if (fstat(fd, &stb_want) != 0 ||
+ (stb_want.st_mode & S_IFMT) != S_IFBLK)
+ return;
+
+ for (i = 0 ; i < 25 ; i++) {
+ struct stat stb;
+ if (stat(dev, &stb) == 0 &&
+ (stb.st_mode & S_IFMT) == S_IFBLK &&
+ (stb.st_rdev == stb_want.st_rdev))
+ return;
+ usleep(delay);
+ if (delay < 200000)
+ delay *= 2;
+ }
+ if (i == 25)
+ dprintf("%s: timeout waiting for %s\n", __func__, dev);
+}
+
+struct superswitch *superlist[] =
+{
+ &super0, &super1,
+ &super_ddf, &super_imsm,
+ &mbr, &gpt,
+ NULL };
+
+#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
+
+struct supertype *super_by_fd(int fd, char **subarrayp)
+{
+ mdu_array_info_t array;
+ int vers;
+ int minor;
+ struct supertype *st = NULL;
+ struct mdinfo *sra;
+ char *verstr;
+ char version[20];
+ int i;
+ char *subarray = NULL;
+ char container[32] = "";
+
+ sra = sysfs_read(fd, NULL, GET_VERSION);
+
+ if (sra) {
+ vers = sra->array.major_version;
+ minor = sra->array.minor_version;
+ verstr = sra->text_version;
+ } else {
+ if (ioctl(fd, GET_ARRAY_INFO, &array))
+ array.major_version = array.minor_version = 0;
+ vers = array.major_version;
+ minor = array.minor_version;
+ verstr = "";
+ }
+
+ if (vers != -1) {
+ sprintf(version, "%d.%d", vers, minor);
+ verstr = version;
+ }
+ if (minor == -2 && is_subarray(verstr)) {
+ char *dev = verstr+1;
+
+ subarray = strchr(dev, '/');
+ if (subarray) {
+ *subarray++ = '\0';
+ subarray = xstrdup(subarray);
+ }
+ strcpy(container, dev);
+ if (sra)
+ sysfs_free(sra);
+ sra = sysfs_read(-1, container, GET_VERSION);
+ if (sra && sra->text_version[0])
+ verstr = sra->text_version;
+ else
+ verstr = "-no-metadata-";
+ }
+
+ for (i = 0; st == NULL && superlist[i] ; i++)
+ st = superlist[i]->match_metadata_desc(verstr);
+
+ if (sra)
+ sysfs_free(sra);
+ if (st) {
+ st->sb = NULL;
+ if (subarrayp)
+ *subarrayp = subarray;
+ strcpy(st->container_devnm, container);
+ strcpy(st->devnm, fd2devnm(fd));
+ } else
+ free(subarray);
+
+ return st;
+}
+#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
+
+int dev_size_from_id(dev_t id, unsigned long long *size)
+{
+ char buf[20];
+ int fd;
+
+ sprintf(buf, "%d:%d", major(id), minor(id));
+ fd = dev_open(buf, O_RDONLY);
+ if (fd < 0)
+ return 0;
+ if (get_dev_size(fd, NULL, size)) {
+ close(fd);
+ return 1;
+ }
+ close(fd);
+ return 0;
+}
+
+struct supertype *dup_super(struct supertype *orig)
+{
+ struct supertype *st;
+
+ if (!orig)
+ return orig;
+ st = xcalloc(1, sizeof(*st));
+ st->ss = orig->ss;
+ st->max_devs = orig->max_devs;
+ st->minor_version = orig->minor_version;
+ st->ignore_hw_compat = orig->ignore_hw_compat;
+ st->data_offset = orig->data_offset;
+ st->sb = NULL;
+ st->info = NULL;
+ return st;
+}
+
+struct supertype *guess_super_type(int fd, enum guess_types guess_type)
+{
+ /* try each load_super to find the best match,
+ * and return the best superswitch
+ */
+ struct superswitch *ss;
+ struct supertype *st;
+ time_t besttime = 0;
+ int bestsuper = -1;
+ int i;
+
+ st = xcalloc(1, sizeof(*st));
+ st->container_devnm[0] = 0;
+
+ for (i = 0 ; superlist[i]; i++) {
+ int rv;
+ ss = superlist[i];
+ if (guess_type == guess_array && ss->add_to_super == NULL)
+ continue;
+ if (guess_type == guess_partitions && ss->add_to_super != NULL)
+ continue;
+ memset(st, 0, sizeof(*st));
+ st->ignore_hw_compat = 1;
+ rv = ss->load_super(st, fd, NULL);
+ if (rv == 0) {
+ struct mdinfo info;
+ st->ss->getinfo_super(st, &info, NULL);
+ if (bestsuper == -1 ||
+ besttime < info.array.ctime) {
+ bestsuper = i;
+ besttime = info.array.ctime;
+ }
+ ss->free_super(st);
+ }
+ }
+ if (bestsuper != -1) {
+ int rv;
+ memset(st, 0, sizeof(*st));
+ st->ignore_hw_compat = 1;
+ rv = superlist[bestsuper]->load_super(st, fd, NULL);
+ if (rv == 0) {
+ superlist[bestsuper]->free_super(st);
+ return st;
+ }
+ }
+ free(st);
+ return NULL;
+}
+
+/* Return size of device in bytes */
+int get_dev_size(int fd, char *dname, unsigned long long *sizep)
+{
+ unsigned long long ldsize;
+ struct stat st;
+
+ if (fstat(fd, &st) != -1 && S_ISREG(st.st_mode))
+ ldsize = (unsigned long long)st.st_size;
+ else
+#ifdef BLKGETSIZE64
+ if (ioctl(fd, BLKGETSIZE64, &ldsize) != 0)
+#endif
+ {
+ unsigned long dsize;
+ if (ioctl(fd, BLKGETSIZE, &dsize) == 0) {
+ ldsize = dsize;
+ ldsize <<= 9;
+ } else {
+ if (dname)
+ pr_err("Cannot get size of %s: %s\b",
+ dname, strerror(errno));
+ return 0;
+ }
+ }
+ *sizep = ldsize;
+ return 1;
+}
+
+/* Return true if this can only be a container, not a member device.
+ * i.e. is and md device and size is zero
+ */
+int must_be_container(int fd)
+{
+ unsigned long long size;
+ if (md_get_version(fd) < 0)
+ return 0;
+ if (get_dev_size(fd, NULL, &size) == 0)
+ return 1;
+ if (size == 0)
+ return 1;
+ return 0;
+}
+
+/* Sets endofpart parameter to the last block used by the last GPT partition on the device.
+ * Returns: 1 if successful
+ * -1 for unknown partition type
+ * 0 for other errors
+ */
+static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart)
+{
+ struct GPT gpt;
+ unsigned char empty_gpt_entry[16]= {0};
+ struct GPT_part_entry *part;
+ char buf[512];
+ unsigned long long curr_part_end;
+ unsigned all_partitions, entry_size;
+ unsigned part_nr;
+
+ *endofpart = 0;
+
+ BUILD_BUG_ON(sizeof(gpt) != 512);
+ /* skip protective MBR */
+ lseek(fd, 512, SEEK_SET);
+ /* read GPT header */
+ if (read(fd, &gpt, 512) != 512)
+ return 0;
+
+ /* get the number of partition entries and the entry size */
+ all_partitions = __le32_to_cpu(gpt.part_cnt);
+ entry_size = __le32_to_cpu(gpt.part_size);
+
+ /* Check GPT signature*/
+ if (gpt.magic != GPT_SIGNATURE_MAGIC)
+ return -1;
+
+ /* sanity checks */
+ if (all_partitions > 1024 ||
+ entry_size > sizeof(buf))
+ return -1;
+
+ part = (struct GPT_part_entry *)buf;
+
+ for (part_nr = 0; part_nr < all_partitions; part_nr++) {
+ /* read partition entry */
+ if (read(fd, buf, entry_size) != (ssize_t)entry_size)
+ return 0;
+
+ /* is this valid partition? */
+ if (memcmp(part->type_guid, empty_gpt_entry, 16) != 0) {
+ /* check the last lba for the current partition */
+ curr_part_end = __le64_to_cpu(part->ending_lba);
+ if (curr_part_end > *endofpart)
+ *endofpart = curr_part_end;
+ }
+
+ }
+ return 1;
+}
+
+/* Sets endofpart parameter to the last block used by the last partition on the device.
+ * Returns: 1 if successful
+ * -1 for unknown partition type
+ * 0 for other errors
+ */
+static int get_last_partition_end(int fd, unsigned long long *endofpart)
+{
+ struct MBR boot_sect;
+ struct MBR_part_record *part;
+ unsigned long long curr_part_end;
+ unsigned part_nr;
+ int retval = 0;
+
+ *endofpart = 0;
+
+ BUILD_BUG_ON(sizeof(boot_sect) != 512);
+ /* read MBR */
+ lseek(fd, 0, 0);
+ if (read(fd, &boot_sect, 512) != 512)
+ goto abort;
+
+ /* check MBP signature */
+ if (boot_sect.magic == MBR_SIGNATURE_MAGIC) {
+ retval = 1;
+ /* found the correct signature */
+ part = boot_sect.parts;
+
+ for (part_nr = 0; part_nr < MBR_PARTITIONS; part_nr++) {
+ /* check for GPT type */
+ if (part->part_type == MBR_GPT_PARTITION_TYPE) {
+ retval = get_gpt_last_partition_end(fd, endofpart);
+ break;
+ }
+ /* check the last used lba for the current partition */
+ curr_part_end = __le32_to_cpu(part->first_sect_lba) +
+ __le32_to_cpu(part->blocks_num);
+ if (curr_part_end > *endofpart)
+ *endofpart = curr_part_end;
+
+ part++;
+ }
+ } else {
+ /* Unknown partition table */
+ retval = -1;
+ }
+ abort:
+ return retval;
+}
+
+int check_partitions(int fd, char *dname, unsigned long long freesize,
+ unsigned long long size)
+{
+ /*
+ * Check where the last partition ends
+ */
+ unsigned long long endofpart;
+ int ret;
+
+ if ((ret = get_last_partition_end(fd, &endofpart)) > 0) {
+ /* There appears to be a partition table here */
+ if (freesize == 0) {
+ /* partitions will not be visible in new device */
+ pr_err("partition table exists on %s but will be lost or\n"
+ " meaningless after creating array\n",
+ dname);
+ return 1;
+ } else if (endofpart > freesize) {
+ /* last partition overlaps metadata */
+ pr_err("metadata will over-write last partition on %s.\n",
+ dname);
+ return 1;
+ } else if (size && endofpart > size) {
+ /* partitions will be truncated in new device */
+ pr_err("array size is too small to cover all partitions on %s.\n",
+ dname);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int open_container(int fd)
+{
+ /* 'fd' is a block device. Find out if it is in use
+ * by a container, and return an open fd on that container.
+ */
+ char path[256];
+ char *e;
+ DIR *dir;
+ struct dirent *de;
+ int dfd, n;
+ char buf[200];
+ int major, minor;
+ struct stat st;
+
+ if (fstat(fd, &st) != 0)
+ return -1;
+ sprintf(path, "/sys/dev/block/%d:%d/holders",
+ (int)major(st.st_rdev), (int)minor(st.st_rdev));
+ e = path + strlen(path);
+
+ dir = opendir(path);
+ if (!dir)
+ return -1;
+ while ((de = readdir(dir))) {
+ if (de->d_ino == 0)
+ continue;
+ if (de->d_name[0] == '.')
+ continue;
+ /* Need to make sure it is a container and not a volume */
+ sprintf(e, "/%s/md/metadata_version", de->d_name);
+ dfd = open(path, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ n = read(dfd, buf, sizeof(buf));
+ close(dfd);
+ if (n <= 0 || (unsigned)n >= sizeof(buf))
+ continue;
+ buf[n] = 0;
+ if (strncmp(buf, "external", 8) != 0 ||
+ n < 10 ||
+ buf[9] == '/')
+ continue;
+ sprintf(e, "/%s/dev", de->d_name);
+ dfd = open(path, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ n = read(dfd, buf, sizeof(buf));
+ close(dfd);
+ if (n <= 0 || (unsigned)n >= sizeof(buf))
+ continue;
+ buf[n] = 0;
+ if (sscanf(buf, "%d:%d", &major, &minor) != 2)
+ continue;
+ sprintf(buf, "%d:%d", major, minor);
+ dfd = dev_open(buf, O_RDONLY);
+ if (dfd >= 0) {
+ closedir(dir);
+ return dfd;
+ }
+ }
+ closedir(dir);
+ return -1;
+}
+
+struct superswitch *version_to_superswitch(char *vers)
+{
+ int i;
+
+ for (i = 0; superlist[i]; i++) {
+ struct superswitch *ss = superlist[i];
+
+ if (strcmp(vers, ss->name) == 0)
+ return ss;
+ }
+
+ return NULL;
+}
+
+int metadata_container_matches(char *metadata, char *devnm)
+{
+ /* Check if 'devnm' is the container named in 'metadata'
+ * which is
+ * /containername/componentname or
+ * -containername/componentname
+ */
+ int l;
+ if (*metadata != '/' && *metadata != '-')
+ return 0;
+ l = strlen(devnm);
+ if (strncmp(metadata+1, devnm, l) != 0)
+ return 0;
+ if (metadata[l+1] != '/')
+ return 0;
+ return 1;
+}
+
+int metadata_subdev_matches(char *metadata, char *devnm)
+{
+ /* Check if 'devnm' is the subdev named in 'metadata'
+ * which is
+ * /containername/subdev or
+ * -containername/subdev
+ */
+ char *sl;
+ if (*metadata != '/' && *metadata != '-')
+ return 0;
+ sl = strchr(metadata+1, '/');
+ if (!sl)
+ return 0;
+ if (strcmp(sl+1, devnm) == 0)
+ return 1;
+ return 0;
+}
+
+int is_container_member(struct mdstat_ent *mdstat, char *container)
+{
+ if (mdstat->metadata_version == NULL ||
+ strncmp(mdstat->metadata_version, "external:", 9) != 0 ||
+ !metadata_container_matches(mdstat->metadata_version+9, container))
+ return 0;
+
+ return 1;
+}
+
+int is_subarray_active(char *subarray, char *container)
+{
+ struct mdstat_ent *mdstat = mdstat_read(0, 0);
+ struct mdstat_ent *ent;
+
+ for (ent = mdstat; ent; ent = ent->next)
+ if (is_container_member(ent, container))
+ if (strcmp(to_subarray(ent, container), subarray) == 0)
+ break;
+
+ free_mdstat(mdstat);
+
+ return ent != NULL;
+}
+
+/* open_subarray - opens a subarray in a container
+ * @dev: container device name
+ * @st: empty supertype
+ * @quiet: block reporting errors flag
+ *
+ * On success returns an fd to a container and fills in *st
+ */
+int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet)
+{
+ struct mdinfo *mdi;
+ struct mdinfo *info;
+ int fd, err = 1;
+ char *_devnm;
+
+ fd = open(dev, O_RDWR|O_EXCL);
+ if (fd < 0) {
+ if (!quiet)
+ pr_err("Couldn't open %s, aborting\n",
+ dev);
+ return -1;
+ }
+
+ _devnm = fd2devnm(fd);
+ if (_devnm == NULL) {
+ if (!quiet)
+ pr_err("Failed to determine device number for %s\n",
+ dev);
+ goto close_fd;
+ }
+ strcpy(st->devnm, _devnm);
+
+ mdi = sysfs_read(fd, st->devnm, GET_VERSION|GET_LEVEL);
+ if (!mdi) {
+ if (!quiet)
+ pr_err("Failed to read sysfs for %s\n",
+ dev);
+ goto close_fd;
+ }
+
+ if (mdi->array.level != UnSet) {
+ if (!quiet)
+ pr_err("%s is not a container\n", dev);
+ goto free_sysfs;
+ }
+
+ st->ss = version_to_superswitch(mdi->text_version);
+ if (!st->ss) {
+ if (!quiet)
+ pr_err("Operation not supported for %s metadata\n",
+ mdi->text_version);
+ goto free_sysfs;
+ }
+
+ if (st->devnm[0] == 0) {
+ if (!quiet)
+ pr_err("Failed to allocate device name\n");
+ goto free_sysfs;
+ }
+
+ if (!st->ss->load_container) {
+ if (!quiet)
+ pr_err("%s is not a container\n", dev);
+ goto free_sysfs;
+ }
+
+ if (st->ss->load_container(st, fd, NULL)) {
+ if (!quiet)
+ pr_err("Failed to load metadata for %s\n",
+ dev);
+ goto free_sysfs;
+ }
+
+ info = st->ss->container_content(st, subarray);
+ if (!info) {
+ if (!quiet)
+ pr_err("Failed to find subarray-%s in %s\n",
+ subarray, dev);
+ goto free_super;
+ }
+ free(info);
+
+ err = 0;
+
+ free_super:
+ if (err)
+ st->ss->free_super(st);
+ free_sysfs:
+ sysfs_free(mdi);
+ close_fd:
+ if (err)
+ close(fd);
+
+ if (err)
+ return -1;
+ else
+ return fd;
+}
+
+int add_disk(int mdfd, struct supertype *st,
+ struct mdinfo *sra, struct mdinfo *info)
+{
+ /* Add a device to an array, in one of 2 ways. */
+ int rv;
+#ifndef MDASSEMBLE
+ if (st->ss->external) {
+ if (info->disk.state & (1<<MD_DISK_SYNC))
+ info->recovery_start = MaxSector;
+ else
+ info->recovery_start = 0;
+ rv = sysfs_add_disk(sra, info, 0);
+ if (! rv) {
+ struct mdinfo *sd2;
+ for (sd2 = sra->devs; sd2; sd2=sd2->next)
+ if (sd2 == info)
+ break;
+ if (sd2 == NULL) {
+ sd2 = xmalloc(sizeof(*sd2));
+ *sd2 = *info;
+ sd2->next = sra->devs;
+ sra->devs = sd2;
+ }
+ }
+ } else
+#endif
+ rv = ioctl(mdfd, ADD_NEW_DISK, &info->disk);
+ return rv;
+}
+
+int remove_disk(int mdfd, struct supertype *st,
+ struct mdinfo *sra, struct mdinfo *info)
+{
+ int rv;
+ /* Remove the disk given by 'info' from the array */
+#ifndef MDASSEMBLE
+ if (st->ss->external)
+ rv = sysfs_set_str(sra, info, "slot", "none");
+ else
+#endif
+ rv = ioctl(mdfd, HOT_REMOVE_DISK, makedev(info->disk.major,
+ info->disk.minor));
+ return rv;
+}
+
+int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
+{
+ /* Initialise kernel's knowledge of array.
+ * This varies between externally managed arrays
+ * and older kernels
+ */
+ int vers = md_get_version(mdfd);
+ int rv;
+
+#ifndef MDASSEMBLE
+ if (st->ss->external)
+ rv = sysfs_set_array(info, vers);
+ else
+#endif
+ if ((vers % 100) >= 1) { /* can use different versions */
+ mdu_array_info_t inf;
+ memset(&inf, 0, sizeof(inf));
+ inf.major_version = info->array.major_version;
+ inf.minor_version = info->array.minor_version;
+ rv = ioctl(mdfd, SET_ARRAY_INFO, &inf);
+ } else
+ rv = ioctl(mdfd, SET_ARRAY_INFO, NULL);
+ return rv;
+}
+
+unsigned long long min_recovery_start(struct mdinfo *array)
+{
+ /* find the minimum recovery_start in an array for metadata
+ * formats that only record per-array recovery progress instead
+ * of per-device
+ */
+ unsigned long long recovery_start = MaxSector;
+ struct mdinfo *d;
+
+ for (d = array->devs; d; d = d->next)
+ recovery_start = min(recovery_start, d->recovery_start);
+
+ return recovery_start;
+}
+
+int mdmon_pid(char *devnm)
+{
+ char path[100];
+ char pid[10];
+ int fd;
+ int n;
+
+ sprintf(path, "%s/%s.pid", MDMON_DIR, devnm);
+
+ fd = open(path, O_RDONLY | O_NOATIME, 0);
+
+ if (fd < 0)
+ return -1;
+ n = read(fd, pid, 9);
+ close(fd);
+ if (n <= 0)
+ return -1;
+ return atoi(pid);
+}
+
+int mdmon_running(char *devnm)
+{
+ int pid = mdmon_pid(devnm);
+ if (pid <= 0)
+ return 0;
+ if (kill(pid, 0) == 0)
+ return 1;
+ return 0;
+}
+
+int start_mdmon(char *devnm)
+{
+ int i, skipped;
+ int len;
+ pid_t pid;
+ int status;
+ char pathbuf[1024];
+ char *paths[4] = {
+ pathbuf,
+ BINDIR "/mdmon",
+ "./mdmon",
+ NULL
+ };
+
+ if (check_env("MDADM_NO_MDMON"))
+ return 0;
+
+ len = readlink("/proc/self/exe", pathbuf, sizeof(pathbuf)-1);
+ if (len > 0) {
+ char *sl;
+ pathbuf[len] = 0;
+ sl = strrchr(pathbuf, '/');
+ if (sl)
+ sl++;
+ else
+ sl = pathbuf;
+ strcpy(sl, "mdmon");
+ } else
+ pathbuf[0] = '\0';
+
+ /* First try to run systemctl */
+ if (!check_env("MDADM_NO_SYSTEMCTL"))
+ switch(fork()) {
+ case 0:
+ /* FIXME yuk. CLOSE_EXEC?? */
+ skipped = 0;
+ for (i = 3; skipped < 20; i++)
+ if (close(i) < 0)
+ skipped++;
+ else
+ skipped = 0;
+
+ /* Don't want to see error messages from
+ * systemctl. If the service doesn't exist,
+ * we start mdmon ourselves.
+ */
+ close(2);
+ open("/dev/null", O_WRONLY);
+ snprintf(pathbuf, sizeof(pathbuf), "mdmon@%s.service",
+ devnm);
+ status = execl("/usr/bin/systemctl", "systemctl",
+ "start",
+ pathbuf, NULL);
+ status = execl("/bin/systemctl", "systemctl", "start",
+ pathbuf, NULL);
+ exit(1);
+ case -1: pr_err("cannot run mdmon. "
+ "Array remains readonly\n");
+ return -1;
+ default: /* parent - good */
+ pid = wait(&status);
+ if (pid >= 0 && status == 0)
+ return 0;
+ }
+
+ /* That failed, try running mdmon directly */
+ switch(fork()) {
+ case 0:
+ /* FIXME yuk. CLOSE_EXEC?? */
+ skipped = 0;
+ for (i = 3; skipped < 20; i++)
+ if (close(i) < 0)
+ skipped++;
+ else
+ skipped = 0;
+
+ for (i = 0; paths[i]; i++)
+ if (paths[i][0]) {
+ execl(paths[i], paths[i],
+ devnm, NULL);
+ }
+ exit(1);
+ case -1: pr_err("cannot run mdmon. "
+ "Array remains readonly\n");
+ return -1;
+ default: /* parent - good */
+ pid = wait(&status);
+ if (pid < 0 || status != 0) {
+ pr_err("failed to launch mdmon. "
+ "Array remains readonly\n");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+__u32 random32(void)
+{
+ __u32 rv;
+ int rfd = open("/dev/urandom", O_RDONLY);
+ if (rfd < 0 || read(rfd, &rv, 4) != 4)
+ rv = random();
+ if (rfd >= 0)
+ close(rfd);
+ return rv;
+}
+
+#ifndef MDASSEMBLE
+int flush_metadata_updates(struct supertype *st)
+{
+ int sfd;
+ if (!st->updates) {
+ st->update_tail = NULL;
+ return -1;
+ }
+
+ sfd = connect_monitor(st->container_devnm);
+ if (sfd < 0)
+ return -1;
+
+ while (st->updates) {
+ struct metadata_update *mu = st->updates;
+ st->updates = mu->next;
+
+ send_message(sfd, mu, 0);
+ wait_reply(sfd, 0);
+ free(mu->buf);
+ free(mu);
+ }
+ ack(sfd, 0);
+ wait_reply(sfd, 0);
+ close(sfd);
+ st->update_tail = NULL;
+ return 0;
+}
+
+void append_metadata_update(struct supertype *st, void *buf, int len)
+{
+
+ struct metadata_update *mu = xmalloc(sizeof(*mu));
+
+ mu->buf = buf;
+ mu->len = len;
+ mu->space = NULL;
+ mu->space_list = NULL;
+ mu->next = NULL;
+ *st->update_tail = mu;
+ st->update_tail = &mu->next;
+}
+#endif /* MDASSEMBLE */
+
+#ifdef __TINYC__
+/* tinyc doesn't optimize this check in ioctl.h out ... */
+unsigned int __invalid_size_argument_for_IOC = 0;
+#endif
+
+int experimental(void)
+{
+ if (check_env("MDADM_EXPERIMENTAL"))
+ return 1;
+ else {
+ pr_err("To use this feature MDADM_EXPERIMENTAL"
+ " environment variable has to be defined.\n");
+ return 0;
+ }
+}
+
+/* Pick all spares matching given criteria from a container
+ * if min_size == 0 do not check size
+ * if domlist == NULL do not check domains
+ * if spare_group given add it to domains of each spare
+ * metadata allows to test domains using metadata of destination array */
+struct mdinfo *container_choose_spares(struct supertype *st,
+ unsigned long long min_size,
+ struct domainlist *domlist,
+ char *spare_group,
+ const char *metadata, int get_one)
+{
+ struct mdinfo *d, **dp, *disks = NULL;
+
+ /* get list of all disks in container */
+ if (st->ss->getinfo_super_disks)
+ disks = st->ss->getinfo_super_disks(st);
+
+ if (!disks)
+ return disks;
+ /* find spare devices on the list */
+ dp = &disks->devs;
+ disks->array.spare_disks = 0;
+ while (*dp) {
+ int found = 0;
+ d = *dp;
+ if (d->disk.state == 0) {
+ /* check if size is acceptable */
+ unsigned long long dev_size;
+ dev_t dev = makedev(d->disk.major,d->disk.minor);
+
+ if (!min_size ||
+ (dev_size_from_id(dev, &dev_size) &&
+ dev_size >= min_size))
+ found = 1;
+ /* check if domain matches */
+ if (found && domlist) {
+ struct dev_policy *pol = devid_policy(dev);
+ if (spare_group)
+ pol_add(&pol, pol_domain,
+ spare_group, NULL);
+ if (domain_test(domlist, pol, metadata) != 1)
+ found = 0;
+ dev_policy_free(pol);
+ }
+ }
+ if (found) {
+ dp = &d->next;
+ disks->array.spare_disks++;
+ if (get_one) {
+ sysfs_free(*dp);
+ d->next = NULL;
+ }
+ } else {
+ *dp = d->next;
+ d->next = NULL;
+ sysfs_free(d);
+ }
+ }
+ return disks;
+}
+
+/* Checks if paths point to the same device
+ * Returns 0 if they do.
+ * Returns 1 if they don't.
+ * Returns -1 if something went wrong,
+ * e.g. paths are empty or the files
+ * they point to don't exist */
+int compare_paths (char* path1, char* path2)
+{
+ struct stat st1,st2;
+
+ if (path1 == NULL || path2 == NULL)
+ return -1;
+ if (stat(path1,&st1) != 0)
+ return -1;
+ if (stat(path2,&st2) != 0)
+ return -1;
+ if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev))
+ return 0;
+ return 1;
+}
+
+/* Make sure we can open as many devices as needed */
+void enable_fds(int devices)
+{
+ unsigned int fds = 20 + devices;
+ struct rlimit lim;
+ if (getrlimit(RLIMIT_NOFILE, &lim) != 0
+ || lim.rlim_cur >= fds)
+ return;
+ if (lim.rlim_max < fds)
+ lim.rlim_max = fds;
+ lim.rlim_cur = fds;
+ setrlimit(RLIMIT_NOFILE, &lim);
+}
+
+int in_initrd(void)
+{
+ /* This is based on similar function in systemd. */
+ struct statfs s;
+ /* statfs.f_type is signed long on s390x and MIPS, causing all
+ sorts of sign extension problems with RAMFS_MAGIC being
+ defined as 0x858458f6 */
+ return statfs("/", &s) >= 0 &&
+ ((unsigned long)s.f_type == TMPFS_MAGIC ||
+ ((unsigned long)s.f_type & 0xFFFFFFFFUL) ==
+ ((unsigned long)RAMFS_MAGIC & 0xFFFFFFFFUL));
+}
+
+void reopen_mddev(int mdfd)
+{
+ /* Re-open without any O_EXCL, but keep
+ * the same fd
+ */
+ char *devnm;
+ int fd;
+ devnm = fd2devnm(mdfd);
+ close(mdfd);
+ fd = open_dev(devnm);
+ if (fd >= 0 && fd != mdfd)
+ dup2(fd, mdfd);
+}
diff --git a/xmalloc.c b/xmalloc.c
new file mode 100644
index 00000000..8d42a7c4
--- /dev/null
+++ b/xmalloc.c
@@ -0,0 +1,72 @@
+/* mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+/*#include <sys/socket.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <sys/un.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <signal.h>
+*/
+
+void *xmalloc(size_t len)
+{
+ void *rv = malloc(len);
+ char *msg;
+ if (rv)
+ return rv;
+ msg = Name ": memory allocation failure - aborting\n";
+ exit(4+!!write(2, msg, strlen(msg)));
+}
+
+void *xrealloc(void *ptr, size_t len)
+{
+ void *rv = realloc(ptr, len);
+ char *msg;
+ if (rv)
+ return rv;
+ msg = Name ": memory allocation failure - aborting\n";
+ exit(4+!!write(2, msg, strlen(msg)));
+}
+
+void *xcalloc(size_t num, size_t size)
+{
+ void *rv = calloc(num, size);
+ char *msg;
+ if (rv)
+ return rv;
+ msg = Name ": memory allocation failure - aborting\n";
+ exit(4+!!write(2, msg, strlen(msg)));
+}
+
+char *xstrdup(const char *str)
+{
+ char *rv = strdup(str);
+ char *msg;
+ if (rv)
+ return rv;
+ msg = Name ": memory allocation failure - aborting\n";
+ exit(4+!!write(2, msg, strlen(msg)));
+}