summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Tokarev <mjt@tls.msk.ru>2012-01-10 16:08:36 +0400
committerMichael Tokarev <mjt@tls.msk.ru>2012-01-10 16:08:36 +0400
commitffcc983ba1ea8b0d45e1cc1e9d50eb1b3f2b2c71 (patch)
tree9caaf9aa27a945935d1e39b43459f9a0acbe9605
parentc4c92db88ab99095b3c79216ff81f83208767c1d (diff)
parent1fbc5b7a5ea9709cdfff7fe7b63b43dfd4def124 (diff)
Merge commit 'mdadm-3.2.3'
-rw-r--r--.gitignore1
-rw-r--r--ANNOUNCE-3.2.324
-rw-r--r--Assemble.c133
-rw-r--r--COPYING43
-rw-r--r--Create.c56
-rw-r--r--Detail.c27
-rw-r--r--Grow.c636
-rw-r--r--Incremental.c202
-rw-r--r--Kill.c5
-rw-r--r--Makefile5
-rw-r--r--Manage.c104
-rw-r--r--Monitor.c9
-rw-r--r--ReadMe.c4
-rw-r--r--bitmap.c7
-rw-r--r--config.c61
-rwxr-xr-xinventory2
-rwxr-xr-xmakedist14
-rw-r--r--managemon.c30
-rw-r--r--mapfile.c16
-rw-r--r--md.412
-rw-r--r--md_p.h4
-rw-r--r--mdadm.8.in144
-rw-r--r--mdadm.c99
-rw-r--r--mdadm.conf.55
-rw-r--r--mdadm.h68
-rw-r--r--mdadm.spec4
-rw-r--r--mdassemble.82
-rw-r--r--mdassemble.c2
-rw-r--r--mdmon.86
-rw-r--r--mdmon.c7
-rw-r--r--mdmon.h1
-rw-r--r--mdopen.c27
-rw-r--r--mdstat.c15
-rw-r--r--monitor.c11
-rw-r--r--msg.c70
-rw-r--r--platform-intel.h19
-rw-r--r--policy.c46
-rw-r--r--raid6check.896
-rw-r--r--restripe.c50
-rw-r--r--super-ddf.c4
-rw-r--r--super-gpt.c6
-rw-r--r--super-intel.c1596
-rw-r--r--super-mbr.c7
-rw-r--r--super0.c25
-rw-r--r--super1.c56
-rw-r--r--sysfs.c64
-rw-r--r--tests/03r5assemV14
-rw-r--r--udev-md-raid.rules18
-rw-r--r--util.c63
49 files changed, 2743 insertions, 1167 deletions
diff --git a/.gitignore b/.gitignore
index 2503bd8b..72007418 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
/*.man
/*-stamp
/mdadm
+/mdadm.8
/mdadm.udeb
/mdmon
/swap_super
diff --git a/ANNOUNCE-3.2.3 b/ANNOUNCE-3.2.3
new file mode 100644
index 00000000..8a8dba46
--- /dev/null
+++ b/ANNOUNCE-3.2.3
@@ -0,0 +1,24 @@
+Subject: ANNOUNCE: mdadm 3.2.3 - A tool for managing Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 3.2.3
+
+It is available at the usual places:
+ countrycode=xx.
+ http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://neil.brown.name/mdadm
+ http://neil.brown.name/git/mdadm
+
+This release is largely a bugfix release for the 3.2 series with many
+minor fixes with little or no impact.
+
+The largest single area of change is support for reshape of Intel
+IMSM arrays (OnLine Capacity Explansion and Level Migtration).
+Among other fixes, this now has a better chance of surviving if a
+device fails during reshape.
+
+Upgrading is recommended - particularly if you use mdadm for IMSM
+arrays - but not essential.
+
+NeilBrown 23rd December 2011
diff --git a/Assemble.c b/Assemble.c
index 25cfec1d..fd944619 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -138,7 +138,7 @@ int Assemble(struct supertype *st, char *mddev,
char *backup_file, int invalid_backup,
int readonly, int runstop,
char *update, char *homehost, int require_homehost,
- int verbose, int force)
+ int verbose, int force, int freeze_reshape)
{
/*
* The task of Assemble is to find a collection of
@@ -220,7 +220,9 @@ int Assemble(struct supertype *st, char *mddev,
int change = 0;
int inargv = 0;
int report_missmatch;
+#ifndef MDASSEMBLE
int bitmap_done;
+#endif
int start_partial_ok = (runstop >= 0) &&
(force || devlist==NULL || auto_assem);
unsigned int num_devs;
@@ -293,7 +295,7 @@ int Assemble(struct supertype *st, char *mddev,
char *devname = tmpdev->devname;
int dfd;
struct stat stb;
- struct supertype *tst = dup_super(st);
+ struct supertype *tst;
struct dev_policy *pol = NULL;
int found_container = 0;
@@ -306,6 +308,8 @@ int Assemble(struct supertype *st, char *mddev,
continue;
}
+ tst = dup_super(st);
+
dfd = dev_open(devname, O_RDONLY|O_EXCL);
if (dfd < 0) {
if (report_missmatch)
@@ -439,13 +443,6 @@ int Assemble(struct supertype *st, char *mddev,
content;
content = content->next) {
- /* do not assemble arrays that might have bad blocks */
- if (content->array.state & (1<<MD_SB_BBM_ERRORS)) {
- fprintf(stderr, Name ": BBM log found in metadata. "
- "Cannot activate array(s).\n");
- tmpdev->used = 2;
- goto loop;
- }
if (!ident_matches(ident, content, tst,
homehost, update,
report_missmatch ? devname : NULL))
@@ -455,6 +452,11 @@ int Assemble(struct supertype *st, char *mddev,
fprintf(stderr, Name ": member %s in %s is already assembled\n",
content->text_version,
devname);
+ } else if (content->array.state & (1<<MD_SB_BLOCK_VOLUME)) {
+ /* do not assemble arrays with unsupported configurations */
+ fprintf(stderr, Name ": Cannot activate member %s in %s.\n",
+ content->text_version,
+ devname);
} else
break;
}
@@ -697,14 +699,13 @@ int Assemble(struct supertype *st, char *mddev,
int err;
err = assemble_container_content(st, mdfd, content, runstop,
chosen_name, verbose,
- backup_file);
+ backup_file, freeze_reshape);
close(mdfd);
return err;
}
+ bitmap_done = 0;
#endif
/* Ok, no bad inconsistancy, we can try updating etc */
- bitmap_done = 0;
- content->update_private = NULL;
devices = malloc(num_devs * sizeof(*devices));
devmap = calloc(num_devs * content->array.raid_disks, 1);
for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) if (tmpdev->used == 1) {
@@ -889,8 +890,6 @@ int Assemble(struct supertype *st, char *mddev,
}
devcnt++;
}
- free(content->update_private);
- content->update_private = NULL;
if (devcnt == 0) {
fprintf(stderr, Name ": no devices found for %s\n",
@@ -1343,9 +1342,14 @@ int Assemble(struct supertype *st, char *mddev,
int rv;
#ifndef MDASSEMBLE
if (content->reshape_active &&
- content->delta_disks <= 0)
- rv = Grow_continue(mdfd, st, content, backup_file);
- else
+ content->delta_disks <= 0) {
+ rv = sysfs_set_str(content, NULL,
+ "array_state", "readonly");
+ if (rv == 0)
+ rv = Grow_continue(mdfd, st, content,
+ backup_file,
+ freeze_reshape);
+ } else
#endif
rv = ioctl(mdfd, RUN_ARRAY, NULL);
if (rv == 0) {
@@ -1372,6 +1376,7 @@ int Assemble(struct supertype *st, char *mddev,
sysfs_set_num(sra, NULL,
"stripe_cache_size",
(4 * content->array.chunk_size / 4096) + 1);
+ sysfs_free(sra);
}
}
if (okcnt < (unsigned)content->array.raid_disks) {
@@ -1511,7 +1516,7 @@ int Assemble(struct supertype *st, char *mddev,
int assemble_container_content(struct supertype *st, int mdfd,
struct mdinfo *content, int runstop,
char *chosen_name, int verbose,
- char *backup_file)
+ char *backup_file, int freeze_reshape)
{
struct mdinfo *dev, *sra;
int working = 0, preexist = 0;
@@ -1523,10 +1528,13 @@ int assemble_container_content(struct supertype *st, int mdfd,
sra = sysfs_read(mdfd, 0, GET_VERSION);
if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0)
- if (sysfs_set_array(content, md_get_version(mdfd)) != 0)
+ if (sysfs_set_array(content, md_get_version(mdfd)) != 0) {
+ if (sra)
+ sysfs_free(sra);
return 1;
+ }
- if (content->reshape_active)
+ if (st->ss->external && content->recovery_blocked)
block_subarray(content);
if (sra)
@@ -1552,48 +1560,37 @@ int assemble_container_content(struct supertype *st, int mdfd,
(working + preexist + expansion) >=
content->array.working_disks) {
int err;
+ int start_reshape;
- if (content->reshape_active) {
+ /* There are two types of reshape: container wide or sub-array specific
+ * Check if metadata requests blocking container wide reshapes
+ */
+ start_reshape = (content->reshape_active &&
+ !((content->reshape_active == CONTAINER_RESHAPE) &&
+ (content->array.state & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE))));
+ if (start_reshape) {
int spare = content->array.raid_disks + expansion;
- int i;
- int *fdlist = malloc(sizeof(int) *
- (working + expansion
- + content->array.raid_disks));
- for (i=0; i<spare; i++)
- fdlist[i] = -1;
- for (dev = content->devs; dev; dev = dev->next) {
- char buf[20];
- int fd;
- sprintf(buf, "%d:%d",
- dev->disk.major,
- dev->disk.minor);
- fd = dev_open(buf, O_RDWR);
-
- if (dev->disk.raid_disk >= 0)
- fdlist[dev->disk.raid_disk] = fd;
- else
- fdlist[spare++] = fd;
- }
- if (st->ss->external && st->ss->recover_backup)
- err = st->ss->recover_backup(st, content);
- else
- err = Grow_restart(st, content, fdlist, spare,
- backup_file, verbose > 0);
- while (spare > 0) {
- spare--;
- if (fdlist[spare] >= 0)
- close(fdlist[spare]);
- }
- if (err) {
- fprintf(stderr, Name ": Failed to restore critical"
- " section for reshape - sorry.\n");
- if (!backup_file)
- fprintf(stderr, Name ": Possibly you need"
- " to specify a --backup-file\n");
+ if (restore_backup(st, content,
+ working,
+ spare, backup_file, verbose) == 1)
return 1;
+
+ err = sysfs_set_str(content, NULL,
+ "array_state", "readonly");
+ if (err)
+ return 1;
+
+ if (st->ss->external) {
+ if (!mdmon_running(st->container_dev))
+ start_mdmon(st->container_dev);
+ ping_monitor_by_id(st->container_dev);
+ if (mdmon_running(st->container_dev) &&
+ st->update_tail == NULL)
+ st->update_tail = &st->updates;
}
- err = Grow_continue(mdfd, st, content, backup_file);
+ err = Grow_continue(mdfd, st, content, backup_file,
+ freeze_reshape);
} else switch(content->array.level) {
case LEVEL_LINEAR:
case LEVEL_MULTIPATH:
@@ -1617,12 +1614,14 @@ int assemble_container_content(struct supertype *st, int mdfd,
if (verbose >= 0) {
if (err)
fprintf(stderr, Name
- ": array %s now has %d devices",
- chosen_name, working + preexist);
+ ": array %s now has %d device%s",
+ chosen_name, working + preexist,
+ working + preexist == 1 ? "":"s");
else
fprintf(stderr, Name
- ": Started %s with %d devices",
- chosen_name, working + preexist);
+ ": Started %s with %d device%s",
+ chosen_name, working + preexist,
+ working + preexist == 1 ? "":"s");
if (preexist)
fprintf(stderr, " (%d new)", working);
if (expansion)
@@ -1635,11 +1634,15 @@ int assemble_container_content(struct supertype *st, int mdfd,
return err;
/* FIXME should have an O_EXCL and wait for read-auto */
} else {
- if (verbose >= 0)
+ if (verbose >= 0) {
fprintf(stderr, Name
- ": %s assembled with %d devices but "
- "not started\n",
- chosen_name, working);
+ ": %s assembled with %d device%s",
+ chosen_name, preexist + working,
+ preexist + working == 1 ? "":"s");
+ if (preexist)
+ fprintf(stderr, " (%d new)", working);
+ fprintf(stderr, " but not started\n");
+ }
return 1;
}
}
diff --git a/COPYING b/COPYING
index 60549be5..d159169d 100644
--- a/COPYING
+++ b/COPYING
@@ -1,12 +1,12 @@
- GNU GENERAL PUBLIC LICENSE
- Version 2, June 1991
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.
- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
- Preamble
+ Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
@@ -15,7 +15,7 @@ software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.) You can apply it to
+the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
@@ -55,8 +55,8 @@ patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
-
- GNU GENERAL PUBLIC LICENSE
+
+ GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
@@ -110,7 +110,7 @@ above, provided that you also meet all of these conditions:
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
-
+
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
@@ -168,7 +168,7 @@ access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
-
+
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
@@ -225,7 +225,7 @@ impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
-
+
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
@@ -255,7 +255,7 @@ make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
- NO WARRANTY
+ NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
@@ -277,9 +277,9 @@ YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
@@ -291,7 +291,7 @@ convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
- Copyright (C) 19yy <name of author>
+ Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -303,17 +303,16 @@ the "copyright" line and a pointer to where the full notice is found.
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
- Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
@@ -336,5 +335,5 @@ necessary. Here is a sample; alter the names:
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
-library. If this is what you want to do, use the GNU Library General
+library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.
diff --git a/Create.c b/Create.c
index 48115db8..90ff3edd 100644
--- a/Create.c
+++ b/Create.c
@@ -332,15 +332,25 @@ int Create(struct supertype *st, char *mddev,
char *name = "default";
for(i=0; !st && superlist[i]; i++) {
st = superlist[i]->match_metadata_desc(name);
+ if (!st)
+ continue;
if (do_default_layout)
layout = default_layout(st, level, verbose);
- if (st && !st->ss->validate_geometry
- (st, level, layout, raiddisks,
- &chunk, size*2, dname, &freesize,
- verbose > 0)) {
+ switch (st->ss->validate_geometry(
+ st, level, layout, raiddisks,
+ &chunk, size*2, dname, &freesize,
+ verbose > 0)) {
+ case -1: /* Not valid, message printed, and not
+ * worth checking any further */
+ exit(2);
+ break;
+ case 0: /* Geometry not valid */
free(st);
st = NULL;
chunk = do_default_chunk ? UnSet : chunk;
+ break;
+ case 1: /* All happy */
+ break;
}
}
@@ -544,14 +554,28 @@ int Create(struct supertype *st, char *mddev,
/* We need to create the device */
map_lock(&map);
mdfd = create_mddev(mddev, name, autof, LOCAL, chosen_name);
- if (mdfd < 0)
+ if (mdfd < 0) {
+ map_unlock(&map);
return 1;
+ }
+ /* verify if chosen_name is not in use,
+ * it could be in conflict with already existing device
+ * e.g. container, array
+ */
+ if (strncmp(chosen_name, "/dev/md/", 8) == 0
+ && map_by_name(&map, chosen_name+8) != NULL) {
+ fprintf(stderr, Name ": Array name %s is in use already.\n",
+ chosen_name);
+ close(mdfd);
+ map_unlock(&map);
+ return 1;
+ }
mddev = chosen_name;
vers = md_get_version(mdfd);
if (vers < 9000) {
fprintf(stderr, Name ": Create requires md driver version 0.90.0 or later\n");
- goto abort;
+ goto abort_locked;
} else {
mdu_array_info_t inf;
memset(&inf, 0, sizeof(inf));
@@ -559,7 +583,7 @@ int Create(struct supertype *st, char *mddev,
if (inf.working_disks != 0) {
fprintf(stderr, Name ": another array by this name"
" is already running.\n");
- goto abort;
+ goto abort_locked;
}
}
@@ -655,7 +679,7 @@ int Create(struct supertype *st, char *mddev,
}
}
if (!st->ss->init_super(st, &info.array, size, name, homehost, uuid))
- goto abort;
+ goto abort_locked;
total_slots = info.array.nr_disks;
st->ss->getinfo_super(st, &info, NULL);
@@ -778,6 +802,10 @@ int Create(struct supertype *st, char *mddev,
}
infos = malloc(sizeof(*infos) * total_slots);
+ if (!infos) {
+ fprintf(stderr, Name ": Unable to allocate memory\n");
+ goto abort;
+ }
for (pass=1; pass <=2 ; pass++) {
struct mddev_dev *moved_disk = NULL; /* the disk that was moved out of the insert point */
@@ -856,15 +884,6 @@ int Create(struct supertype *st, char *mddev,
/* getinfo_super might have lost these ... */
inf->disk.major = major(stb.st_rdev);
inf->disk.minor = minor(stb.st_rdev);
- /* FIXME the following should not be needed
- * as getinfo_super is suppose to set
- * them. However it doesn't for imsm,
- * so we have this hack for now
- */
- if (st->ss == &super_imsm) {
- inf->disk.number = dnum;
- inf->disk.raid_disk = dnum;
- }
}
break;
case 2:
@@ -909,7 +928,7 @@ int Create(struct supertype *st, char *mddev,
Name ": Failed to write metadata to %s\n",
dv->devname);
st->ss->free_super(st);
- goto abort;
+ goto abort_locked;
}
/* update parent container uuid */
@@ -992,6 +1011,7 @@ int Create(struct supertype *st, char *mddev,
abort:
map_lock(&map);
+ abort_locked:
map_remove(&map, fd2devnum(mdfd));
map_unlock(&map);
diff --git a/Detail.c b/Detail.c
index 375189d0..e7d16812 100644
--- a/Detail.c
+++ b/Detail.c
@@ -58,7 +58,7 @@ int Detail(char *dev, int brief, int export, int test, char *homehost)
int rv = test ? 4 : 1;
int avail_disks = 0;
- char *avail;
+ char *avail = NULL;
if (fd < 0) {
fprintf(stderr, Name ": cannot open %s: %s\n",
@@ -372,11 +372,13 @@ int Detail(char *dev, int brief, int export, int test, char *homehost)
else
st = ", degraded";
- printf(" State : %s%s%s%s\n",
- (array.state&(1<<MD_SB_CLEAN))?"clean":"active",
- st,
- (!e || e->percent < 0) ? "" : sync_action[e->resync],
- larray_size ? "": ", Not Started");
+ printf(" State : %s%s%s%s%s%s \n",
+ (array.state&(1<<MD_SB_CLEAN))?"clean":"active", st,
+ (!e || (e->percent < 0 && e->percent != PROCESS_PENDING &&
+ e->percent != PROCESS_DELAYED)) ? "" : sync_action[e->resync],
+ larray_size ? "": ", Not Started",
+ e->percent == PROCESS_DELAYED ? " (DELAYED)": "",
+ e->percent == PROCESS_PENDING ? " (PENDING)": "");
}
if (array.raid_disks)
printf(" Active Devices : %d\n", array.active_disks);
@@ -416,10 +418,8 @@ int Detail(char *dev, int brief, int export, int test, char *homehost)
}
if (e && e->percent >= 0) {
- printf(" Re%s Status : %d%% complete\n",
- (st && st->sb && info->reshape_active)?
- "shape":"build",
- e->percent);
+ static char *sync_action[] = {"Rebuild", "Resync", "Reshape", "Check"};
+ printf(" %7s Status : %d%% complete\n", sync_action[e->resync], e->percent);
is_rebuilding = 1;
}
free_mdstat(ms);
@@ -430,12 +430,9 @@ This is pretty boring
printf(" Reshape pos'n : %llu%s\n", (unsigned long long) info->reshape_progress<<9,
human_size((unsigned long long)info->reshape_progress<<9));
#endif
- if (info->delta_disks > 0)
+ if (info->delta_disks != 0)
printf(" Delta Devices : %d, (%d->%d)\n",
info->delta_disks, array.raid_disks - info->delta_disks, array.raid_disks);
- if (info->delta_disks < 0)
- printf(" Delta Devices : %d, (%d->%d)\n",
- info->delta_disks, array.raid_disks, array.raid_disks + info->delta_disks);
if (info->new_level != array.level) {
char *c = map_num(pers, info->new_level);
printf(" New Level : %s\n", c?c:"-unknown-");
@@ -590,6 +587,8 @@ This is pretty boring
out:
close(fd);
free(subarray);
+ free(avail);
+ sysfs_free(sra);
return rv;
}
diff --git a/Grow.c b/Grow.c
index 6e31b94b..b2c13602 100644
--- a/Grow.c
+++ b/Grow.c
@@ -35,6 +35,67 @@
#define offsetof(t,f) ((size_t)&(((t*)0)->f))
#endif
+int restore_backup(struct supertype *st,
+ struct mdinfo *content,
+ int working_disks,
+ int next_spare,
+ char *backup_file,
+ int verbose)
+{
+ int i;
+ int *fdlist;
+ struct mdinfo *dev;
+ int err;
+ int disk_count = next_spare + working_disks;
+
+ dprintf("Called restore_backup()\n");
+ fdlist = malloc(sizeof(int) * disk_count);
+ if (fdlist == NULL) {
+ fprintf(stderr,
+ Name ": cannot allocate memory for disk list\n");
+ return 1;
+ }
+ for (i = 0; i < next_spare; i++)
+ fdlist[i] = -1;
+ for (dev = content->devs; dev; dev = dev->next) {
+ char buf[22];
+ int fd;
+ sprintf(buf, "%d:%d",
+ dev->disk.major,
+ dev->disk.minor);
+ fd = dev_open(buf, O_RDWR);
+
+ if (dev->disk.raid_disk >= 0)
+ fdlist[dev->disk.raid_disk] = fd;
+ else
+ fdlist[next_spare++] = fd;
+ }
+
+ if (st->ss->external && st->ss->recover_backup)
+ err = st->ss->recover_backup(st, content);
+ else
+ err = Grow_restart(st, content, fdlist, next_spare,
+ backup_file, verbose > 0);
+
+ while (next_spare > 0) {
+ next_spare--;
+ if (fdlist[next_spare] >= 0)
+ close(fdlist[next_spare]);
+ }
+ free(fdlist);
+ if (err) {
+ fprintf(stderr, Name ": Failed to restore critical"
+ " section for reshape - sorry.\n");
+ if (!backup_file)
+ fprintf(stderr, Name ": Possibly you need"
+ " to specify a --backup-file\n");
+ return 1;
+ }
+
+ dprintf("restore_backup() returns status OK.\n");
+ return 0;
+}
+
int Grow_Add_device(char *devname, int fd, char *newdev)
{
/* Add a device to an active array.
@@ -73,6 +134,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
fprintf(stderr, Name ": Cannot grow linear sub-arrays yet\n");
free(subarray);
free(st);
+ return 1;
}
nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT);
@@ -112,7 +174,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
return 1;
}
fd2 = dev_open(dv, O_RDWR);
- if (!fd2) {
+ if (fd2 < 0) {
fprintf(stderr, Name ": cannot open device file %s\n", dv);
close(nfd);
free(st);
@@ -313,12 +375,18 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
return 1;
}
if (strcmp(file, "internal") == 0) {
+ int rv;
int d;
+ int offset_setable = 0;
+ struct mdinfo *mdi;
if (st->ss->add_internal_bitmap == NULL) {
fprintf(stderr, Name ": Internal bitmaps not supported "
"with %s metadata\n", st->ss->name);
return 1;
}
+ mdi = sysfs_read(fd, -1, GET_BITMAP_LOCATION);
+ if (mdi)
+ offset_setable = 1;
for (d=0; d< st->max_devs; d++) {
mdu_disk_info_t disk;
char *dv;
@@ -339,11 +407,13 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
if (st->ss->add_internal_bitmap(
st,
&chunk, delay, write_behind,
- bitmapsize, 0, major)
+ bitmapsize, offset_setable,
+ major)
)
st->ss->write_bitmap(st, fd2);
else {
- fprintf(stderr, Name ": failed to create internal bitmap - chunksize problem.\n");
+ fprintf(stderr, Name ": failed "
+ "to create internal bitmap - chunksize problem.\n");
close(fd2);
return 1;
}
@@ -351,8 +421,16 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
close(fd2);
}
}
- array.state |= (1<<MD_SB_BITMAP_PRESENT);
- if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
+ if (offset_setable) {
+ st->ss->getinfo_super(st, mdi, NULL);
+ sysfs_init(mdi, fd, -1);
+ rv = sysfs_set_num(mdi, NULL, "bitmap/location",
+ mdi->bitmap_offset);
+ } else {
+ array.state |= (1<<MD_SB_BITMAP_PRESENT);
+ rv = ioctl(fd, SET_ARRAY_INFO, &array);
+ }
+ if (rv < 0) {
if (errno == EBUSY)
fprintf(stderr, Name
": Cannot add bitmap while array is"
@@ -380,13 +458,14 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
dv = map_dev(disk.major, disk.minor, 1);
if (!dv) continue;
fd2 = dev_open(dv, O_RDONLY);
- if (fd2 >= 0 &&
- st->ss->load_super(st, fd2, NULL) == 0) {
+ if (fd2 >= 0) {
+ if (st->ss->load_super(st, fd2, NULL) == 0) {
+ close(fd2);
+ st->ss->uuid_from_super(st, uuid);
+ break;
+ }
close(fd2);
- st->ss->uuid_from_super(st, uuid);
- break;
}
- close(fd2);
}
if (d == max_devs) {
fprintf(stderr, Name ": cannot find UUID for array!\n");
@@ -417,7 +496,6 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
return 0;
}
-
/*
* When reshaping an array we might need to backup some data.
* This is written to all spares with a 'super_block' describing it.
@@ -463,7 +541,7 @@ static int check_idle(struct supertype *st)
char container[40];
struct mdstat_ent *ent, *e;
int is_idle = 1;
-
+
fmt_devname(container, container_dev);
ent = mdstat_read(0, 0);
for (e = ent ; e; e = e->next) {
@@ -486,7 +564,7 @@ static int freeze_container(struct supertype *st)
if (!check_idle(st))
return -1;
-
+
fmt_devname(container, container_dev);
if (block_monitor(container, 1)) {
@@ -502,7 +580,7 @@ static void unfreeze_container(struct supertype *st)
int container_dev = (st->container_dev != NoMdDev
? st->container_dev : st->devnum);
char container[40];
-
+
fmt_devname(container, container_dev);
unblock_monitor(container, 1);
@@ -635,15 +713,24 @@ static int subarray_set_num(char *container, struct mdinfo *sra, char *name, int
return rc;
}
-int start_reshape(struct mdinfo *sra, int already_running)
+int start_reshape(struct mdinfo *sra, int already_running,
+ int before_data_disks, int data_disks)
{
int err;
+ unsigned long long sync_max_to_set;
+
sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
- err = sysfs_set_num(sra, NULL, "suspend_hi", 0);
- err = err ?: sysfs_set_num(sra, NULL, "suspend_lo", 0);
+ err = sysfs_set_num(sra, NULL, "suspend_hi", sra->reshape_progress);
+ err = err ?: sysfs_set_num(sra, NULL, "suspend_lo",
+ sra->reshape_progress);
+ if (before_data_disks <= data_disks)
+ sync_max_to_set = sra->reshape_progress / data_disks;
+ else
+ sync_max_to_set = (sra->component_size * data_disks
+ - sra->reshape_progress) / data_disks;
if (!already_running)
- sysfs_set_num(sra, NULL, "sync_min", 0);
- err = err ?: sysfs_set_num(sra, NULL, "sync_max", 0);
+ sysfs_set_num(sra, NULL, "sync_min", sync_max_to_set);
+ err = err ?: sysfs_set_num(sra, NULL, "sync_max", sync_max_to_set);
if (!already_running)
err = err ?: sysfs_set_str(sra, NULL, "sync_action", "reshape");
@@ -1268,7 +1355,7 @@ char *analyse_change(struct mdinfo *info, struct reshape *re)
if (re->after.data_disks < re->before.data_disks &&
get_linux_version() < 2006030)
- return "reshape to fewer devices is not supported before 2.6.32 - sorry.";
+ return "reshape to fewer devices is not supported before 2.6.30 - sorry.";
re->backup_blocks = compute_backup_blocks(
info->new_chunk, info->array.chunk_size,
@@ -1283,13 +1370,14 @@ static int reshape_array(char *container, int fd, char *devname,
struct supertype *st, struct mdinfo *info,
int force, struct mddev_dev *devlist,
char *backup_file, int quiet, int forked,
- int restart);
+ int restart, int freeze_reshape);
static int reshape_container(char *container, char *devname,
- struct supertype *st,
+ int mdfd,
+ struct supertype *st,
struct mdinfo *info,
int force,
char *backup_file,
- int quiet, int restart);
+ int quiet, int restart, int freeze_reshape);
int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
long long size,
@@ -1401,6 +1489,36 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
return 1;
}
+ /* check if operation is supported for metadata handler */
+ if (st->ss->container_content) {
+ struct mdinfo *cc = NULL;
+ struct mdinfo *content = NULL;
+
+ cc = st->ss->container_content(st, subarray);
+ for (content = cc; content ; content = content->next) {
+ int allow_reshape = 1;
+
+ /* check if reshape is allowed based on metadata
+ * indications stored in content.array.status
+ */
+ if (content->array.state & (1<<MD_SB_BLOCK_VOLUME))
+ allow_reshape = 0;
+ if (content->array.state
+ & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE))
+ allow_reshape = 0;
+ if (!allow_reshape) {
+ fprintf(stderr, Name
+ " cannot reshape arrays in"
+ " container with unsupported"
+ " metadata: %s(%s)\n",
+ devname, container_buf);
+ sysfs_free(cc);
+ free(subarray);
+ return 1;
+ }
+ }
+ sysfs_free(cc);
+ }
if (mdmon_running(container_dev))
st->update_tail = &st->updates;
}
@@ -1415,15 +1533,15 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
Name ": Need %d spare%s to avoid degraded array,"
" and only have %d.\n"
" Use --force to over-ride this check.\n",
- raid_disks - array.raid_disks,
- raid_disks - array.raid_disks == 1 ? "" : "s",
+ raid_disks - array.raid_disks,
+ raid_disks - array.raid_disks == 1 ? "" : "s",
array.spare_disks + added_disks);
return 1;
}
sra = sysfs_read(fd, 0, GET_LEVEL | GET_DISKS | GET_DEVS
| GET_STATE | GET_VERSION);
- if (sra) {
+ if (sra) {
if (st->ss->external && subarray == NULL) {
array.level = LEVEL_CONTAINER;
sra->array.level = LEVEL_CONTAINER;
@@ -1436,16 +1554,19 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
frozen = freeze(st);
if (frozen < -1) {
/* freeze() already spewed the reason */
+ sysfs_free(sra);
return 1;
} else if (frozen < 0) {
fprintf(stderr, Name ": %s is performing resync/recovery and cannot"
" be reshaped\n", devname);
+ sysfs_free(sra);
return 1;
}
/* ========= set size =============== */
if (size >= 0 && (size == 0 || size != array.size)) {
long long orig_size = get_component_size(fd)/2;
+ long long min_csize;
struct mdinfo *mdi;
if (orig_size == 0)
@@ -1461,10 +1582,40 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
/* Update the size of each member device in case
* they have been resized. This will never reduce
* below the current used-size. The "size" attribute
- * understand '0' to mean 'max'.
+ * understands '0' to mean 'max'.
*/
- for (mdi = sra->devs; mdi; mdi = mdi->next)
- sysfs_set_num(sra, mdi, "size", size);
+ min_csize = 0;
+ for (mdi = sra->devs; mdi; mdi = mdi->next) {
+ if (sysfs_set_num(sra, mdi, "size", size) < 0)
+ break;
+ if (array.not_persistent == 0 &&
+ array.major_version == 0 &&
+ get_linux_version() < 3001000) {
+ /* Dangerous to allow size to exceed 2TB */
+ unsigned long long csize;
+ if (sysfs_get_ll(sra, mdi, "size", &csize) == 0) {
+ if (csize >= 2ULL*1024*1024*1024)
+ csize = 2ULL*1024*1024*1024;
+ if ((min_csize == 0 || (min_csize
+ > (long long)csize)))
+ min_csize = csize;
+ }
+ }
+ }
+ if (min_csize && size > min_csize) {
+ fprintf(stderr, Name ": Cannot safely make this array "
+ "use more than 2TB per device on this kernel.\n");
+ rv = 1;
+ goto release;
+ }
+ if (min_csize && size == 0) {
+ /* Don't let the kernel choose a size - it will get
+ * it wrong
+ */
+ fprintf(stderr, Name ": Limited v0.90 array to "
+ "2TB per device\n");
+ size = min_csize;
+ }
array.size = size;
if (array.size != size) {
@@ -1487,14 +1638,14 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
sync_metadata(st);
fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
devname, strerror(err));
- if (err == EBUSY &&
+ if (err == EBUSY &&
(array.state & (1<<MD_SB_BITMAP_PRESENT)))
fprintf(stderr, " Bitmap must be removed before size can be changed\n");
rv = 1;
goto release;
}
if (assume_clean) {
- /* This will fail on kernels newer than 2.6.40 unless
+ /* This will fail on kernels newer than 3.0 unless
* a backport has been arranged.
*/
if (sra == NULL ||
@@ -1537,11 +1688,11 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
/* ========= check for Raid10/Raid1 -> Raid0 conversion ===============
* current implementation assumes that following conditions must be met:
* - RAID10:
- * - far_copies == 1
- * - near_copies == 2
+ * - far_copies == 1
+ * - near_copies == 2
*/
if ((level == 0 && array.level == 10 && sra &&
- array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) ||
+ array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) ||
(level == 0 && array.level == 1 && sra)) {
int err;
err = remove_disks_for_takeover(st, sra, array.layout);
@@ -1552,8 +1703,12 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
rv = 1;
goto release;
}
- /* FIXME this is added with no justification - why is it here */
- ping_monitor(container);
+ /* Make sure mdmon has seen the device removal
+ * and updated metadata before we continue with
+ * level change
+ */
+ if (container)
+ ping_monitor(container);
}
memset(&info, 0, sizeof(info));
@@ -1587,7 +1742,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
goto release;
}
} else if (strcmp(layout_str, "normalise") == 0 ||
- strcmp(layout_str, "normalize") == 0) {
+ strcmp(layout_str, "normalize") == 0) {
/* If we have a -6 RAID6 layout, remove the '-6'. */
info.new_layout = UnSet;
if (info.array.level == 6 && info.new_level == UnSet) {
@@ -1668,8 +1823,8 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
* number of devices (On-Line Capacity Expansion) must be
* performed at the level of the container
*/
- rv = reshape_container(container, devname, st, &info,
- force, backup_file, quiet, 0);
+ rv = reshape_container(container, devname, -1, st, &info,
+ force, backup_file, quiet, 0, 0);
frozen = 0;
} else {
/* get spare devices from external metadata
@@ -1697,10 +1852,11 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
}
sync_metadata(st);
rv = reshape_array(container, fd, devname, st, &info, force,
- devlist, backup_file, quiet, 0, 0);
+ devlist, backup_file, quiet, 0, 0, 0);
frozen = 0;
}
release:
+ sysfs_free(sra);
if (frozen > 0)
unfreeze(st);
return rv;
@@ -1710,7 +1866,7 @@ static int reshape_array(char *container, int fd, char *devname,
struct supertype *st, struct mdinfo *info,
int force, struct mddev_dev *devlist,
char *backup_file, int quiet, int forked,
- int restart)
+ int restart, int freeze_reshape)
{
struct reshape reshape;
int spares_needed;
@@ -1724,8 +1880,8 @@ static int reshape_array(char *container, int fd, char *devname,
struct mddev_dev *dv;
int added_disks;
- int *fdlist;
- unsigned long long *offsets;
+ int *fdlist = NULL;
+ unsigned long long *offsets = NULL;
int d;
int nrdisks;
int err;
@@ -1750,10 +1906,12 @@ static int reshape_array(char *container, int fd, char *devname,
if (info->reshape_active) {
int new_level = info->new_level;
info->new_level = UnSet;
- info->array.raid_disks -= info->delta_disks;
+ if (info->delta_disks > 0)
+ info->array.raid_disks -= info->delta_disks;
msg = analyse_change(info, &reshape);
info->new_level = new_level;
- info->array.raid_disks += info->delta_disks;
+ if (info->delta_disks > 0)
+ info->array.raid_disks += info->delta_disks;
if (!restart)
/* Make sure the array isn't read-only */
ioctl(fd, RESTART_ARRAY_RW, 0);
@@ -1767,7 +1925,7 @@ static int reshape_array(char *container, int fd, char *devname,
(reshape.level != info->array.level ||
reshape.before.layout != info->array.layout ||
reshape.before.data_disks + reshape.parity
- != info->array.raid_disks - info->delta_disks)) {
+ != info->array.raid_disks - max(0, info->delta_disks))) {
fprintf(stderr, Name ": reshape info is not in native format -"
" cannot continue.\n");
goto release;
@@ -1801,7 +1959,7 @@ static int reshape_array(char *container, int fd, char *devname,
" and only have %d.\n"
" Use --force to over-ride this check.\n",
spares_needed,
- spares_needed == 1 ? "" : "s",
+ spares_needed == 1 ? "" : "s",
info->array.spare_disks + added_disks);
goto release;
}
@@ -1815,7 +1973,7 @@ static int reshape_array(char *container, int fd, char *devname,
Name ": Need %d spare%s to create working array,"
" and only have %d.\n",
spares_needed,
- spares_needed == 1 ? "" : "s",
+ spares_needed == 1 ? "" : "s",
info->array.spare_disks + added_disks);
goto release;
}
@@ -1831,7 +1989,7 @@ static int reshape_array(char *container, int fd, char *devname,
err = errno;
fprintf(stderr, Name ": %s: could not set level to %s\n",
devname, c);
- if (err == EBUSY &&
+ if (err == EBUSY &&
(info->array.state & (1<<MD_SB_BITMAP_PRESENT)))
fprintf(stderr, " Bitmap must be removed"
" before level can be changed\n");
@@ -1839,7 +1997,7 @@ static int reshape_array(char *container, int fd, char *devname,
}
if (!quiet)
fprintf(stderr, Name ": level of %s changed to %s\n",
- devname, c);
+ devname, c);
orig_level = array.level;
sysfs_freeze_array(info);
@@ -1893,7 +2051,7 @@ static int reshape_array(char *container, int fd, char *devname,
*/
if (devlist)
Manage_subdevs(devname, fd, devlist, !quiet,
- 0,NULL);
+ 0,NULL, 0);
if (reshape.backup_blocks == 0) {
/* No restriping needed, but we might need to impose
@@ -1947,7 +2105,7 @@ static int reshape_array(char *container, int fd, char *devname,
* 1/ The array will shrink.
* We need to ensure the reshape will pause before reaching
* the 'critical section'. We also need to fork and wait for
- * that to happen. When it does we
+ * that to happen. When it does we
* suspend/backup/complete/unfreeze
*
* 2/ The array will not change size.
@@ -2000,7 +2158,7 @@ started:
* unit. The number we have so far is just a minimum
*/
blocks = reshape.backup_blocks;
- if (reshape.before.data_disks ==
+ if (reshape.before.data_disks ==
reshape.after.data_disks) {
/* Make 'blocks' bigger for better throughput, but
* not so big that we reject it below.
@@ -2114,7 +2272,7 @@ started:
Name ": Cannot set device shape for %s: %s\n",
devname, strerror(errno));
- if (err == EBUSY &&
+ if (err == EBUSY &&
(array.state & (1<<MD_SB_BITMAP_PRESENT)))
fprintf(stderr,
" Bitmap must be removed before"
@@ -2129,18 +2287,18 @@ started:
int err = 0;
if (sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
err = errno;
- if (!err && sysfs_set_num(sra, NULL, "layout",
- reshape.after.layout) < 0)
+ if (!err && sysfs_set_num(sra, NULL, "layout",
+ reshape.after.layout) < 0)
err = errno;
if (!err && subarray_set_num(container, sra, "raid_disks",
- reshape.after.data_disks +
- reshape.parity) < 0)
+ reshape.after.data_disks +
+ reshape.parity) < 0)
err = errno;
if (err) {
fprintf(stderr, Name ": Cannot set device shape for %s\n",
devname);
- if (err == EBUSY &&
+ if (err == EBUSY &&
(array.state & (1<<MD_SB_BITMAP_PRESENT)))
fprintf(stderr,
" Bitmap must be removed before"
@@ -2149,9 +2307,10 @@ started:
}
}
- err = start_reshape(sra, restart);
+ err = start_reshape(sra, restart, reshape.before.data_disks,
+ reshape.after.data_disks);
if (err) {
- fprintf(stderr,
+ fprintf(stderr,
Name ": Cannot %s reshape for %s\n",
restart ? "continue" : "start",
devname);
@@ -2159,6 +2318,15 @@ started:
}
if (restart)
sysfs_set_str(sra, NULL, "array_state", "active");
+ if (freeze_reshape) {
+ free(fdlist);
+ free(offsets);
+ sysfs_free(sra);
+ fprintf(stderr, Name ": Reshape has to be continued from"
+ " location %llu when root fileststem has been mounted\n",
+ sra->reshape_progress);
+ return 1;
+ }
/* Now we just need to kick off the reshape and watch, while
* handling backups of the data...
@@ -2171,8 +2339,12 @@ started:
abort_reshape(sra);
goto release;
default:
+ free(fdlist);
+ free(offsets);
+ sysfs_free(sra);
return 0;
case 0:
+ map_fork();
break;
}
@@ -2197,6 +2369,9 @@ started:
d - odisks, fdlist+odisks,
offsets+odisks);
+ free(fdlist);
+ free(offsets);
+
if (backup_file && done)
unlink(backup_file);
if (!done) {
@@ -2212,6 +2387,7 @@ started:
/* no need to wait for the reshape to finish as
* there is nothing more to do.
*/
+ sysfs_free(sra);
exit(0);
}
wait_reshape(sra);
@@ -2276,31 +2452,39 @@ started:
st->update_tail = NULL;
}
out:
+ sysfs_free(sra);
if (forked)
return 0;
unfreeze(st);
exit(0);
release:
+ free(fdlist);
+ free(offsets);
if (orig_level != UnSet && sra) {
c = map_num(pers, orig_level);
if (c && sysfs_set_str(sra, NULL, "level", c) == 0)
fprintf(stderr, Name ": aborting level change\n");
}
+ sysfs_free(sra);
if (!forked)
unfreeze(st);
return 1;
}
+/* mdfd handle is passed to be closed in child process (after fork).
+ */
int reshape_container(char *container, char *devname,
- struct supertype *st,
+ int mdfd,
+ struct supertype *st,
struct mdinfo *info,
int force,
char *backup_file,
- int quiet, int restart)
+ int quiet, int restart, int freeze_reshape)
{
struct mdinfo *cc = NULL;
int rv = restart;
+ int last_devnum = -1;
/* component_size is not meaningful for a container,
* so pass '-1' meaning 'no change'
@@ -2326,12 +2510,20 @@ int reshape_container(char *container, char *devname,
unfreeze(st);
return 1;
default: /* parent */
- printf(Name ": multi-array reshape continues in background\n");
+ if (!freeze_reshape)
+ printf(Name ": multi-array reshape continues"
+ " in background\n");
return 0;
case 0: /* child */
+ map_fork();
break;
}
+ /* close unused handle in child process
+ */
+ if (mdfd > -1)
+ close(mdfd);
+
while(1) {
/* For each member array with reshape_active,
* we need to perform the reshape.
@@ -2368,24 +2560,59 @@ int reshape_container(char *container, char *devname,
if (!content)
break;
- fd = open_dev(mdstat->devnum);
- if (fd < 0)
- break;
adev = map_dev(dev2major(mdstat->devnum),
dev2minor(mdstat->devnum),
0);
if (!adev)
adev = content->text_version;
+ fd = open_dev(mdstat->devnum);
+ if (fd < 0) {
+ printf(Name ": Device %s cannot be opened for reshape.",
+ adev);
+ break;
+ }
+
+ if (last_devnum == mdstat->devnum) {
+ /* Do not allow for multiple reshape_array() calls for
+ * the same array.
+ * It can happen when reshape_array() returns without
+ * error, when reshape is not finished (wrong reshape
+ * starting/continuation conditions). Mdmon doesn't
+ * switch to next array in container and reentry
+ * conditions for the same array occur.
+ * This is possibly interim until the behaviour of
+ * reshape_array is resolved().
+ */
+ printf(Name ": Multiple reshape execution detected for "
+ "device %s.", adev);
+ close(fd);
+ break;
+ }
+ last_devnum = mdstat->devnum;
+
sysfs_init(content, fd, mdstat->devnum);
rv = reshape_array(container, fd, adev, st,
content, force, NULL,
- backup_file, quiet, 1, restart);
+ backup_file, quiet, 1, restart,
+ freeze_reshape);
close(fd);
+
+ if (freeze_reshape) {
+ sysfs_free(cc);
+ exit(0);
+ }
+
restart = 0;
if (rv)
break;
+ rv = !mdmon_running(devname2devnum(container));
+ if (rv) {
+ printf(Name ": Mdmon is not found. "
+ "Cannot continue container reshape.\n");
+ break;
+ }
}
if (!rv)
unfreeze(st);
@@ -2414,7 +2641,7 @@ int reshape_container(char *container, char *devname,
* suspend/backup/allow always come together
* wait/resume/discard do too.
* For the same-size case we have two backups to improve flow.
- *
+ *
*/
int progress_reshape(struct mdinfo *info, struct reshape *reshape,
@@ -2559,7 +2786,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
* this much.
*/
target = 64*1024*2 * min(reshape->before.data_disks,
- reshape->after.data_disks);
+ reshape->after.data_disks);
target /= reshape->backup_blocks;
if (target < 2)
target = 2;
@@ -2696,7 +2923,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
- completed;
}
*reshape_completed = completed;
-
+
close(fd);
/* We return the need_backup flag. Caller will decide
@@ -2725,15 +2952,21 @@ check_progress:
int rv = -2;
tv.tv_sec = 10;
tv.tv_usec = 0;
- while (fd >= 0 && rv < 0) {
+ while (fd >= 0 && rv < 0 && tv.tv_sec > 0) {
fd_set rfds;
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
if (select(fd+1, NULL, NULL, &rfds, &tv) != 1)
break;
- if (sysfs_fd_get_ll(fd, &completed) >= 0)
+ switch (sysfs_fd_get_ll(fd, &completed)) {
+ case 0:
/* all good again */
rv = 1;
+ break;
+ case -2: /* read error - abort */
+ tv.tv_sec = 0;
+ break;
+ }
}
if (fd >= 0)
close(fd);
@@ -2750,7 +2983,6 @@ check_progress:
}
}
-
/* FIXME return status is never checked */
static int grow_backup(struct mdinfo *sra,
unsigned long long offset, /* per device */
@@ -2820,7 +3052,7 @@ static int grow_backup(struct mdinfo *sra,
else
lseek64(destfd[i], destoffsets[i], 0);
- rv = save_stripes(sources, offsets,
+ rv = save_stripes(sources, offsets,
disks, chunk, level, layout,
dests, destfd,
offset*512*odata, stripes * chunk * odata,
@@ -2868,11 +3100,11 @@ static int grow_backup(struct mdinfo *sra,
* every works.
*/
/* FIXME return value is often ignored */
-static int forget_backup(
- int dests, int *destfd, unsigned long long *destoffsets,
- int part)
+static int forget_backup(int dests, int *destfd,
+ unsigned long long *destoffsets,
+ int part)
{
- /*
+ /*
* Erase backup 'part' (which is 0 or 1)
*/
int i;
@@ -2896,7 +3128,7 @@ static int forget_backup(
if ((unsigned long long)lseek64(destfd[i], destoffsets[i]-4096, 0) !=
destoffsets[i]-4096)
rv = -1;
- if (rv == 0 &&
+ if (rv == 0 &&
write(destfd[i], &bsb, 512) != 512)
rv = -1;
fsync(destfd[i]);
@@ -2932,7 +3164,7 @@ static void validate(int afd, int bfd, unsigned long long offset)
fail("magic is bad");
if (memcmp(bsb2.magic, "md_backup_data-2", 16) == 0 &&
bsb2.sb_csum2 != bsb_csum((char*)&bsb2,
- ((char*)&bsb2.sb_csum2)-((char*)&bsb2)))
+ ((char*)&bsb2.sb_csum2)-((char*)&bsb2)))
fail("second csum bad");
if (__le64_to_cpu(bsb2.devstart)*512 != offset)
@@ -2962,7 +3194,7 @@ static void validate(int afd, int bfd, unsigned long long offset)
if ((unsigned long long)read(afd, abuf, len) != len)
fail("read first from array failed");
if (memcmp(bbuf, abuf, len) != 0) {
- #if 0
+#if 0
int i;
printf("offset=%llu len=%llu\n",
(unsigned long long)__le64_to_cpu(bsb2.arraystart)*512, len);
@@ -2971,7 +3203,7 @@ static void validate(int afd, int bfd, unsigned long long offset)
printf("first diff byte %d\n", i);
break;
}
- #endif
+#endif
fail("data1 compare failed");
}
}
@@ -3322,7 +3554,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
nonew:
if (verbose)
fprintf(stderr, Name
- ": backup-metadata found on %s but is not needed\n", devname);
+ ": backup-metadata found on %s but is not needed\n", devname);
continue; /* No new data here */
}
} else {
@@ -3357,7 +3589,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
second_fail:
if (verbose)
fprintf(stderr, Name
- ": Failed to verify secondary backup-metadata block on %s\n",
+ ": Failed to verify secondary backup-metadata block on %s\n",
devname);
continue; /* Cannot seek */
}
@@ -3398,9 +3630,10 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
if (verbose)
fprintf(stderr, Name ": Error restoring backup from %s\n",
devname);
+ free(offsets);
return 1;
}
-
+
if (bsb.magic[15] == '2' &&
restore_stripes(fdlist, offsets,
info->array.raid_disks,
@@ -3415,9 +3648,11 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
if (verbose)
fprintf(stderr, Name ": Error restoring second backup from %s\n",
devname);
+ free(offsets);
return 1;
}
+ free(offsets);
/* Ok, so the data is restored. Let's update those superblocks. */
@@ -3514,37 +3749,216 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
return 1;
}
-int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
- char *backup_file)
+int Grow_continue_command(char *devname, int fd,
+ char *backup_file, int verbose)
{
+ int ret_val = 0;
+ struct supertype *st = NULL;
+ struct mdinfo *content = NULL;
+ struct mdinfo array;
+ char *subarray = NULL;
+ struct mdinfo *cc = NULL;
+ struct mdstat_ent *mdstat = NULL;
char buf[40];
- char *container = NULL;
- int err;
+ int cfd = -1;
+ int fd2 = -1;
+ char *ep;
+ unsigned long long position;
- err = sysfs_set_str(info, NULL, "array_state", "readonly");
- if (err)
- return err;
- if (st->ss->external) {
- fmt_devname(buf, st->container_dev);
- container = buf;
- freeze(st);
+ dprintf("Grow continue from command line called for %s\n",
+ devname);
- if (!mdmon_running(st->container_dev))
- start_mdmon(st->container_dev);
- ping_monitor_by_id(st->container_dev);
+ st = super_by_fd(fd, &subarray);
+ if (!st || !st->ss) {
+ fprintf(stderr,
+ Name ": Unable to determine metadata format for %s\n",
+ devname);
+ return 1;
+ }
+ dprintf("Grow continue is run for ");
+ if (st->ss->external == 0) {
+ dprintf("native array (%s)\n", devname);
+ if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
+ fprintf(stderr, Name ": %s is not an active md array -"
+ " aborting\n", devname);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+ content = &array;
+ sysfs_init(content, fd, st->devnum);
+ } else {
+ int container_dev;
+ if (subarray) {
+ dprintf("subarray (%s)\n", subarray);
+ container_dev = st->container_dev;
+ cfd = open_dev_excl(st->container_dev);
+ } else {
+ container_dev = st->devnum;
+ close(fd);
+ cfd = open_dev_excl(st->devnum);
+ dprintf("container (%i)\n", container_dev);
+ fd = cfd;
+ }
+ if (cfd < 0) {
+ fprintf(stderr, Name ": Unable to open container "
+ "for %s\n", devname);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+ fmt_devname(buf, container_dev);
- if (info->reshape_active == 2) {
- int cfd = open_dev(st->container_dev);
- if (cfd < 0)
- return 1;
- st->ss->load_container(st, cfd, container);
- close(cfd);
- return reshape_container(container, NULL,
- st, info, 0, backup_file,
- 0, 1);
+ /* find in container array under reshape
+ */
+ ret_val = st->ss->load_container(st, cfd, NULL);
+ if (ret_val) {
+ fprintf(stderr,
+ Name ": Cannot read superblock for %s\n",
+ devname);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+
+ cc = st->ss->container_content(st, subarray);
+ for (content = cc; content ; content = content->next) {
+ char *array;
+ int allow_reshape = 1;
+
+ if (content->reshape_active == 0)
+ continue;
+ /* The decision about array or container wide
+ * reshape is taken in Grow_continue based
+ * content->reshape_active state, therefore we
+ * need to check_reshape based on
+ * reshape_active and subarray name
+ */
+ if (content->array.state & (1<<MD_SB_BLOCK_VOLUME))
+ allow_reshape = 0;
+ if (content->reshape_active == CONTAINER_RESHAPE &&
+ (content->array.state
+ & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE)))
+ allow_reshape = 0;
+
+ if (!allow_reshape) {
+ fprintf(stderr, Name
+ ": cannot continue reshape of an array"
+ " in container with unsupported"
+ " metadata: %s(%s)\n",
+ devname, buf);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+
+ array = strchr(content->text_version+1, '/')+1;
+ mdstat = mdstat_by_subdev(array, container_dev);
+ if (!mdstat)
+ continue;
+ break;
+ }
+ if (!content) {
+ fprintf(stderr,
+ Name ": Unable to determine reshaped "
+ "array for %s\n", devname);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+ fd2 = open_dev(mdstat->devnum);
+ if (fd2 < 0) {
+ fprintf(stderr, Name ": cannot open (md%i)\n",
+ mdstat->devnum);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+
+ sysfs_init(content, fd2, mdstat->devnum);
+
+ /* start mdmon in case it is not running
+ */
+ if (!mdmon_running(container_dev))
+ start_mdmon(container_dev);
+ ping_monitor(buf);
+
+ if (mdmon_running(container_dev))
+ st->update_tail = &st->updates;
+ else {
+ fprintf(stderr, Name ": No mdmon found. "
+ "Grow cannot continue.\n");
+ ret_val = 1;
+ goto Grow_continue_command_exit;
}
}
- return reshape_array(container, mdfd, "array", st, info, 1,
- NULL, backup_file, 0, 0, 1);
+
+ /* verify that array under reshape is started from
+ * correct position
+ */
+ ret_val = sysfs_get_str(content, NULL, "sync_max", buf, 40);
+ if (ret_val <= 0) {
+ fprintf(stderr, Name
+ ": cannot open verify reshape progress for %s (%i)\n",
+ content->sys_name, ret_val);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+ dprintf(Name ": Read sync_max sysfs entry is: %s\n", buf);
+ position = strtoull(buf, &ep, 0);
+ if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' ')) {
+ fprintf(stderr, Name ": Fatal error: array reshape was"
+ " not properly frozen\n");
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+ position *= get_data_disks(map_name(pers, mdstat->level),
+ content->new_layout,
+ content->array.raid_disks);
+ if (position != content->reshape_progress) {
+ fprintf(stderr, Name ": Fatal error: array reshape was"
+ " not properly frozen.\n");
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+
+ /* continue reshape
+ */
+ ret_val = Grow_continue(fd, st, content, backup_file, 0);
+
+Grow_continue_command_exit:
+ if (fd2 > -1)
+ close(fd2);
+ if (cfd > -1)
+ close(cfd);
+ st->ss->free_super(st);
+ free_mdstat(mdstat);
+ sysfs_free(cc);
+ free(subarray);
+
+ return ret_val;
+}
+
+int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
+ char *backup_file, int freeze_reshape)
+{
+ int ret_val = 2;
+
+ if (!info->reshape_active)
+ return ret_val;
+
+ if (st->ss->external) {
+ char container[40];
+ int cfd = open_dev(st->container_dev);
+
+ if (cfd < 0)
+ return 1;
+
+ fmt_devname(container, st->container_dev);
+ st->ss->load_container(st, cfd, container);
+ close(cfd);
+ ret_val = reshape_container(container, NULL, mdfd,
+ st, info, 0, backup_file,
+ 0, 1, freeze_reshape);
+ } else
+ ret_val = reshape_array(NULL, mdfd, "array", st, info, 1,
+ NULL, backup_file, 0, 0, 1,
+ freeze_reshape);
+
+ return ret_val;
}
diff --git a/Incremental.c b/Incremental.c
index 951c2a07..78c97129 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -44,16 +44,12 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
static int Incremental_container(struct supertype *st, char *devname,
char *homehost,
- int verbose, int runstop, int autof);
-
-static struct mddev_ident *search_mdstat(struct supertype *st,
- struct mdinfo *info,
- char *devname,
- int verbose, int *rvp);
+ int verbose, int runstop, int autof,
+ int freeze_reshape);
int Incremental(char *devname, int verbose, int runstop,
struct supertype *st, char *homehost, int require_homehost,
- int autof)
+ int autof, int freeze_reshape)
{
/* Add this device to an array, creating the array if necessary
* and starting the array if sensible or - if runstop>0 - if possible.
@@ -138,9 +134,16 @@ int Incremental(char *devname, int verbose, int runstop,
rv = st->ss->load_container(st, dfd, NULL);
close(dfd);
- if (!rv && st->ss->container_content)
- return Incremental_container(st, devname, homehost,
- verbose, runstop, autof);
+ if (!rv && st->ss->container_content) {
+ if (map_lock(&map))
+ fprintf(stderr, Name ": failed to get "
+ "exclusive lock on mapfile\n");
+ rv = Incremental_container(st, devname, homehost,
+ verbose, runstop, autof,
+ freeze_reshape);
+ map_unlock(&map);
+ return rv;
+ }
fprintf(stderr, Name ": %s is not part of an md array.\n",
devname);
@@ -205,7 +208,7 @@ int Incremental(char *devname, int verbose, int runstop,
st->ss->getinfo_super(st, &info, NULL);
/* 3/ Check if there is a match in mdadm.conf */
- match = search_mdstat(st, &info, devname, verbose, &rv);
+ match = conf_match(st, &info, devname, verbose, &rv);
if (!match && rv == 2)
goto out;
@@ -290,7 +293,7 @@ int Incremental(char *devname, int verbose, int runstop,
name_to_use, autof, trustworthy, chosen_name);
if (mdfd < 0)
- goto out;
+ goto out_unlock;
sysfs_init(&info, mdfd, 0);
@@ -298,7 +301,7 @@ int Incremental(char *devname, int verbose, int runstop,
fprintf(stderr, Name ": failed to set array info for %s: %s\n",
chosen_name, strerror(errno));
rv = 2;
- goto out;
+ goto out_unlock;
}
dinfo = info;
@@ -309,7 +312,7 @@ int Incremental(char *devname, int verbose, int runstop,
devname, chosen_name, strerror(errno));
ioctl(mdfd, STOP_ARRAY, 0);
rv = 2;
- goto out;
+ goto out_unlock;
}
sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
GET_OFFSET | GET_SIZE));
@@ -325,7 +328,7 @@ int Incremental(char *devname, int verbose, int runstop,
" --incremental reliably. Aborting.\n");
sysfs_free(sra);
rv = 2;
- goto out;
+ goto out_unlock;
}
info.array.working_disks = 1;
/* 6/ Make sure /var/run/mdadm.map contains this array. */
@@ -372,17 +375,23 @@ int Incremental(char *devname, int verbose, int runstop,
": not adding %s to active array (without --run) %s\n",
devname, chosen_name);
rv = 2;
- goto out;
+ goto out_unlock;
}
}
if (!sra) {
rv = 2;
- goto out;
+ goto out_unlock;
}
if (sra->devs) {
sprintf(dn, "%d:%d", sra->devs->disk.major,
sra->devs->disk.minor);
dfd2 = dev_open(dn, O_RDONLY);
+ if (dfd2 < 0) {
+ fprintf(stderr, Name
+ ": unable to open %s\n", devname);
+ rv = 2;
+ goto out_unlock;
+ }
st2 = dup_super(st);
if (st2->ss->load_super(st2, dfd2, NULL) ||
st->ss->compare_super(st, st2) != 0) {
@@ -392,7 +401,7 @@ int Incremental(char *devname, int verbose, int runstop,
devname, chosen_name);
close(dfd2);
rv = 2;
- goto out;
+ goto out_unlock;
}
close(dfd2);
st2->ss->getinfo_super(st2, &info2, NULL);
@@ -404,7 +413,7 @@ int Incremental(char *devname, int verbose, int runstop,
": unexpected difference between %s and %s.\n",
chosen_name, devname);
rv = 2;
- goto out;
+ goto out_unlock;
}
}
info2.disk.major = major(stb.st_rdev);
@@ -425,7 +434,7 @@ int Incremental(char *devname, int verbose, int runstop,
fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
devname, chosen_name, strerror(errno));
rv = 2;
- goto out;
+ goto out_unlock;
}
info.array.working_disks = 0;
for (d = sra->devs; d; d=d->next)
@@ -438,19 +447,24 @@ int Incremental(char *devname, int verbose, int runstop,
if (info.array.level == LEVEL_CONTAINER) {
int devnum = devnum; /* defined and used iff ->external */
/* Try to assemble within the container */
- map_unlock(&map);
- sysfs_uevent(&info, "change");
+ sysfs_uevent(sra, "change");
if (verbose >= 0)
fprintf(stderr, Name
- ": container %s now has %d devices\n",
- chosen_name, info.array.working_disks);
+ ": container %s now has %d device%s\n",
+ chosen_name, info.array.working_disks,
+ info.array.working_disks==1?"":"s");
wait_for(chosen_name, mdfd);
if (st->ss->external)
devnum = fd2devnum(mdfd);
+ if (st->ss->load_container)
+ rv = st->ss->load_container(st, mdfd, NULL);
close(mdfd);
sysfs_free(sra);
- rv = Incremental(chosen_name, verbose, runstop,
- NULL, homehost, require_homehost, autof);
+ if (!rv)
+ rv = Incremental_container(st, chosen_name, homehost,
+ verbose, runstop, autof,
+ freeze_reshape);
+ map_unlock(&map);
if (rv == 1)
/* Don't fail the whole -I if a subarray didn't
* have enough devices to start yet
@@ -478,9 +492,8 @@ int Incremental(char *devname, int verbose, int runstop,
fprintf(stderr, Name
": %s attached to %s, not enough to start (%d).\n",
devname, chosen_name, active_disks);
- map_unlock(&map);
rv = 0;
- goto out;
+ goto out_unlock;
}
/* 7b/ if yes, */
@@ -494,9 +507,8 @@ int Incremental(char *devname, int verbose, int runstop,
fprintf(stderr, Name
": %s attached to %s which is already active.\n",
devname, chosen_name);
- map_unlock(&map);
rv = 0;
- goto out;
+ goto out_unlock;
}
map_unlock(&map);
@@ -577,79 +589,9 @@ out:
if (sra)
sysfs_free(sra);
return rv;
-}
-
-static struct mddev_ident *search_mdstat(struct supertype *st,
- struct mdinfo *info,
- char *devname,
- int verbose, int *rvp)
-{
- struct mddev_ident *array_list, *match;
- array_list = conf_get_ident(NULL);
- match = NULL;
- for (; array_list; array_list = array_list->next) {
- if (array_list->uuid_set &&
- same_uuid(array_list->uuid, info->uuid, st->ss->swapuuid)
- == 0) {
- if (verbose >= 2 && array_list->devname)
- fprintf(stderr, Name
- ": UUID differs from %s.\n",
- array_list->devname);
- continue;
- }
- if (array_list->name[0] &&
- strcasecmp(array_list->name, info->name) != 0) {
- if (verbose >= 2 && array_list->devname)
- fprintf(stderr, Name
- ": Name differs from %s.\n",
- array_list->devname);
- continue;
- }
- if (array_list->devices &&
- !match_oneof(array_list->devices, devname)) {
- if (verbose >= 2 && array_list->devname)
- fprintf(stderr, Name
- ": Not a listed device for %s.\n",
- array_list->devname);
- continue;
- }
- if (array_list->super_minor != UnSet &&
- array_list->super_minor != info->array.md_minor) {
- if (verbose >= 2 && array_list->devname)
- fprintf(stderr, Name
- ": Different super-minor to %s.\n",
- array_list->devname);
- continue;
- }
- if (!array_list->uuid_set &&
- !array_list->name[0] &&
- !array_list->devices &&
- array_list->super_minor == UnSet) {
- if (verbose >= 2 && array_list->devname)
- fprintf(stderr, Name
- ": %s doesn't have any identifying information.\n",
- array_list->devname);
- continue;
- }
- /* FIXME, should I check raid_disks and level too?? */
-
- if (match) {
- if (verbose >= 0) {
- if (match->devname && array_list->devname)
- fprintf(stderr, Name
- ": we match both %s and %s - cannot decide which to use.\n",
- match->devname, array_list->devname);
- else
- fprintf(stderr, Name
- ": multiple lines in mdadm.conf match\n");
- }
- *rvp = 2;
- match = NULL;
- break;
- }
- match = array_list;
- }
- return match;
+out_unlock:
+ map_unlock(&map);
+ goto out;
}
static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
@@ -944,8 +886,10 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
* to obtain minimum spare size */
struct supertype *st3 = dup_super(st2);
int mdfd = open_dev(mp->devnum);
- if (!mdfd)
+ if (mdfd < 0) {
+ free(st3);
goto next;
+ }
if (st3->ss->load_container &&
!st3->ss->load_container(st3, mdfd, mp->path)) {
component_size = st3->ss->min_acceptable_spare_size(st3);
@@ -1035,7 +979,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
close(dfd);
*dfdp = -1;
rv = Manage_subdevs(chosen->sys_name, mdfd, &devlist,
- -1, 0, NULL);
+ -1, 0, NULL, 0);
close(mdfd);
}
if (verbose > 0) {
@@ -1048,6 +992,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
}
sysfs_free(chosen);
}
+ map_unlock(&map);
return rv;
}
@@ -1184,6 +1129,8 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
close(fd);
}
+ closedir(dir);
+
if (!chosen)
return 1;
@@ -1384,6 +1331,7 @@ int IncrementalScan(int verbose)
strerror(errno));
rv = 1;
}
+ sysfs_free(sra);
}
}
return rv;
@@ -1416,7 +1364,7 @@ static char *container2devname(char *devname)
static int Incremental_container(struct supertype *st, char *devname,
char *homehost, int verbose,
- int runstop, int autof)
+ int runstop, int autof, int freeze_reshape)
{
/* Collect the contents of this container and for each
* array, choose a device name and assemble the array.
@@ -1433,6 +1381,8 @@ static int Incremental_container(struct supertype *st, char *devname,
struct map_ent *smp;
int suuid[4];
int sfd;
+ int ra_blocked = 0;
+ int ra_all = 0;
st->ss->getinfo_super(st, &info, NULL);
@@ -1445,7 +1395,7 @@ static int Incremental_container(struct supertype *st, char *devname,
return 0;
}
- match = search_mdstat(st, &info, devname, verbose, &rv);
+ match = conf_match(st, &info, devname, verbose, &rv);
if (match == NULL && rv == 2)
return rv;
@@ -1460,24 +1410,23 @@ static int Incremental_container(struct supertype *st, char *devname,
trustworthy = FOREIGN;
list = st->ss->container_content(st, NULL);
- if (map_lock(&map))
- fprintf(stderr, Name ": failed to get exclusive lock on "
- "mapfile\n");
- /* do not assemble arrays that might have bad blocks */
- if (list->array.state & (1<<MD_SB_BBM_ERRORS)) {
- fprintf(stderr, Name ": BBM log found in metadata. "
- "Cannot activate array(s).\n");
- /* free container data and exit */
- sysfs_free(list);
- return 2;
- }
-
+ /* when nothing to activate - quit */
+ if (list == NULL)
+ return 0;
for (ra = list ; ra ; ra = ra->next) {
int mdfd;
char chosen_name[1024];
struct map_ent *mp;
struct mddev_ident *match = NULL;
+ ra_all++;
+ /* do not activate arrays blocked by metadata handler */
+ if (ra->array.state & (1 << MD_SB_BLOCK_VOLUME)) {
+ fprintf(stderr, Name ": Cannot activate array %s in %s.\n",
+ ra->text_version, devname);
+ ra_blocked++;
+ continue;
+ }
mp = map_by_uuid(&map, ra->uuid);
if (mp) {
@@ -1551,10 +1500,16 @@ static int Incremental_container(struct supertype *st, char *devname,
}
assemble_container_content(st, mdfd, ra, runstop,
- chosen_name, verbose, NULL);
+ chosen_name, verbose, NULL,
+ freeze_reshape);
close(mdfd);
}
+ /* don't move spares to container with volume being activated
+ when all volumes are blocked */
+ if (ra_all == ra_blocked)
+ return 0;
+
/* Now move all suitable spares from spare container */
domains = domain_from_array(list, st->ss->name);
memcpy(suuid, uuid_zero, sizeof(int[4]));
@@ -1600,7 +1555,6 @@ static int Incremental_container(struct supertype *st, char *devname,
close(sfd);
}
domain_free(domains);
- map_unlock(&map);
return 0;
}
@@ -1666,15 +1620,15 @@ int IncrementalRemove(char *devname, char *id_path, int verbose)
if (subfd >= 0) {
Manage_subdevs(memb->dev, subfd,
&devlist, verbose, 0,
- NULL);
+ NULL, 0);
close(subfd);
}
}
free_mdstat(mdstat);
} else
- Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL);
+ Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL, 0);
devlist.disposition = 'r';
- rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL);
+ rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL, 0);
close(mdfd);
free_mdstat(ent);
return rv;
diff --git a/Kill.c b/Kill.c
index 29a43ea6..bac48440 100644
--- a/Kill.c
+++ b/Kill.c
@@ -59,10 +59,9 @@ int Kill(char *dev, struct supertype *st, int force, int quiet, int noexcl)
close(fd);
return 2;
}
+ st->ignore_hw_compat = 1;
rv = st->ss->load_super(st, fd, dev);
- if (force && rv >= 2)
- rv = 0; /* ignore bad data in superblock */
- if (rv== 0 || (force && rv >= 2)) {
+ if (rv == 0 || (force && rv >= 2)) {
st->ss->free_super(st);
st->ss->init_super(st, NULL, 0, "", NULL, NULL);
if (st->ss->store_super(st, fd)) {
diff --git a/Makefile b/Makefile
index 72087bea..b8d363f8 100644
--- a/Makefile
+++ b/Makefile
@@ -137,7 +137,7 @@ ASSEMBLE_FLAGS += -DMDASSEMBLE_AUTO
endif
all : mdadm mdmon
-man : mdadm.man md.man mdadm.conf.man mdmon.man
+man : mdadm.man md.man mdadm.conf.man mdmon.man raid6check.man
everything: all mdadm.static swap_super test_stripe \
mdassemble mdassemble.auto mdassemble.static mdassemble.man \
@@ -221,6 +221,9 @@ mdadm.conf.man : mdadm.conf.5
mdassemble.man : mdassemble.8
nroff -man mdassemble.8 > mdassemble.man
+raid6check.man : raid6check.8
+ nroff -man raid6check.8 > raid6check.man
+
$(OBJS) : $(INCL) mdmon.h
$(MON_OBJS) : $(INCL) mdmon.h
diff --git a/Manage.c b/Manage.c
index 66d69780..d9775ded 100644
--- a/Manage.c
+++ b/Manage.c
@@ -44,6 +44,7 @@ int Manage_ro(char *devname, int fd, int readonly)
#ifndef MDASSEMBLE
struct mdinfo *mdi;
#endif
+ int rv = 0;
if (md_get_version(fd) < 9000) {
fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
@@ -75,7 +76,8 @@ int Manage_ro(char *devname, int fd, int readonly)
vers[9] = mdi->text_version[0];
sysfs_set_str(mdi, NULL, "metadata_version", vers);
- return 1;
+ rv = 1;
+ goto out;
}
} else {
char *cp;
@@ -84,35 +86,43 @@ int Manage_ro(char *devname, int fd, int readonly)
sysfs_set_str(mdi, NULL, "metadata_version", vers);
cp = strchr(vers+10, '/');
- if (*cp)
+ if (cp)
*cp = 0;
ping_monitor(vers+10);
if (mdi->array.level <= 0)
sysfs_set_str(mdi, NULL, "array_state", "active");
}
- return 0;
+ goto out;
}
#endif
if (ioctl(fd, GET_ARRAY_INFO, &array)) {
fprintf(stderr, Name ": %s does not appear to be active.\n",
devname);
- return 1;
+ rv = 1;
+ goto out;
}
if (readonly>0) {
if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
fprintf(stderr, Name ": failed to set readonly for %s: %s\n",
devname, strerror(errno));
- return 1;
+ rv = 1;
+ goto out;
}
} else if (readonly < 0) {
if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
fprintf(stderr, Name ": failed to set writable for %s: %s\n",
devname, strerror(errno));
- return 1;
+ rv = 1;
+ goto out;
}
}
- return 0;
+out:
+#ifndef MDASSEMBLE
+ if (mdi)
+ sysfs_free(mdi);
+#endif
+ return rv;
}
#ifndef MDASSEMBLE
@@ -156,7 +166,7 @@ static void remove_devices(int devnum, char *path)
sprintf(pe, "%d", part);
}
n = readlink(path2, link, sizeof(link));
- if (n && (int)strlen(base) == n &&
+ if (n > 0 && (int)strlen(base) == n &&
strncmp(link, base, n) == 0)
unlink(path2);
}
@@ -173,6 +183,7 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
* quiet < 0 means we will try again if it fails.
*/
mdu_param_t param; /* unused */
+ int rv = 0;
if (runstop == -1 && md_get_version(fd) < 9000) {
if (ioctl(fd, STOP_MD, 0)) {
@@ -251,7 +262,8 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
fprintf(stderr, Name
": failed to stop array %s: %s\n",
devname, strerror(errno));
- return 1;
+ rv = 1;
+ goto out;
}
/* Give monitor a chance to act */
@@ -263,7 +275,8 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
": failed to completely stop %s"
": Device is busy\n",
devname);
- return 1;
+ rv = 1;
+ goto out;
}
} else if (mdi &&
mdi->array.major_version == -1 &&
@@ -291,9 +304,8 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
"member %s still active\n",
devname, m->dev);
free_mdstat(mds);
- if (mdi)
- sysfs_free(mdi);
- return 1;
+ rv = 1;
+ goto out;
}
}
@@ -318,9 +330,8 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
"process, mounted filesystem "
"or active volume group?\n");
}
- if (mdi)
- sysfs_free(mdi);
- return 1;
+ rv = 1;
+ goto out;
}
/* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
* was stopped, so We'll do it here just to be sure. Drop any
@@ -345,8 +356,11 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
map_lock(&map);
map_remove(&map, devnum);
map_unlock(&map);
+ out:
+ if (mdi)
+ sysfs_free(mdi);
}
- return 0;
+ return rv;
}
int Manage_resize(char *devname, int fd, long long size, int raid_disks)
@@ -371,7 +385,7 @@ int Manage_resize(char *devname, int fd, long long size, int raid_disks)
int Manage_subdevs(char *devname, int fd,
struct mddev_dev *devlist, int verbose, int test,
- char *update)
+ char *update, int force)
{
/* do something to each dev.
* devmode can be
@@ -623,19 +637,44 @@ int Manage_subdevs(char *devname, int fd,
if (add_dev == dv->devname) {
if (!get_dev_size(tfd, dv->devname, &ldsize)) {
+ st->ss->free_super(st);
close(tfd);
return 1;
}
} else if (!get_dev_size(tfd, NULL, &ldsize)) {
+ st->ss->free_super(st);
close(tfd);
tfd = -1;
continue;
}
+ if (tst->ss->validate_geometry(
+ tst, array.level, array.layout,
+ array.raid_disks, NULL,
+ ldsize >> 9, NULL, NULL, 0) == 0) {
+ if (!force) {
+ fprintf(stderr, Name
+ ": %s is larger than %s can "
+ "effectively use.\n"
+ " Add --force is you "
+ "really wan to add this device.\n",
+ add_dev, devname);
+ st->ss->free_super(st);
+ close(tfd);
+ return 1;
+ }
+ fprintf(stderr, Name
+ ": %s is larger than %s can "
+ "effectively use.\n"
+ " Adding anyway as --force "
+ "was given.\n",
+ add_dev, devname);
+ }
if (!tst->ss->external &&
array.major_version == 0 &&
md_get_version(fd)%100 < 2) {
close(tfd);
+ st->ss->free_super(st);
tfd = -1;
if (ioctl(fd, HOT_ADD_DISK,
(unsigned long)stb.st_rdev)==0) {
@@ -686,6 +725,7 @@ int Manage_subdevs(char *devname, int fd,
/* FIXME this is a bad test to be using */
if (!tst->sb) {
close(tfd);
+ st->ss->free_super(st);
fprintf(stderr, Name ": cannot load array metadata from %s\n", devname);
return 1;
}
@@ -695,6 +735,7 @@ int Manage_subdevs(char *devname, int fd,
array_size) {
close(tfd);
tfd = -1;
+ st->ss->free_super(st);
if (add_dev != dv->devname)
continue;
fprintf(stderr, Name ": %s not large enough to join array\n",
@@ -741,11 +782,25 @@ int Manage_subdevs(char *devname, int fd,
remove_partitions(tfd);
close(tfd);
tfd = -1;
- if (update) {
+ if (update || dv->writemostly > 0) {
int rv = -1;
tfd = dev_open(dv->devname, O_RDWR);
+ if (tfd < 0) {
+ fprintf(stderr, Name ": failed to open %s for"
+ " superblock update during re-add\n", dv->devname);
+ st->ss->free_super(st);
+ return 1;
+ }
- if (tfd >= 0)
+ if (dv->writemostly == 1)
+ rv = st->ss->update_super(
+ st, NULL, "writemostly",
+ devname, verbose, 0, NULL);
+ if (dv->writemostly == 2)
+ rv = st->ss->update_super(
+ st, NULL, "readwrite",
+ devname, verbose, 0, NULL);
+ if (update)
rv = st->ss->update_super(
st, NULL, update,
devname, verbose, 0, NULL);
@@ -756,6 +811,7 @@ int Manage_subdevs(char *devname, int fd,
if (rv != 0) {
fprintf(stderr, Name ": failed to update"
" superblock during re-add\n");
+ st->ss->free_super(st);
return 1;
}
}
@@ -765,11 +821,13 @@ int Manage_subdevs(char *devname, int fd,
if (verbose >= 0)
fprintf(stderr, Name ": re-added %s\n", add_dev);
count++;
+ st->ss->free_super(st);
continue;
}
if (errno == ENOMEM || errno == EROFS) {
fprintf(stderr, Name ": add new device failed for %s: %s\n",
add_dev, strerror(errno));
+ st->ss->free_super(st);
if (add_dev != dv->devname)
continue;
return 1;
@@ -1188,9 +1246,9 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
sprintf(devname, "%d:%d", major(devid), minor(devid));
devlist.disposition = 'r';
- if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL) == 0) {
+ if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
devlist.disposition = 'a';
- if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL) == 0) {
+ if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
/* make sure manager is aware of changes */
ping_manager(to_devname);
ping_manager(from_devname);
@@ -1198,7 +1256,7 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
close(fd2);
return 1;
}
- else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL);
+ else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
}
close(fd1);
close(fd2);
diff --git a/Monitor.c b/Monitor.c
index 101bca4c..8bc88247 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -124,6 +124,7 @@ int Monitor(struct mddev_dev *devlist,
*/
struct state *statelist = NULL;
+ struct state *st2;
int finished = 0;
struct mdstat_ent *mdstat = NULL;
char *mailfrom = NULL;
@@ -242,6 +243,11 @@ int Monitor(struct mddev_dev *devlist,
}
test = 0;
}
+ for (st2 = statelist; st2; st2 = statelist) {
+ statelist = st2->next;
+ free(st2);
+ }
+
if (pidfile)
unlink(pidfile);
return 0;
@@ -558,7 +564,8 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
sysfs_read(-1, st->devnum, GET_MISMATCH);
if (sra && sra->mismatch_cnt > 0) {
char cnt[40];
- sprintf(cnt, " mismatches found: %d", sra->mismatch_cnt);
+ sprintf(cnt, " mismatches found: %d (on raid level %d)",
+ sra->mismatch_cnt, array.level);
alert("RebuildFinished", dev, cnt, ainfo);
} else
alert("RebuildFinished", dev, NULL, ainfo);
diff --git a/ReadMe.c b/ReadMe.c
index b6588417..9aa798b7 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -24,7 +24,7 @@
#include "mdadm.h"
-char Version[] = Name " - v3.2.2 - 17th June 2011\n";
+char Version[] = Name " - v3.2.3 - 23rd December 2011\n";
/*
* File: ReadMe.c
@@ -153,6 +153,7 @@ struct option long_options[] = {
{"scan", 0, 0, 's'},
{"force", 0, 0, Force},
{"update", 1, 0, 'U'},
+ {"freeze-reshape", 0, 0, FreezeReshape},
/* Management */
{"add", 0, 0, Add},
@@ -190,6 +191,7 @@ struct option long_options[] = {
{"backup-file", 1,0, BackupFile},
{"invalid-backup",0,0,InvalidBackup},
{"array-size", 1, 0, 'Z'},
+ {"continue", 0, 0, Continue},
/* For Incremental */
{"rebuild-map", 0, 0, RebuildMapOpt},
diff --git a/bitmap.c b/bitmap.c
index 2e1ecdac..c27688c5 100644
--- a/bitmap.c
+++ b/bitmap.c
@@ -132,7 +132,7 @@ bitmap_info_t *bitmap_fd_read(int fd, int brief)
void *buf;
unsigned int n, skip;
- if (posix_memalign(&buf, 512, 8192) != 0) {
+ if (posix_memalign(&buf, 4096, 8192) != 0) {
fprintf(stderr, Name ": failed to allocate 8192 bytes\n");
return NULL;
}
@@ -147,6 +147,7 @@ bitmap_info_t *bitmap_fd_read(int fd, int brief)
fprintf(stderr, Name ": failed to allocate %zd bytes\n",
sizeof(*info));
#endif
+ free(buf);
return NULL;
}
@@ -154,6 +155,7 @@ bitmap_info_t *bitmap_fd_read(int fd, int brief)
fprintf(stderr, Name ": failed to read superblock of bitmap "
"file: %s\n", strerror(errno));
free(info);
+ free(buf);
return NULL;
}
memcpy(&info->sb, buf, sizeof(info->sb));
@@ -198,6 +200,7 @@ bitmap_info_t *bitmap_fd_read(int fd, int brief)
total_bits = read_bits;
}
out:
+ free(buf);
info->total_bits = total_bits;
info->dirty_bits = dirty_bits;
return info;
@@ -331,7 +334,7 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
goto free_info;
printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n",
info->total_bits, info->dirty_bits,
- 100.0 * info->dirty_bits / (info->total_bits + 1));
+ 100.0 * info->dirty_bits / (info->total_bits?:1));
free_info:
free(info);
return rv;
diff --git a/config.c b/config.c
index ad75411c..6a398cfa 100644
--- a/config.c
+++ b/config.c
@@ -707,6 +707,8 @@ void autoline(char *line)
for (i = 0; i < super_cnt; i++)
if (!seen[i])
policy_add(rule_policy, pol_auto, dflt, pol_metadata, superlist[i]->name, NULL);
+
+ free(seen);
}
int loaded = 0;
@@ -1017,11 +1019,12 @@ int conf_name_is_free(char *name)
return 1;
}
-struct mddev_ident *conf_match(struct mdinfo *info, struct supertype *st)
+struct mddev_ident *conf_match(struct supertype *st,
+ struct mdinfo *info,
+ char *devname,
+ int verbose, int *rvp)
{
struct mddev_ident *array_list, *match;
- int verbose = 0;
- char *devname = NULL;
array_list = conf_get_ident(NULL);
match = NULL;
for (; array_list; array_list = array_list->next) {
@@ -1042,7 +1045,7 @@ struct mddev_ident *conf_match(struct mdinfo *info, struct supertype *st)
array_list->devname);
continue;
}
- if (array_list->devices && devname &&
+ if (array_list->devices &&
!match_oneof(array_list->devices, devname)) {
if (verbose >= 2 && array_list->devname)
fprintf(stderr, Name
@@ -1064,7 +1067,8 @@ struct mddev_ident *conf_match(struct mdinfo *info, struct supertype *st)
array_list->super_minor == UnSet) {
if (verbose >= 2 && array_list->devname)
fprintf(stderr, Name
- ": %s doesn't have any identifying information.\n",
+ ": %s doesn't have any identifying"
+ " information.\n",
array_list->devname);
continue;
}
@@ -1074,15 +1078,54 @@ struct mddev_ident *conf_match(struct mdinfo *info, struct supertype *st)
if (verbose >= 0) {
if (match->devname && array_list->devname)
fprintf(stderr, Name
- ": we match both %s and %s - cannot decide which to use.\n",
- match->devname, array_list->devname);
+ ": we match both %s and %s - "
+ "cannot decide which to use.\n",
+ match->devname,
+ array_list->devname);
else
fprintf(stderr, Name
- ": multiple lines in mdadm.conf match\n");
+ ": multiple lines in mdadm.conf"
+ " match\n");
}
- return NULL;
+ if (rvp)
+ *rvp = 2;
+ match = NULL;
+ break;
}
match = array_list;
}
return match;
}
+
+int conf_verify_devnames(struct mddev_ident *array_list)
+{
+ struct mddev_ident *a1, *a2;
+
+ for (a1 = array_list; a1; a1 = a1->next) {
+ if (!a1->devname)
+ continue;
+ for (a2 = a1->next; a2; a2 = a2->next) {
+ if (!a2->devname)
+ continue;
+ if (strcmp(a1->devname, a2->devname) != 0)
+ continue;
+
+ if (a1->uuid_set && a2->uuid_set) {
+ char nbuf[64];
+ __fname_from_uuid(a1->uuid, 0, nbuf, ':');
+ fprintf(stderr,
+ Name ": Devices %s and ",
+ nbuf);
+ __fname_from_uuid(a2->uuid, 0, nbuf, ':');
+ fprintf(stderr,
+ "%s have the same name: %s\n",
+ nbuf, a1->devname);
+ } else
+ fprintf(stderr, Name ": Device %s given twice"
+ " in config file\n", a1->devname);
+ return 1;
+ }
+ }
+
+ return 0;
+}
diff --git a/inventory b/inventory
index aa5cacb0..ff41fc4e 100755
--- a/inventory
+++ b/inventory
@@ -12,6 +12,7 @@ ANNOUNCE-3.1.5
ANNOUNCE-3.2
ANNOUNCE-3.2.1
ANNOUNCE-3.2.2
+ANNOUNCE-3.2.3
Assemble.c
bitmap.c
bitmap.h
@@ -79,6 +80,7 @@ probe_roms.h
pwgr.c
Query.c
raid5extend.c
+raid6check.8
raid6check.c
ReadMe.c
README.initramfs
diff --git a/makedist b/makedist
index 0f10f423..3c99e3a8 100755
--- a/makedist
+++ b/makedist
@@ -67,8 +67,18 @@ then
cp ChangeLog $target/ChangeLog
if [ " $arg" != " test" ]
then
- scp $target/$base master.kernel.org:/pub/linux/utils/raid/mdadm/mdadm-$version.tar.gz
- scp $target/ANNOUNCE $target/ChangeLog master.kernel.org:/pub/linux/utils/raid/mdadm/
+ echo -n "Confirm signing this release? "
+ read a
+ if [ " $a" != " y" ]; then echo OK - bye. ; exit 1; fi
+ if zcat $target/$base | gpg -ba > $target/$base.sign && gpg -ba $target/ANNOUNCE
+ then
+ kup put $target/$base $target/$base.sign \
+ /pub/linux/utils/raid/mdadm/mdadm-$version.tar.gz
+ kup put $target/ANNOUNCE $target/ANNOUNCE.asc /pub/linux/utils/raid/mdadm/ANNOUNCE
+ else
+ echo signing failed
+ exit 1
+ fi
fi
else
if [ ! -f $target/$base ]
diff --git a/managemon.c b/managemon.c
index 6662f675..cde0d8be 100644
--- a/managemon.c
+++ b/managemon.c
@@ -117,11 +117,16 @@ static void close_aa(struct active_array *aa)
close(d->state_fd);
}
- close(aa->action_fd);
- close(aa->info.state_fd);
- close(aa->resync_start_fd);
- close(aa->metadata_fd);
- close(aa->sync_completed_fd);
+ if (aa->action_fd >= 0)
+ close(aa->action_fd);
+ if (aa->info.state_fd >= 0)
+ close(aa->info.state_fd);
+ if (aa->resync_start_fd >= 0)
+ close(aa->resync_start_fd);
+ if (aa->metadata_fd >= 0)
+ close(aa->metadata_fd);
+ if (aa->sync_completed_fd >= 0)
+ close(aa->sync_completed_fd);
}
static void free_aa(struct active_array *aa)
@@ -409,7 +414,13 @@ static int disk_init_and_add(struct mdinfo *disk, struct mdinfo *clone,
*disk = *clone;
disk->recovery_fd = sysfs_open(aa->devnum, disk->sys_name, "recovery_start");
+ if (disk->recovery_fd < 0)
+ return -1;
disk->state_fd = sysfs_open(aa->devnum, disk->sys_name, "state");
+ if (disk->state_fd < 0) {
+ close(disk->recovery_fd);
+ return -1;
+ }
disk->prev_state = read_dev_state(disk->state_fd);
disk->curr_state = disk->prev_state;
disk->next = aa->info.devs;
@@ -461,7 +472,7 @@ static void manage_member(struct mdstat_ent *mdstat,
if (mdstat->level) {
int level = map_name(pers, mdstat->level);
if (level == 0 || level == LEVEL_LINEAR) {
- a->container = NULL;
+ a->to_remove = 1;
wakeup_monitor();
return;
}
@@ -498,7 +509,10 @@ static void manage_member(struct mdstat_ent *mdstat,
newa = duplicate_aa(a);
if (!newa)
goto out;
- /* Cool, we can add a device or several. */
+ /* prevent the kernel from activating the disk(s) before we
+ * finish adding them
+ */
+ sysfs_set_str(&a->info, NULL, "sync_action", "frozen");
/* Add device to array and set offset/size/slot.
* and open files for each newdev */
@@ -736,7 +750,7 @@ void manage(struct mdstat_ent *mdstat, struct supertype *container)
/* Looks like a member of this container */
for (a = container->arrays; a; a = a->next) {
if (mdstat->devnum == a->devnum) {
- if (a->container)
+ if (a->container && a->to_remove == 0)
manage_member(mdstat, a);
break;
}
diff --git a/mapfile.c b/mapfile.c
index ff1e9736..0bfecd05 100644
--- a/mapfile.c
+++ b/mapfile.c
@@ -159,6 +159,18 @@ void map_unlock(struct map_ent **melp)
lf = NULL;
}
+void map_fork(void)
+{
+ /* We are forking, so must close the lock file.
+ * Don't risk flushing anything though.
+ */
+ if (lf) {
+ close(fileno(lf));
+ fclose(lf);
+ lf = NULL;
+ }
+}
+
void map_add(struct map_ent **melp,
int devnum, char *metadata, int uuid[4], char *path)
{
@@ -419,7 +431,9 @@ void RebuildMap(void)
* find a unique name based on metadata name.
*
*/
- struct mddev_ident *match = conf_match(info, st);
+ struct mddev_ident *match = conf_match(st, info,
+ NULL, 0,
+ NULL);
struct stat stb;
if (match && match->devname && match->devname[0] == '/') {
path = match->devname;
diff --git a/md.4 b/md.4
index 5e796393..99faad1a 100644
--- a/md.4
+++ b/md.4
@@ -120,7 +120,7 @@ a MULTIPATH array with no superblock makes sense.
RAID1
In some configurations it might be desired to create a raid1
configuration that does not use a superblock, and to maintain the state of
-the array elsewhere. While not encouraged for general us, it does
+the array elsewhere. While not encouraged for general use, it does
have special-purpose uses and is supported.
.SS ARRAYS WITH EXTERNAL METADATA
@@ -128,7 +128,7 @@ have special-purpose uses and is supported.
From release 2.6.28, the
.I md
driver supports arrays with externally managed metadata. That is,
-the metadata is not managed by the kernel by rather by a user-space
+the metadata is not managed by the kernel but rather by a user-space
program which is external to the kernel. This allows support for a
variety of metadata formats without cluttering the kernel with lots of
details.
@@ -136,7 +136,7 @@ details.
.I md
is able to communicate with the user-space program through various
sysfs attributes so that it can make appropriate changes to the
-metadata \- for example to make a device as faulty. When necessary,
+metadata \- for example to mark a device as faulty. When necessary,
.I md
will wait for the program to acknowledge the event by writing to a
sysfs attribute.
@@ -215,7 +215,7 @@ spindle. In theory, having an N-disk RAID1 will allow N sequential
threads to read from all disks.
Individual devices in a RAID1 can be marked as "write-mostly".
-This drives are excluded from the normal read balancing and will only
+These drives are excluded from the normal read balancing and will only
be read from when there is no other option. This can be useful for
devices connected over a slow link.
@@ -468,7 +468,7 @@ scrub starts and is incremented whenever a sector is
found that is a mismatch.
.I md
normally works in units much larger than a single sector and when it
-finds a mismatch, it does not determin exactly how many actual sectors were
+finds a mismatch, it does not determine exactly how many actual sectors were
affected but simply adds the number of sectors in the IO unit that was
used. So a value of 128 could simply mean that a single 64KB check
found an error (128 x 512bytes = 64KB).
@@ -661,7 +661,7 @@ to this file will cause the system-wide setting to have effect.
This is the partner of
.B md/sync_speed_min
and overrides
-.B /proc/sys/dev/raid/spool_limit_max
+.B /proc/sys/dev/raid/speed_limit_max
described below.
.TP
diff --git a/md_p.h b/md_p.h
index 6c79a3d1..517b2041 100644
--- a/md_p.h
+++ b/md_p.h
@@ -101,7 +101,9 @@ typedef struct mdp_device_descriptor_s {
#define MD_SB_CLEAN 0
#define MD_SB_ERRORS 1
#define MD_SB_BBM_ERRORS 2
-
+#define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */
+#define MD_SB_BLOCK_VOLUME 4 /* block activation of array, other arrays
+ * in container can be activated */
#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */
typedef struct mdp_superblock_s {
diff --git a/mdadm.8.in b/mdadm.8.in
index d2d7ef22..27be110f 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -5,7 +5,7 @@
.\" the Free Software Foundation; either version 2 of the License, or
.\" (at your option) any later version.
.\" See file COPYING in distribution for details.
-.TH MDADM 8 "" v3.2.2
+.TH MDADM 8 "" v3.2.3
.SH NAME
mdadm \- manage MD devices
.I aka
@@ -336,7 +336,8 @@ It can easily be moved between hosts with different endian-ness, and a
recovery operation can be checkpointed and restarted. The different
sub-versions store the superblock at different locations on the
device, either at the end (for 1.0), at the start (for 1.1) or 4K from
-the start (for 1.2). "1" is equivalent to "1.0".
+the start (for 1.2). "1" is equivalent to "1.2" (the commonly
+preferred 1.x format).
'if '{DEFAULT_METADATA}'1.2' "default" is equivalent to "1.2".
.IP ddf
Use the "Industry Standard" DDF (Disk Data Format) format defined by
@@ -418,11 +419,22 @@ issued.
A suffix of 'M' or 'G' can be given to indicate Megabytes or
Gigabytes respectively.
+Sometimes a replacement drive can be a little smaller than the
+original drives though this should be minimised by IDEMA standards.
+Such a replacement drive will be rejected by
+.IR md .
+To guard against this it can be useful to set the initial size
+slightly smaller than the smaller device with the aim that it will
+still be larger than any replacement.
+
This value can be set with
.B \-\-grow
-for RAID level 1/4/5/6. If the array was created with a size smaller
-than the currently active drives, the extra space can be accessed
-using
+for RAID level 1/4/5/6 though
+.B CONTAINER
+based arrays such as those with IMSM metadata may not be able to
+support this.
+If the array was created with a size smaller than the currently
+active drives, the extra space can be accessed using
.BR \-\-grow .
The size can be given as
.B max
@@ -440,9 +452,10 @@ problems the array can be made bigger again with no loss with another
.B "\-\-grow \-\-size="
command.
-This value can not be used with
+This value cannot be used when creating a
.B CONTAINER
-metadata such as DDF and IMSM.
+such as with DDF and IMSM metadata, though it perfectly valid when
+creating an array inside a container.
.TP
.BR \-Z ", " \-\-array\-size=
@@ -706,7 +719,7 @@ facts the operator knows.
When an array is resized to a larger size with
.B "\-\-grow \-\-size="
the new space is normally resynced in that same way that the whole
-array is resynced at creation. From Linux version 2.6.40,
+array is resynced at creation. From Linux version 3.0,
.B \-\-assume\-clean
can be used with that command to avoid the automatic resync.
@@ -721,6 +734,31 @@ The file must be stored on a separate device, not on the RAID array
being reshaped.
.TP
+.BR \-\-continue
+This option is complementary to the
+.B \-\-freeze-reshape
+option for assembly. It is needed when
+.B \-\-grow
+operation is interrupted and it is not restarted automatically due to
+.B \-\-freeze-reshape
+usage during array assembly. This option is used together with
+.BR \-G
+, (
+.BR \-\-grow
+) command and device for a pending reshape to be continued.
+All parameters required for reshape continuation will be read from array metadata.
+If initial
+.BR \-\-grow
+command had required
+.BR \-\-backup\-file=
+option to be set, continuation option will require to have exactly the same
+backup file given as well.
+.IP
+Any other parameter passed together with
+.BR \-\-continue
+option will be ignored.
+
+.TP
.BR \-N ", " \-\-name=
Set a
.B name
@@ -811,6 +849,11 @@ number, and there is no entry in /dev for that number and with a
non-standard name. Names that are not in 'standard' format are only
allowed in "/dev/md/".
+This is meaningful with
+.B \-\-create
+or
+.BR \-\-build .
+
.ig XX
.\".TP
.\".BR \-\-symlink = no
@@ -835,6 +878,28 @@ allowed in "/dev/md/".
.\"
.XX
+.TP
+.BR \-a ", " "\-\-add"
+This option can be used in Grow mode in two cases.
+
+If the target array is a Linear array, then
+.B \-\-add
+can be used to add one or more devices to the array. They
+are simply catenated on to the end of the array. Once added, the
+devices cannot be removed.
+
+If the
+.B \-\-raid\-disks
+option is being used to increase the number of devices in an array,
+then
+.B \-\-add
+can be used to add some extra devices to be included in the array.
+In most cases this is not needed as the extra devices can be added as
+spares first, and then the number of raid-disks can be changed.
+However for RAID0, it is not possible to add spares. So to increase
+the number of devices in a RAID0, it is necessary to set the new
+number of devices, and to add the new devices, in the same command.
+
.SH For assemble:
.TP
@@ -912,28 +977,6 @@ not as reliable as you would like.
See this option under Create and Build options.
.TP
-.BR \-a ", " "\-\-add"
-This option can be used in Grow mode in two cases.
-
-If the target array is a Linear array, then
-.B \-\-add
-can be used to add one or more devices to the array. They
-are simply catenated on to the end of the array. Once added, the
-devices cannot be removed.
-
-If the
-.B \-\-raid\-disks
-option is being used to increase the number of devices in an array,
-then
-.B \-\-add
-can be used to add some extra devices to be included in the array.
-In most cases this is not needed as the extra devices can be added as
-spares first, and then the number of raid-disks can be changed.
-However for RAID0, it is not possible to add spares. So to increase
-the number of devices in a RAID0, it is necessary to set the new
-number of devices, and to add the new devices, in the same command.
-
-.TP
.BR \-b ", " \-\-bitmap=
Specify the bitmap file that was given when the array was created. If
an array has an
@@ -1078,6 +1121,18 @@ option can be used when an array has an internal bitmap which is
corrupt in some way so that assembling the array normally fails. It
will cause any internal bitmap to be ignored.
+.TP
+.BR \-\-freeze\-reshape
+Option is intended to be used in start-up scripts during initrd boot phase.
+When array under reshape is assembled during initrd phase, this option
+stops reshape after reshape critical section is being restored. This happens
+before file system pivot operation and avoids loss of file system context.
+Losing file system context would cause reshape to be broken.
+
+Reshape can be continued later using the
+.B \-\-continue
+option for the grow command.
+
.SH For Manage mode:
.TP
@@ -1491,7 +1546,7 @@ the first device given is the md device.
In the second usage example, all devices listed are treated as md
devices and assembly is attempted.
In the third (where no devices are listed) all md devices that are
-listed in the configuration file are assembled. If not arrays are
+listed in the configuration file are assembled. If no arrays are
described by the configuration file, then any arrays that
can be found on unused devices will be assembled.
@@ -1600,7 +1655,7 @@ and no devices are listed,
will first attempt to assemble all the arrays listed in the config
file.
-In no array at listed in the config (other than those marked
+If no arrays are listed in the config (other than those marked
.BR <ignore> )
it will look through the available devices for possible arrays and
will try to assemble anything that it finds. Arrays which are tagged
@@ -2200,22 +2255,24 @@ change the "size" attribute for RAID1, RAID4, RAID5 and RAID6.
.IP \(bu 4
increase or decrease the "raid\-devices" attribute of RAID0, RAID1, RAID4,
RAID5, and RAID6.
-.IP \bu 4
+.IP \(bu 4
change the chunk-size and layout of RAID0, RAID4, RAID5 and RAID6.
-.IP \bu 4
+.IP \(bu 4
convert between RAID1 and RAID5, between RAID5 and RAID6, between
-RAID0, RAID5, and RAID5, and between RAID0 and RAID10 (in the near-2 mode).
+RAID0, RAID4, and RAID5, and between RAID0 and RAID10 (in the near-2 mode).
.IP \(bu 4
add a write-intent bitmap to any array which supports these bitmaps, or
remove a write-intent bitmap from such an array.
.PP
-Using GROW on containers is currently only support for Intel's IMSM
+Using GROW on containers is currently supported only for Intel's IMSM
container format. The number of devices in a container can be
increased - which affects all arrays in the container - or an array
in a container can be converted between levels where those levels are
supported by the container, and the conversion is on of those listed
-above.
+above. Resizing arrays in an IMSM container with
+.B "--grow --size"
+is not yet supported.
Grow functionality (e.g. expand a number of raid devices) for Intel's
IMSM container format has an experimental status. It is guarded by the
@@ -2250,7 +2307,7 @@ space to start being used. If the size is increased in this way, a
are synchronised.
Note that when an array changes size, any filesystem that may be
-stored in the array will not automatically grow for shrink to use or
+stored in the array will not automatically grow or shrink to use or
vacate the space. The
filesystem will need to be explicitly told to use the extra space
after growing, or to reduce its size
@@ -2259,7 +2316,7 @@ to shrinking the array.
Also the size of an array cannot be changed while it has an active
bitmap. If an array has a bitmap, it must be removed before the size
-can be changed. Once the change it complete a new bitmap can be created.
+can be changed. Once the change is complete a new bitmap can be created.
.SS RAID\-DEVICES CHANGES
@@ -2435,8 +2492,8 @@ must match one of the names or patterns in a
line.
.IP +
-Does the device have a valid md superblock. If a specific metadata
-version is request with
+Does the device have a valid md superblock? If a specific metadata
+version is requested with
.B \-\-metadata
or
.B \-e
@@ -2467,6 +2524,7 @@ is not able to positively identify the array as belonging to the
current host, the device will be rejected.
..
+.PP
.I mdadm
keeps a list of arrays that it has partially assembled in
.B /var/run/mdadm/map
@@ -2639,7 +2697,7 @@ can be started.
Any devices which are components of /dev/md4 will be marked as faulty
and then remove from the array.
-.B " mdadm --grow /dev/md4 --level=6 --backup-file=/root/backup-md4
+.B " mdadm --grow /dev/md4 --level=6 --backup-file=/root/backup-md4"
.br
The array
.B /dev/md4
@@ -2787,7 +2845,7 @@ configuration file at all.
For further information on mdadm usage, MD and the various levels of
RAID, see:
.IP
-.B http://linux\-raid.osdl.org/
+.B http://raid.wiki.kernel.org/
.PP
(based upon Jakob \(/Ostergaard's Software\-RAID.HOWTO)
.\".PP
diff --git a/mdadm.c b/mdadm.c
index fb510512..f07fac2a 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -74,6 +74,7 @@ int main(int argc, char *argv[])
int export = 0;
int assume_clean = 0;
char *symlinks = NULL;
+ int grow_continue = 0;
/* autof indicates whether and how to create device node.
* bottom 3 bits are style. Rest (when shifted) are number of parts
* 0 - unset
@@ -112,6 +113,8 @@ int main(int argc, char *argv[])
int mdfd = -1;
+ int freeze_reshape = 0;
+
srandom(time(0) ^ getpid());
ident.uuid_set=0;
@@ -209,13 +212,17 @@ int main(int argc, char *argv[])
case 'I': newmode = INCREMENTAL;
shortopt = short_bitmap_auto_options; break;
case AutoDetect:
- newmode = AUTODETECT; break;
+ newmode = AUTODETECT;
+ break;
case MiscOpt:
case 'D':
case 'E':
case 'X':
- case 'Q': newmode = MISC; break;
+ case 'Q':
+ newmode = MISC;
+ break;
+
case 'R':
case 'S':
case 'o':
@@ -226,17 +233,15 @@ int main(int argc, char *argv[])
case DetailPlatform:
case KillSubarray:
case UpdateSubarray:
- if (opt == KillSubarray || opt == UpdateSubarray) {
- if (subarray) {
- fprintf(stderr, Name ": subarray can only"
- " be specified once\n");
- exit(2);
- }
- subarray = optarg;
- }
case UdevRules:
- case 'K': if (!mode) newmode = MISC; break;
- case NoSharing: newmode = MONITOR; break;
+ case 'K':
+ if (!mode)
+ newmode = MISC;
+ break;
+
+ case NoSharing:
+ newmode = MONITOR;
+ break;
}
if (mode && newmode == mode) {
/* everybody happy ! */
@@ -609,10 +614,15 @@ int main(int argc, char *argv[])
case O(ASSEMBLE,Force): /* force assembly */
case O(MISC,'f'): /* force zero */
case O(MISC,Force): /* force zero */
+ case O(MANAGE,Force): /* add device which is too large */
force=1;
continue;
-
/* now for the Assemble options */
+ case O(ASSEMBLE, FreezeReshape): /* Freeze reshape during
+ * initrd phase */
+ case O(INCREMENTAL, FreezeReshape):
+ freeze_reshape = 1;
+ continue;
case O(CREATE,'u'): /* uuid of array */
case O(ASSEMBLE,'u'): /* uuid of array */
if (ident.uuid_set) {
@@ -840,6 +850,7 @@ int main(int argc, char *argv[])
continue;
case O(MONITOR,'1'): /* oneshot */
oneshot = 1;
+ spare_sharing = 0;
continue;
case O(MONITOR,'t'): /* test */
test = 1;
@@ -915,6 +926,14 @@ int main(int argc, char *argv[])
case O(MISC, DetailPlatform):
case O(MISC, KillSubarray):
case O(MISC, UpdateSubarray):
+ if (opt == KillSubarray || opt == UpdateSubarray) {
+ if (subarray) {
+ fprintf(stderr, Name ": subarray can only"
+ " be specified once\n");
+ exit(2);
+ }
+ subarray = optarg;
+ }
if (devmode && devmode != opt &&
(devmode == 'E' || (opt == 'E' && devmode != 'Q'))) {
fprintf(stderr, Name ": --examine/-E cannot be given with ");
@@ -988,6 +1007,11 @@ int main(int argc, char *argv[])
backup_file = optarg;
continue;
+ case O(GROW, Continue):
+ /* Continue interrupted grow
+ */
+ grow_continue = 1;
+ continue;
case O(ASSEMBLE, InvalidBackup):
/* Acknowledge that the backupfile is invalid, but ask
* to continue anyway
@@ -1185,7 +1209,8 @@ int main(int argc, char *argv[])
require_homehost = 0;
}
- if ((mode != MISC || devmode != 'E') &&
+ if (!((mode == MISC && devmode == 'E')
+ || (mode == MONITOR && spare_sharing == 0)) &&
geteuid() != 0) {
fprintf(stderr, Name ": must be super-user to perform this action\n");
exit(1);
@@ -1202,7 +1227,7 @@ int main(int argc, char *argv[])
if (!rv && devs_found>1)
rv = Manage_subdevs(devlist->devname, mdfd,
devlist->next, verbose-quiet, test,
- update);
+ update, force);
if (!rv && readonly < 0)
rv = Manage_ro(devlist->devname, mdfd, readonly);
if (!rv && runstop)
@@ -1226,14 +1251,16 @@ int main(int argc, char *argv[])
NULL, backup_file, invalid_backup,
readonly, runstop, update,
homehost, require_homehost,
- verbose-quiet, force);
+ verbose-quiet, force,
+ freeze_reshape);
}
} else if (!scan)
rv = Assemble(ss, devlist->devname, &ident,
devlist->next, backup_file, invalid_backup,
readonly, runstop, update,
homehost, require_homehost,
- verbose-quiet, force);
+ verbose-quiet, force,
+ freeze_reshape);
else if (devs_found>0) {
if (update && devs_found > 1) {
fprintf(stderr, Name ": can only update a single array at a time\n");
@@ -1257,13 +1284,22 @@ int main(int argc, char *argv[])
NULL, backup_file, invalid_backup,
readonly, runstop, update,
homehost, require_homehost,
- verbose-quiet, force);
+ verbose-quiet, force,
+ freeze_reshape);
}
} else {
struct mddev_ident *a, *array_list = conf_get_ident(NULL);
struct mddev_dev *devlist = conf_get_devs();
+ struct map_ent *map = NULL;
int cnt = 0;
int failures, successes;
+
+ if (conf_verify_devnames(array_list)) {
+ fprintf(stderr, Name
+ ": Duplicate MD device names in "
+ "conf file were found.\n");
+ exit(1);
+ }
if (devlist == NULL) {
fprintf(stderr, Name ": No devices listed in conf file were found.\n");
exit(1);
@@ -1281,6 +1317,10 @@ int main(int argc, char *argv[])
if (a->autof == 0)
a->autof = autof;
}
+ if (map_lock(&map))
+ fprintf(stderr, Name " %s: failed to get "
+ "exclusive lock on mapfile\n",
+ __func__);
do {
failures = 0;
successes = 0;
@@ -1298,7 +1338,8 @@ int main(int argc, char *argv[])
NULL, NULL, 0,
readonly, runstop, NULL,
homehost, require_homehost,
- verbose-quiet, force);
+ verbose-quiet, force,
+ freeze_reshape);
if (r == 0) {
a->assembled = 1;
successes++;
@@ -1323,9 +1364,13 @@ int main(int argc, char *argv[])
rv2 = Assemble(ss, NULL,
&ident,
devlist, NULL, 0,
- readonly, runstop, NULL,
- homehost, require_homehost,
- verbose-quiet, force);
+ readonly,
+ runstop, NULL,
+ homehost,
+ require_homehost,
+ verbose-quiet,
+ force,
+ freeze_reshape);
if (rv2==0) {
cnt++;
acnt++;
@@ -1342,6 +1387,7 @@ int main(int argc, char *argv[])
fprintf(stderr, Name ": No arrays found in config file\n");
rv = 1;
}
+ map_unlock(&map);
}
break;
case BUILD:
@@ -1633,7 +1679,11 @@ int main(int argc, char *argv[])
delay = DEFAULT_BITMAP_DELAY;
rv = Grow_addbitmap(devlist->devname, mdfd, bitmap_file,
bitmap_chunk, delay, write_behind, force);
- } else if (size >= 0 || raiddisks != 0 || layout_str != NULL
+ } else if (grow_continue)
+ rv = Grow_continue_command(devlist->devname,
+ mdfd, backup_file,
+ verbose);
+ else if (size >= 0 || raiddisks != 0 || layout_str != NULL
|| chunk != 0 || level != UnSet) {
rv = Grow_reshape(devlist->devname, mdfd, quiet, backup_file,
size, level, layout_str, chunk, raiddisks,
@@ -1679,7 +1729,8 @@ int main(int argc, char *argv[])
else
rv = Incremental(devlist->devname, verbose-quiet,
runstop, ss, homehost,
- require_homehost, autof);
+ require_homehost, autof,
+ freeze_reshape);
break;
case AUTODETECT:
autodetect();
diff --git a/mdadm.conf.5 b/mdadm.conf.5
index 9f31c734..400b10c3 100644
--- a/mdadm.conf.5
+++ b/mdadm.conf.5
@@ -439,7 +439,7 @@ A device may belong to several domains. The domain of an array is a union
of domains of all devices in that array. A spare can be automatically
moved from one array to another if the set of the destination array's
.I domains
-contains all the
+ppcontains all the
.I domains
of the new disk or if both arrays have the same
.IR spare-group .
@@ -463,6 +463,7 @@ any arbitrary string
.B path=
file glob matching anything from
.B /dev/disk/by-path
+.TP
.B type=
either
.B disk
@@ -471,6 +472,8 @@ or
.TP
.B action=
include, re-add, spare, spare-same-slot, or force-spare
+.B auto=
+yes, no, or homehost.
.P
The
diff --git a/mdadm.h b/mdadm.h
index e075dd2a..3bcd0526 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -194,24 +194,30 @@ struct mdinfo {
unsigned long long custom_array_size; /* size for non-default sized
* arrays (in sectors)
*/
+#define NO_RESHAPE 0
+#define VOLUME_RESHAPE 1
+#define CONTAINER_RESHAPE 2
int reshape_active;
unsigned long long reshape_progress;
+ int recovery_blocked; /* for external metadata it
+ * indicates that there is
+ * reshape in progress in
+ * container,
+ * for native metadata it is
+ * reshape_active field mirror
+ */
union {
unsigned long long resync_start; /* per-array resync position */
unsigned long long recovery_start; /* per-device rebuild position */
#define MaxSector (~0ULL) /* resync/recovery complete position */
};
+ unsigned long bitmap_offset; /* 0 == none, 1 == a file */
unsigned long safe_mode_delay; /* ms delay to mark clean */
int new_level, delta_disks, new_layout, new_chunk;
int errors;
unsigned long cache_size; /* size of raid456 stripe cache*/
int mismatch_cnt;
char text_version[50];
- void *update_private; /* for passing metadata-format
- * specific update data
- * between successive calls to
- * update_super()
- */
int container_member; /* for assembling external-metatdata arrays
* This is to be used internally by metadata
@@ -313,6 +319,8 @@ enum special_options {
RebuildMapOpt,
InvalidBackup,
UdevRules,
+ FreezeReshape,
+ Continue,
};
/* structures read from config file */
@@ -427,6 +435,7 @@ extern void map_add(struct map_ent **melp,
int devnum, char *metadata, int uuid[4], char *path);
extern int map_lock(struct map_ent **melp);
extern void map_unlock(struct map_ent **melp);
+extern void map_fork(void);
/* various details can be requested */
enum sysfs_read_flags {
@@ -440,11 +449,13 @@ enum sysfs_read_flags {
GET_DISKS = (1 << 7),
GET_DEGRADED = (1 << 8),
GET_SAFEMODE = (1 << 9),
- GET_DEVS = (1 << 10), /* gets role, major, minor */
- GET_OFFSET = (1 << 11),
- GET_SIZE = (1 << 12),
- GET_STATE = (1 << 13),
- GET_ERROR = (1 << 14),
+ GET_BITMAP_LOCATION = (1 << 10),
+
+ GET_DEVS = (1 << 20), /* gets role, major, minor */
+ GET_OFFSET = (1 << 21),
+ GET_SIZE = (1 << 22),
+ GET_STATE = (1 << 23),
+ GET_ERROR = (1 << 24),
};
/* If fd >= 0, get the array it is open on,
@@ -646,6 +657,8 @@ extern struct superswitch {
* linear-grow-new - add a new device to a linear array, but don't
* change the size: so superblock still matches
* linear-grow-update - now change the size of the array.
+ * writemostly - set the WriteMostly1 bit in the superblock devflags
+ * readwrite - clear the WriteMostly1 bit in the superblock devflags
*/
int (*update_super)(struct supertype *st, struct mdinfo *info,
char *update,
@@ -700,6 +713,12 @@ extern struct superswitch {
* inter-device dependencies, it should record sufficient details
* so these can be validated.
* Both 'size' and '*freesize' are in sectors. chunk is KiB.
+ * Return value is:
+ * 1: everything is OK
+ * 0: not OK for some reason - if 'verbose', then error was reported.
+ * -1: st->sb was NULL, 'subdev' is a member of a container of this
+ * types, but array is not acceptable for some reason
+ * message was reported even if verbose is 0.
*/
int (*validate_geometry)(struct supertype *st, int level, int layout,
int raiddisks,
@@ -1010,7 +1029,7 @@ extern int Manage_runstop(char *devname, int fd, int runstop, int quiet);
extern int Manage_resize(char *devname, int fd, long long size, int raid_disks);
extern int Manage_subdevs(char *devname, int fd,
struct mddev_dev *devlist, int verbose, int test,
- char *update);
+ char *update, int force);
extern int autodetect(void);
extern int Grow_Add_device(char *devname, int fd, char *newdev);
extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force);
@@ -1022,7 +1041,17 @@ extern int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
extern int Grow_restart(struct supertype *st, struct mdinfo *info,
int *fdlist, int cnt, char *backup_file, int verbose);
extern int Grow_continue(int mdfd, struct supertype *st,
- struct mdinfo *info, char *backup_file);
+ struct mdinfo *info, char *backup_file,
+ int freeze_reshape);
+
+extern int restore_backup(struct supertype *st,
+ struct mdinfo *content,
+ int working_disks,
+ int spares,
+ char *backup_file,
+ int verbose);
+extern int Grow_continue_command(char *devname, int fd,
+ char *backup_file, int verbose);
extern int Assemble(struct supertype *st, char *mddev,
struct mddev_ident *ident,
@@ -1030,7 +1059,7 @@ extern int Assemble(struct supertype *st, char *mddev,
char *backup_file, int invalid_backup,
int readonly, int runstop,
char *update, char *homehost, int require_homehost,
- int verbose, int force);
+ int verbose, int force, int freeze_reshape);
extern int Build(char *mddev, int chunk, int level, int layout,
int raiddisks, struct mddev_dev *devlist, int assume_clean,
@@ -1064,7 +1093,7 @@ extern int WaitClean(char *dev, int sock, int verbose);
extern int Incremental(char *devname, int verbose, int runstop,
struct supertype *st, char *homehost, int require_homehost,
- int autof);
+ int autof, int freeze_reshape);
extern void RebuildMap(void);
extern int IncrementalScan(int verbose);
extern int IncrementalRemove(char *devname, char *path, int verbose);
@@ -1114,8 +1143,12 @@ extern char *conf_get_homehost(int *require_homehostp);
extern char *conf_line(FILE *file);
extern char *conf_word(FILE *file, int allow_key);
extern int conf_name_is_free(char *name);
+extern int conf_verify_devnames(struct mddev_ident *array_list);
extern int devname_matches(char *name, char *match);
-extern struct mddev_ident *conf_match(struct mdinfo *info, struct supertype *st);
+extern struct mddev_ident *conf_match(struct supertype *st,
+ struct mdinfo *info,
+ char *devname,
+ int verbose, int *rvp);
extern int experimental(void);
extern void free_line(char *line);
@@ -1136,6 +1169,7 @@ extern unsigned long long get_component_size(int fd);
extern void remove_partitions(int fd);
extern int test_partition(int fd);
extern int test_partition_from_id(dev_t id);
+extern int get_data_disks(int level, int layout, int raid_disks);
extern unsigned long long calc_array_size(int level, int raid_disks, int layout,
int chunksize, unsigned long long devsize);
extern int flush_metadata_updates(struct supertype *st);
@@ -1143,7 +1177,7 @@ extern void append_metadata_update(struct supertype *st, void *buf, int len);
extern int assemble_container_content(struct supertype *st, int mdfd,
struct mdinfo *content, int runstop,
char *chosen_name, int verbose,
- char *backup_file);
+ char *backup_file, int freeze_reshape);
extern struct mdinfo *container_choose_spares(struct supertype *st,
unsigned long long min_size,
struct domainlist *domlist,
@@ -1345,3 +1379,5 @@ static inline int xasprintf(char **strp, const char *fmt, ...) {
#define PATH_MAX 4096
#endif
+#define PROCESS_DELAYED -2
+#define PROCESS_PENDING -3
diff --git a/mdadm.spec b/mdadm.spec
index c5b8bda7..52c3d6a8 100644
--- a/mdadm.spec
+++ b/mdadm.spec
@@ -1,6 +1,6 @@
Summary: mdadm is used for controlling Linux md devices (aka RAID arrays)
Name: mdadm
-Version: 3.2.2
+Version: 3.2.3
Release: 1
Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tgz
URL: http://neil.brown.name/blog/mdadm
@@ -41,6 +41,8 @@ rm -rf $RPM_BUILD_ROOT
%defattr(-,root,root)
%doc TODO ChangeLog mdadm.conf-example COPYING
%{_sbindir}/mdadm
+%{_sbindir}/mdmon
+/lib/udev/rules.d/64-md-raid.rules
%config(noreplace,missingok)/%{_sysconfdir}/mdadm.conf
%{_mandir}/man*/md*
diff --git a/mdassemble.8 b/mdassemble.8
index 02105246..824d98b5 100644
--- a/mdassemble.8
+++ b/mdassemble.8
@@ -1,5 +1,5 @@
.\" -*- nroff -*-
-.TH MDASSEMBLE 8 "" v3.2.2
+.TH MDASSEMBLE 8 "" v3.2.3
.SH NAME
mdassemble \- assemble MD devices
.I aka
diff --git a/mdassemble.c b/mdassemble.c
index 66e480a8..f5bc7464 100644
--- a/mdassemble.c
+++ b/mdassemble.c
@@ -85,7 +85,7 @@ int main(int argc, char *argv[]) {
rv |= Assemble(array_list->st, array_list->devname,
array_list, NULL, NULL, 0,
readonly, runstop, NULL, NULL, 0,
- verbose, force);
+ verbose, force, 1);
}
return rv;
}
diff --git a/mdmon.8 b/mdmon.8
index 7939a99a..8c1ce5f2 100644
--- a/mdmon.8
+++ b/mdmon.8
@@ -1,5 +1,5 @@
.\" See file COPYING in distribution for details.
-.TH MDMON 8 "" v3.2.2
+.TH MDMON 8 "" v3.2.3
.SH NAME
mdmon \- monitor MD external metadata arrays
@@ -104,7 +104,7 @@ within those disks. MD metadata in comparison defines a 1:1
relationship between a set of block devices and a raid array. For
example to create 2 arrays at different raid levels on a single
set of disks, MD metadata requires the disks be partitioned and then
-each array can created be created with a subset of those partitions. The
+each array can be created with a subset of those partitions. The
supported external formats perform this disk carving internally.
.P
Container devices simply hold references to all member disks and allow
@@ -172,7 +172,7 @@ Note that
is automatically started by
.I mdadm
when needed and so does not need to be considered when working with
-RAID arrays. The only times it is run other that by
+RAID arrays. The only times it is run other than by
.I mdadm
is when the boot scripts need to restart it after mounting the new
root filesystem.
diff --git a/mdmon.c b/mdmon.c
index ee68e3c3..b6ae0e68 100644
--- a/mdmon.c
+++ b/mdmon.c
@@ -189,6 +189,9 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
kill(pid, SIGTERM);
+ if (sock < 0)
+ return;
+
/* Wait for monitor to exit by reading from the socket, after
* clearing the non-blocking flag */
fl = fcntl(sock, F_GETFL, 0);
@@ -468,6 +471,7 @@ static int mdmon(char *devname, int devnum, int must_fork, int takeover)
exit(3);
}
close(victim_sock);
+ victim_sock = -1;
}
if (container->ss->load_container(container, mdfd, devname)) {
fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
@@ -501,7 +505,8 @@ static int mdmon(char *devname, int devnum, int must_fork, int takeover)
if (victim > 0) {
try_kill_monitor(victim, container->devname, victim_sock);
- close(victim_sock);
+ if (victim_sock >= 0)
+ close(victim_sock);
}
setsid();
diff --git a/mdmon.h b/mdmon.h
index 6d1776f9..59e1b537 100644
--- a/mdmon.h
+++ b/mdmon.h
@@ -28,6 +28,7 @@ struct active_array {
struct mdinfo info;
struct supertype *container;
struct active_array *next, *replaces;
+ int to_remove;
int action_fd;
int resync_start_fd;
diff --git a/mdopen.c b/mdopen.c
index 0a174211..eac1c1fc 100644
--- a/mdopen.c
+++ b/mdopen.c
@@ -38,9 +38,9 @@ void make_parts(char *dev, int cnt)
* else that of dev
*/
struct stat stb;
- int major_num = major_num; /* quiet gcc -Os unitialized warning */
- int minor_num = minor_num; /* quiet gcc -Os unitialized warning */
- int odig = odig; /* quiet gcc -Os unitialized warning */
+ int major_num;
+ int minor_num;
+ int odig;
int i;
int nlen = strlen(dev) + 20;
char *name;
@@ -53,23 +53,26 @@ void make_parts(char *dev, int cnt)
if (lstat(dev, &stb)!= 0)
return;
- if (S_ISLNK(stb.st_mode)) {
+ if (S_ISBLK(stb.st_mode)) {
+ major_num = major(stb.st_rdev);
+ minor_num = minor(stb.st_rdev);
+ odig = -1;
+ } else if (S_ISLNK(stb.st_mode)) {
int len = readlink(dev, orig, sizeof(orig));
if (len < 0 || len > 1000)
return;
orig[len] = 0;
odig = isdigit(orig[len-1]);
- } else if (S_ISBLK(stb.st_mode)) {
- major_num = major(stb.st_rdev);
- minor_num = minor(stb.st_rdev);
+ major_num = -1;
+ minor_num = -1;
} else
- return;
+ return;
name = malloc(nlen);
for (i=1; i <= cnt ; i++) {
struct stat stb2;
snprintf(name, nlen, "%s%s%d", dev, dig?"p":"", i);
if (stat(name, &stb2)==0) {
- if (!S_ISBLK(stb2.st_mode))
+ if (!S_ISBLK(stb2.st_mode) || !S_ISBLK(stb.st_mode))
continue;
if (stb2.st_rdev == makedev(major_num, minor_num+i))
continue;
@@ -360,8 +363,12 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
if (lstat(chosen, &stb) == 0) {
char buf[300];
+ ssize_t link_len = readlink(chosen, buf, sizeof(buf)-1);
+ if (link_len >= 0)
+ buf[link_len] = '\0';
+
if ((stb.st_mode & S_IFMT) != S_IFLNK ||
- readlink(chosen, buf, 300) <0 ||
+ link_len < 0 ||
strcmp(buf, devname) != 0) {
fprintf(stderr, Name ": %s exists - ignoring\n",
chosen);
diff --git a/mdstat.c b/mdstat.c
index 3d2edadb..6ead24c4 100644
--- a/mdstat.c
+++ b/mdstat.c
@@ -131,10 +131,15 @@ struct mdstat_ent *mdstat_read(int hold, int start)
FILE *f;
struct mdstat_ent *all, *rv, **end, **insert_here;
char *line;
+ int fd;
if (hold && mdstat_fd != -1) {
lseek(mdstat_fd, 0L, 0);
- f = fdopen(dup(mdstat_fd), "r");
+ fd = dup(mdstat_fd);
+ if (fd >= 0)
+ f = fdopen(fd, "r");
+ else
+ return NULL;
} else
f = fopen("/proc/mdstat", "r");
if (f == NULL)
@@ -257,10 +262,10 @@ struct mdstat_ent *mdstat_read(int hold, int start)
if (strncmp(w, "check", 5)==0)
ent->resync = 3;
- if (l > 8 && strcmp(w+l-8, "=DELAYED"))
- ent->percent = 0;
- if (l > 8 && strcmp(w+l-8, "=PENDING"))
- ent->percent = 0;
+ if (l > 8 && strcmp(w+l-8, "=DELAYED") == 0)
+ ent->percent = PROCESS_DELAYED;
+ if (l > 8 && strcmp(w+l-8, "=PENDING") == 0)
+ ent->percent = PROCESS_PENDING;
} else if (ent->percent == -1 &&
w[0] >= '0' &&
w[0] <= '9' &&
diff --git a/monitor.c b/monitor.c
index 7ac59072..29bde18a 100644
--- a/monitor.c
+++ b/monitor.c
@@ -339,7 +339,8 @@ static int read_and_act(struct active_array *a)
a->container->ss->set_disk(a, mdi->disk.raid_disk,
mdi->curr_state);
check_degraded = 1;
- mdi->next_state |= DS_UNBLOCK;
+ if (mdi->curr_state & DS_BLOCKED)
+ mdi->next_state |= DS_UNBLOCK;
if (a->curr_state == read_auto) {
a->container->ss->set_array_state(a, 0);
a->next_state = active;
@@ -479,7 +480,7 @@ static void reconcile_failed(struct active_array *aa, struct mdinfo *failed)
struct mdinfo *victim;
for (a = aa; a; a = a->next) {
- if (!a->container)
+ if (!a->container || a->to_remove)
continue;
victim = find_device(a, failed->disk.major, failed->disk.minor);
if (!victim)
@@ -539,7 +540,7 @@ static int wait_and_act(struct supertype *container, int nowait)
/* once an array has been deactivated we want to
* ask the manager to discard it.
*/
- if (!a->container) {
+ if (!a->container || a->to_remove) {
if (discard_this) {
ap = &(*ap)->next;
continue;
@@ -642,7 +643,7 @@ static int wait_and_act(struct supertype *container, int nowait)
/* FIXME check if device->state_fd need to be cleared?*/
signal_manager();
}
- if (a->container) {
+ if (a->container && !a->to_remove) {
is_dirty = read_and_act(a);
rv |= 1;
dirty_arrays += is_dirty;
@@ -657,7 +658,7 @@ static int wait_and_act(struct supertype *container, int nowait)
/* propagate failures across container members */
for (a = *aap; a ; a = a->next) {
- if (!a->container)
+ if (!a->container || a->to_remove)
continue;
for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
if (mdi->curr_state & DS_FAULTY)
diff --git a/msg.c b/msg.c
index a10c930d..dc780b3e 100644
--- a/msg.c
+++ b/msg.c
@@ -207,9 +207,14 @@ int fping_monitor(int sfd)
int ping_monitor(char *devname)
{
int sfd = connect_monitor(devname);
- int err = fping_monitor(sfd);
+ int err;
+
+ if (sfd >= 0) {
+ err = fping_monitor(sfd);
+ close(sfd);
+ } else
+ err = -1;
- close(sfd);
return err;
}
@@ -281,6 +286,40 @@ int block_subarray(struct mdinfo *sra)
return rc;
}
+
+/* check mdmon version if it supports
+ * array blocking mechanism
+ */
+int check_mdmon_version(char *container)
+{
+ char *version = NULL;
+ int devnum = devname2devnum(container);
+
+ if (!mdmon_running(devnum)) {
+ /* if mdmon is not active we assume that any instance that is
+ * later started will match the current mdadm version, if this
+ * assumption is violated we may inadvertantly rebuild an array
+ * that was meant for reshape, or start rebuild on a spare that
+ * was to be moved to another container
+ */
+ /* pass */;
+ } else {
+ int ver;
+
+ version = ping_monitor_version(container);
+ ver = version ? mdadm_version(version) : -1;
+ free(version);
+ if (ver < 3002000) {
+ fprintf(stderr, Name
+ ": mdmon instance for %s cannot be disabled\n",
+ container);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
/**
* block_monitor - prevent mdmon spare assignment
* @container - container to block
@@ -302,34 +341,13 @@ int block_subarray(struct mdinfo *sra)
*/
int block_monitor(char *container, const int freeze)
{
- int devnum = devname2devnum(container);
struct mdstat_ent *ent, *e, *e2;
struct mdinfo *sra = NULL;
- char *version = NULL;
char buf[64];
int rv = 0;
- if (!mdmon_running(devnum)) {
- /* if mdmon is not active we assume that any instance that is
- * later started will match the current mdadm version, if this
- * assumption is violated we may inadvertantly rebuild an array
- * that was meant for reshape, or start rebuild on a spare that
- * was to be moved to another container
- */
- /* pass */;
- } else {
- int ver;
-
- version = ping_monitor_version(container);
- ver = version ? mdadm_version(version) : -1;
- free(version);
- if (ver < 3002000) {
- fprintf(stderr, Name
- ": mdmon instance for %s cannot be disabled\n",
- container);
- return -1;
- }
- }
+ if (check_mdmon_version(container))
+ return -1;
ent = mdstat_read(0, 0);
if (!ent) {
@@ -430,6 +448,8 @@ void unblock_monitor(char *container, const int unfreeze)
continue;
sysfs_free(sra);
sra = sysfs_read(-1, e->devnum, GET_VERSION|GET_LEVEL);
+ if (!sra)
+ continue;
if (sra->array.level > 0)
to_ping++;
if (unblock_subarray(sra, unfreeze))
diff --git a/platform-intel.h b/platform-intel.h
index e24ae379..c997f1b8 100644
--- a/platform-intel.h
+++ b/platform-intel.h
@@ -124,10 +124,11 @@ static inline int imsm_orom_has_raid5(const struct imsm_orom *orom)
static inline int imsm_orom_has_chunk(const struct imsm_orom *orom, int chunk)
{
int fs = ffs(chunk);
-
if (!fs)
return 0;
fs--; /* bit num to bit index */
+ if (chunk & (chunk-1))
+ return 0; /* not a power of 2 */
return !!(orom->sss & (1 << (fs - 1)));
}
@@ -167,21 +168,6 @@ static inline int fls(int x)
return r;
}
-/**
- * imsm_orom_default_chunk - return the largest chunk size supported via orom
- * @orom: orom pointer from find_imsm_orom
- */
-static inline int imsm_orom_default_chunk(const struct imsm_orom *orom)
-{
- int fs = fls(orom->sss);
-
- if (!fs)
- return 0;
-
- return min(512, (1 << fs));
-}
-
-
enum sys_dev_type {
SYS_DEV_UNKNOWN = 0,
SYS_DEV_SAS,
@@ -189,7 +175,6 @@ enum sys_dev_type {
SYS_DEV_MAX
};
-
struct sys_dev {
enum sys_dev_type type;
char *path;
diff --git a/policy.c b/policy.c
index ebb14814..cd260c69 100644
--- a/policy.c
+++ b/policy.c
@@ -195,7 +195,9 @@ static char *disk_path(struct mdinfo *disk)
int prefix_len;
DIR *by_path;
char symlink[PATH_MAX] = "/dev/disk/by-path/";
+ char nm[PATH_MAX];
struct dirent *ent;
+ int rv;
by_path = opendir(symlink);
if (!by_path)
@@ -218,7 +220,17 @@ static char *disk_path(struct mdinfo *disk)
return strdup(ent->d_name);
}
closedir(by_path);
- return NULL;
+ /* A NULL path isn't really acceptable - use the devname.. */
+ sprintf(symlink, "/sys/dev/block/%d:%d", disk->disk.major, disk->disk.minor);
+ rv = readlink(symlink, nm, sizeof(nm)-1);
+ if (rv > 0) {
+ char *dname;
+ nm[rv] = 0;
+ dname = strrchr(nm, '/');
+ if (dname)
+ return strdup(dname + 1);
+ }
+ return strdup("unknown");
}
char type_part[] = "part";
@@ -245,13 +257,13 @@ static int pol_match(struct rule *rule, char *path, char *type)
if (rule->name == rule_path) {
if (pathok == 0)
pathok = -1;
- if (fnmatch(rule->value, path, 0) == 0)
+ if (path && fnmatch(rule->value, path, 0) == 0)
pathok = 1;
}
if (rule->name == rule_type) {
if (typeok == 0)
typeok = -1;
- if (strcmp(rule->value, type) == 0)
+ if (type && strcmp(rule->value, type) == 0)
typeok = 1;
}
rule = rule->next;
@@ -270,7 +282,8 @@ static void pol_merge(struct dev_policy **pol, struct rule *rule)
for (r = rule; r ; r = r->next)
if (r->name == pol_act ||
- r->name == pol_domain)
+ r->name == pol_domain ||
+ r->name == pol_auto)
pol_new(pol, r->name, r->value, metadata);
}
@@ -280,7 +293,10 @@ static int path_has_part(char *path, char **part)
* if it does, place a pointer to "-pathNN"
* in 'part'.
*/
- int l = strlen(path);
+ int l;
+ if (!path)
+ return 0;
+ l = strlen(path);
while (l > 1 && isdigit(path[l-1]))
l--;
if (l < 5 || strncmp(path+l-5, "-part", 5) != 0)
@@ -343,9 +359,6 @@ struct dev_policy *path_policy(char *path, char *type)
struct dev_policy *pol = NULL;
int i;
- if (!type)
- return NULL;
-
rules = config_rules;
while (rules) {
@@ -366,7 +379,7 @@ struct dev_policy *path_policy(char *path, char *type)
/* Now add any metadata-specific internal knowledge
* about this path
*/
- for (i=0; superlist[i]; i++)
+ for (i=0; path && superlist[i]; i++)
if (superlist[i]->get_disk_controller_domain) {
const char *d =
superlist[i]->get_disk_controller_domain(path);
@@ -399,12 +412,8 @@ struct dev_policy *disk_policy(struct mdinfo *disk)
char *type = disk_type(disk);
struct dev_policy *pol = NULL;
- if (!type)
- return NULL;
if (config_rules_has_path)
path = disk_path(disk);
- if (!path)
- return NULL;
pol = path_policy(path, type);
@@ -501,6 +510,7 @@ void policy_add(char *type, ...)
}
pr->next = config_rules;
config_rules = pr;
+ va_end(ap);
}
void policy_free(void)
@@ -678,6 +688,8 @@ struct domainlist *domain_from_array(struct mdinfo *mdi, const char *metadata)
{
struct domainlist *domlist = NULL;
+ if (!mdi)
+ return NULL;
for (mdi = mdi->devs ; mdi ; mdi = mdi->next)
domainlist_add_dev(&domlist, makedev(mdi->disk.major,
mdi->disk.minor),
@@ -755,8 +767,10 @@ int policy_check_path(struct mdinfo *disk, struct map_ent *array)
snprintf(path, PATH_MAX, FAILED_SLOTS_DIR "/%s", id_path);
f = fopen(path, "r");
- if (!f)
+ if (!f) {
+ free(id_path);
return 0;
+ }
rv = fscanf(f, " %s %x:%x:%x:%x\n",
array->metadata,
@@ -765,6 +779,7 @@ int policy_check_path(struct mdinfo *disk, struct map_ent *array)
array->uuid+2,
array->uuid+3);
fclose(f);
+ free(id_path);
return rv == 5;
}
@@ -868,7 +883,8 @@ int Write_rules(char *rule_name)
char udev_rule_file[PATH_MAX];
if (rule_name) {
- strcpy(udev_rule_file, rule_name);
+ strncpy(udev_rule_file, rule_name, sizeof(udev_rule_file) - 6);
+ udev_rule_file[sizeof(udev_rule_file) - 6] = '\0';
strcat(udev_rule_file, ".temp");
fd = creat(udev_rule_file,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
diff --git a/raid6check.8 b/raid6check.8
new file mode 100644
index 00000000..50033430
--- /dev/null
+++ b/raid6check.8
@@ -0,0 +1,96 @@
+.\" -*- nroff -*-
+.\" Copyright Piergiorgio Sartor and others.
+.\" This program is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; either version 2 of the License, or
+.\" (at your option) any later version.
+.\" See file COPYING in distribution for details.
+.TH RAID6CHECK 8 "" v1.0.0
+.SH NAME
+raid6check \- check MD RAID6 device for errors
+.I aka
+Linux Software RAID
+
+.SH SYNOPSIS
+
+.BI raid6check " <raid6 device> <start stripe> <number of stripes>"
+
+.SH DESCRIPTION
+RAID6 devices in which one single component drive has errors can use
+the double parity in order to find out which component drive.
+The "raid6check" tool checks, for each stripe, the double parity
+consistency, reports mismatches and, if possible, which
+component drive has the mismatch.
+Since it works at stripe level, it can report different drives with
+mismatches at different stripes.
+
+"raid6check" requires a non-degraded RAID6 MD device as first
+parameter, a starting stripe (usually 0) and the number of stripes
+to be checked.
+If this third parameter is also 0, it will check the array up to
+the end.
+
+"raid6check" will start printing information about the RAID6, then
+for each stripe, it will report the parity rotation status.
+In case of parity mismatches, "raid6check" reports, if possible,
+which component drive could be responsible. Otherwise it reports
+that it is not possible to find the component drive.
+
+If the given MD device is not a RAID6, "raid6check" will, of
+course, not continue.
+
+If the RAID6 MD device is degraded, "raid6check" will report
+an error and it will not proceed further.
+
+No write operations are performed on the array or the components.
+Furthermore, the checked array can be online and in use during
+the operation of "raid6check".
+
+.SH EXAMPLES
+
+.B " raid6check /dev/md0 0 0"
+.br
+This will check /dev/md0 from start to end.
+
+.B " raid6check /dev/md3 0 1"
+.br
+This will check the first stripe of /dev/md3.
+
+.B " raid6check /dev/md1 1000 0"
+.br
+This will check /dev/md1 from stripe 1000 up to the end.
+
+.B " raid6check /dev/m127 128 256"
+.br
+This will check 256 stripes of /dev/md127 starting from stripe 128.
+
+.B " raid6check /dev/md0 0 0 | grep -i error > md0_err.log"
+.br
+This will check /dev/md0 completely and create a log file only
+with errors, if any.
+
+.SH FILES
+
+"raid6check" uses directly the component drives as found in /dev.
+Furthermore, the sysfs interface is needed in order to find out
+the RAID6 parameters.
+
+.SH BUGS
+Negative parameters can lead to unexpected results.
+
+It is not clear what will happen if the RAID6 MD device gets
+degraded during the check.
+
+.PP
+The latest version of
+.I raid6check
+should always be available from
+.IP
+.B http://www.kernel.org/pub/linux/utils/raid/mdadm/
+.PP
+Related man pages:
+.PP
+.IR mdadm (8)
+.IR mdmon (8),
+.IR mdadm.conf (5),
+.IR md (4).
diff --git a/restripe.c b/restripe.c
index 9c83e2eb..00e7a822 100644
--- a/restripe.c
+++ b/restripe.c
@@ -687,6 +687,7 @@ int restore_stripes(int *dest, unsigned long long *offsets,
char **stripes = malloc(raid_disks * sizeof(char*));
char **blocks = malloc(raid_disks * sizeof(char*));
int i;
+ int rv;
int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2);
@@ -704,11 +705,8 @@ int restore_stripes(int *dest, unsigned long long *offsets,
if (stripe_buf == NULL || stripes == NULL || blocks == NULL
|| zero == NULL) {
- free(stripe_buf);
- free(stripes);
- free(blocks);
- free(zero);
- return -2;
+ rv = -2;
+ goto abort;
}
for (i = 0; i < raid_disks; i++)
stripes[i] = stripe_buf + i * chunk_size;
@@ -717,20 +715,26 @@ int restore_stripes(int *dest, unsigned long long *offsets,
unsigned long long offset;
int disk, qdisk;
int syndrome_disks;
- if (length < len)
- return -3;
+ if (length < len) {
+ rv = -3;
+ goto abort;
+ }
for (i = 0; i < data_disks; i++) {
int disk = geo_map(i, start/chunk_size/data_disks,
raid_disks, level, layout);
if (src_buf == NULL) {
/* read from file */
- if (lseek64(source,
- read_offset, 0) != (off64_t)read_offset)
- return -1;
+ if (lseek64(source, read_offset, 0) !=
+ (off64_t)read_offset) {
+ rv = -1;
+ goto abort;
+ }
if (read(source,
stripes[disk],
- chunk_size) != chunk_size)
- return -1;
+ chunk_size) != chunk_size) {
+ rv = -1;
+ goto abort;
+ }
} else {
/* read from input buffer */
memcpy(stripes[disk],
@@ -782,15 +786,27 @@ int restore_stripes(int *dest, unsigned long long *offsets,
}
for (i=0; i < raid_disks ; i++)
if (dest[i] >= 0) {
- if (lseek64(dest[i], offsets[i]+offset, 0) < 0)
- return -1;
- if (write(dest[i], stripes[i], chunk_size) != chunk_size)
- return -1;
+ if (lseek64(dest[i],
+ offsets[i]+offset, 0) < 0) {
+ rv = -1;
+ goto abort;
+ }
+ if (write(dest[i], stripes[i],
+ chunk_size) != chunk_size) {
+ rv = -1;
+ goto abort;
+ }
}
length -= len;
start += len;
}
- return 0;
+ rv = 0;
+
+abort:
+ free(stripe_buf);
+ free(stripes);
+ free(blocks);
+ return rv;
}
#ifdef MAIN
diff --git a/super-ddf.c b/super-ddf.c
index 7312ba4b..b5b0b422 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -1374,6 +1374,7 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *m
info->recovery_start = MaxSector;
info->reshape_active = 0;
+ info->recovery_blocked = 0;
info->name[0] = 0;
info->array.major_version = -1;
@@ -1449,6 +1450,7 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, cha
info->recovery_start = MaxSector;
info->resync_start = 0;
info->reshape_active = 0;
+ info->recovery_blocked = 0;
if (!(ddf->virt->entries[info->container_member].state
& DDF_state_inconsistent) &&
(ddf->virt->entries[info->container_member].init_state
@@ -2555,7 +2557,7 @@ static int reserve_space(struct supertype *st, int raiddisks,
continue;
/* This is bigger than 'size', see if there are enough */
cnt = 0;
- for (dl2 = dl; dl2 ; dl2=dl2->next)
+ for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
if (dl2->esize >= dl->esize)
cnt++;
if (cnt >= raiddisks)
diff --git a/super-gpt.c b/super-gpt.c
index 6f852aa1..75269bf1 100644
--- a/super-gpt.c
+++ b/super-gpt.c
@@ -76,7 +76,7 @@ static int load_gpt(struct supertype *st, int fd, char *devname)
free_gpt(st);
- if (posix_memalign((void**)&super, 512, 32*512) != 0) {
+ if (posix_memalign((void**)&super, 4096, 32*512) != 0) {
fprintf(stderr, Name ": %s could not allocate superblock\n",
__func__);
return 1;
@@ -179,8 +179,10 @@ static struct supertype *match_metadata_desc(char *arg)
if (!st)
return st;
- if (strcmp(arg, "gpt") != 0)
+ if (strcmp(arg, "gpt") != 0) {
+ free(st);
return NULL;
+ }
st->ss = &gpt;
st->info = NULL;
diff --git a/super-intel.c b/super-intel.c
index 2ef2b3c6..0e9269f5 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -74,17 +74,21 @@
/* Define all supported attributes that have to be accepted by mdadm
*/
-#define MPB_ATTRIB_SUPPORTED MPB_ATTRIB_CHECKSUM_VERIFY | \
+#define MPB_ATTRIB_SUPPORTED (MPB_ATTRIB_CHECKSUM_VERIFY | \
MPB_ATTRIB_2TB | \
MPB_ATTRIB_2TB_DISK | \
MPB_ATTRIB_RAID0 | \
MPB_ATTRIB_RAID1 | \
MPB_ATTRIB_RAID10 | \
MPB_ATTRIB_RAID5 | \
- MPB_ATTRIB_EXP_STRIPE_SIZE
+ MPB_ATTRIB_EXP_STRIPE_SIZE)
+
+/* Define attributes that are unused but not harmful */
+#define MPB_ATTRIB_IGNORED (MPB_ATTRIB_NEVER_USE)
#define MPB_SECTOR_CNT 2210
#define IMSM_RESERVED_SECTORS 4096
+#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056
#define SECT_PER_MB_SHIFT 11
/* Disk configuration info. */
@@ -102,6 +106,12 @@ struct imsm_disk {
__u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
};
+/* map selector for map managment
+ */
+#define MAP_0 0
+#define MAP_1 1
+#define MAP_X -1
+
/* RAID map configuration infos. */
struct imsm_map {
__u32 pba_of_lba0; /* start address of partition */
@@ -230,6 +240,12 @@ static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed"
#define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
+#define MIGR_REC_BUF_SIZE 512 /* size of migr_record i/o buffer */
+#define MIGR_REC_POSITION 512 /* migr_record position offset on disk,
+ * MIGR_REC_BUF_SIZE <= MIGR_REC_POSITION
+ */
+
+
#define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
* be recovered using srcMap */
#define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
@@ -341,7 +357,7 @@ struct intel_super {
struct extent *e; /* for determining freespace @ create */
int raiddisk; /* slot to fill in autolayout */
enum action action;
- } *disks;
+ } *disks, *current_disk;
struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
active */
struct dl *missing; /* disks removed while we weren't looking */
@@ -658,21 +674,30 @@ struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
{
/* A device can have 2 maps if it is in the middle of a migration.
* If second_map is:
- * 0 - we return the first map
- * 1 - we return the second map if it exists, else NULL
- * -1 - we return the second map if it exists, else the first
+ * MAP_0 - we return the first map
+ * MAP_1 - we return the second map if it exists, else NULL
+ * MAP_X - we return the second map if it exists, else the first
*/
struct imsm_map *map = &dev->vol.map[0];
+ struct imsm_map *map2 = NULL;
- if (second_map == 1 && !dev->vol.migr_state)
- return NULL;
- else if (second_map == 1 ||
- (second_map < 0 && dev->vol.migr_state)) {
- void *ptr = map;
+ if (dev->vol.migr_state)
+ map2 = (void *)map + sizeof_imsm_map(map);
- return ptr + sizeof_imsm_map(map);
- } else
- return map;
+ switch (second_map) {
+ case MAP_0:
+ break;
+ case MAP_1:
+ map = map2;
+ break;
+ case MAP_X:
+ if (map2)
+ map = map2;
+ break;
+ default:
+ map = NULL;
+ }
+ return map;
}
@@ -682,13 +707,13 @@ struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
{
size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
- sizeof_imsm_map(get_imsm_map(dev, 0));
+ sizeof_imsm_map(get_imsm_map(dev, MAP_0));
/* migrating means an additional map */
if (dev->vol.migr_state)
- size += sizeof_imsm_map(get_imsm_map(dev, 1));
+ size += sizeof_imsm_map(get_imsm_map(dev, MAP_1));
else if (migr_state)
- size += sizeof_imsm_map(get_imsm_map(dev, 0));
+ size += sizeof_imsm_map(get_imsm_map(dev, MAP_0));
return size;
}
@@ -742,9 +767,9 @@ static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
/*
* for second_map:
- * == 0 get first map
- * == 1 get second map
- * == -1 than get map according to the current migr_state
+ * == MAP_0 get first map
+ * == MAP_1 get second map
+ * == MAP_X than get map according to the current migr_state
*/
static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
int slot,
@@ -815,7 +840,7 @@ static int count_memberships(struct dl *dl, struct intel_super *super)
for (i = 0; i < super->anchor->num_raid_devs; i++) {
struct imsm_dev *dev = get_imsm_dev(super, i);
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
if (get_imsm_disk_slot(map, dl->index) >= 0)
memberships++;
@@ -824,13 +849,24 @@ static int count_memberships(struct dl *dl, struct intel_super *super)
return memberships;
}
+static __u32 imsm_min_reserved_sectors(struct intel_super *super);
+
static struct extent *get_extents(struct intel_super *super, struct dl *dl)
{
/* find a list of used extents on the given physical device */
struct extent *rv, *e;
int i;
int memberships = count_memberships(dl, super);
- __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
+ __u32 reservation;
+
+ /* trim the reserved area for spares, so they can join any array
+ * regardless of whether the OROM has assigned sectors from the
+ * IMSM_RESERVED_SECTORS region
+ */
+ if (dl->index == -1)
+ reservation = imsm_min_reserved_sectors(super);
+ else
+ reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
rv = malloc(sizeof(struct extent) * (memberships + 1));
if (!rv)
@@ -839,7 +875,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl)
for (i = 0; i < super->anchor->num_raid_devs; i++) {
struct imsm_dev *dev = get_imsm_dev(super, i);
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
if (get_imsm_disk_slot(map, dl->index) >= 0) {
e->start = __le32_to_cpu(map->pba_of_lba0);
@@ -921,6 +957,51 @@ static int is_failed(struct imsm_disk *disk)
return (disk->status & FAILED_DISK) == FAILED_DISK;
}
+/* try to determine how much space is reserved for metadata from
+ * the last get_extents() entry on the smallest active disk,
+ * otherwise fallback to the default
+ */
+static __u32 imsm_min_reserved_sectors(struct intel_super *super)
+{
+ struct extent *e;
+ int i;
+ __u32 min_active, remainder;
+ __u32 rv = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
+ struct dl *dl, *dl_min = NULL;
+
+ if (!super)
+ return rv;
+
+ min_active = 0;
+ for (dl = super->disks; dl; dl = dl->next) {
+ if (dl->index < 0)
+ continue;
+ if (dl->disk.total_blocks < min_active || min_active == 0) {
+ dl_min = dl;
+ min_active = dl->disk.total_blocks;
+ }
+ }
+ if (!dl_min)
+ return rv;
+
+ /* find last lba used by subarrays on the smallest active disk */
+ e = get_extents(super, dl_min);
+ if (!e)
+ return rv;
+ for (i = 0; e[i].size; i++)
+ continue;
+
+ remainder = min_active - e[i].start;
+ free(e);
+
+ /* to give priority to recovery we should not require full
+ IMSM_RESERVED_SECTORS from the spare */
+ rv = MPB_SECTOR_CNT + NUM_BLOCKS_DIRTY_STRIPE_REGION;
+
+ /* if real reservation is smaller use that value */
+ return (remainder < rv) ? remainder : rv;
+}
+
/* Return minimum size of a spare that can be used in this array*/
static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
{
@@ -947,11 +1028,15 @@ static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
if (i > 0)
rv = e[i-1].start + e[i-1].size;
free(e);
+
/* add the amount of space needed for metadata */
- rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
+ rv = rv + imsm_min_reserved_sectors(super);
+
return rv * 512;
}
+static int is_gen_migration(struct imsm_dev *dev);
+
#ifndef MDASSEMBLE
static __u64 blocks_per_migr_unit(struct intel_super *super,
struct imsm_dev *dev);
@@ -963,8 +1048,8 @@ static void print_imsm_dev(struct intel_super *super,
{
__u64 sz;
int slot, i;
- struct imsm_map *map = get_imsm_map(dev, 0);
- struct imsm_map *map2 = get_imsm_map(dev, 1);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
__u32 ord;
printf("\n");
@@ -980,14 +1065,14 @@ static void print_imsm_dev(struct intel_super *super,
printf("\n");
printf(" Slots : [");
for (i = 0; i < map->num_members; i++) {
- ord = get_imsm_ord_tbl_ent(dev, i, 0);
+ ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
}
printf("]");
if (map2) {
printf(" <-- [");
for (i = 0; i < map2->num_members; i++) {
- ord = get_imsm_ord_tbl_ent(dev, i, 1);
+ ord = get_imsm_ord_tbl_ent(dev, i, MAP_1);
printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
}
printf("]");
@@ -1001,7 +1086,7 @@ static void print_imsm_dev(struct intel_super *super,
printf("\n");
slot = get_imsm_disk_slot(map, disk_idx);
if (slot >= 0) {
- ord = get_imsm_ord_tbl_ent(dev, slot, -1);
+ ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
printf(" This Slot : %d%s\n", slot,
ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
} else
@@ -1045,29 +1130,35 @@ static void print_imsm_dev(struct intel_super *super,
printf("idle\n");
printf(" Map State : %s", map_state_str[map->map_state]);
if (dev->vol.migr_state) {
- struct imsm_map *map = get_imsm_map(dev, 1);
+ struct imsm_map *map = get_imsm_map(dev, MAP_1);
printf(" <-- %s", map_state_str[map->map_state]);
- printf("\n Checkpoint : %u (%llu)",
- __le32_to_cpu(dev->vol.curr_migr_unit),
- (unsigned long long)blocks_per_migr_unit(super, dev));
+ printf("\n Checkpoint : %u ",
+ __le32_to_cpu(dev->vol.curr_migr_unit));
+ if ((is_gen_migration(dev)) && ((slot > 1) || (slot < 0)))
+ printf("(N/A)");
+ else
+ printf("(%llu)", (unsigned long long)
+ blocks_per_migr_unit(super, dev));
}
printf("\n");
printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
}
-static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
+static void print_imsm_disk(struct imsm_disk *disk, int index, __u32 reserved)
{
- struct imsm_disk *disk = __get_imsm_disk(mpb, index);
char str[MAX_RAID_SERIAL_LEN + 1];
__u64 sz;
- if (index < 0 || !disk)
+ if (index < -1 || !disk)
return;
printf("\n");
snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
- printf(" Disk%02d Serial : %s\n", index, str);
+ if (index >= 0)
+ printf(" Disk%02d Serial : %s\n", index, str);
+ else
+ printf(" Disk Serial : %s\n", str);
printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
is_configured(disk) ? " active" : "",
is_failed(disk) ? " failed" : "");
@@ -1077,8 +1168,6 @@ static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
human_size(sz * 512));
}
-static int is_gen_migration(struct imsm_dev *dev);
-
void examine_migr_rec_imsm(struct intel_super *super)
{
struct migr_record *migr_rec = super->migr_rec;
@@ -1087,11 +1176,19 @@ void examine_migr_rec_imsm(struct intel_super *super)
for (i = 0; i < mpb->num_raid_devs; i++) {
struct imsm_dev *dev = __get_imsm_dev(mpb, i);
+ struct imsm_map *map;
+ int slot = -1;
+
if (is_gen_migration(dev) == 0)
continue;
printf("\nMigration Record Information:");
- if (super->disks->index > 1) {
+
+ /* first map under migration */
+ map = get_imsm_map(dev, MAP_0);
+ if (map)
+ slot = get_imsm_disk_slot(map, super->disks->index);
+ if ((map == NULL) || (slot > 1) || (slot < 0)) {
printf(" Empty\n ");
printf("Examine one of first two disks in array\n");
break;
@@ -1141,11 +1238,14 @@ void examine_migr_rec_imsm(struct intel_super *super)
static int imsm_check_attributes(__u32 attributes)
{
int ret_val = 1;
- __u32 not_supported = (MPB_ATTRIB_SUPPORTED)^0xffffffff;
+ __u32 not_supported = MPB_ATTRIB_SUPPORTED^0xffffffff;
+
+ not_supported &= ~MPB_ATTRIB_IGNORED;
not_supported &= attributes;
if (not_supported) {
- fprintf(stderr, Name "(IMSM): Unsupported attributes : %x\n", not_supported);
+ fprintf(stderr, Name "(IMSM): Unsupported attributes : %x\n",
+ (unsigned)__le32_to_cpu(not_supported));
if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY \n");
not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
@@ -1248,7 +1348,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
printf(" Disks : %d\n", mpb->num_disks);
printf(" RAID Devices : %d\n", mpb->num_raid_devs);
- print_imsm_disk(mpb, super->disks->index, reserved);
+ print_imsm_disk(__get_imsm_disk(mpb, super->disks->index), super->disks->index, reserved);
if (super->bbm_log) {
struct bbm_log *log = super->bbm_log;
@@ -1273,28 +1373,12 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
for (i = 0; i < mpb->num_disks; i++) {
if (i == super->disks->index)
continue;
- print_imsm_disk(mpb, i, reserved);
+ print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved);
}
- for (dl = super->disks ; dl; dl = dl->next) {
- struct imsm_disk *disk;
- char str[MAX_RAID_SERIAL_LEN + 1];
- __u64 sz;
- if (dl->index >= 0)
- continue;
-
- disk = &dl->disk;
- printf("\n");
- snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
- printf(" Disk Serial : %s\n", str);
- printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
- is_configured(disk) ? " active" : "",
- is_failed(disk) ? " failed" : "");
- printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
- sz = __le32_to_cpu(disk->total_blocks) - reserved;
- printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
- human_size(sz * 512));
- }
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl->index == -1)
+ print_imsm_disk(&dl->disk, -1, reserved);
examine_migr_rec_imsm(super);
}
@@ -1518,11 +1602,11 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
fd2devname(fd, buf);
printf(" Port%d : %s", port, buf);
if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
- printf(" (%s)\n", buf);
+ printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf);
else
- printf("()\n");
+ printf(" ()\n");
+ close(fd);
}
- close(fd);
free(path);
path = NULL;
}
@@ -1789,7 +1873,7 @@ get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
{
/* migr_strip_size when repairing or initializing parity */
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
__u32 chunk = __le32_to_cpu(map->blocks_per_strip);
switch (get_imsm_raid_level(map)) {
@@ -1807,7 +1891,7 @@ static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
* this is different than migr_strip_size_resync(), but it's good
* to be compatible
*/
- struct imsm_map *map = get_imsm_map(dev, 1);
+ struct imsm_map *map = get_imsm_map(dev, MAP_1);
__u32 chunk = __le32_to_cpu(map->blocks_per_strip);
switch (get_imsm_raid_level(map)) {
@@ -1826,8 +1910,8 @@ static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
{
- struct imsm_map *lo = get_imsm_map(dev, 0);
- struct imsm_map *hi = get_imsm_map(dev, 1);
+ struct imsm_map *lo = get_imsm_map(dev, MAP_0);
+ struct imsm_map *hi = get_imsm_map(dev, MAP_1);
__u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
__u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
@@ -1836,11 +1920,11 @@ static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
{
- struct imsm_map *lo = get_imsm_map(dev, 0);
+ struct imsm_map *lo = get_imsm_map(dev, MAP_0);
int level = get_imsm_raid_level(lo);
if (level == 1 || level == 10) {
- struct imsm_map *hi = get_imsm_map(dev, 1);
+ struct imsm_map *hi = get_imsm_map(dev, MAP_1);
return hi->num_domains;
} else
@@ -1869,7 +1953,7 @@ static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
static __u32 parity_segment_depth(struct imsm_dev *dev)
{
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
__u32 chunk = __le32_to_cpu(map->blocks_per_strip);
switch(get_imsm_raid_level(map)) {
@@ -1885,7 +1969,7 @@ static __u32 parity_segment_depth(struct imsm_dev *dev)
static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
{
- struct imsm_map *map = get_imsm_map(dev, 1);
+ struct imsm_map *map = get_imsm_map(dev, MAP_1);
__u32 chunk = __le32_to_cpu(map->blocks_per_strip);
__u32 strip = block / chunk;
@@ -1924,7 +2008,7 @@ static __u64 blocks_per_migr_unit(struct intel_super *super,
case MIGR_VERIFY:
case MIGR_REPAIR:
case MIGR_INIT: {
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
__u32 stripes_per_unit;
__u32 blocks_per_unit;
__u32 parity_depth;
@@ -1940,7 +2024,7 @@ static __u64 blocks_per_migr_unit(struct intel_super *super,
*/
stripes_per_unit = num_stripes_per_unit_resync(dev);
migr_chunk = migr_strip_blocks_resync(dev);
- disks = imsm_num_data_members(dev, 0);
+ disks = imsm_num_data_members(dev, MAP_0);
blocks_per_unit = stripes_per_unit * migr_chunk * disks;
stripe = __le16_to_cpu(map->blocks_per_strip) * disks;
segment = blocks_per_unit / stripe;
@@ -1994,13 +2078,14 @@ static int read_imsm_migr_rec(int fd, struct intel_super *super)
unsigned long long dsize;
get_dev_size(fd, NULL, &dsize);
- if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
+ if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) {
fprintf(stderr,
Name ": Cannot seek to anchor block: %s\n",
strerror(errno));
goto out;
}
- if (read(fd, super->migr_rec_buf, 512) != 512) {
+ if (read(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) !=
+ MIGR_REC_BUF_SIZE) {
fprintf(stderr,
Name ": Cannot read migr record block: %s\n",
strerror(errno));
@@ -2012,6 +2097,19 @@ out:
return ret_val;
}
+static struct imsm_dev *imsm_get_device_during_migration(
+ struct intel_super *super)
+{
+
+ struct intel_dev *dv;
+
+ for (dv = super->devlist; dv; dv = dv->next) {
+ if (is_gen_migration(dv->dev))
+ return dv->dev;
+ }
+ return NULL;
+}
+
/*******************************************************************************
* Function: load_imsm_migr_rec
* Description: Function reads imsm migration record (it is stored at the last
@@ -2022,6 +2120,7 @@ out:
* Returns:
* 0 : success
* -1 : fail
+ * -2 : no migration in progress
******************************************************************************/
static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
{
@@ -2030,13 +2129,31 @@ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
char nm[30];
int retval = -1;
int fd = -1;
+ struct imsm_dev *dev;
+ struct imsm_map *map = NULL;
+ int slot = -1;
+
+ /* find map under migration */
+ dev = imsm_get_device_during_migration(super);
+ /* nothing to load,no migration in progress?
+ */
+ if (dev == NULL)
+ return -2;
+ map = get_imsm_map(dev, MAP_0);
if (info) {
for (sd = info->devs ; sd ; sd = sd->next) {
+ /* skip spare and failed disks
+ */
+ if (sd->disk.raid_disk < 0)
+ continue;
/* read only from one of the first two slots */
- if ((sd->disk.raid_disk > 1) ||
- (sd->disk.raid_disk < 0))
+ if (map)
+ slot = get_imsm_disk_slot(map,
+ sd->disk.raid_disk);
+ if ((map == NULL) || (slot > 1) || (slot < 0))
continue;
+
sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
fd = dev_open(nm, O_RDONLY);
if (fd >= 0)
@@ -2045,8 +2162,14 @@ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
}
if (fd < 0) {
for (dl = super->disks; dl; dl = dl->next) {
+ /* skip spare and failed disks
+ */
+ if (dl->index < 0)
+ continue;
/* read only from one of the first two slots */
- if (dl->index > 1)
+ if (map)
+ slot = get_imsm_disk_slot(map, dl->index);
+ if ((map == NULL) || (slot > 1) || (slot < 0))
continue;
sprintf(nm, "%d:%d", dl->major, dl->minor);
fd = dev_open(nm, O_RDONLY);
@@ -2130,23 +2253,45 @@ static int write_imsm_migr_rec(struct supertype *st)
struct dl *sd;
int len;
struct imsm_update_general_migration_checkpoint *u;
+ struct imsm_dev *dev;
+ struct imsm_map *map = NULL;
+
+ /* find map under migration */
+ dev = imsm_get_device_during_migration(super);
+ /* if no migration, write buffer anyway to clear migr_record
+ * on disk based on first available device
+ */
+ if (dev == NULL)
+ dev = get_imsm_dev(super, super->current_vol < 0 ? 0 :
+ super->current_vol);
+
+ map = get_imsm_map(dev, MAP_0);
for (sd = super->disks ; sd ; sd = sd->next) {
+ int slot = -1;
+
+ /* skip failed and spare devices */
+ if (sd->index < 0)
+ continue;
/* write to 2 first slots only */
- if ((sd->index < 0) || (sd->index > 1))
+ if (map)
+ slot = get_imsm_disk_slot(map, sd->index);
+ if ((map == NULL) || (slot > 1) || (slot < 0))
continue;
+
sprintf(nm, "%d:%d", sd->major, sd->minor);
fd = dev_open(nm, O_RDWR);
if (fd < 0)
continue;
get_dev_size(fd, NULL, &dsize);
- if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
+ if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) {
fprintf(stderr,
Name ": Cannot seek to anchor block: %s\n",
strerror(errno));
goto out;
}
- if (write(fd, super->migr_rec_buf, 512) != 512) {
+ if (write(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) !=
+ MIGR_REC_BUF_SIZE) {
fprintf(stderr,
Name ": Cannot write migr record block: %s\n",
strerror(errno));
@@ -2184,13 +2329,37 @@ static int write_imsm_migr_rec(struct supertype *st)
}
#endif /* MDASSEMBLE */
+/* spare/missing disks activations are not allowe when
+ * array/container performs reshape operation, because
+ * all arrays in container works on the same disks set
+ */
+int imsm_reshape_blocks_arrays_changes(struct intel_super *super)
+{
+ int rv = 0;
+ struct intel_dev *i_dev;
+ struct imsm_dev *dev;
+
+ /* check whole container
+ */
+ for (i_dev = super->devlist; i_dev; i_dev = i_dev->next) {
+ dev = i_dev->dev;
+ if (is_gen_migration(dev)) {
+ /* No repair during any migration in container
+ */
+ rv = 1;
+ break;
+ }
+ }
+ return rv;
+}
+
static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
{
struct intel_super *super = st->sb;
struct migr_record *migr_rec = super->migr_rec;
struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
- struct imsm_map *map = get_imsm_map(dev, 0);
- struct imsm_map *prev_map = get_imsm_map(dev, 1);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *prev_map = get_imsm_map(dev, MAP_1);
struct imsm_map *map_to_analyse = map;
struct dl *dl;
char *devname;
@@ -2201,7 +2370,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
if (prev_map)
map_to_analyse = prev_map;
- dl = super->disks;
+ dl = super->current_disk;
info->container_member = super->current_vol;
info->array.raid_disks = map->num_members;
@@ -2216,7 +2385,9 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
info->custom_array_size = __le32_to_cpu(dev->size_high);
info->custom_array_size <<= 32;
info->custom_array_size |= __le32_to_cpu(dev->size_low);
- if (prev_map && map->map_state == prev_map->map_state) {
+ info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
+
+ if (is_gen_migration(dev)) {
info->reshape_active = 1;
info->new_level = get_imsm_raid_level(map);
info->new_layout = imsm_level_to_layout(info->new_level);
@@ -2226,7 +2397,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
/* this needs to be applied to every array
* in the container.
*/
- info->reshape_active = 2;
+ info->reshape_active = CONTAINER_RESHAPE;
}
/* We shape information that we give to md might have to be
* modify to cope with md's requirement for reshaping arrays.
@@ -2263,11 +2434,13 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
info->new_chunk = info->array.chunk_size;
info->delta_disks = 0;
}
- info->disk.major = 0;
- info->disk.minor = 0;
+
if (dl) {
info->disk.major = dl->major;
info->disk.minor = dl->minor;
+ info->disk.number = dl->index;
+ info->disk.raid_disk = get_imsm_disk_slot(map_to_analyse,
+ dl->index);
}
info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0);
@@ -2293,8 +2466,9 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
info->reshape_progress = 0;
info->resync_start = MaxSector;
- if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
- dev->vol.dirty) {
+ if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
+ dev->vol.dirty) &&
+ imsm_reshape_blocks_arrays_changes(super) == 0) {
info->resync_start = 0;
}
if (dev->vol.migr_state) {
@@ -2326,9 +2500,11 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
dprintf("IMSM: General Migration checkpoint : %llu "
"(%llu) -> read reshape progress : %llu\n",
- units, blocks_per_unit, info->reshape_progress);
+ (unsigned long long)units,
+ (unsigned long long)blocks_per_unit,
+ info->reshape_progress);
- used_disks = imsm_num_data_members(dev, 1);
+ used_disks = imsm_num_data_members(dev, MAP_1);
if (used_disks > 0) {
array_blocks = map->blocks_per_member *
used_disks;
@@ -2373,7 +2549,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
dmap[i] = 0;
if (i < info->array.raid_disks) {
struct imsm_disk *dsk;
- j = get_imsm_disk_idx(dev, i, -1);
+ j = get_imsm_disk_idx(dev, i, MAP_X);
dsk = get_imsm_disk(super, j);
if (dsk && (dsk->status & CONFIGURED_DISK))
dmap[i] = 1;
@@ -2382,8 +2558,30 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
}
}
-static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed);
-static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev);
+static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
+ int failed, int look_in_map);
+
+static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
+ int look_in_map);
+
+
+#ifndef MDASSEMBLE
+static void manage_second_map(struct intel_super *super, struct imsm_dev *dev)
+{
+ if (is_gen_migration(dev)) {
+ int failed;
+ __u8 map_state;
+ struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
+
+ failed = imsm_count_failed(super, dev, MAP_1);
+ map_state = imsm_check_degraded(super, dev, failed, MAP_1);
+ if (map2->map_state != map_state) {
+ map2->map_state = map_state;
+ super->updates_pending++;
+ }
+ }
+}
+#endif
static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
{
@@ -2433,6 +2631,7 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *
info->disk.state = 0;
info->name[0] = 0;
info->recovery_start = MaxSector;
+ info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
/* do we have the all the insync disks that we expect? */
mpb = super->anchor;
@@ -2443,15 +2642,15 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *
struct imsm_map *map;
__u8 state;
- failed = imsm_count_failed(super, dev);
- state = imsm_check_degraded(super, dev, failed);
- map = get_imsm_map(dev, dev->vol.migr_state);
+ failed = imsm_count_failed(super, dev, MAP_0);
+ state = imsm_check_degraded(super, dev, failed, MAP_0);
+ map = get_imsm_map(dev, MAP_0);
/* any newly missing disks?
* (catches single-degraded vs double-degraded)
*/
for (j = 0; j < map->num_members; j++) {
- __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
+ __u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0);
__u32 idx = ord_to_idx(ord);
if (!(ord & IMSM_ORD_REBUILD) &&
@@ -2592,25 +2791,30 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info,
mpb = super->anchor;
- if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
- rv = -1;
- else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) {
- mpb->orig_family_num = *((__u32 *) info->update_private);
- rv = 0;
- } else if (strcmp(update, "uuid") == 0) {
- __u32 *new_family = malloc(sizeof(*new_family));
-
- /* update orig_family_number with the incoming random
- * data, report the new effective uuid, and store the
- * new orig_family_num for future updates.
+ if (strcmp(update, "uuid") == 0) {
+ /* We take this to mean that the family_num should be updated.
+ * However that is much smaller than the uuid so we cannot really
+ * allow an explicit uuid to be given. And it is hard to reliably
+ * know if one was.
+ * So if !uuid_set we know the current uuid is random and just used
+ * the first 'int' and copy it to the other 3 positions.
+ * Otherwise we require the 4 'int's to be the same as would be the
+ * case if we are using a random uuid. So an explicit uuid will be
+ * accepted as long as all for ints are the same... which shouldn't hurt
*/
- if (new_family) {
- memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32));
- uuid_from_super_imsm(st, info->uuid);
- *new_family = mpb->orig_family_num;
- info->update_private = new_family;
+ if (!uuid_set) {
+ info->uuid[1] = info->uuid[2] = info->uuid[3] = info->uuid[0];
rv = 0;
+ } else {
+ if (info->uuid[0] != info->uuid[1] ||
+ info->uuid[1] != info->uuid[2] ||
+ info->uuid[2] != info->uuid[3])
+ rv = -1;
+ else
+ rv = 0;
}
+ if (rv == 0)
+ mpb->orig_family_num = info->uuid[0];
} else if (strcmp(update, "assemble") == 0)
rv = 0;
else
@@ -2778,14 +2982,16 @@ static void fd2devname(int fd, char *name)
sprintf(path, "/sys/dev/block/%d:%d",
major(st.st_rdev), minor(st.st_rdev));
- rv = readlink(path, dname, sizeof(dname));
+ rv = readlink(path, dname, sizeof(dname)-1);
if (rv <= 0)
return;
dname[rv] = '\0';
nm = strrchr(dname, '/');
- nm++;
- snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
+ if (nm) {
+ nm++;
+ snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
+ }
}
extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
@@ -2870,7 +3076,6 @@ static void serialcpy(__u8 *dest, __u8 *src)
strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
}
-#ifndef MDASSEMBLE
static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
{
struct dl *dl;
@@ -2881,7 +3086,6 @@ static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
return dl;
}
-#endif
static struct imsm_disk *
__serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
@@ -2978,12 +3182,12 @@ static void migrate(struct imsm_dev *dev, struct intel_super *super,
__u8 to_state, int migr_type)
{
struct imsm_map *dest;
- struct imsm_map *src = get_imsm_map(dev, 0);
+ struct imsm_map *src = get_imsm_map(dev, MAP_0);
dev->vol.migr_state = 1;
set_migr_type(dev, migr_type);
dev->vol.curr_migr_unit = 0;
- dest = get_imsm_map(dev, 1);
+ dest = get_imsm_map(dev, MAP_1);
/* duplicate and then set the target end state in map[0] */
memcpy(dest, src, sizeof_imsm_map(src));
@@ -3005,10 +3209,12 @@ static void migrate(struct imsm_dev *dev, struct intel_super *super,
src->map_state = to_state;
}
-static void end_migration(struct imsm_dev *dev, __u8 map_state)
+static void end_migration(struct imsm_dev *dev, struct intel_super *super,
+ __u8 map_state)
{
- struct imsm_map *map = get_imsm_map(dev, 0);
- struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state == 0 ?
+ MAP_0 : MAP_1);
int i, j;
/* merge any IMSM_ORD_REBUILD bits that were not successfully
@@ -3016,19 +3222,31 @@ static void end_migration(struct imsm_dev *dev, __u8 map_state)
*
* FIXME add support for raid-level-migration
*/
- for (i = 0; i < prev->num_members; i++)
- for (j = 0; j < map->num_members; j++)
- /* during online capacity expansion
- * disks position can be changed if takeover is used
- */
- if (ord_to_idx(map->disk_ord_tbl[j]) ==
- ord_to_idx(prev->disk_ord_tbl[i])) {
- map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i];
- break;
- }
+ if ((map_state != map->map_state) && (is_gen_migration(dev) == 0) &&
+ (prev->map_state != IMSM_T_STATE_UNINITIALIZED)) {
+ /* when final map state is other than expected
+ * merge maps (not for migration)
+ */
+ int failed;
+
+ for (i = 0; i < prev->num_members; i++)
+ for (j = 0; j < map->num_members; j++)
+ /* during online capacity expansion
+ * disks position can be changed
+ * if takeover is used
+ */
+ if (ord_to_idx(map->disk_ord_tbl[j]) ==
+ ord_to_idx(prev->disk_ord_tbl[i])) {
+ map->disk_ord_tbl[j] |=
+ prev->disk_ord_tbl[i];
+ break;
+ }
+ failed = imsm_count_failed(super, dev, MAP_0);
+ map_state = imsm_check_degraded(super, dev, failed, MAP_0);
+ }
dev->vol.migr_state = 0;
- dev->vol.migr_type = 0;
+ set_migr_type(dev, 0);
dev->vol.curr_migr_unit = 0;
map->map_state = map_state;
}
@@ -3128,8 +3346,8 @@ int check_mpb_migr_compatibility(struct intel_super *super)
dev_iter->vol.migr_state == 1 &&
dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
/* This device is migrating */
- map0 = get_imsm_map(dev_iter, 0);
- map1 = get_imsm_map(dev_iter, 1);
+ map0 = get_imsm_map(dev_iter, MAP_0);
+ map1 = get_imsm_map(dev_iter, MAP_1);
if (map0->pba_of_lba0 != map1->pba_of_lba0)
/* migration optimization area was used */
return -1;
@@ -3215,7 +3433,7 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
sectors = mpb_sectors(anchor) - 1;
free(anchor);
- if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
+ if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) {
fprintf(stderr, Name
": %s could not allocate migr_rec buffer\n", __func__);
free(super->buf);
@@ -3439,7 +3657,6 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de
return 0;
}
-#ifndef MDASSEMBLE
/* find_missing - helper routine for load_super_imsm_all that identifies
* disks that have disappeared from the system. This routine relies on
* the mpb being uptodate, which it is at load time.
@@ -3475,6 +3692,7 @@ static int find_missing(struct intel_super *super)
return 0;
}
+#ifndef MDASSEMBLE
static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
{
struct intel_disk *idisk = disk_list;
@@ -3867,13 +4085,16 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
/* load migration record */
err = load_imsm_migr_rec(super, NULL);
- if (err) {
+ if (err == -1) {
+ /* migration is in progress,
+ * but migr_rec cannot be loaded,
+ */
err = 4;
goto error;
}
/* Check migration compatibility */
- if (check_mpb_migr_compatibility(super) != 0) {
+ if ((err == 0) && (check_mpb_migr_compatibility(super) != 0)) {
fprintf(stderr, Name ": Unsupported migration detected");
if (devname)
fprintf(stderr, " on %s\n", devname);
@@ -4016,7 +4237,7 @@ static void imsm_update_version_info(struct intel_super *super)
for (i = 0; i < mpb->num_raid_devs; i++) {
dev = get_imsm_dev(super, i);
- map = get_imsm_map(dev, 0);
+ map = get_imsm_map(dev, MAP_0);
if (__le32_to_cpu(dev->size_high) > 0)
mpb->attributes |= MPB_ATTRIB_2TB;
@@ -4112,12 +4333,14 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
fprintf(stderr, Name": could not allocate new mpb\n");
return 0;
}
- if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
+ if (posix_memalign(&super->migr_rec_buf, 512,
+ MIGR_REC_BUF_SIZE) != 0) {
fprintf(stderr, Name
": %s could not allocate migr_rec buffer\n",
__func__);
free(super->buf);
free(super);
+ free(mpb_new);
return 0;
}
memcpy(mpb_new, mpb, size_old);
@@ -4128,12 +4351,40 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
memset(mpb_new + size_old, 0, size_round - size_old);
}
super->current_vol = idx;
- /* when creating the first raid device in this container set num_disks
- * to zero, i.e. delete this spare and add raid member devices in
- * add_to_super_imsm_volume()
+
+ /* handle 'failed_disks' by either:
+ * a) create dummy disk entries in the table if this the first
+ * volume in the array. We add them here as this is the only
+ * opportunity to add them. add_to_super_imsm_volume()
+ * handles the non-failed disks and continues incrementing
+ * mpb->num_disks.
+ * b) validate that 'failed_disks' matches the current number
+ * of missing disks if the container is populated
*/
- if (super->current_vol == 0)
+ if (super->current_vol == 0) {
mpb->num_disks = 0;
+ for (i = 0; i < info->failed_disks; i++) {
+ struct imsm_disk *disk;
+
+ mpb->num_disks++;
+ disk = __get_imsm_disk(mpb, i);
+ disk->status = CONFIGURED_DISK | FAILED_DISK;
+ disk->scsi_id = __cpu_to_le32(~(__u32)0);
+ snprintf((char *) disk->serial, MAX_RAID_SERIAL_LEN,
+ "missing:%d", i);
+ }
+ find_missing(super);
+ } else {
+ int missing = 0;
+ struct dl *d;
+
+ for (d = super->missing; d; d = d->next)
+ missing++;
+ if (info->failed_disks > missing) {
+ fprintf(stderr, Name": unable to add 'missing' disk to container\n");
+ return 0;
+ }
+ }
if (!check_name(super, name, 0))
return 0;
@@ -4165,15 +4416,18 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
vol = &dev->vol;
vol->migr_state = 0;
set_migr_type(dev, MIGR_INIT);
- vol->dirty = 0;
+ vol->dirty = !info->state;
vol->curr_migr_unit = 0;
- map = get_imsm_map(dev, 0);
+ map = get_imsm_map(dev, MAP_0);
map->pba_of_lba0 = __cpu_to_le32(super->create_offset);
map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
map->failed_disk_num = ~0;
- map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
- IMSM_T_STATE_NORMAL;
+ if (info->level > 0)
+ map->map_state = IMSM_T_STATE_UNINITIALIZED;
+ else
+ map->map_state = info->failed_disks ? IMSM_T_STATE_FAILED :
+ IMSM_T_STATE_NORMAL;
map->ddf = 1;
if (info->level == 1 && info->raid_disks > 2) {
@@ -4248,7 +4502,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
": %s could not allocate superblock\n", __func__);
return 0;
}
- if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
+ if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) {
fprintf(stderr, Name
": %s could not allocate migr_rec buffer\n", __func__);
free(super->buf);
@@ -4281,13 +4535,14 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
{
struct intel_super *super = st->sb;
struct imsm_super *mpb = super->anchor;
- struct dl *dl;
+ struct imsm_disk *_disk;
struct imsm_dev *dev;
struct imsm_map *map;
+ struct dl *dl, *df;
int slot;
dev = get_imsm_dev(super, super->current_vol);
- map = get_imsm_map(dev, 0);
+ map = get_imsm_map(dev, MAP_0);
if (! (dk->state & (1<<MD_DISK_SYNC))) {
fprintf(stderr, Name ": %s: Cannot add spare devices to IMSM volume\n",
@@ -4322,20 +4577,62 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
/* Check the device has not already been added */
slot = get_imsm_disk_slot(map, dl->index);
if (slot >= 0 &&
- (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) {
+ (get_imsm_ord_tbl_ent(dev, slot, MAP_X) & IMSM_ORD_REBUILD) == 0) {
fprintf(stderr, Name ": %s has been included in this array twice\n",
devname);
return 1;
}
- set_imsm_ord_tbl_ent(map, dk->number, dl->index);
+ set_imsm_ord_tbl_ent(map, dk->raid_disk, dl->index);
dl->disk.status = CONFIGURED_DISK;
+ /* update size of 'missing' disks to be at least as large as the
+ * largest acitve member (we only have dummy missing disks when
+ * creating the first volume)
+ */
+ if (super->current_vol == 0) {
+ for (df = super->missing; df; df = df->next) {
+ if (dl->disk.total_blocks > df->disk.total_blocks)
+ df->disk.total_blocks = dl->disk.total_blocks;
+ _disk = __get_imsm_disk(mpb, df->index);
+ *_disk = df->disk;
+ }
+ }
+
+ /* refresh unset/failed slots to point to valid 'missing' entries */
+ for (df = super->missing; df; df = df->next)
+ for (slot = 0; slot < mpb->num_disks; slot++) {
+ __u32 ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
+
+ if ((ord & IMSM_ORD_REBUILD) == 0)
+ continue;
+ set_imsm_ord_tbl_ent(map, slot, df->index | IMSM_ORD_REBUILD);
+ if (is_gen_migration(dev)) {
+ struct imsm_map *map2 = get_imsm_map(dev,
+ MAP_1);
+ int slot2 = get_imsm_disk_slot(map2, df->index);
+ if ((slot2 < map2->num_members) &&
+ (slot2 >= 0)) {
+ __u32 ord2 = get_imsm_ord_tbl_ent(dev,
+ slot2,
+ MAP_1);
+ if ((unsigned)df->index ==
+ ord_to_idx(ord2))
+ set_imsm_ord_tbl_ent(map2,
+ slot2,
+ df->index |
+ IMSM_ORD_REBUILD);
+ }
+ }
+ dprintf("set slot:%d to missing disk:%d\n", slot, df->index);
+ break;
+ }
+
/* if we are creating the first raid device update the family number */
if (super->current_vol == 0) {
__u32 sum;
struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
- struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
+ _disk = __get_imsm_disk(mpb, dl->index);
if (!_dev || !_disk) {
fprintf(stderr, Name ": BUG mpb setup error\n");
return 1;
@@ -4347,10 +4644,41 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
mpb->family_num = __cpu_to_le32(sum);
mpb->orig_family_num = mpb->family_num;
}
-
+ super->current_disk = dl;
return 0;
}
+/* mark_spare()
+ * Function marks disk as spare and restores disk serial
+ * in case it was previously marked as failed by takeover operation
+ * reruns:
+ * -1 : critical error
+ * 0 : disk is marked as spare but serial is not set
+ * 1 : success
+ */
+int mark_spare(struct dl *disk)
+{
+ __u8 serial[MAX_RAID_SERIAL_LEN];
+ int ret_val = -1;
+
+ if (!disk)
+ return ret_val;
+
+ ret_val = 0;
+ if (!imsm_read_serial(disk->fd, NULL, serial)) {
+ /* Restore disk serial number, because takeover marks disk
+ * as failed and adds to serial ':0' before it becomes
+ * a spare disk.
+ */
+ serialcpy(disk->serial, serial);
+ serialcpy(disk->disk.serial, serial);
+ ret_val = 1;
+ }
+ disk->disk.status = SPARE_DISK;
+ disk->index = -1;
+
+ return ret_val;
+}
static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
int fd, char *devname)
@@ -4388,7 +4716,6 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
memset(dd, 0, sizeof(*dd));
dd->major = major(stb.st_rdev);
dd->minor = minor(stb.st_rdev);
- dd->index = -1;
dd->devname = devname ? strdup(devname) : NULL;
dd->fd = fd;
dd->e = NULL;
@@ -4405,7 +4732,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
size /= 512;
serialcpy(dd->disk.serial, dd->serial);
dd->disk.total_blocks = __cpu_to_le32(size);
- dd->disk.status = SPARE_DISK;
+ mark_spare(dd);
if (sysfs_disk_to_scsi_id(fd, &id) == 0)
dd->disk.scsi_id = __cpu_to_le32(id);
else
@@ -4448,9 +4775,8 @@ static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
memset(dd, 0, sizeof(*dd));
dd->major = dk->major;
dd->minor = dk->minor;
- dd->index = -1;
dd->fd = -1;
- dd->disk.status = SPARE_DISK;
+ mark_spare(dd);
dd->action = DISK_REMOVE;
dd->next = super->disk_mgmt_list;
@@ -4570,11 +4896,11 @@ static int write_super_imsm(struct supertype *st, int doclose)
mpb->check_sum = __cpu_to_le32(sum);
if (clear_migration_record)
- memset(super->migr_rec_buf, 0, 512);
+ memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SIZE);
/* write the mpb for disks that compose raid devices */
for (d = super->disks; d ; d = d->next) {
- if (d->index < 0)
+ if (d->index < 0 || is_failed(&d->disk))
continue;
if (store_imsm_mpb(d->fd, mpb))
fprintf(stderr, "%s: failed for device %d:%d %s\n",
@@ -4584,7 +4910,8 @@ static int write_super_imsm(struct supertype *st, int doclose)
get_dev_size(d->fd, NULL, &dsize);
if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) {
- if (write(d->fd, super->migr_rec_buf, 512) != 512)
+ if (write(d->fd, super->migr_rec_buf,
+ MIGR_REC_BUF_SIZE) != MIGR_REC_BUF_SIZE)
perror("Write migr_rec failed");
}
}
@@ -4607,7 +4934,7 @@ static int create_array(struct supertype *st, int dev_idx)
struct imsm_update_create_array *u;
struct intel_super *super = st->sb;
struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
struct disk_info *inf;
struct imsm_disk *disk;
int i;
@@ -4626,7 +4953,7 @@ static int create_array(struct supertype *st, int dev_idx)
imsm_copy_dev(&u->dev, dev);
inf = get_disk_info(u);
for (i = 0; i < map->num_members; i++) {
- int idx = get_imsm_disk_idx(dev, i, -1);
+ int idx = get_imsm_disk_idx(dev, i, MAP_X);
disk = get_imsm_disk(super, idx);
serialcpy(inf[i].serial, disk->serial);
@@ -4912,43 +5239,44 @@ static int is_raid_level_supported(const struct imsm_orom *orom, int level, int
return 0;
}
+static int imsm_default_chunk(const struct imsm_orom *orom)
+{
+ /* up to 512 if the plaform supports it, otherwise the platform max.
+ * 128 if no platform detected
+ */
+ int fs = max(7, orom ? fls(orom->sss) : 0);
+
+ return min(512, (1 << fs));
+}
#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
-/*
- * validate volume parameters with OROM/EFI capabilities
- */
static int
validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
int raiddisks, int *chunk, int verbose)
{
-#if DEBUG
- verbose = 1;
-#endif
- /* validate container capabilities */
- if (super->orom && raiddisks > super->orom->tds) {
- if (verbose)
- fprintf(stderr, Name ": %d exceeds maximum number of"
- " platform supported disks: %d\n",
- raiddisks, super->orom->tds);
+ /* check/set platform and metadata limits/defaults */
+ if (super->orom && raiddisks > super->orom->dpa) {
+ pr_vrb(": platform supports a maximum of %d disks per array\n",
+ super->orom->dpa);
return 0;
}
/* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
- if (super->orom && (!is_raid_level_supported(super->orom, level,
- raiddisks))) {
+ if (!is_raid_level_supported(super->orom, level, raiddisks)) {
pr_vrb(": platform does not support raid%d with %d disk%s\n",
level, raiddisks, raiddisks > 1 ? "s" : "");
return 0;
}
- if (super->orom && level != 1) {
- if (chunk && (*chunk == 0 || *chunk == UnSet))
- *chunk = imsm_orom_default_chunk(super->orom);
- else if (chunk && !imsm_orom_has_chunk(super->orom, *chunk)) {
- pr_vrb(": platform does not support a chunk size of: "
- "%d\n", *chunk);
- return 0;
- }
+
+ if (chunk && (*chunk == 0 || *chunk == UnSet))
+ *chunk = imsm_default_chunk(super->orom);
+
+ if (super->orom && chunk && !imsm_orom_has_chunk(super->orom, *chunk)) {
+ pr_vrb(": platform does not support a chunk size of: "
+ "%d\n", *chunk);
+ return 0;
}
+
if (layout != imsm_level_to_layout(level)) {
if (level == 5)
pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
@@ -4973,7 +5301,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
{
struct stat stb;
struct intel_super *super = st->sb;
- struct imsm_super *mpb = super->anchor;
+ struct imsm_super *mpb;
struct dl *dl;
unsigned long long pos = 0;
unsigned long long maxsize;
@@ -4984,6 +5312,8 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
if (!super)
return 0;
+ mpb = super->anchor;
+
if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose)) {
fprintf(stderr, Name ": RAID gemetry validation failed. "
"Cannot proceed with the action(s).\n");
@@ -5062,6 +5392,11 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
fprintf(stderr, Name ": The option-rom requires all member"
" disks to be a member of all volumes\n");
return 0;
+ } else if (super->orom && mpb->num_raid_devs > 0 &&
+ mpb->num_disks != raiddisks) {
+ fprintf(stderr, Name ": The option-rom requires all member"
+ " disks to be a member of all volumes\n");
+ return 0;
}
/* retrieve the largest free space block */
@@ -5100,10 +5435,26 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
i += dl->extent_cnt;
maxsize = merge_extents(super, i);
+
+ if (!check_env("IMSM_NO_PLATFORM") &&
+ mpb->num_raid_devs > 0 && size && size != maxsize) {
+ fprintf(stderr, Name ": attempting to create a second "
+ "volume with size less then remaining space. "
+ "Aborting...\n");
+ return 0;
+ }
+
if (maxsize < size || maxsize == 0) {
- if (verbose)
- fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
- maxsize, size);
+ if (verbose) {
+ if (maxsize == 0)
+ fprintf(stderr, Name ": no free space"
+ " left on device. Aborting...\n");
+ else
+ fprintf(stderr, Name ": not enough space"
+ " to create volume of given size"
+ " (%llu < %llu). Aborting...\n",
+ maxsize, size);
+ }
return 0;
}
@@ -5209,7 +5560,11 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
}
if (!dev) {
- if (st->sb && freesize) {
+ if (st->sb) {
+ if (!validate_geometry_imsm_orom(st->sb, level, layout,
+ raiddisks, chunk,
+ verbose))
+ return 0;
/* we are being asked to automatically layout a
* new volume based on the current contents of
* the container. If the the parameters can be
@@ -5218,12 +5573,9 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
* created. add_to_super and getinfo_super
* detect when autolayout is in progress.
*/
- if (!validate_geometry_imsm_orom(st->sb, level, layout,
- raiddisks, chunk,
- verbose))
- return 0;
- return reserve_space(st, raiddisks, size,
- chunk?*chunk:0, freesize);
+ if (freesize)
+ return reserve_space(st, raiddisks, size,
+ chunk?*chunk:0, freesize);
}
return 1;
}
@@ -5277,7 +5629,8 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
return validate_geometry_imsm_volume(st, level, layout,
raiddisks, chunk,
size, dev,
- freesize, verbose);
+ freesize, 1)
+ ? 1 : -1;
}
}
@@ -5298,9 +5651,8 @@ static void default_geometry_imsm(struct supertype *st, int *level, int *layout,
if (level && layout && *layout == UnSet)
*layout = imsm_level_to_layout(*level);
- if (chunk && (*chunk == UnSet || *chunk == 0) &&
- super && super->orom)
- *chunk = imsm_orom_default_chunk(super->orom);
+ if (chunk && (*chunk == UnSet || *chunk == 0))
+ *chunk = imsm_default_chunk(super->orom);
}
static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
@@ -5368,10 +5720,8 @@ static int kill_subarray_imsm(struct supertype *st)
struct dl *d;
for (d = super->disks; d; d = d->next)
- if (d->index > -2) {
- d->index = -1;
- d->disk.status = SPARE_DISK;
- }
+ if (d->index > -2)
+ mark_spare(d);
}
super->updates_pending++;
@@ -5430,6 +5780,7 @@ static int update_subarray_imsm(struct supertype *st, char *subarray,
return 0;
}
+#endif /* MDASSEMBLE */
static int is_gen_migration(struct imsm_dev *dev)
{
@@ -5444,7 +5795,6 @@ static int is_gen_migration(struct imsm_dev *dev)
return 0;
}
-#endif /* MDASSEMBLE */
static int is_rebuilding(struct imsm_dev *dev)
{
@@ -5456,7 +5806,7 @@ static int is_rebuilding(struct imsm_dev *dev)
if (migr_type(dev) != MIGR_REBUILD)
return 0;
- migr_map = get_imsm_map(dev, 1);
+ migr_map = get_imsm_map(dev, MAP_1);
if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
return 1;
@@ -5464,6 +5814,26 @@ static int is_rebuilding(struct imsm_dev *dev)
return 0;
}
+#ifndef MDASSEMBLE
+static int is_initializing(struct imsm_dev *dev)
+{
+ struct imsm_map *migr_map;
+
+ if (!dev->vol.migr_state)
+ return 0;
+
+ if (migr_type(dev) != MIGR_INIT)
+ return 0;
+
+ migr_map = get_imsm_map(dev, MAP_1);
+
+ if (migr_map->map_state == IMSM_T_STATE_UNINITIALIZED)
+ return 1;
+
+ return 0;
+}
+#endif
+
static void update_recovery_start(struct intel_super *super,
struct imsm_dev *dev,
struct mdinfo *array)
@@ -5515,20 +5885,24 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
struct imsm_super *mpb = super->anchor;
struct mdinfo *rest = NULL;
unsigned int i;
- int bbm_errors = 0;
+ int sb_errors = 0;
struct dl *d;
int spare_disks = 0;
/* do not assemble arrays when not all attributes are supported */
if (imsm_check_attributes(mpb->attributes) == 0) {
- fprintf(stderr, Name ": IMSM metadata loading not allowed "
- "due to attributes incompatibility.\n");
- return NULL;
+ sb_errors = 1;
+ fprintf(stderr, Name ": Unsupported attributes in IMSM metadata."
+ "Arrays activation is blocked.\n");
}
/* check for bad blocks */
- if (imsm_bbm_log_size(super->anchor))
- bbm_errors = 1;
+ if (imsm_bbm_log_size(super->anchor)) {
+ fprintf(stderr, Name ": BBM log found in IMSM metadata."
+ "Arrays activation is blocked.\n");
+ sb_errors = 1;
+ }
+
/* count spare devices, not used in maps
*/
@@ -5541,7 +5915,10 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
struct imsm_map *map;
struct imsm_map *map2;
struct mdinfo *this;
- int slot, chunk;
+ int slot;
+#ifndef MDASSEMBLE
+ int chunk;
+#endif
char *ep;
if (subarray &&
@@ -5549,8 +5926,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
continue;
dev = get_imsm_dev(super, i);
- map = get_imsm_map(dev, 0);
- map2 = get_imsm_map(dev, 1);
+ map = get_imsm_map(dev, MAP_0);
+ map2 = get_imsm_map(dev, MAP_1);
/* do not publish arrays that are in the middle of an
* unsupported migration
@@ -5566,19 +5943,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
* OROM/EFI
*/
- chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
-#ifndef MDASSEMBLE
- if (!validate_geometry_imsm_orom(super,
- get_imsm_raid_level(map), /* RAID level */
- imsm_level_to_layout(get_imsm_raid_level(map)),
- map->num_members, /* raid disks */
- &chunk,
- 1 /* verbose */)) {
- fprintf(stderr, Name ": RAID gemetry validation failed. "
- "Cannot proceed with the action(s).\n");
- continue;
- }
-#endif /* MDASSEMBLE */
this = malloc(sizeof(*this));
if (!this) {
fprintf(stderr, Name ": failed to allocate %zu bytes\n",
@@ -5589,6 +5953,30 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
super->current_vol = i;
getinfo_super_imsm_volume(st, this, NULL);
this->next = rest;
+#ifndef MDASSEMBLE
+ chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
+ /* mdadm does not support all metadata features- set the bit in all arrays state */
+ if (!validate_geometry_imsm_orom(super,
+ get_imsm_raid_level(map), /* RAID level */
+ imsm_level_to_layout(get_imsm_raid_level(map)),
+ map->num_members, /* raid disks */
+ &chunk,
+ 1 /* verbose */)) {
+ fprintf(stderr, Name ": IMSM RAID geometry validation"
+ " failed. Array %s activation is blocked.\n",
+ dev->volume);
+ this->array.state |=
+ (1<<MD_SB_BLOCK_CONTAINER_RESHAPE) |
+ (1<<MD_SB_BLOCK_VOLUME);
+ }
+#endif
+
+ /* if array has bad blocks, set suitable bit in all arrays state */
+ if (sb_errors)
+ this->array.state |=
+ (1<<MD_SB_BLOCK_CONTAINER_RESHAPE) |
+ (1<<MD_SB_BLOCK_VOLUME);
+
for (slot = 0 ; slot < map->num_members; slot++) {
unsigned long long recovery_start;
struct mdinfo *info_d;
@@ -5598,8 +5986,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
__u32 ord;
skip = 0;
- idx = get_imsm_disk_idx(dev, slot, 0);
- ord = get_imsm_ord_tbl_ent(dev, slot, -1);
+ idx = get_imsm_disk_idx(dev, slot, MAP_0);
+ ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
for (d = super->disks; d ; d = d->next)
if (d->index == idx)
break;
@@ -5677,17 +6065,16 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
rest = this;
}
- /* if array has bad blocks, set suitable bit in array status */
- if (bbm_errors)
- rest->array.state |= (1<<MD_SB_BBM_ERRORS);
-
return rest;
}
-static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
+static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
+ int failed, int look_in_map)
{
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *map;
+
+ map = get_imsm_map(dev, look_in_map);
if (!failed)
return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
@@ -5715,7 +6102,7 @@ static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
int insync = insync;
for (i = 0; i < map->num_members; i++) {
- __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
+ __u32 ord = get_imsm_ord_tbl_ent(dev, i, MAP_X);
int idx = ord_to_idx(ord);
struct imsm_disk *disk;
@@ -5751,33 +6138,55 @@ static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
return map->map_state;
}
-static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
+static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
+ int look_in_map)
{
int i;
int failed = 0;
struct imsm_disk *disk;
- struct imsm_map *map = get_imsm_map(dev, 0);
- struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *prev = get_imsm_map(dev, MAP_1);
+ struct imsm_map *map_for_loop;
__u32 ord;
int idx;
+ int idx_1;
/* at the beginning of migration we set IMSM_ORD_REBUILD on
* disks that are being rebuilt. New failures are recorded to
* map[0]. So we look through all the disks we started with and
* see if any failures are still present, or if any new ones
* have arrived
- *
- * FIXME add support for online capacity expansion and
- * raid-level-migration
*/
- for (i = 0; i < prev->num_members; i++) {
- ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
- ord |= __le32_to_cpu(map->disk_ord_tbl[i]);
- idx = ord_to_idx(ord);
+ map_for_loop = map;
+ if (prev && (map->num_members < prev->num_members))
+ map_for_loop = prev;
- disk = get_imsm_disk(super, idx);
- if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
- failed++;
+ for (i = 0; i < map_for_loop->num_members; i++) {
+ idx_1 = -255;
+ /* when MAP_X is passed both maps failures are counted
+ */
+ if (prev &&
+ ((look_in_map == MAP_1) || (look_in_map == MAP_X)) &&
+ (i < prev->num_members)) {
+ ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
+ idx_1 = ord_to_idx(ord);
+
+ disk = get_imsm_disk(super, idx_1);
+ if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
+ failed++;
+ }
+ if (((look_in_map == MAP_0) || (look_in_map == MAP_X)) &&
+ (i < map->num_members)) {
+ ord = __le32_to_cpu(map->disk_ord_tbl[i]);
+ idx = ord_to_idx(ord);
+
+ if (idx != idx_1) {
+ disk = get_imsm_disk(super, idx);
+ if (!disk || is_failed(disk) ||
+ ord & IMSM_ORD_REBUILD)
+ failed++;
+ }
+ }
}
return failed;
@@ -5815,7 +6224,7 @@ static int is_resyncing(struct imsm_dev *dev)
if (migr_type(dev) == MIGR_GEN_MIGR)
return 0;
- migr_map = get_imsm_map(dev, 1);
+ migr_map = get_imsm_map(dev, MAP_1);
if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
(dev->vol.migr_type != MIGR_GEN_MIGR))
@@ -5830,9 +6239,11 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
__u32 ord;
int slot;
struct imsm_map *map;
+ char buf[MAX_RAID_SERIAL_LEN+3];
+ unsigned int len, shift = 0;
/* new failures are always set in map[0] */
- map = get_imsm_map(dev, 0);
+ map = get_imsm_map(dev, MAP_0);
slot = get_imsm_disk_slot(map, idx);
if (slot < 0)
@@ -5842,8 +6253,28 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
return 0;
+ memcpy(buf, disk->serial, MAX_RAID_SERIAL_LEN);
+ buf[MAX_RAID_SERIAL_LEN] = '\000';
+ strcat(buf, ":0");
+ if ((len = strlen(buf)) >= MAX_RAID_SERIAL_LEN)
+ shift = len - MAX_RAID_SERIAL_LEN + 1;
+ strncpy((char *)disk->serial, &buf[shift], MAX_RAID_SERIAL_LEN);
+
disk->status |= FAILED_DISK;
set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
+ /* mark failures in second map if second map exists and this disk
+ * in this slot.
+ * This is valid for migration, initialization and rebuild
+ */
+ if (dev->vol.migr_state) {
+ struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
+ int slot2 = get_imsm_disk_slot(map2, idx);
+
+ if ((slot2 < map2->num_members) &&
+ (slot2 >= 0))
+ set_imsm_ord_tbl_ent(map2, slot2,
+ idx | IMSM_ORD_REBUILD);
+ }
if (map->failed_disk_num == 0xff)
map->failed_disk_num = slot;
return 1;
@@ -5862,17 +6293,23 @@ static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
{
- __u8 map_state;
struct dl *dl;
- int failed;
if (!super->missing)
return;
- failed = imsm_count_failed(super, dev);
- map_state = imsm_check_degraded(super, dev, failed);
dprintf("imsm: mark missing\n");
- end_migration(dev, map_state);
+ /* end process for initialization and rebuild only
+ */
+ if (is_gen_migration(dev) == 0) {
+ __u8 map_state;
+ int failed;
+
+ failed = imsm_count_failed(super, dev, MAP_0);
+ map_state = imsm_check_degraded(super, dev, failed, MAP_0);
+
+ end_migration(dev, super, map_state);
+ }
for (dl = super->missing; dl; dl = dl->next)
mark_missing(dev, &dl->disk, dl->index);
super->updates_pending++;
@@ -5880,7 +6317,7 @@ static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
static unsigned long long imsm_set_array_size(struct imsm_dev *dev)
{
- int used_disks = imsm_num_data_members(dev, 0);
+ int used_disks = imsm_num_data_members(dev, MAP_0);
unsigned long long array_blocks;
struct imsm_map *map;
@@ -5897,7 +6334,7 @@ static unsigned long long imsm_set_array_size(struct imsm_dev *dev)
/* set array size in metadata
*/
- map = get_imsm_map(dev, 0);
+ map = get_imsm_map(dev, MAP_0);
array_blocks = map->blocks_per_member * used_disks;
/* round array size down to closest MB
@@ -5925,7 +6362,7 @@ static void imsm_progress_container_reshape(struct intel_super *super)
for (i = 0; i < mpb->num_raid_devs; i++) {
struct imsm_dev *dev = get_imsm_dev(super, i);
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
struct imsm_map *map2;
int prev_num_members;
@@ -5946,11 +6383,11 @@ static void imsm_progress_container_reshape(struct intel_super *super)
map->num_members = prev_disks;
dev->vol.migr_state = 1;
dev->vol.curr_migr_unit = 0;
- dev->vol.migr_type = MIGR_GEN_MIGR;
+ set_migr_type(dev, MIGR_GEN_MIGR);
for (i = prev_num_members;
i < map->num_members; i++)
set_imsm_ord_tbl_ent(map, i, i);
- map2 = get_imsm_map(dev, 1);
+ map2 = get_imsm_map(dev, MAP_1);
/* Copy the current map */
memcpy(map2, map, copy_map_size);
map2->num_members = prev_num_members;
@@ -5970,9 +6407,9 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
int inst = a->info.container_member;
struct intel_super *super = a->container->sb;
struct imsm_dev *dev = get_imsm_dev(super, inst);
- struct imsm_map *map = get_imsm_map(dev, 0);
- int failed = imsm_count_failed(super, dev);
- __u8 map_state = imsm_check_degraded(super, dev, failed);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ int failed = imsm_count_failed(super, dev, MAP_0);
+ __u8 map_state = imsm_check_degraded(super, dev, failed, MAP_0);
__u32 blocks_per_unit;
if (dev->vol.migr_state &&
@@ -5994,12 +6431,14 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
* user action is required to recover process
*/
if (0) {
- struct imsm_map *map2 = get_imsm_map(dev, 1);
- dev->vol.migr_state = 0;
- dev->vol.migr_type = 0;
- dev->vol.curr_migr_unit = 0;
- memcpy(map, map2, sizeof_imsm_map(map2));
- super->updates_pending++;
+ struct imsm_map *map2 =
+ get_imsm_map(dev, MAP_1);
+ dev->vol.migr_state = 0;
+ set_migr_type(dev, 0);
+ dev->vol.curr_migr_unit = 0;
+ memcpy(map, map2,
+ sizeof_imsm_map(map2));
+ super->updates_pending++;
}
}
if (a->last_checkpoint >= a->info.component_size) {
@@ -6007,7 +6446,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
int used_disks;
struct mdinfo *mdi;
- used_disks = imsm_num_data_members(dev, 0);
+ used_disks = imsm_num_data_members(dev, MAP_0);
if (used_disks > 0) {
array_blocks =
map->blocks_per_member *
@@ -6052,11 +6491,12 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
*/
if (is_resyncing(dev)) {
dprintf("imsm: mark resync done\n");
- end_migration(dev, map_state);
+ end_migration(dev, super, map_state);
super->updates_pending++;
a->last_checkpoint = 0;
}
- } else if (!is_resyncing(dev) && !failed) {
+ } else if ((!is_resyncing(dev) && !failed) &&
+ (imsm_reshape_blocks_arrays_changes(super) == 0)) {
/* mark the start of the init process if nothing is failed */
dprintf("imsm: mark resync start\n");
if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
@@ -6113,7 +6553,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
int inst = a->info.container_member;
struct intel_super *super = a->container->sb;
struct imsm_dev *dev = get_imsm_dev(super, inst);
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
struct imsm_disk *disk;
int failed;
__u32 ord;
@@ -6128,7 +6568,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
dprintf("imsm: set_disk %d:%x\n", n, state);
- ord = get_imsm_ord_tbl_ent(dev, n, -1);
+ ord = get_imsm_ord_tbl_ent(dev, n, MAP_0);
disk = get_imsm_disk(super, ord_to_idx(ord));
/* check for new failures */
@@ -6139,53 +6579,99 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
/* check if in_sync */
if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
- struct imsm_map *migr_map = get_imsm_map(dev, 1);
+ struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
super->updates_pending++;
}
- failed = imsm_count_failed(super, dev);
- map_state = imsm_check_degraded(super, dev, failed);
+ failed = imsm_count_failed(super, dev, MAP_0);
+ map_state = imsm_check_degraded(super, dev, failed, MAP_0);
/* check if recovery complete, newly degraded, or failed */
- if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
- end_migration(dev, map_state);
- map = get_imsm_map(dev, 0);
- map->failed_disk_num = ~0;
- super->updates_pending++;
- a->last_checkpoint = 0;
- } else if (map_state == IMSM_T_STATE_DEGRADED &&
- map->map_state != map_state &&
- !dev->vol.migr_state) {
- dprintf("imsm: mark degraded\n");
- map->map_state = map_state;
- super->updates_pending++;
- a->last_checkpoint = 0;
- } else if (map_state == IMSM_T_STATE_FAILED &&
- map->map_state != map_state) {
- dprintf("imsm: mark failed\n");
- end_migration(dev, map_state);
- super->updates_pending++;
- a->last_checkpoint = 0;
- } else if (is_gen_migration(dev)) {
- dprintf("imsm: Detected General Migration in state: ");
- if (map_state == IMSM_T_STATE_NORMAL) {
- end_migration(dev, map_state);
- map = get_imsm_map(dev, 0);
+ dprintf("imsm: Detected transition to state ");
+ switch (map_state) {
+ case IMSM_T_STATE_NORMAL: /* transition to normal state */
+ dprintf("normal: ");
+ if (is_rebuilding(dev)) {
+ dprintf("while rebuilding");
+ end_migration(dev, super, map_state);
+ map = get_imsm_map(dev, MAP_0);
map->failed_disk_num = ~0;
- dprintf("normal\n");
- } else {
- if (map_state == IMSM_T_STATE_DEGRADED) {
- printf("degraded\n");
- end_migration(dev, map_state);
- } else {
- dprintf("failed\n");
+ super->updates_pending++;
+ a->last_checkpoint = 0;
+ break;
+ }
+ if (is_gen_migration(dev)) {
+ dprintf("while general migration");
+ if (a->last_checkpoint >= a->info.component_size)
+ end_migration(dev, super, map_state);
+ else
+ map->map_state = map_state;
+ map = get_imsm_map(dev, MAP_0);
+ map->failed_disk_num = ~0;
+ super->updates_pending++;
+ break;
+ }
+ break;
+ case IMSM_T_STATE_DEGRADED: /* transition to degraded state */
+ dprintf("degraded: ");
+ if ((map->map_state != map_state) &&
+ !dev->vol.migr_state) {
+ dprintf("mark degraded");
+ map->map_state = map_state;
+ super->updates_pending++;
+ a->last_checkpoint = 0;
+ break;
+ }
+ if (is_rebuilding(dev)) {
+ dprintf("while rebuilding.");
+ if (map->map_state != map_state) {
+ dprintf(" Map state change");
+ end_migration(dev, super, map_state);
+ super->updates_pending++;
}
+ break;
+ }
+ if (is_gen_migration(dev)) {
+ dprintf("while general migration");
+ if (a->last_checkpoint >= a->info.component_size)
+ end_migration(dev, super, map_state);
+ else {
+ map->map_state = map_state;
+ manage_second_map(super, dev);
+ }
+ super->updates_pending++;
+ break;
+ }
+ if (is_initializing(dev)) {
+ dprintf("while initialization.");
map->map_state = map_state;
+ super->updates_pending++;
+ break;
}
- super->updates_pending++;
+ break;
+ case IMSM_T_STATE_FAILED: /* transition to failed state */
+ dprintf("failed: ");
+ if (is_gen_migration(dev)) {
+ dprintf("while general migration");
+ map->map_state = map_state;
+ super->updates_pending++;
+ break;
+ }
+ if (map->map_state != map_state) {
+ dprintf("mark failed");
+ end_migration(dev, super, map_state);
+ super->updates_pending++;
+ a->last_checkpoint = 0;
+ break;
+ }
+ break;
+ default:
+ dprintf("state %i\n", map_state);
}
+ dprintf("\n");
+
}
static int store_imsm_mpb(int fd, struct imsm_super *mpb)
@@ -6236,7 +6722,7 @@ static void imsm_sync_metadata(struct supertype *container)
static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
{
struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
- int i = get_imsm_disk_idx(dev, idx, -1);
+ int i = get_imsm_disk_idx(dev, idx, MAP_X);
struct dl *dl;
for (dl = super->disks; dl; dl = dl->next)
@@ -6257,7 +6743,7 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot,
struct mdinfo *additional_test_list)
{
struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
- int idx = get_imsm_disk_idx(dev, slot, -1);
+ int idx = get_imsm_disk_idx(dev, slot, MAP_X);
struct imsm_super *mpb = super->anchor;
struct imsm_map *map;
unsigned long long pos;
@@ -6319,7 +6805,7 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot,
}
for (i = 0; i < mpb->num_raid_devs; i++) {
dev = get_imsm_dev(super, i);
- map = get_imsm_map(dev, 0);
+ map = get_imsm_map(dev, MAP_0);
/* check if this disk is already a member of
* this array
@@ -6375,9 +6861,9 @@ static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
dev2 = get_imsm_dev(cont->sb, dev_idx);
if (dev2) {
- state = imsm_check_degraded(cont->sb, dev2, failed);
+ state = imsm_check_degraded(cont->sb, dev2, failed, MAP_0);
if (state == IMSM_T_STATE_FAILED) {
- map = get_imsm_map(dev2, 0);
+ map = get_imsm_map(dev2, MAP_0);
if (!map)
return 1;
for (slot = 0; slot < map->num_members; slot++) {
@@ -6385,7 +6871,7 @@ static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
* Check if failed disks are deleted from intel
* disk list or are marked to be deleted
*/
- idx = get_imsm_disk_idx(dev2, slot, -1);
+ idx = get_imsm_disk_idx(dev2, slot, MAP_X);
idisk = get_imsm_dl_disk(cont->sb, idx);
/*
* Do not rebuild the array if failed disks
@@ -6419,7 +6905,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
struct intel_super *super = a->container->sb;
int inst = a->info.container_member;
struct imsm_dev *dev = get_imsm_dev(super, inst);
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
int failed = a->info.array.raid_disks;
struct mdinfo *rv = NULL;
struct mdinfo *d;
@@ -6443,10 +6929,16 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
inst, failed, a->info.array.raid_disks, a->info.array.level);
- if (dev->vol.migr_state &&
- dev->vol.migr_type == MIGR_GEN_MIGR)
- /* No repair during migration */
+ if (imsm_reshape_blocks_arrays_changes(super))
+ return NULL;
+
+ /* Cannot activate another spare if rebuild is in progress already
+ */
+ if (is_rebuilding(dev)) {
+ dprintf("imsm: No spare activation allowed. "
+ "Rebuild in progress already.\n");
return NULL;
+ }
if (a->info.array.level == 4)
/* No repair for takeovered array
@@ -6454,7 +6946,8 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
*/
return NULL;
- if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
+ if (imsm_check_degraded(super, dev, failed, MAP_0) !=
+ IMSM_T_STATE_DEGRADED)
return NULL;
/*
@@ -6463,9 +6956,9 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
* are removed from container.
*/
if (failed) {
- dprintf("found failed disks in %s, check if there another"
+ dprintf("found failed disks in %.*s, check if there another"
"failed sub-array.\n",
- dev->volume);
+ MAX_RAID_SERIAL_LEN, dev->volume);
/* check if states of the other volumes allow for rebuild */
for (i = 0; i < super->anchor->num_raid_devs; i++) {
if (i != inst) {
@@ -6495,9 +6988,9 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
*/
dl = imsm_readd(super, i, a);
if (!dl)
- dl = imsm_add_spare(super, i, a, 0, NULL);
+ dl = imsm_add_spare(super, i, a, 0, rv);
if (!dl)
- dl = imsm_add_spare(super, i, a, 1, NULL);
+ dl = imsm_add_spare(super, i, a, 1, rv);
if (!dl)
continue;
@@ -6534,8 +7027,6 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
num_spares++;
dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
i, di->data_offset);
-
- break;
}
if (!rv)
@@ -6587,15 +7078,15 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
{
struct imsm_dev *dev = get_imsm_dev(super, idx);
- struct imsm_map *map = get_imsm_map(dev, 0);
- struct imsm_map *new_map = get_imsm_map(&u->dev, 0);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *new_map = get_imsm_map(&u->dev, MAP_0);
struct disk_info *inf = get_disk_info(u);
struct imsm_disk *disk;
int i;
int j;
for (i = 0; i < map->num_members; i++) {
- disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1));
+ disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, MAP_X));
for (j = 0; j < new_map->num_members; j++)
if (serialcmp(disk->serial, inf[j].serial) == 0)
return 1;
@@ -6708,7 +7199,7 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration *
struct imsm_map *map;
struct imsm_dev *new_dev =
(struct imsm_dev *)*space_list;
- struct imsm_map *migr_map = get_imsm_map(dev, 1);
+ struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
int to_state;
struct dl *new_disk;
@@ -6716,7 +7207,7 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration *
return ret_val;
*space_list = **space_list;
memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0));
- map = get_imsm_map(new_dev, 0);
+ map = get_imsm_map(new_dev, MAP_0);
if (migr_map) {
dprintf("imsm: Error: migration in progress");
return ret_val;
@@ -6736,7 +7227,7 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration *
migrate(new_dev, super, to_state, MIGR_GEN_MIGR);
if (u->new_level > -1)
map->raid_level = u->new_level;
- migr_map = get_imsm_map(new_dev, 1);
+ migr_map = get_imsm_map(new_dev, MAP_1);
if ((u->new_level == 5) &&
(migr_map->raid_level == 0)) {
int ord = map->num_members - 1;
@@ -6805,6 +7296,120 @@ error_disk_add:
return ret_val;
}
+static int apply_update_activate_spare(struct imsm_update_activate_spare *u,
+ struct intel_super *super,
+ struct active_array *active_array)
+{
+ struct imsm_super *mpb = super->anchor;
+ struct imsm_dev *dev = get_imsm_dev(super, u->array);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *migr_map;
+ struct active_array *a;
+ struct imsm_disk *disk;
+ __u8 to_state;
+ struct dl *dl;
+ unsigned int found;
+ int failed;
+ int victim;
+ int i;
+ int second_map_created = 0;
+
+ for (; u; u = u->next) {
+ victim = get_imsm_disk_idx(dev, u->slot, MAP_X);
+
+ if (victim < 0)
+ return 0;
+
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl == u->dl)
+ break;
+
+ if (!dl) {
+ fprintf(stderr, "error: imsm_activate_spare passed "
+ "an unknown disk (index: %d)\n",
+ u->dl->index);
+ return 0;
+ }
+
+ /* count failures (excluding rebuilds and the victim)
+ * to determine map[0] state
+ */
+ failed = 0;
+ for (i = 0; i < map->num_members; i++) {
+ if (i == u->slot)
+ continue;
+ disk = get_imsm_disk(super,
+ get_imsm_disk_idx(dev, i, MAP_X));
+ if (!disk || is_failed(disk))
+ failed++;
+ }
+
+ /* adding a pristine spare, assign a new index */
+ if (dl->index < 0) {
+ dl->index = super->anchor->num_disks;
+ super->anchor->num_disks++;
+ }
+ disk = &dl->disk;
+ disk->status |= CONFIGURED_DISK;
+ disk->status &= ~SPARE_DISK;
+
+ /* mark rebuild */
+ to_state = imsm_check_degraded(super, dev, failed, MAP_0);
+ if (!second_map_created) {
+ second_map_created = 1;
+ map->map_state = IMSM_T_STATE_DEGRADED;
+ migrate(dev, super, to_state, MIGR_REBUILD);
+ } else
+ map->map_state = to_state;
+ migr_map = get_imsm_map(dev, MAP_1);
+ set_imsm_ord_tbl_ent(map, u->slot, dl->index);
+ set_imsm_ord_tbl_ent(migr_map, u->slot,
+ dl->index | IMSM_ORD_REBUILD);
+
+ /* update the family_num to mark a new container
+ * generation, being careful to record the existing
+ * family_num in orig_family_num to clean up after
+ * earlier mdadm versions that neglected to set it.
+ */
+ if (mpb->orig_family_num == 0)
+ mpb->orig_family_num = mpb->family_num;
+ mpb->family_num += super->random;
+
+ /* count arrays using the victim in the metadata */
+ found = 0;
+ for (a = active_array; a ; a = a->next) {
+ dev = get_imsm_dev(super, a->info.container_member);
+ map = get_imsm_map(dev, MAP_0);
+
+ if (get_imsm_disk_slot(map, victim) >= 0)
+ found++;
+ }
+
+ /* delete the victim if it is no longer being
+ * utilized anywhere
+ */
+ if (!found) {
+ struct dl **dlp;
+
+ /* We know that 'manager' isn't touching anything,
+ * so it is safe to delete
+ */
+ for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
+ if ((*dlp)->index == victim)
+ break;
+
+ /* victim may be on the missing list */
+ if (!*dlp)
+ for (dlp = &super->missing; *dlp;
+ dlp = &(*dlp)->next)
+ if ((*dlp)->index == victim)
+ break;
+ imsm_delete(super, dlp, victim);
+ }
+ }
+
+ return 1;
+}
static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
struct intel_super *super,
@@ -6866,8 +7471,8 @@ static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
newdev = (void*)sp;
/* Copy the dev, but not (all of) the map */
memcpy(newdev, id->dev, sizeof(*newdev));
- oldmap = get_imsm_map(id->dev, 0);
- newmap = get_imsm_map(newdev, 0);
+ oldmap = get_imsm_map(id->dev, MAP_0);
+ newmap = get_imsm_map(newdev, MAP_0);
/* Copy the current map */
memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
/* update one device only
@@ -6878,7 +7483,7 @@ static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
devices_to_reshape--;
newdev->vol.migr_state = 1;
newdev->vol.curr_migr_unit = 0;
- newdev->vol.migr_type = MIGR_GEN_MIGR;
+ set_migr_type(newdev, MIGR_GEN_MIGR);
newmap->num_members = u->new_raid_disks;
for (i = 0; i < delta_disks; i++) {
set_imsm_ord_tbl_ent(newmap,
@@ -6887,7 +7492,7 @@ static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
}
/* New map is correct, now need to save old map
*/
- newmap = get_imsm_map(newdev, 1);
+ newmap = get_imsm_map(newdev, MAP_1);
memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
imsm_set_array_size(newdev);
@@ -6930,11 +7535,12 @@ static int apply_takeover_update(struct imsm_update_takeover *u,
if (dev == NULL)
return 0;
- map = get_imsm_map(dev, 0);
+ map = get_imsm_map(dev, MAP_0);
if (u->direction == R10_TO_R0) {
/* Number of failed disks must be half of initial disk number */
- if (imsm_count_failed(super, dev) != (map->num_members / 2))
+ if (imsm_count_failed(super, dev, MAP_0) !=
+ (map->num_members / 2))
return 0;
/* iterate through devices to mark removed disks as spare */
@@ -6948,8 +7554,7 @@ static int apply_takeover_update(struct imsm_update_takeover *u,
if (du->index > idx)
du->index--;
/* mark as spare disk */
- dm->disk.status = SPARE_DISK;
- dm->index = -1;
+ mark_spare(dm);
}
}
/* update map */
@@ -6994,7 +7599,7 @@ static int apply_takeover_update(struct imsm_update_takeover *u,
dev_new = (void *)space;
memcpy(dev_new, dev, sizeof(*dev));
/* update new map */
- map = get_imsm_map(dev_new, 0);
+ map = get_imsm_map(dev_new, MAP_0);
map->num_members = map->num_members * 2;
map->map_state = IMSM_T_STATE_DEGRADED;
map->num_domains = 2;
@@ -7100,99 +7705,8 @@ static void imsm_process_update(struct supertype *st,
}
case update_activate_spare: {
struct imsm_update_activate_spare *u = (void *) update->buf;
- struct imsm_dev *dev = get_imsm_dev(super, u->array);
- struct imsm_map *map = get_imsm_map(dev, 0);
- struct imsm_map *migr_map;
- struct active_array *a;
- struct imsm_disk *disk;
- __u8 to_state;
- struct dl *dl;
- unsigned int found;
- int failed;
- int victim = get_imsm_disk_idx(dev, u->slot, -1);
- int i;
-
- for (dl = super->disks; dl; dl = dl->next)
- if (dl == u->dl)
- break;
-
- if (!dl) {
- fprintf(stderr, "error: imsm_activate_spare passed "
- "an unknown disk (index: %d)\n",
- u->dl->index);
- return;
- }
-
- super->updates_pending++;
- /* count failures (excluding rebuilds and the victim)
- * to determine map[0] state
- */
- failed = 0;
- for (i = 0; i < map->num_members; i++) {
- if (i == u->slot)
- continue;
- disk = get_imsm_disk(super,
- get_imsm_disk_idx(dev, i, -1));
- if (!disk || is_failed(disk))
- failed++;
- }
-
- /* adding a pristine spare, assign a new index */
- if (dl->index < 0) {
- dl->index = super->anchor->num_disks;
- super->anchor->num_disks++;
- }
- disk = &dl->disk;
- disk->status |= CONFIGURED_DISK;
- disk->status &= ~SPARE_DISK;
-
- /* mark rebuild */
- to_state = imsm_check_degraded(super, dev, failed);
- map->map_state = IMSM_T_STATE_DEGRADED;
- migrate(dev, super, to_state, MIGR_REBUILD);
- migr_map = get_imsm_map(dev, 1);
- set_imsm_ord_tbl_ent(map, u->slot, dl->index);
- set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
-
- /* update the family_num to mark a new container
- * generation, being careful to record the existing
- * family_num in orig_family_num to clean up after
- * earlier mdadm versions that neglected to set it.
- */
- if (mpb->orig_family_num == 0)
- mpb->orig_family_num = mpb->family_num;
- mpb->family_num += super->random;
-
- /* count arrays using the victim in the metadata */
- found = 0;
- for (a = st->arrays; a ; a = a->next) {
- dev = get_imsm_dev(super, a->info.container_member);
- map = get_imsm_map(dev, 0);
-
- if (get_imsm_disk_slot(map, victim) >= 0)
- found++;
- }
-
- /* delete the victim if it is no longer being
- * utilized anywhere
- */
- if (!found) {
- struct dl **dlp;
-
- /* We know that 'manager' isn't touching anything,
- * so it is safe to delete
- */
- for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
- if ((*dlp)->index == victim)
- break;
-
- /* victim may be on the missing list */
- if (!*dlp)
- for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
- if ((*dlp)->index == victim)
- break;
- imsm_delete(super, dlp, victim);
- }
+ if (apply_update_activate_spare(u, super, st->arrays))
+ super->updates_pending++;
break;
}
case update_create_array: {
@@ -7229,7 +7743,7 @@ static void imsm_process_update(struct supertype *st,
goto create_error;
}
- new_map = get_imsm_map(&u->dev, 0);
+ new_map = get_imsm_map(&u->dev, MAP_0);
new_start = __le32_to_cpu(new_map->pba_of_lba0);
new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
inf = get_disk_info(u);
@@ -7240,7 +7754,7 @@ static void imsm_process_update(struct supertype *st,
*/
for (i = 0; i < mpb->num_raid_devs; i++) {
dev = get_imsm_dev(super, i);
- map = get_imsm_map(dev, 0);
+ map = get_imsm_map(dev, MAP_0);
start = __le32_to_cpu(map->pba_of_lba0);
end = start + __le32_to_cpu(map->blocks_per_member);
if ((new_start >= start && new_start <= end) ||
@@ -7421,7 +7935,7 @@ static void imsm_prepare_update(struct supertype *st,
if (u->direction == R0_TO_R10) {
void **tail = (void **)&update->space_list;
struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
int num_members = map->num_members;
void *space;
int size, i;
@@ -7553,7 +8067,7 @@ static void imsm_prepare_update(struct supertype *st,
struct imsm_map *map;
dev = get_imsm_dev(super, u->subdev);
- map = get_imsm_map(dev, 0);
+ map = get_imsm_map(dev, MAP_0);
current_level = map->raid_level;
break;
}
@@ -7587,7 +8101,7 @@ static void imsm_prepare_update(struct supertype *st,
struct imsm_update_create_array *u = (void *) update->buf;
struct intel_dev *dv;
struct imsm_dev *dev = &u->dev;
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
struct dl *dl;
struct disk_info *inf;
int i;
@@ -7672,20 +8186,20 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
for (i = 0; i < mpb->num_raid_devs; i++) {
dev = get_imsm_dev(super, i);
- map = get_imsm_map(dev, 0);
+ map = get_imsm_map(dev, MAP_0);
num_members = map->num_members;
for (j = 0; j < num_members; j++) {
/* update ord entries being careful not to propagate
* ord-flags to the first map
*/
- ord = get_imsm_ord_tbl_ent(dev, j, -1);
+ ord = get_imsm_ord_tbl_ent(dev, j, MAP_X);
if (ord_to_idx(ord) <= index)
continue;
- map = get_imsm_map(dev, 0);
+ map = get_imsm_map(dev, MAP_0);
set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
- map = get_imsm_map(dev, 1);
+ map = get_imsm_map(dev, MAP_1);
if (map)
set_imsm_ord_tbl_ent(map, j, ord - 1);
}
@@ -7701,6 +8215,75 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
}
}
#endif /* MDASSEMBLE */
+
+static void close_targets(int *targets, int new_disks)
+{
+ int i;
+
+ if (!targets)
+ return;
+
+ for (i = 0; i < new_disks; i++) {
+ if (targets[i] >= 0) {
+ close(targets[i]);
+ targets[i] = -1;
+ }
+ }
+}
+
+static int imsm_get_allowed_degradation(int level, int raid_disks,
+ struct intel_super *super,
+ struct imsm_dev *dev)
+{
+ switch (level) {
+ case 10:{
+ int ret_val = 0;
+ struct imsm_map *map;
+ int i;
+
+ ret_val = raid_disks/2;
+ /* check map if all disks pairs not failed
+ * in both maps
+ */
+ map = get_imsm_map(dev, MAP_0);
+ for (i = 0; i < ret_val; i++) {
+ int degradation = 0;
+ if (get_imsm_disk(super, i) == NULL)
+ degradation++;
+ if (get_imsm_disk(super, i + 1) == NULL)
+ degradation++;
+ if (degradation == 2)
+ return 0;
+ }
+ map = get_imsm_map(dev, MAP_1);
+ /* if there is no second map
+ * result can be returned
+ */
+ if (map == NULL)
+ return ret_val;
+ /* check degradation in second map
+ */
+ for (i = 0; i < ret_val; i++) {
+ int degradation = 0;
+ if (get_imsm_disk(super, i) == NULL)
+ degradation++;
+ if (get_imsm_disk(super, i + 1) == NULL)
+ degradation++;
+ if (degradation == 2)
+ return 0;
+ }
+ return ret_val;
+ }
+ case 5:
+ return 1;
+ case 6:
+ return 2;
+ default:
+ return 0;
+ }
+}
+
+
/*******************************************************************************
* Function: open_backup_targets
* Description: Function opens file descriptors for all devices given in
@@ -7709,13 +8292,21 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
* info : general array info
* raid_disks : number of disks
* raid_fds : table of device's file descriptors
+ * super : intel super for raid10 degradation check
+ * dev : intel device for raid10 degradation check
* Returns:
* 0 : success
* -1 : fail
******************************************************************************/
-int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds)
+int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds,
+ struct intel_super *super, struct imsm_dev *dev)
{
struct mdinfo *sd;
+ int i;
+ int opened = 0;
+
+ for (i = 0; i < raid_disks; i++)
+ raid_fds[i] = -1;
for (sd = info->devs ; sd ; sd = sd->next) {
char *dn;
@@ -7734,8 +8325,19 @@ int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds)
raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR);
if (raid_fds[sd->disk.raid_disk] < 0) {
fprintf(stderr, "cannot open component\n");
- return -1;
+ continue;
}
+ opened++;
+ }
+ /* check if maximum array degradation level is not exceeded
+ */
+ if ((raid_disks - opened) >
+ imsm_get_allowed_degradation(info->new_level,
+ raid_disks,
+ super, dev)) {
+ fprintf(stderr, "Not enough disks can be opened.\n");
+ close_targets(raid_fds, raid_disks);
+ return -2;
}
return 0;
}
@@ -7762,8 +8364,8 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
struct mdinfo *sd;
char nm[30];
int fd;
- struct imsm_map *map_dest = get_imsm_map(dev, 0);
- struct imsm_map *map_src = get_imsm_map(dev, 1);
+ struct imsm_map *map_dest = get_imsm_map(dev, MAP_0);
+ struct imsm_map *map_src = get_imsm_map(dev, MAP_1);
unsigned long long num_migr_units;
unsigned long long array_blocks;
@@ -7776,7 +8378,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
migr_rec->dest_depth_per_unit = GEN_MIGR_AREA_SIZE /
max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
migr_rec->dest_depth_per_unit *= map_dest->blocks_per_strip;
- new_data_disks = imsm_num_data_members(dev, 0);
+ new_data_disks = imsm_num_data_members(dev, MAP_0);
migr_rec->blocks_per_unit =
__cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks);
migr_rec->dest_depth_per_unit =
@@ -7840,12 +8442,12 @@ int save_backup_imsm(struct supertype *st,
unsigned long long *target_offsets = NULL;
int *targets = NULL;
int i;
- struct imsm_map *map_dest = get_imsm_map(dev, 0);
+ struct imsm_map *map_dest = get_imsm_map(dev, MAP_0);
int new_disks = map_dest->num_members;
int dest_layout = 0;
int dest_chunk;
unsigned long long start;
- int data_disks = imsm_num_data_members(dev, 0);
+ int data_disks = imsm_num_data_members(dev, MAP_0);
targets = malloc(new_disks * sizeof(int));
if (!targets)
@@ -7868,7 +8470,8 @@ int save_backup_imsm(struct supertype *st,
target_offsets[i] -= start/data_disks;
}
- if (open_backup_targets(info, new_disks, targets))
+ if (open_backup_targets(info, new_disks, targets,
+ super, dev))
goto abort;
dest_layout = imsm_level_to_layout(map_dest->raid_level);
@@ -7894,9 +8497,7 @@ int save_backup_imsm(struct supertype *st,
abort:
if (targets) {
- for (i = 0; i < new_disks; i++)
- if (targets[i] >= 0)
- close(targets[i]);
+ close_targets(targets, new_disks);
free(targets);
}
free(target_offsets);
@@ -7985,7 +8586,6 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
char buffer[20];
int skipped_disks = 0;
- int max_degradation;
err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20);
if (err < 1)
@@ -8007,9 +8607,8 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
if (id == NULL)
return 1;
- map_dest = get_imsm_map(id->dev, 0);
+ map_dest = get_imsm_map(id->dev, MAP_0);
new_disks = map_dest->num_members;
- max_degradation = new_disks - imsm_num_data_members(id->dev, 0);
read_offset = (unsigned long long)
__le32_to_cpu(migr_rec->ckpt_area_pba) * 512;
@@ -8025,7 +8624,11 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
if (!targets)
goto abort;
- open_backup_targets(info, new_disks, targets);
+ if (open_backup_targets(info, new_disks, targets, super, id->dev)) {
+ fprintf(stderr,
+ Name ": Cannot open some devices belonging to array.\n");
+ goto abort;
+ }
for (i = 0; i < new_disks; i++) {
if (targets[i] < 0) {
@@ -8036,29 +8639,36 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
fprintf(stderr,
Name ": Cannot seek to block: %s\n",
strerror(errno));
- goto abort;
+ skipped_disks++;
+ continue;
}
if ((unsigned)read(targets[i], buf, unit_len) != unit_len) {
fprintf(stderr,
Name ": Cannot read copy area block: %s\n",
strerror(errno));
- goto abort;
+ skipped_disks++;
+ continue;
}
if (lseek64(targets[i], write_offset, SEEK_SET) < 0) {
fprintf(stderr,
Name ": Cannot seek to block: %s\n",
strerror(errno));
- goto abort;
+ skipped_disks++;
+ continue;
}
if ((unsigned)write(targets[i], buf, unit_len) != unit_len) {
fprintf(stderr,
Name ": Cannot restore block: %s\n",
strerror(errno));
- goto abort;
+ skipped_disks++;
+ continue;
}
}
- if (skipped_disks > max_degradation) {
+ if (skipped_disks > imsm_get_allowed_degradation(info->new_level,
+ new_disks,
+ super,
+ id->dev)) {
fprintf(stderr,
Name ": Cannot restore data from backup."
" Too many failed disks\n");
@@ -8382,7 +8992,7 @@ static int imsm_create_metadata_update_for_migration(
if (dev) {
struct imsm_map *map;
- map = get_imsm_map(dev, 0);
+ map = get_imsm_map(dev, MAP_0);
if (map) {
int current_chunk_size =
__le16_to_cpu(map->blocks_per_strip) / 2;
@@ -8451,6 +9061,10 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
int change = -1;
int check_devs = 0;
int chunk;
+ /* number of added/removed disks in operation result */
+ int devNumChange = 0;
+ /* imsm compatible layout value for array geometry verification */
+ int imsm_layout = -1;
getinfo_super_imsm_volume(st, &info, NULL);
if ((geo->level != info.array.level) &&
@@ -8468,23 +9082,23 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
change = -1;
goto analyse_change_exit;
}
+ imsm_layout = geo->layout;
check_devs = 1;
- }
- if (geo->level == 10) {
+ devNumChange = 1; /* parity disk added */
+ } else if (geo->level == 10) {
change = CH_TAKEOVER;
check_devs = 1;
+ devNumChange = 2; /* two mirrors added */
+ imsm_layout = 0x102; /* imsm supported layout */
}
break;
case 1:
- if (geo->level == 0) {
- change = CH_TAKEOVER;
- check_devs = 1;
- }
- break;
case 10:
if (geo->level == 0) {
change = CH_TAKEOVER;
check_devs = 1;
+ devNumChange = -(geo->raid_disks/2);
+ imsm_layout = 0; /* imsm raid0 layout */
}
break;
}
@@ -8519,8 +9133,11 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
change = -1;
goto analyse_change_exit;
}
- } else
+ } else {
geo->layout = info.array.layout;
+ if (imsm_layout == -1)
+ imsm_layout = info.array.layout;
+ }
if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
&& (geo->chunksize != info.array.chunk_size))
@@ -8531,8 +9148,8 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
chunk = geo->chunksize / 1024;
if (!validate_geometry_imsm(st,
geo->level,
- geo->layout,
- geo->raid_disks,
+ imsm_layout,
+ geo->raid_disks + devNumChange,
&chunk,
geo->size,
0, 0, 1))
@@ -8661,8 +9278,9 @@ static int imsm_reshape_super(struct supertype *st, long long size, int level,
dprintf("imsm: info: Volume operation\n");
/* find requested device */
while (dev) {
- imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum);
- if (devnum == geo.dev_id)
+ if (imsm_find_array_minor_by_subdev(
+ dev->index, st->container_dev, &devnum) == 0
+ && devnum == geo.dev_id)
break;
dev = dev->next;
}
@@ -8891,12 +9509,12 @@ static int imsm_manage_reshape(
goto abort;
}
- map_src = get_imsm_map(dev, 1);
+ map_src = get_imsm_map(dev, MAP_1);
if (map_src == NULL)
goto abort;
- ndata = imsm_num_data_members(dev, 0);
- odata = imsm_num_data_members(dev, 1);
+ ndata = imsm_num_data_members(dev, MAP_0);
+ odata = imsm_num_data_members(dev, MAP_1);
chunk = __le16_to_cpu(map_src->blocks_per_strip) * 512;
old_data_stripe_length = odata * chunk;
diff --git a/super-mbr.c b/super-mbr.c
index 5eefdf69..64999637 100644
--- a/super-mbr.c
+++ b/super-mbr.c
@@ -169,13 +169,14 @@ static void getinfo_mbr(struct supertype *st, struct mdinfo *info, char *map)
static struct supertype *match_metadata_desc(char *arg)
{
- struct supertype *st = malloc(sizeof(*st));
+ struct supertype *st;
- if (!st)
- return st;
if (strcmp(arg, "mbr") != 0)
return NULL;
+ st = malloc(sizeof(*st));
+ if (!st)
+ return st;
st->ss = &mbr;
st->info = NULL;
st->minor_version = 0;
diff --git a/super0.c b/super0.c
index 4a165f9b..dab85db8 100644
--- a/super0.c
+++ b/super0.c
@@ -360,6 +360,9 @@ static void getinfo_super0(struct supertype *st, struct mdinfo *info, char *map)
info->array.state = sb->state;
info->component_size = sb->size*2;
+ if (sb->state & (1<<MD_SB_BITMAP_PRESENT))
+ info->bitmap_offset = 8;
+
info->disk.state = sb->this_disk.state;
info->disk.major = sb->this_disk.major;
info->disk.minor = sb->this_disk.minor;
@@ -387,6 +390,8 @@ static void getinfo_super0(struct supertype *st, struct mdinfo *info, char *map)
} else
info->reshape_active = 0;
+ info->recovery_blocked = info->reshape_active;
+
sprintf(info->name, "%d", sb->md_minor);
/* work_disks is calculated rather than read directly */
for (i=0; i < MD_SB_DISKS; i++)
@@ -570,6 +575,10 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
sb->state &= ~(1<<MD_SB_BITMAP_PRESENT);
} else if (strcmp(update, "_reshape_progress")==0)
sb->reshape_position = info->reshape_progress;
+ else if (strcmp(update, "writemostly")==0)
+ sb->state |= (1<<MD_DISK_WRITEMOSTLY);
+ else if (strcmp(update, "readwrite")==0)
+ sb->state &= ~(1<<MD_DISK_WRITEMOSTLY);
else
rv = -1;
@@ -688,6 +697,8 @@ static int add_to_super0(struct supertype *st, mdu_disk_info_t *dinfo,
dk->minor = dinfo->minor;
dk->raid_disk = dinfo->raid_disk;
dk->state = dinfo->state;
+ /* In case our source disk was writemostly, don't copy that bit */
+ dk->state &= ~(1<<MD_DISK_WRITEMOSTLY);
sb->this_disk = sb->disks[dinfo->number];
sb->sb_csum = calc_sb0_csum(sb);
@@ -1115,6 +1126,13 @@ static int validate_geometry0(struct supertype *st, int level,
{
unsigned long long ldsize;
int fd;
+ unsigned int tbmax = 4;
+
+ /* prior to linux 3.1, a but limits usable device size to 2TB.
+ * It was introduced in 2.6.29, but we won't worry about that detail
+ */
+ if (get_linux_version() < 3001000)
+ tbmax = 2;
if (level == LEVEL_CONTAINER) {
if (verbose)
@@ -1127,9 +1145,10 @@ static int validate_geometry0(struct supertype *st, int level,
MD_SB_DISKS);
return 0;
}
- if (size > (0x7fffffffULL<<9)) {
+ if (size >= tbmax * 2ULL*1024*1024*1024) {
if (verbose)
- fprintf(stderr, Name ": 0.90 metadata supports at most 2 terrabytes per device\n");
+ fprintf(stderr, Name ": 0.90 metadata supports at most "
+ "%d terabytes per device\n", tbmax);
return 0;
}
if (chunk && *chunk == UnSet)
@@ -1154,8 +1173,6 @@ static int validate_geometry0(struct supertype *st, int level,
if (ldsize < MD_RESERVED_SECTORS * 512)
return 0;
- if (size > (0x7fffffffULL<<9))
- return 0;
*freesize = MD_NEW_SIZE_SECTORS(ldsize >> 9);
return 1;
}
diff --git a/super1.c b/super1.c
index 09be351e..867aa58c 100644
--- a/super1.c
+++ b/super1.c
@@ -313,7 +313,7 @@ static void examine_super1(struct supertype *st, char *homehost)
printf("\n");
}
if (sb->devflags) {
- printf(" Flags :");
+ printf(" Flags :");
if (sb->devflags & WriteMostly1)
printf(" write-mostly");
printf("\n");
@@ -578,6 +578,8 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
info->data_offset = __le64_to_cpu(sb->data_offset);
info->component_size = __le64_to_cpu(sb->size);
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET))
+ info->bitmap_offset = __le32_to_cpu(sb->bitmap_offset);
info->disk.major = 0;
info->disk.minor = 0;
@@ -600,6 +602,8 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
info->disk.state = 6; /* active and in sync */
info->disk.raid_disk = role;
}
+ if (sb->devflags & WriteMostly1)
+ info->disk.state |= (1 << MD_DISK_WRITEMOSTLY);
info->events = __le64_to_cpu(sb->events);
sprintf(info->text_version, "1.%d", st->minor_version);
info->safe_mode_delay = 200;
@@ -626,6 +630,8 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
} else
info->reshape_active = 0;
+ info->recovery_blocked = info->reshape_active;
+
if (map)
for (i=0; i<map_disks; i++)
map[i] = 0;
@@ -803,6 +809,10 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
__le64_to_cpu(sb->data_size));
} else if (strcmp(update, "_reshape_progress")==0)
sb->reshape_position = __cpu_to_le64(info->reshape_progress);
+ else if (strcmp(update, "writemostly")==0)
+ sb->devflags |= WriteMostly1;
+ else if (strcmp(update, "readwrite")==0)
+ sb->devflags &= ~WriteMostly1;
else
rv = -1;
@@ -923,6 +933,7 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
sb->max_dev = __cpu_to_le32(dk->number+1);
sb->dev_number = __cpu_to_le32(dk->number);
+ sb->devflags = 0; /* don't copy another disks flags */
sb->sb_csum = calc_sb_1_csum(sb);
dip = (struct devinfo **)&st->info;
@@ -1055,7 +1066,9 @@ static int write_init_super1(struct supertype *st)
sb->dev_number = __cpu_to_le32(di->disk.number);
if (di->disk.state & (1<<MD_DISK_WRITEMOSTLY))
- sb->devflags |= __cpu_to_le32(WriteMostly1);
+ sb->devflags |= WriteMostly1;
+ else
+ sb->devflags &= ~WriteMostly1;
if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
read(rfd, sb->device_uuid, 16) != 16) {
@@ -1373,7 +1386,8 @@ static int load_super1(struct supertype *st, int fd, char *devname)
return 0;
no_bitmap:
- super->feature_map = __cpu_to_le32(__le32_to_cpu(super->feature_map) & ~1);
+ super->feature_map = __cpu_to_le32(__le32_to_cpu(super->feature_map)
+ & ~MD_FEATURE_BITMAP_OFFSET);
return 0;
}
@@ -1475,12 +1489,10 @@ add_internal_bitmap1(struct supertype *st,
int may_change, int major)
{
/*
- * If not may_change, then this is a 'Grow', and the bitmap
- * must fit after the superblock.
- * If may_change, then this is create, and we can put the bitmap
- * before the superblock if we like, or may move the start.
- * If !may_change, the bitmap MUST live at offset of 1K, until
- * we get a sysfs interface.
+ * If not may_change, then this is a 'Grow' without sysfs support for
+ * bitmaps, and the bitmap must fit after the superblock at 1K offset.
+ * If may_change, then this is create or a Grow with sysfs syupport,
+ * and we can put the bitmap wherever we like.
*
* size is in sectors, chunk is in bytes !!!
*/
@@ -1491,16 +1503,20 @@ add_internal_bitmap1(struct supertype *st,
long offset;
unsigned long long chunk = *chunkp;
int room = 0;
+ int creating = 0;
struct mdp_superblock_1 *sb = st->sb;
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + 1024);
int uuid[4];
+ if (__le64_to_cpu(sb->data_size) == 0)
+ /* Must be creating the array, else data_size would be non-zero */
+ creating = 1;
switch(st->minor_version) {
case 0:
/* either 3K after the superblock (when hot-add),
* or some amount of space before.
*/
- if (may_change) {
+ if (creating) {
/* We are creating array, so we *know* how much room has
* been left.
*/
@@ -1510,8 +1526,8 @@ add_internal_bitmap1(struct supertype *st,
room = __le64_to_cpu(sb->super_offset)
- __le64_to_cpu(sb->data_offset)
- __le64_to_cpu(sb->data_size);
- /* remove '1 ||' when we can set offset via sysfs */
- if (1 || (room < 3*2 &&
+
+ if (!may_change || (room < 3*2 &&
__le32_to_cpu(sb->max_dev) <= 384)) {
room = 3*2;
offset = 1*2;
@@ -1522,17 +1538,17 @@ add_internal_bitmap1(struct supertype *st,
break;
case 1:
case 2: /* between superblock and data */
- if (may_change) {
+ if (creating) {
offset = 4*2;
room = choose_bm_space(__le64_to_cpu(sb->size));
} else {
room = __le64_to_cpu(sb->data_offset)
- __le64_to_cpu(sb->super_offset);
- if (1 || __le32_to_cpu(sb->max_dev) <= 384) {
- room -= 2;
+ if (!may_change) {
+ room -= 2; /* Leave 1K for superblock */
offset = 2;
} else {
- room -= 4*2;
+ room -= 4*2; /* leave 4K for superblock */
offset = 4*2;
}
}
@@ -1577,7 +1593,8 @@ add_internal_bitmap1(struct supertype *st,
sb->bitmap_offset = __cpu_to_le32(offset);
- sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map) | 1);
+ sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map)
+ | MD_FEATURE_BITMAP_OFFSET);
memset(bms, 0, sizeof(*bms));
bms->magic = __cpu_to_le32(BITMAP_MAGIC);
bms->version = __cpu_to_le32(major);
@@ -1592,7 +1609,6 @@ add_internal_bitmap1(struct supertype *st,
return 1;
}
-
static void locate_bitmap1(struct supertype *st, int fd)
{
unsigned long long offset;
@@ -1620,7 +1636,7 @@ static int write_bitmap1(struct supertype *st, int fd)
int rv = 0;
int towrite, n;
- char *buf = (char*)(((long)(abuf+4096))&~4095UL);
+ char buf[4096];
locate_bitmap1(st, fd);
@@ -1635,7 +1651,7 @@ static int write_bitmap1(struct supertype *st, int fd)
n = towrite;
if (n > 4096)
n = 4096;
- n = write(fd, buf, n);
+ n = awrite(fd, buf, n);
if (n > 0)
towrite -= n;
else
diff --git a/sysfs.c b/sysfs.c
index 44314baf..e32ececa 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -217,6 +217,19 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
msec = (msec * 1000) / scale;
sra->safe_mode_delay = msec;
}
+ if (options & GET_BITMAP_LOCATION) {
+ strcpy(base, "bitmap/location");
+ if (load_sys(fname, buf))
+ goto abort;
+ if (strncmp(buf, "file", 4) == 0)
+ sra->bitmap_offset = 1;
+ else if (strncmp(buf, "none", 4) == 0)
+ sra->bitmap_offset = 0;
+ else if (buf[0] == '+')
+ sra->bitmap_offset = strtoul(buf+1, NULL, 10);
+ else
+ goto abort;
+ }
if (! (options & GET_DEVS))
return sra;
@@ -379,7 +392,7 @@ unsigned long long get_component_size(int fd)
return 0;
n = read(fd, fname, sizeof(fname));
close(fd);
- if (n == sizeof(fname))
+ if (n < 0 || n == sizeof(fname))
return 0;
fname[n] = 0;
return strtoull(fname, NULL, 10) * 2;
@@ -470,7 +483,7 @@ int sysfs_fd_get_ll(int fd, unsigned long long *val)
lseek(fd, 0, 0);
n = read(fd, buf, sizeof(buf));
if (n <= 0)
- return -1;
+ return -2;
buf[n] = 0;
*val = strtoull(buf, &ep, 0);
if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
@@ -541,8 +554,21 @@ int sysfs_set_array(struct mdinfo *info, int vers)
ver[0] = 0;
if (info->array.major_version == -1 &&
info->array.minor_version == -2) {
+ char buf[1024];
+
strcat(strcpy(ver, "external:"), info->text_version);
+ /* meta version might already be set if we are setting
+ * new geometry for a reshape. In that case we don't
+ * want to over-write the 'readonly' flag that is
+ * stored in the metadata version. So read the current
+ * version first, and preserve the flag
+ */
+ if (sysfs_get_str(info, NULL, "metadata_version",
+ buf, 1024) > 0)
+ if (strlen(buf) >= 9 && buf[9] == '-')
+ ver[9] = '-';
+
if ((vers % 100) < 2 ||
sysfs_set_str(info, NULL, "metadata_version",
ver) < 0) {
@@ -606,7 +632,7 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
memset(nm, 0, sizeof(nm));
sprintf(dv, "/sys/dev/block/%d:%d", sd->disk.major, sd->disk.minor);
- rv = readlink(dv, nm, sizeof(nm));
+ rv = readlink(dv, nm, sizeof(nm)-1);
if (rv <= 0)
return -1;
nm[rv] = '\0';
@@ -709,9 +735,9 @@ int sysfs_disk_to_scsi_id(int fd, __u32 *id)
/* from an open block device, try to retrieve it scsi_id */
struct stat st;
char path[256];
- char *c1, *c2;
DIR *dir;
struct dirent *de;
+ int host, bus, target, lun;
if (fstat(fd, &st))
return 1;
@@ -723,32 +749,22 @@ int sysfs_disk_to_scsi_id(int fd, __u32 *id)
if (!dir)
return 1;
- de = readdir(dir);
- while (de) {
- if (strchr(de->d_name, ':'))
+ for (de = readdir(dir); de; de = readdir(dir)) {
+ int count;
+
+ if (de->d_type != DT_DIR)
+ continue;
+
+ count = sscanf(de->d_name, "%d:%d:%d:%d", &host, &bus, &target, &lun);
+ if (count == 4)
break;
- de = readdir(dir);
}
closedir(dir);
if (!de)
return 1;
- c1 = de->d_name;
- c2 = strchr(c1, ':');
- *c2 = '\0';
- *id = strtol(c1, NULL, 10) << 24; /* host */
- c1 = c2 + 1;
- c2 = strchr(c1, ':');
- *c2 = '\0';
- *id |= strtol(c1, NULL, 10) << 16; /* bus */
- c1 = c2 + 1;
- c2 = strchr(c1, ':');
- *c2 = '\0';
- *id |= strtol(c1, NULL, 10) << 8; /* target */
- c1 = c2 + 1;
- *id |= strtol(c1, NULL, 10); /* lun */
-
+ *id = (host << 24) | (bus << 16) | (target << 8) | (lun << 0);
return 0;
}
@@ -793,6 +809,8 @@ int sysfs_unique_holder(int devnum, long rdev)
}
n = read(fd, buf, sizeof(buf)-1);
close(fd);
+ if (n < 0)
+ continue;
buf[n] = 0;
if (sscanf(buf, "%d:%d%c", &mj, &mn, &c) != 3 ||
c != '\n') {
diff --git a/tests/03r5assemV1 b/tests/03r5assemV1
index 5238edee..52b11077 100644
--- a/tests/03r5assemV1
+++ b/tests/03r5assemV1
@@ -58,6 +58,9 @@ mdadm --assemble --scan --config=$conf $md1
eval $tst
### Now with a missing device
+# We don't want the recovery to complete while we are
+# messing about here.
+echo 1000 > /proc/sys/dev/raid/speed_limit_max
mdadm -AR $md1 $dev0 $dev2 $dev3 $dev4 #
check state U_U
@@ -120,3 +123,4 @@ mdadm -I -c $conf $dev0
mdadm -I -c $conf $dev1
mdadm -I -c $conf $dev2
eval $tst
+echo 2000 > /proc/sys/dev/raid/speed_limit_max
diff --git a/udev-md-raid.rules b/udev-md-raid.rules
index 1d898332..f564f70a 100644
--- a/udev-md-raid.rules
+++ b/udev-md-raid.rules
@@ -2,11 +2,19 @@
SUBSYSTEM!="block", GOTO="md_end"
-# handle potential components of arrays
-ENV{ID_FS_TYPE}=="linux_raid_member", ACTION=="remove", RUN+="/sbin/mdadm -If $name --path $env{ID_PATH}"
-ENV{ID_FS_TYPE}=="linux_raid_member", ACTION=="add", RUN+="/sbin/mdadm --incremental $env{DEVNAME}"
-ENV{ID_FS_TYPE}=="isw_raid_member", ACTION=="remove", RUN+="/sbin/mdadm -If $name --path $env{ID_PATH}"
-ENV{ID_FS_TYPE}=="isw_raid_member", ACTION=="add", RUN+="/sbin/mdadm --incremental $env{DEVNAME}"
+# handle potential components of arrays (the ones supported by md)
+ENV{ID_FS_TYPE}=="ddf_raid_member|isw_raid_member|linux_raid_member", GOTO="md_inc"
+GOTO="md_inc_skip"
+
+LABEL="md_inc"
+
+# remember you can limit what gets auto/incrementally assembled by
+# mdadm.conf(5)'s 'AUTO' and selectively whitelist using 'ARRAY'
+ACTION=="add", RUN+="/sbin/mdadm --incremental $tempnode"
+ACTION=="remove", ENV{ID_PATH}=="?*", RUN+="/sbin/mdadm -If $name --path $env{ID_PATH}"
+ACTION=="remove", ENV{ID_PATH}!="?*", RUN+="/sbin/mdadm -If $name"
+
+LABEL="md_inc_skip"
# handle md arrays
ACTION!="add|change", GOTO="md_end"
diff --git a/util.c b/util.c
index 10bbe56a..6985a707 100644
--- a/util.c
+++ b/util.c
@@ -146,16 +146,16 @@ int get_linux_version()
{
struct utsname name;
char *cp;
- int a,b,c;
+ int a = 0, b = 0,c = 0;
if (uname(&name) <0)
return -1;
cp = name.release;
a = strtoul(cp, &cp, 10);
- if (*cp != '.') return -1;
- b = strtoul(cp+1, &cp, 10);
- if (*cp != '.') return -1;
- c = strtoul(cp+1, NULL, 10);
+ if (*cp == '.')
+ b = strtoul(cp+1, &cp, 10);
+ if (*cp == '.')
+ c = strtoul(cp+1, &cp, 10);
return (a*1000000)+(b*1000)+c;
}
@@ -363,7 +363,7 @@ int enough_fd(int fd)
struct mdu_array_info_s array;
struct mdu_disk_info_s disk;
int avail_disks = 0;
- int i;
+ int i, rv;
char *avail;
if (ioctl(fd, GET_ARRAY_INFO, &array) != 0 ||
@@ -386,9 +386,10 @@ int enough_fd(int fd)
avail[disk.raid_disk] = 1;
}
/* This is used on an active array, so assume it is clean */
- return enough(array.level, array.raid_disks, array.layout,
- 1,
- avail, avail_disks);
+ rv = enough(array.level, array.raid_disks, array.layout,
+ 1, avail, avail_disks);
+ free(avail);
+ return rv;
}
@@ -535,6 +536,7 @@ int check_raid(int fd, char *name)
struct supertype *st = guess_super(fd);
if (!st) return 0;
+ st->ignore_hw_compat = 1;
st->ss->load_super(st, fd, name);
/* Looks like a raid array .. */
fprintf(stderr, Name ": %s appears to be part of a raid array:\n",
@@ -639,7 +641,7 @@ char *human_size(long long bytes)
* We allow upto 2048Megabytes before converting to
* gigabytes, as that shows more precision and isn't
* too large a number.
- * Terrabytes are not yet handled.
+ * Terabytes are not yet handled.
*/
if (bytes < 5000*1024)
@@ -702,6 +704,12 @@ void print_r10_layout(int layout)
unsigned long long calc_array_size(int level, int raid_disks, int layout,
int chunksize, unsigned long long devsize)
{
+ devsize &= ~(unsigned long long)((chunksize>>9)-1);
+ return get_data_disks(level, layout, raid_disks) * devsize;
+}
+
+int get_data_disks(int level, int layout, int raid_disks)
+{
int data_disks = 0;
switch (level) {
case 0: data_disks = raid_disks; break;
@@ -712,8 +720,8 @@ unsigned long long calc_array_size(int level, int raid_disks, int layout,
case 10: data_disks = raid_disks / (layout & 255) / ((layout>>8)&255);
break;
}
- devsize &= ~(unsigned long long)((chunksize>>9)-1);
- return data_disks * devsize;
+
+ return data_disks;
}
#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
@@ -1120,7 +1128,8 @@ static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart)
{
struct GPT gpt;
unsigned char empty_gpt_entry[16]= {0};
- struct GPT_part_entry part;
+ struct GPT_part_entry *part;
+ char buf[512];
unsigned long long curr_part_end;
unsigned all_partitions, entry_size;
unsigned part_nr;
@@ -1144,18 +1153,20 @@ static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart)
/* sanity checks */
if (all_partitions > 1024 ||
- entry_size > 512)
+ entry_size > sizeof(buf))
return -1;
+ part = (struct GPT_part_entry *)buf;
+
for (part_nr=0; part_nr < all_partitions; part_nr++) {
/* read partition entry */
- if (read(fd, &part, entry_size) != (ssize_t)entry_size)
+ if (read(fd, buf, entry_size) != (ssize_t)entry_size)
return 0;
/* is this valid partition? */
- if (memcmp(part.type_guid, empty_gpt_entry, 16) != 0) {
+ if (memcmp(part->type_guid, empty_gpt_entry, 16) != 0) {
/* check the last lba for the current partition */
- curr_part_end = __le64_to_cpu(part.ending_lba);
+ curr_part_end = __le64_to_cpu(part->ending_lba);
if (curr_part_end > *endofpart)
*endofpart = curr_part_end;
}
@@ -1369,7 +1380,7 @@ int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet)
if (!quiet)
fprintf(stderr, Name ": Couldn't open %s, aborting\n",
dev);
- return 2;
+ return -1;
}
st->devnum = fd2devnum(fd);
@@ -1572,7 +1583,7 @@ int mdmon_running(int devnum)
int start_mdmon(int devnum)
{
- int i;
+ int i, skipped;
int len;
pid_t pid;
int status;
@@ -1587,7 +1598,7 @@ int start_mdmon(int devnum)
if (check_env("MDADM_NO_MDMON"))
return 0;
- len = readlink("/proc/self/exe", pathbuf, sizeof(pathbuf));
+ len = readlink("/proc/self/exe", pathbuf, sizeof(pathbuf)-1);
if (len > 0) {
char *sl;
pathbuf[len] = 0;
@@ -1603,8 +1614,13 @@ int start_mdmon(int devnum)
switch(fork()) {
case 0:
/* FIXME yuk. CLOSE_EXEC?? */
- for (i=3; i < 100; i++)
- close(i);
+ skipped = 0;
+ for (i=3; skipped < 20; i++)
+ if (close(i) < 0)
+ skipped++;
+ else
+ skipped = 0;
+
for (i=0; paths[i]; i++)
if (paths[i][0])
execl(paths[i], "mdmon",
@@ -1697,7 +1713,8 @@ int experimental(void)
if (check_env("MDADM_EXPERIMENTAL"))
return 1;
else {
- fprintf(stderr, Name ": To use this feature MDADM_EXPERIMENTAL enviroment variable has to defined.\n");
+ fprintf(stderr, Name ": To use this feature MDADM_EXPERIMENTAL"
+ " environment variable has to be defined.\n");
return 0;
}
}