From 70c55e36b73827579fcb2dadbb6359ef605191ff Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 23 Oct 2012 16:27:15 +1100 Subject: Add support for --replace and --with --replace can be used to replace a device without completely failing it. Once the replacement completes the device will be failed. --with can indicate which of several spares to use. Signed-off-by: NeilBrown --- Manage.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ReadMe.c | 7 +++ mdadm.8.in | 24 +++++++++- mdadm.c | 14 ++++++ mdadm.h | 4 +- 5 files changed, 193 insertions(+), 3 deletions(-) diff --git a/Manage.c b/Manage.c index 296feeb7..832e84ce 100644 --- a/Manage.c +++ b/Manage.c @@ -954,6 +954,111 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv, return 1; } +int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv, + unsigned long rdev, int verbose, char *devname) +{ + struct mdinfo *mdi, *di; + if (tst->ss->external) { + pr_err("--replace only supported for native metadata (0.90 or 1.x)\n"); + return -1; + } + /* Need to find the device in sysfs and add 'want_replacement' to the + * status. + */ + mdi = sysfs_read(fd, -1, GET_DEVS); + if (!mdi || !mdi->devs) { + pr_err("Cannot find status of %s to enable replacement - strange\n", + devname); + return -1; + } + for (di = mdi->devs; di; di = di->next) + if (di->disk.major == (int)major(rdev) && + di->disk.minor == (int)minor(rdev)) + break; + if (di) { + int rv; + if (di->disk.raid_disk < 0) { + pr_err("%s is not active and so cannot be replaced.\n", + dv->devname); + sysfs_free(mdi); + return -1; + } + rv = sysfs_set_str(mdi, di, + "state", "want_replacement"); + if (rv) { + sysfs_free(mdi); + pr_err("Failed to request replacement for %s\n", + dv->devname); + return -1; + } + if (verbose >= 0) + pr_err("Marked %s (device %d in %s) for replacement\n", + dv->devname, di->disk.raid_disk, devname); + /* If there is a matching 'with', we need to tell it which + * raid disk + */ + while (dv && dv->disposition != 'W') + dv = dv->next; + if (dv) { + dv->disposition = 'w'; + dv->used = di->disk.raid_disk; + } + return 1; + } + sysfs_free(mdi); + pr_err("%s not found in %s so cannot --replace it\n", + dv->devname, devname); + return -1; +} + +int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv, + unsigned long rdev, int verbose, char *devname) +{ + struct mdinfo *mdi, *di; + /* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */ + mdi = sysfs_read(fd, -1, GET_DEVS|GET_STATE); + if (!mdi || !mdi->devs) { + pr_err("Cannot find status of %s to enable replacement - strange\n", + devname); + return -1; + } + for (di = mdi->devs; di; di = di->next) + if (di->disk.major == (int)major(rdev) && + di->disk.minor == (int)minor(rdev)) + break; + if (di) { + int rv; + if (di->disk.state & (1<devname); + sysfs_free(mdi); + return -1; + } + if (di->disk.raid_disk >= 0) { + pr_err("%s is active and cannot be a replacement\n", + dv->devname); + sysfs_free(mdi); + return -1; + } + rv = sysfs_set_num(mdi, di, + "slot", dv->used); + if (rv) { + sysfs_free(mdi); + pr_err("Failed to %s as preferred replacement.\n", + dv->devname); + return -1; + } + if (verbose >= 0) + pr_err("Marked %s in %s as replacement for device %d\n", + dv->devname, devname, dv->used); + return 1; + } + sysfs_free(mdi); + pr_err("%s not found in %s so cannot make it preferred replacement\n", + dv->devname, devname); + return -1; +} + int Manage_subdevs(char *devname, int fd, struct mddev_dev *devlist, int verbose, int test, char *update, int force) @@ -970,6 +1075,16 @@ int Manage_subdevs(char *devname, int fd, * 'f' - set the device faulty SET_DISK_FAULTY * device can be 'detached' in which case any device that * is inaccessible will be marked faulty. + * 'R' - mark this device as wanting replacement. + * 'W' - this device is added if necessary and activated as + * a replacement for a previous 'R' device. + * ----- + * 'w' - 'W' will be changed to 'w' when it is paired with + * a 'R' device. If a 'W' is found while walking the list + * it must be unpaired, and is an error. + * 'M' - this is created by a 'missing' target. It is a slight + * variant on 'A' + * * For 'f' and 'r', the device can also be a kernel-internal * name such as 'sdb'. */ @@ -1209,6 +1324,38 @@ int Manage_subdevs(char *devname, int fd, pr_err("set %s faulty in %s\n", dv->devname, devname); break; + case 'R': /* Mark as replaceable */ + if (subarray) { + pr_err("Cannot replace disks in a" + " \'member\' array, perform this" + " operation on the parent container\n"); + rv = -1; + } else { + if (!frozen) { + if (sysfs_freeze_array(&info) == 1) + frozen = 1; + else + frozen = -1; + } + rv = Manage_replace(tst, fd, dv, + stb.st_rdev, verbose, + devname); + } + if (rv < 0) + goto abort; + if (rv > 0) + count++; + break; + case 'W': /* --with device that doesn't match */ + pr_err("No matching --replace device for --with %s\n", + dv->devname); + goto abort; + case 'w': /* --with device which was matched */ + rv = Manage_with(tst, fd, dv, + stb.st_rdev, verbose, devname); + if (rv < 0) + goto abort; + break; } } if (frozen > 0) diff --git a/ReadMe.c b/ReadMe.c index 2fde2ac5..0aa8cbd7 100644 --- a/ReadMe.c +++ b/ReadMe.c @@ -146,6 +146,8 @@ struct option long_options[] = { {"remove", 0, 0, Remove}, {"fail", 0, 0, Fail}, {"set-faulty",0, 0, Fail}, + {"replace", 0, 0, Replace}, + {"with", 0, 0, With}, {"run", 0, 0, 'R'}, {"stop", 0, 0, 'S'}, {"readonly", 0, 0, 'o'}, @@ -309,6 +311,7 @@ char OptionHelp[] = " --remove -r : remove subsequent devices\n" " --fail -f : mark subsequent devices as faulty\n" " --set-faulty : same as --fail\n" +" --replace : mark a device for replacement\n" " --run -R : start a partially built array\n" " --stop -S : deactivate array, releasing all resources\n" " --readonly -o : mark array as readonly\n" @@ -462,6 +465,10 @@ char Help_manage[] = " --remove -r : remove subsequent devices, which must not be active\n" " --fail -f : mark subsequent devices a faulty\n" " --set-faulty : same as --fail\n" +" --replace : mark device(s) to be replaced by spares. Once\n" +" : replacement completes, device will be marked faulty\n" +" --with : Indicate which spare a previous '--replace' should\n" +" : prefer to use\n" " --run -R : start a partially built array\n" " --stop -S : deactivate array, releasing all resources\n" " --readonly -o : mark array as readonly\n" diff --git a/mdadm.8.in b/mdadm.8.in index d30963b6..23e31228 100644 --- a/mdadm.8.in +++ b/mdadm.8.in @@ -216,8 +216,9 @@ to detect and assemble arrays \(em possibly in an If a device is given before any options, or if the first option is .BR \-\-add , .BR \-\-fail , -or .BR \-\-remove , +or +.BR \-\-replace , then the MANAGE mode is assumed. Anything other than these will cause the .B Misc @@ -1283,7 +1284,7 @@ have already been marked as failed. .TP .BR \-f ", " \-\-fail -mark listed devices as faulty. +Mark listed devices as faulty. As well as the name of a device file, the word .B detached can be given. This will cause any device that has been detached from @@ -1294,6 +1295,25 @@ the system to be marked as failed. It can then be removed. same as .BR \-\-fail . +.TP +.B \-\-replace +Mark listed devices as requiring replacement. As soon as a spare is +available, it will be rebuilt and will replace the marked device. +This is similar to marking a device as faulty, but the device remains +in service during the recovery process to increase resilience against +multiple failures. When the replacement process finishes, the +replaced device will be marked as faulty. + +.TP +.B \-\-with +This can follow a list of +.B \-\-replace +devices. The devices listed after +.B \-\-with +will be preferentially used to replace the devices listed after +.BR \-\-replace . +These device must already be spare devices in the array. + .TP .BR \-\-write\-mostly Subsequent devices that are added or re\-added will have the 'write-mostly' diff --git a/mdadm.c b/mdadm.c index 42544f1a..11016e7b 100644 --- a/mdadm.c +++ b/mdadm.c @@ -195,6 +195,8 @@ int main(int argc, char *argv[]) case Add: case 'r': case Remove: + case Replace: + case With: case 'f': case Fail: case ReAdd: /* re-add */ @@ -928,6 +930,18 @@ int main(int argc, char *argv[]) * remove the device */ devmode = 'f'; continue; + case O(MANAGE,Replace): + /* Mark these devices for replacement */ + devmode = 'R'; + continue; + case O(MANAGE,With): + /* These are the replacements to use */ + if (devmode != 'R') { + pr_err("--with must follow --replace\n"); + exit(2); + } + devmode = 'W'; + continue; case O(INCREMENTAL,'R'): case O(MANAGE,'R'): case O(ASSEMBLE,'R'): diff --git a/mdadm.h b/mdadm.h index ac051178..f1352e37 100644 --- a/mdadm.h +++ b/mdadm.h @@ -312,6 +312,8 @@ enum special_options { Add, Remove, Fail, + Replace, + With, MiscOpt, WaitOpt, ConfigFile, @@ -432,7 +434,7 @@ struct mddev_dev { * Not set for names read from .config */ char writemostly; /* 1 for 'set writemostly', 2 for 'clear writemostly' */ - char used; /* set when used */ + int used; /* set when used */ long long data_offset; struct mddev_dev *next; }; -- cgit v1.2.3