summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-10-23 16:27:15 +1100
committerNeilBrown <neilb@suse.de>2012-10-23 16:27:15 +1100
commit70c55e36b73827579fcb2dadbb6359ef605191ff (patch)
treeaf568358a6d2ab5c760820177fe121ab3ca4d0fa
parent1dc837e4edc805819a1ff40add3d0beecfeecdbb (diff)
Add support for --replace and --with
--replace can be used to replace a device without completely failing it. Once the replacement completes the device will be failed. --with can indicate which of several spares to use. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--Manage.c147
-rw-r--r--ReadMe.c7
-rw-r--r--mdadm.8.in24
-rw-r--r--mdadm.c14
-rw-r--r--mdadm.h4
5 files changed, 193 insertions, 3 deletions
diff --git a/Manage.c b/Manage.c
index 296feeb7..832e84ce 100644
--- a/Manage.c
+++ b/Manage.c
@@ -954,6 +954,111 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
return 1;
}
+int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
+ unsigned long rdev, int verbose, char *devname)
+{
+ struct mdinfo *mdi, *di;
+ if (tst->ss->external) {
+ pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
+ return -1;
+ }
+ /* Need to find the device in sysfs and add 'want_replacement' to the
+ * status.
+ */
+ mdi = sysfs_read(fd, -1, GET_DEVS);
+ if (!mdi || !mdi->devs) {
+ pr_err("Cannot find status of %s to enable replacement - strange\n",
+ devname);
+ return -1;
+ }
+ for (di = mdi->devs; di; di = di->next)
+ if (di->disk.major == (int)major(rdev) &&
+ di->disk.minor == (int)minor(rdev))
+ break;
+ if (di) {
+ int rv;
+ if (di->disk.raid_disk < 0) {
+ pr_err("%s is not active and so cannot be replaced.\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ rv = sysfs_set_str(mdi, di,
+ "state", "want_replacement");
+ if (rv) {
+ sysfs_free(mdi);
+ pr_err("Failed to request replacement for %s\n",
+ dv->devname);
+ return -1;
+ }
+ if (verbose >= 0)
+ pr_err("Marked %s (device %d in %s) for replacement\n",
+ dv->devname, di->disk.raid_disk, devname);
+ /* If there is a matching 'with', we need to tell it which
+ * raid disk
+ */
+ while (dv && dv->disposition != 'W')
+ dv = dv->next;
+ if (dv) {
+ dv->disposition = 'w';
+ dv->used = di->disk.raid_disk;
+ }
+ return 1;
+ }
+ sysfs_free(mdi);
+ pr_err("%s not found in %s so cannot --replace it\n",
+ dv->devname, devname);
+ return -1;
+}
+
+int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
+ unsigned long rdev, int verbose, char *devname)
+{
+ struct mdinfo *mdi, *di;
+ /* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */
+ mdi = sysfs_read(fd, -1, GET_DEVS|GET_STATE);
+ if (!mdi || !mdi->devs) {
+ pr_err("Cannot find status of %s to enable replacement - strange\n",
+ devname);
+ return -1;
+ }
+ for (di = mdi->devs; di; di = di->next)
+ if (di->disk.major == (int)major(rdev) &&
+ di->disk.minor == (int)minor(rdev))
+ break;
+ if (di) {
+ int rv;
+ if (di->disk.state & (1<<MD_DISK_FAULTY)) {
+ pr_err("%s is faulty and cannot be a replacement\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ if (di->disk.raid_disk >= 0) {
+ pr_err("%s is active and cannot be a replacement\n",
+ dv->devname);
+ sysfs_free(mdi);
+ return -1;
+ }
+ rv = sysfs_set_num(mdi, di,
+ "slot", dv->used);
+ if (rv) {
+ sysfs_free(mdi);
+ pr_err("Failed to %s as preferred replacement.\n",
+ dv->devname);
+ return -1;
+ }
+ if (verbose >= 0)
+ pr_err("Marked %s in %s as replacement for device %d\n",
+ dv->devname, devname, dv->used);
+ return 1;
+ }
+ sysfs_free(mdi);
+ pr_err("%s not found in %s so cannot make it preferred replacement\n",
+ dv->devname, devname);
+ return -1;
+}
+
int Manage_subdevs(char *devname, int fd,
struct mddev_dev *devlist, int verbose, int test,
char *update, int force)
@@ -970,6 +1075,16 @@ int Manage_subdevs(char *devname, int fd,
* 'f' - set the device faulty SET_DISK_FAULTY
* device can be 'detached' in which case any device that
* is inaccessible will be marked faulty.
+ * 'R' - mark this device as wanting replacement.
+ * 'W' - this device is added if necessary and activated as
+ * a replacement for a previous 'R' device.
+ * -----
+ * 'w' - 'W' will be changed to 'w' when it is paired with
+ * a 'R' device. If a 'W' is found while walking the list
+ * it must be unpaired, and is an error.
+ * 'M' - this is created by a 'missing' target. It is a slight
+ * variant on 'A'
+ *
* For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'.
*/
@@ -1209,6 +1324,38 @@ int Manage_subdevs(char *devname, int fd,
pr_err("set %s faulty in %s\n",
dv->devname, devname);
break;
+ case 'R': /* Mark as replaceable */
+ if (subarray) {
+ pr_err("Cannot replace disks in a"
+ " \'member\' array, perform this"
+ " operation on the parent container\n");
+ rv = -1;
+ } else {
+ if (!frozen) {
+ if (sysfs_freeze_array(&info) == 1)
+ frozen = 1;
+ else
+ frozen = -1;
+ }
+ rv = Manage_replace(tst, fd, dv,
+ stb.st_rdev, verbose,
+ devname);
+ }
+ if (rv < 0)
+ goto abort;
+ if (rv > 0)
+ count++;
+ break;
+ case 'W': /* --with device that doesn't match */
+ pr_err("No matching --replace device for --with %s\n",
+ dv->devname);
+ goto abort;
+ case 'w': /* --with device which was matched */
+ rv = Manage_with(tst, fd, dv,
+ stb.st_rdev, verbose, devname);
+ if (rv < 0)
+ goto abort;
+ break;
}
}
if (frozen > 0)
diff --git a/ReadMe.c b/ReadMe.c
index 2fde2ac5..0aa8cbd7 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -146,6 +146,8 @@ struct option long_options[] = {
{"remove", 0, 0, Remove},
{"fail", 0, 0, Fail},
{"set-faulty",0, 0, Fail},
+ {"replace", 0, 0, Replace},
+ {"with", 0, 0, With},
{"run", 0, 0, 'R'},
{"stop", 0, 0, 'S'},
{"readonly", 0, 0, 'o'},
@@ -309,6 +311,7 @@ char OptionHelp[] =
" --remove -r : remove subsequent devices\n"
" --fail -f : mark subsequent devices as faulty\n"
" --set-faulty : same as --fail\n"
+" --replace : mark a device for replacement\n"
" --run -R : start a partially built array\n"
" --stop -S : deactivate array, releasing all resources\n"
" --readonly -o : mark array as readonly\n"
@@ -462,6 +465,10 @@ char Help_manage[] =
" --remove -r : remove subsequent devices, which must not be active\n"
" --fail -f : mark subsequent devices a faulty\n"
" --set-faulty : same as --fail\n"
+" --replace : mark device(s) to be replaced by spares. Once\n"
+" : replacement completes, device will be marked faulty\n"
+" --with : Indicate which spare a previous '--replace' should\n"
+" : prefer to use\n"
" --run -R : start a partially built array\n"
" --stop -S : deactivate array, releasing all resources\n"
" --readonly -o : mark array as readonly\n"
diff --git a/mdadm.8.in b/mdadm.8.in
index d30963b6..23e31228 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -216,8 +216,9 @@ to detect and assemble arrays \(em possibly in an
If a device is given before any options, or if the first option is
.BR \-\-add ,
.BR \-\-fail ,
-or
.BR \-\-remove ,
+or
+.BR \-\-replace ,
then the MANAGE mode is assumed.
Anything other than these will cause the
.B Misc
@@ -1283,7 +1284,7 @@ have already been marked as failed.
.TP
.BR \-f ", " \-\-fail
-mark listed devices as faulty.
+Mark listed devices as faulty.
As well as the name of a device file, the word
.B detached
can be given. This will cause any device that has been detached from
@@ -1295,6 +1296,25 @@ same as
.BR \-\-fail .
.TP
+.B \-\-replace
+Mark listed devices as requiring replacement. As soon as a spare is
+available, it will be rebuilt and will replace the marked device.
+This is similar to marking a device as faulty, but the device remains
+in service during the recovery process to increase resilience against
+multiple failures. When the replacement process finishes, the
+replaced device will be marked as faulty.
+
+.TP
+.B \-\-with
+This can follow a list of
+.B \-\-replace
+devices. The devices listed after
+.B \-\-with
+will be preferentially used to replace the devices listed after
+.BR \-\-replace .
+These device must already be spare devices in the array.
+
+.TP
.BR \-\-write\-mostly
Subsequent devices that are added or re\-added will have the 'write-mostly'
flag set. This is only valid for RAID1 and means that the 'md' driver
diff --git a/mdadm.c b/mdadm.c
index 42544f1a..11016e7b 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -195,6 +195,8 @@ int main(int argc, char *argv[])
case Add:
case 'r':
case Remove:
+ case Replace:
+ case With:
case 'f':
case Fail:
case ReAdd: /* re-add */
@@ -928,6 +930,18 @@ int main(int argc, char *argv[])
* remove the device */
devmode = 'f';
continue;
+ case O(MANAGE,Replace):
+ /* Mark these devices for replacement */
+ devmode = 'R';
+ continue;
+ case O(MANAGE,With):
+ /* These are the replacements to use */
+ if (devmode != 'R') {
+ pr_err("--with must follow --replace\n");
+ exit(2);
+ }
+ devmode = 'W';
+ continue;
case O(INCREMENTAL,'R'):
case O(MANAGE,'R'):
case O(ASSEMBLE,'R'):
diff --git a/mdadm.h b/mdadm.h
index ac051178..f1352e37 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -312,6 +312,8 @@ enum special_options {
Add,
Remove,
Fail,
+ Replace,
+ With,
MiscOpt,
WaitOpt,
ConfigFile,
@@ -432,7 +434,7 @@ struct mddev_dev {
* Not set for names read from .config
*/
char writemostly; /* 1 for 'set writemostly', 2 for 'clear writemostly' */
- char used; /* set when used */
+ int used; /* set when used */
long long data_offset;
struct mddev_dev *next;
};