summaryrefslogtreecommitdiff
path: root/Manage.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-08-13 08:00:21 +1000
committerNeilBrown <neilb@suse.de>2012-08-13 08:00:21 +1000
commit38aeaf3af6828fbefb19b6da1c12479578915391 (patch)
treeb2f79ebdef182402f4d27059f2d8682830cceb2a /Manage.c
parentabe94694dadc1a889e208bfafdfce7d742e9cbd4 (diff)
Manage_subdevs: split most of 'add' handling into Manage_add.
This makes Manage_subdevs smaller, and makes the error-path handling for Manage_add much cleaner and probably less buggy. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'Manage.c')
-rw-r--r--Manage.c670
1 files changed, 330 insertions, 340 deletions
diff --git a/Manage.c b/Manage.c
index c80738c5..c1123bd8 100644
--- a/Manage.c
+++ b/Manage.c
@@ -529,6 +529,317 @@ skip_re_add:
return 0;
}
+int Manage_add(int fd, int tfd, struct mddev_dev *dv,
+ struct supertype *tst, mdu_array_info_t *array,
+ int force, int verbose, char *devname,
+ char *update, unsigned long rdev, unsigned long long array_size)
+{
+ unsigned long long ldsize;
+ struct supertype *dev_st;
+ int j;
+ mdu_disk_info_t disc;
+
+ if (!get_dev_size(tfd, dv->devname, &ldsize)) {
+ if (dv->disposition == 'M')
+ return 0;
+ else
+ return -1;
+ }
+
+ if (tst->ss->validate_geometry(
+ tst, array->level, array->layout,
+ array->raid_disks, NULL,
+ ldsize >> 9, NULL, NULL, 0) == 0) {
+ if (!force) {
+ pr_err("%s is larger than %s can "
+ "effectively use.\n"
+ " Add --force is you "
+ "really want to add this device.\n",
+ dv->devname, devname);
+ return -1;
+ }
+ pr_err("%s is larger than %s can "
+ "effectively use.\n"
+ " Adding anyway as --force "
+ "was given.\n",
+ dv->devname, devname);
+ }
+ if (!tst->ss->external &&
+ array->major_version == 0 &&
+ md_get_version(fd)%100 < 2) {
+ if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
+ if (verbose >= 0)
+ pr_err("hot added %s\n",
+ dv->devname);
+ return 1;
+ }
+
+ pr_err("hot add failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ return -1;
+ }
+
+ if (array->not_persistent == 0 || tst->ss->external) {
+
+ /* need to find a sample superblock to copy, and
+ * a spare slot to use.
+ * For 'external' array (well, container based),
+ * We can just load the metadata for the array->
+ */
+ int array_failed;
+ if (tst->sb)
+ /* already loaded */;
+ else if (tst->ss->external) {
+ tst->ss->load_container(tst, fd, NULL);
+ } else for (j = 0; j < tst->max_devs; j++) {
+ char *dev;
+ int dfd;
+ disc.number = j;
+ if (ioctl(fd, GET_DISK_INFO, &disc))
+ continue;
+ if (disc.major==0 && disc.minor==0)
+ continue;
+ if ((disc.state & 4)==0) /* sync */
+ continue;
+ /* Looks like a good device to try */
+ dev = map_dev(disc.major, disc.minor, 1);
+ if (!dev)
+ continue;
+ dfd = dev_open(dev, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ if (tst->ss->load_super(tst, dfd,
+ NULL)) {
+ close(dfd);
+ continue;
+ }
+ close(dfd);
+ break;
+ }
+ /* FIXME this is a bad test to be using */
+ if (!tst->sb && dv->disposition != 'a') {
+ /* we are re-adding a device to a
+ * completely dead array - have to depend
+ * on kernel to check
+ */
+ } else if (!tst->sb) {
+ pr_err("cannot load array metadata from %s\n", devname);
+ return -1;
+ }
+
+ /* Make sure device is large enough */
+ if (tst->ss->avail_size(tst, ldsize/512) <
+ array_size) {
+ if (dv->disposition == 'M')
+ return 0;
+ pr_err("%s not large enough to join array\n",
+ dv->devname);
+ return -1;
+ }
+
+ /* Possibly this device was recently part of
+ * the array and was temporarily removed, and
+ * is now being re-added. If so, we can
+ * simply re-add it.
+ */
+
+ if (array->not_persistent==0) {
+ dev_st = dup_super(tst);
+ dev_st->ss->load_super(dev_st, tfd, NULL);
+ }
+ if (dev_st && dev_st->sb) {
+ int rv = attempt_re_add(fd, tfd, dv,
+ dev_st, tst,
+ rdev,
+ update, devname,
+ verbose,
+ array);
+ dev_st->ss->free_super(dev_st);
+ if (rv)
+ return rv;
+ }
+ if (dv->disposition == 'M') {
+ if (verbose > 0)
+ pr_err("--re-add for %s to %s is not possible\n",
+ dv->devname, devname);
+ return 0;
+ }
+ if (dv->disposition == 'A') {
+ pr_err("--re-add for %s to %s is not possible\n",
+ dv->devname, devname);
+ return -1;
+ }
+ if (array->active_disks < array->raid_disks) {
+ char *avail = xcalloc(array->raid_disks, 1);
+ int d;
+ int found = 0;
+
+ for (d = 0; d < MAX_DISKS && found < array->active_disks; d++) {
+ disc.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disc))
+ continue;
+ if (disc.major == 0 && disc.minor == 0)
+ continue;
+ if (!(disc.state & (1<<MD_DISK_SYNC)))
+ continue;
+ avail[disc.raid_disk] = 1;
+ found++;
+ }
+ array_failed = !enough(array->level, array->raid_disks,
+ array->layout, 1, avail);
+ } else
+ array_failed = 0;
+ if (array_failed) {
+ pr_err("%s has failed so using --add cannot work and might destroy\n",
+ devname);
+ pr_err("data on %s. You should stop the array and re-assemble it.\n",
+ dv->devname);
+ return -1;
+ }
+ } else {
+ /* non-persistent. Must ensure that new drive
+ * is at least array->size big.
+ */
+ if (ldsize/512 < array_size) {
+ pr_err("%s not large enough to join array\n",
+ dv->devname);
+ return -1;
+ }
+ }
+ /* committed to really trying this device now*/
+ remove_partitions(tfd);
+
+ /* in 2.6.17 and earlier, version-1 superblocks won't
+ * use the number we write, but will choose a free number.
+ * we must choose the same free number, which requires
+ * starting at 'raid_disks' and counting up
+ */
+ for (j = array->raid_disks; j < tst->max_devs; j++) {
+ disc.number = j;
+ if (ioctl(fd, GET_DISK_INFO, &disc))
+ break;
+ if (disc.major==0 && disc.minor==0)
+ break;
+ if (disc.state & 8) /* removed */
+ break;
+ }
+ disc.major = major(rdev);
+ disc.minor = minor(rdev);
+ disc.number =j;
+ disc.state = 0;
+ if (array->not_persistent==0) {
+ int dfd;
+ if (dv->writemostly == 1)
+ disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+ dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname))
+ return -1;
+ if (tst->ss->write_init_super(tst))
+ return -1;
+ } else if (dv->disposition == 'A') {
+ /* this had better be raid1.
+ * As we are "--re-add"ing we must find a spare slot
+ * to fill.
+ */
+ char *used = xcalloc(array->raid_disks, 1);
+ for (j = 0; j < tst->max_devs; j++) {
+ mdu_disk_info_t disc2;
+ disc2.number = j;
+ if (ioctl(fd, GET_DISK_INFO, &disc2))
+ continue;
+ if (disc2.major==0 && disc2.minor==0)
+ continue;
+ if (disc2.state & 8) /* removed */
+ continue;
+ if (disc2.raid_disk < 0)
+ continue;
+ if (disc2.raid_disk > array->raid_disks)
+ continue;
+ used[disc2.raid_disk] = 1;
+ }
+ for (j = 0 ; j < array->raid_disks; j++)
+ if (!used[j]) {
+ disc.raid_disk = j;
+ disc.state |= (1<<MD_DISK_SYNC);
+ break;
+ }
+ free(used);
+ }
+ if (dv->writemostly == 1)
+ disc.state |= (1 << MD_DISK_WRITEMOSTLY);
+ if (tst->ss->external) {
+ /* add a disk
+ * to an external metadata container */
+ struct mdinfo new_mdi;
+ struct mdinfo *sra;
+ int container_fd;
+ int devnum = fd2devnum(fd);
+ int dfd;
+
+ container_fd = open_dev_excl(devnum);
+ if (container_fd < 0) {
+ pr_err("add failed for %s:"
+ " could not get exclusive access to container\n",
+ dv->devname);
+ tst->ss->free_super(tst);
+ return -1;
+ }
+
+ dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ if (mdmon_running(tst->container_dev))
+ tst->update_tail = &tst->updates;
+ if (tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname)) {
+ close(dfd);
+ close(container_fd);
+ return -1;
+ }
+ if (tst->update_tail)
+ flush_metadata_updates(tst);
+ else
+ tst->ss->sync_metadata(tst);
+
+ sra = sysfs_read(container_fd, -1, 0);
+ if (!sra) {
+ pr_err("add failed for %s: sysfs_read failed\n",
+ dv->devname);
+ close(container_fd);
+ tst->ss->free_super(tst);
+ return -1;
+ }
+ sra->array.level = LEVEL_CONTAINER;
+ /* Need to set data_offset and component_size */
+ tst->ss->getinfo_super(tst, &new_mdi, NULL);
+ new_mdi.disk.major = disc.major;
+ new_mdi.disk.minor = disc.minor;
+ new_mdi.recovery_start = 0;
+ /* Make sure fds are closed as they are O_EXCL which
+ * would block add_disk */
+ tst->ss->free_super(tst);
+ if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
+ pr_err("add new device to external metadata"
+ " failed for %s\n", dv->devname);
+ close(container_fd);
+ sysfs_free(sra);
+ return -1;
+ }
+ ping_monitor_by_id(devnum);
+ sysfs_free(sra);
+ close(container_fd);
+ } else {
+ tst->ss->free_super(tst);
+ if (ioctl(fd, ADD_NEW_DISK, &disc)) {
+ pr_err("add new device failed for %s as %d: %s\n",
+ dv->devname, j, strerror(errno));
+ return -1;
+ }
+ }
+ if (verbose >= 0)
+ pr_err("added %s\n", dv->devname);
+ return 1;
+}
+
int Manage_subdevs(char *devname, int fd,
struct mddev_dev *devlist, int verbose, int test,
char *update, int force)
@@ -549,13 +860,11 @@ int Manage_subdevs(char *devname, int fd,
* name such as 'sdb'.
*/
mdu_array_info_t array;
- mdu_disk_info_t disc;
unsigned long long array_size;
struct mddev_dev *dv;
struct stat stb;
- int j;
int tfd = -1;
- struct supertype *dev_st, *tst;
+ struct supertype *tst;
char *subarray = NULL;
int lfd = -1;
int sysfd = -1;
@@ -586,10 +895,9 @@ int Manage_subdevs(char *devname, int fd,
}
stb.st_rdev = 0;
- for (dv = devlist, j=0 ; dv; dv = dv->next) {
- unsigned long long ldsize;
+ for (dv = devlist; dv; dv = dv->next) {
int err;
- int array_failed;
+ int rv;
if (strcmp(dv->devname, "failed") == 0 ||
strcmp(dv->devname, "faulty") == 0) {
@@ -670,8 +978,6 @@ int Manage_subdevs(char *devname, int fd,
}
}
} else {
- j = 0;
-
tfd = dev_open(dv->devname, O_RDONLY);
if (tfd < 0 && dv->disposition == 'r' &&
lstat(dv->devname, &stb) == 0)
@@ -720,6 +1026,14 @@ int Manage_subdevs(char *devname, int fd,
}
/* Make sure it isn't in use (in 2.6 or later) */
tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
+ if (tfd >= 0) {
+ /* We know no-one else is using it. We'll
+ * need non-exclusive access to add it, so
+ * do that now.
+ */
+ close(tfd);
+ tfd = dev_open(dv->devname, O_RDONLY);
+ }
if (tfd < 0) {
if (dv->disposition == 'M')
continue;
@@ -733,339 +1047,15 @@ int Manage_subdevs(char *devname, int fd,
else
frozen = -1;
}
- if (!get_dev_size(tfd, dv->devname, &ldsize)) {
- close(tfd);
- tfd = -1;
- if (dv->disposition == 'M')
- continue;
- else
- goto abort;
- }
-
- if (tst->ss->validate_geometry(
- tst, array.level, array.layout,
- array.raid_disks, NULL,
- ldsize >> 9, NULL, NULL, 0) == 0) {
- if (!force) {
- pr_err("%s is larger than %s can "
- "effectively use.\n"
- " Add --force is you "
- "really want to add this device.\n",
- dv->devname, devname);
- close(tfd);
- goto abort;
- }
- pr_err("%s is larger than %s can "
- "effectively use.\n"
- " Adding anyway as --force "
- "was given.\n",
- dv->devname, devname);
- }
- if (!tst->ss->external &&
- array.major_version == 0 &&
- md_get_version(fd)%100 < 2) {
- close(tfd);
- tfd = -1;
- if (ioctl(fd, HOT_ADD_DISK,
- (unsigned long)stb.st_rdev)==0) {
- if (verbose >= 0)
- pr_err("hot added %s\n",
- dv->devname);
- continue;
- }
-
- pr_err("hot add failed for %s: %s\n",
- dv->devname, strerror(errno));
+ rv = Manage_add(fd, tfd, dv, tst, &array,
+ force, verbose, devname, update,
+ stb.st_rdev, array_size);
+ close(tfd);
+ tfd = -1;
+ if (rv < 0)
goto abort;
- }
-
- if (array.not_persistent == 0 || tst->ss->external) {
-
- /* need to find a sample superblock to copy, and
- * a spare slot to use.
- * For 'external' array (well, container based),
- * We can just load the metadata for the array.
- */
- if (tst->sb)
- /* already loaded */;
- else if (tst->ss->external) {
- tst->ss->load_container(tst, fd, NULL);
- } else for (j = 0; j < tst->max_devs; j++) {
- char *dev;
- int dfd;
- disc.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc))
- continue;
- if (disc.major==0 && disc.minor==0)
- continue;
- if ((disc.state & 4)==0) /* sync */
- continue;
- /* Looks like a good device to try */
- dev = map_dev(disc.major, disc.minor, 1);
- if (!dev)
- continue;
- dfd = dev_open(dev, O_RDONLY);
- if (dfd < 0)
- continue;
- if (tst->ss->load_super(tst, dfd,
- NULL)) {
- close(dfd);
- continue;
- }
- close(dfd);
- break;
- }
- /* FIXME this is a bad test to be using */
- if (!tst->sb && dv->disposition != 'a') {
- /* we are re-adding a device to a
- * completely dead array - have to depend
- * on kernel to check
- */
- } else if (!tst->sb) {
- close(tfd);
- pr_err("cannot load array metadata from %s\n", devname);
- goto abort;
- }
-
- /* Make sure device is large enough */
- if (tst->ss->avail_size(tst, ldsize/512) <
- array_size) {
- close(tfd);
- tfd = -1;
- if (dv->disposition == 'M')
- continue;
- pr_err("%s not large enough to join array\n",
- dv->devname);
- goto abort;
- }
-
- /* Possibly this device was recently part of
- * the array and was temporarily removed, and
- * is now being re-added. If so, we can
- * simply re-add it.
- */
-
- if (array.not_persistent==0) {
- dev_st = dup_super(tst);
- dev_st->ss->load_super(dev_st, tfd, NULL);
- }
- if (dev_st && dev_st->sb) {
- int rv = attempt_re_add(fd, tfd, dv,
- dev_st, tst,
- stb.st_rdev,
- update, devname,
- verbose,
- &array);
- dev_st->ss->free_super(dev_st);
- if (rv < 0) {
- /* Bad failure */
- close(tfd);
- goto abort;
- }
- if (rv > 0) {
- /* success! */
- close(tfd);
- tfd = -1;
- count++;
- continue;
- }
- }
- if (dv->disposition == 'M') {
- if (verbose > 0)
- pr_err("--re-add for %s to %s is not possible\n",
- dv->devname, devname);
- if (tfd >= 0) {
- close(tfd);
- tfd = -1;
- }
- continue;
- }
- if (dv->disposition == 'A') {
- if (tfd >= 0)
- close(tfd);
- pr_err("--re-add for %s to %s is not possible\n",
- dv->devname, devname);
- goto abort;
- }
- if (array.active_disks < array.raid_disks) {
- char *avail = xcalloc(array.raid_disks, 1);
- int d;
- int found = 0;
-
- for (d = 0; d < MAX_DISKS && found < array.active_disks; d++) {
- disc.number = d;
- if (ioctl(fd, GET_DISK_INFO, &disc))
- continue;
- if (disc.major == 0 && disc.minor == 0)
- continue;
- if (!(disc.state & (1<<MD_DISK_SYNC)))
- continue;
- avail[disc.raid_disk] = 1;
- found++;
- }
- array_failed = !enough(array.level, array.raid_disks,
- array.layout, 1, avail);
- } else
- array_failed = 0;
- if (array_failed) {
- pr_err("%s has failed so using --add cannot work and might destroy\n",
- devname);
- pr_err("data on %s. You should stop the array and re-assemble it.\n",
- dv->devname);
- if (tfd >= 0)
- close(tfd);
- goto abort;
- }
- } else {
- /* non-persistent. Must ensure that new drive
- * is at least array.size big.
- */
- if (ldsize/512 < array_size) {
- pr_err("%s not large enough to join array\n",
- dv->devname);
- if (tfd >= 0)
- close(tfd);
- goto abort;
- }
- }
- /* committed to really trying this device now*/
- if (tfd >= 0) {
- remove_partitions(tfd);
- close(tfd);
- tfd = -1;
- }
- /* in 2.6.17 and earlier, version-1 superblocks won't
- * use the number we write, but will choose a free number.
- * we must choose the same free number, which requires
- * starting at 'raid_disks' and counting up
- */
- for (j = array.raid_disks; j < tst->max_devs; j++) {
- disc.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc))
- break;
- if (disc.major==0 && disc.minor==0)
- break;
- if (disc.state & 8) /* removed */
- break;
- }
- disc.major = major(stb.st_rdev);
- disc.minor = minor(stb.st_rdev);
- disc.number =j;
- disc.state = 0;
- if (array.not_persistent==0) {
- int dfd;
- if (dv->writemostly == 1)
- disc.state |= 1 << MD_DISK_WRITEMOSTLY;
- dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
- if (tst->ss->add_to_super(tst, &disc, dfd,
- dv->devname)) {
- close(dfd);
- goto abort;
- }
- if (tst->ss->write_init_super(tst)) {
- close(dfd);
- goto abort;
- }
- } else if (dv->disposition == 'A') {
- /* this had better be raid1.
- * As we are "--re-add"ing we must find a spare slot
- * to fill.
- */
- char *used = xcalloc(array.raid_disks, 1);
- for (j = 0; j < tst->max_devs; j++) {
- mdu_disk_info_t disc2;
- disc2.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc2))
- continue;
- if (disc2.major==0 && disc2.minor==0)
- continue;
- if (disc2.state & 8) /* removed */
- continue;
- if (disc2.raid_disk < 0)
- continue;
- if (disc2.raid_disk > array.raid_disks)
- continue;
- used[disc2.raid_disk] = 1;
- }
- for (j = 0 ; j < array.raid_disks; j++)
- if (!used[j]) {
- disc.raid_disk = j;
- disc.state |= (1<<MD_DISK_SYNC);
- break;
- }
- free(used);
- }
- if (dv->writemostly == 1)
- disc.state |= (1 << MD_DISK_WRITEMOSTLY);
- if (tst->ss->external) {
- /* add a disk
- * to an external metadata container */
- struct mdinfo new_mdi;
- struct mdinfo *sra;
- int container_fd;
- int devnum = fd2devnum(fd);
- int dfd;
-
- container_fd = open_dev_excl(devnum);
- if (container_fd < 0) {
- pr_err("add failed for %s:"
- " could not get exclusive access to container\n",
- dv->devname);
- tst->ss->free_super(tst);
- goto abort;
- }
-
- dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
- if (mdmon_running(tst->container_dev))
- tst->update_tail = &tst->updates;
- if (tst->ss->add_to_super(tst, &disc, dfd,
- dv->devname)) {
- close(dfd);
- close(container_fd);
- goto abort;
- }
- if (tst->update_tail)
- flush_metadata_updates(tst);
- else
- tst->ss->sync_metadata(tst);
-
- sra = sysfs_read(container_fd, -1, 0);
- if (!sra) {
- pr_err("add failed for %s: sysfs_read failed\n",
- dv->devname);
- close(container_fd);
- tst->ss->free_super(tst);
- goto abort;
- }
- sra->array.level = LEVEL_CONTAINER;
- /* Need to set data_offset and component_size */
- tst->ss->getinfo_super(tst, &new_mdi, NULL);
- new_mdi.disk.major = disc.major;
- new_mdi.disk.minor = disc.minor;
- new_mdi.recovery_start = 0;
- /* Make sure fds are closed as they are O_EXCL which
- * would block add_disk */
- tst->ss->free_super(tst);
- if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
- pr_err("add new device to external metadata"
- " failed for %s\n", dv->devname);
- close(container_fd);
- sysfs_free(sra);
- goto abort;
- }
- ping_monitor_by_id(devnum);
- sysfs_free(sra);
- close(container_fd);
- } else {
- tst->ss->free_super(tst);
- if (ioctl(fd, ADD_NEW_DISK, &disc)) {
- pr_err("add new device failed for %s as %d: %s\n",
- dv->devname, j, strerror(errno));
- goto abort;
- }
- }
- if (verbose >= 0)
- pr_err("added %s\n", dv->devname);
+ if (rv > 0)
+ count++;
break;
case 'r':