summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ANNOUNCE-4.1-rc115
-rw-r--r--Assemble.c356
-rw-r--r--Build.c219
-rw-r--r--Create.c169
-rw-r--r--Detail.c446
-rw-r--r--Examine.c75
-rw-r--r--[-rwxr-xr-x]Grow.c892
-rw-r--r--Incremental.c226
-rw-r--r--Kill.c2
-rw-r--r--Makefile72
-rw-r--r--Manage.c147
-rw-r--r--Monitor.c395
-rw-r--r--Query.c67
-rw-r--r--README.initramfs9
-rw-r--r--ReadMe.c105
-rw-r--r--bitmap.c7
-rw-r--r--bitmap.h4
-rw-r--r--clustermd_tests/00r10_Create50
-rw-r--r--clustermd_tests/00r1_Create50
-rw-r--r--clustermd_tests/01r10_Grow_bitmap-switch51
-rw-r--r--clustermd_tests/01r10_Grow_resize38
-rw-r--r--clustermd_tests/01r1_Grow_add68
-rw-r--r--clustermd_tests/01r1_Grow_bitmap-switch51
-rw-r--r--clustermd_tests/01r1_Grow_resize23
-rw-r--r--clustermd_tests/02r10_Manage_add33
-rw-r--r--clustermd_tests/02r10_Manage_add-spare30
-rw-r--r--clustermd_tests/02r10_Manage_re-add18
-rw-r--r--clustermd_tests/02r1_Manage_add33
-rw-r--r--clustermd_tests/02r1_Manage_add-spare30
-rw-r--r--clustermd_tests/02r1_Manage_re-add18
-rw-r--r--clustermd_tests/03r10_switch-recovery21
-rw-r--r--clustermd_tests/03r10_switch-resync18
-rw-r--r--clustermd_tests/03r1_switch-recovery21
-rw-r--r--clustermd_tests/03r1_switch-resync18
-rw-r--r--clustermd_tests/cluster_conf43
-rw-r--r--clustermd_tests/func.sh332
-rw-r--r--config.c4
-rw-r--r--debian/changelog6
-rwxr-xr-xinventory35
-rw-r--r--kernel-patch-2.6.1835
-rw-r--r--kernel-patch-2.6.18.635
-rw-r--r--kernel-patch-2.6.1934
-rw-r--r--kernel-patch-2.6.25199
-rw-r--r--kernel-patch-2.6.2736
-rw-r--r--lib.c34
-rwxr-xr-xmakedist6
-rw-r--r--managemon.c66
-rw-r--r--mapfile.c4
-rw-r--r--maps.c40
-rw-r--r--md.41
-rw-r--r--md_p.h25
-rw-r--r--md_u.h7
-rw-r--r--mdadm.8.in142
-rw-r--r--mdadm.c225
-rw-r--r--mdadm.conf.52
-rw-r--r--mdadm.h103
-rw-r--r--mdadm.spec4
-rw-r--r--mdassemble.865
-rw-r--r--mdassemble.c80
-rw-r--r--mdmon.82
-rw-r--r--mdmon.c9
-rw-r--r--mdmon.h1
-rw-r--r--mdopen.c92
-rw-r--r--mdstat.c49
-rw-r--r--misc/mdcheck11
-rw-r--r--monitor.c4
-rw-r--r--msg.c2
-rw-r--r--platform-intel.c4
-rw-r--r--policy.c9
-rw-r--r--raid6check.c2
-rw-r--r--restripe.c12
-rw-r--r--sg_io.c3
-rw-r--r--sha1.h8
-rw-r--r--super-ddf.c126
-rw-r--r--super-gpt.c8
-rw-r--r--super-intel.c850
-rw-r--r--super-mbr.c10
-rw-r--r--super0.c33
-rw-r--r--super1.c666
-rw-r--r--sysfs.c92
-rw-r--r--systemd/mdadm-last-resort@.service4
-rw-r--r--systemd/mdadm-last-resort@.timer2
-rw-r--r--systemd/mdmonitor.service5
-rwxr-xr-xtest651
-rw-r--r--tests/00raid14
-rw-r--r--tests/00readonly22
-rw-r--r--tests/02r5grow17
-rw-r--r--tests/check35
-rw-r--r--tests/func.sh360
-rw-r--r--tests/testdev13
-rw-r--r--udev-md-clustered-confirm-device.rules21
-rw-r--r--udev-md-raid-arrays.rules2
-rw-r--r--udev-md-raid-assembly.rules5
-rw-r--r--udev-md-raid-creating.rules7
-rw-r--r--udev-md-raid-safe-timeouts.rules61
-rw-r--r--util.c497
96 files changed, 5621 insertions, 3418 deletions
diff --git a/ANNOUNCE-4.1-rc1 b/ANNOUNCE-4.1-rc1
new file mode 100644
index 0000000..54e2beb
--- /dev/null
+++ b/ANNOUNCE-4.1-rc1
@@ -0,0 +1,15 @@
+Subject: ANNOUNCE: mdadm 4.1 - A tool for managing md Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 4.1
+
+It is available at the usual places:
+ http://www.kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://git.kernel.org/pub/scm/utils/mdadm/mdadm.git
+ http://git.kernel.org/cgit/utils/mdadm/
+
+The update constitutes more than one year of enhancements and bug fixes
+including for IMSM RAID, Partial Parity Log, and clustered RAID support.
+
+Jes Sorensen, 2018-xx-xx
diff --git a/Assemble.c b/Assemble.c
index 3da0903..e83d550 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -149,6 +149,7 @@ static int select_devices(struct mddev_dev *devlist,
struct mdinfo *content = NULL;
int report_mismatch = ((inargv && c->verbose >= 0) || c->verbose > 0);
struct domainlist *domains = NULL;
+ dev_t rdev;
tmpdev = devlist; num_devs = 0;
while (tmpdev) {
@@ -169,7 +170,6 @@ static int select_devices(struct mddev_dev *devlist,
tmpdev = tmpdev ? tmpdev->next : NULL) {
char *devname = tmpdev->devname;
int dfd;
- struct stat stb;
struct supertype *tst;
struct dev_policy *pol = NULL;
int found_container = 0;
@@ -204,14 +204,7 @@ static int select_devices(struct mddev_dev *devlist,
pr_err("cannot open device %s: %s\n",
devname, strerror(errno));
tmpdev->used = 2;
- } else if (fstat(dfd, &stb)< 0) {
- /* Impossible! */
- pr_err("fstat failed for %s: %s\n",
- devname, strerror(errno));
- tmpdev->used = 2;
- } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
- pr_err("%s is not a block device.\n",
- devname);
+ } else if (!fstat_is_blkdev(dfd, devname, &rdev)) {
tmpdev->used = 2;
} else if (must_be_container(dfd)) {
if (st) {
@@ -222,21 +215,20 @@ static int select_devices(struct mddev_dev *devlist,
pr_err("%s is a container, but we are looking for components\n",
devname);
tmpdev->used = 2;
-#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
} if (!tst && (tst = super_by_fd(dfd, NULL)) == NULL) {
if (report_mismatch)
pr_err("not a recognisable container: %s\n",
devname);
tmpdev->used = 2;
-#endif
- } else if (!tst->ss->load_container
- || tst->ss->load_container(tst, dfd, NULL)) {
+ } else if (!tst->ss->load_container ||
+ tst->ss->load_container(tst, dfd, NULL)) {
if (report_mismatch)
pr_err("no correct container type: %s\n",
devname);
tmpdev->used = 2;
} else if (auto_assem &&
- !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)),
+ !conf_test_metadata(tst->ss->name,
+ (pol = devid_policy(rdev)),
tst->ss->match_home(tst, c->homehost) == 1)) {
if (report_mismatch)
pr_err("%s has metadata type %s for which auto-assembly is disabled\n",
@@ -263,7 +255,8 @@ static int select_devices(struct mddev_dev *devlist,
tst->ss->name, devname);
tmpdev->used = 2;
} else if (auto_assem && st == NULL &&
- !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)),
+ !conf_test_metadata(tst->ss->name,
+ (pol = devid_policy(rdev)),
tst->ss->match_home(tst, c->homehost) == 1)) {
if (report_mismatch)
pr_err("%s has metadata type %s for which auto-assembly is disabled\n",
@@ -486,7 +479,7 @@ static int select_devices(struct mddev_dev *devlist,
/* Collect domain information from members only */
if (tmpdev && tmpdev->used == 1) {
if (!pol)
- pol = devid_policy(stb.st_rdev);
+ pol = devid_policy(rdev);
domain_merge(&domains, pol, tst?tst->ss->name:NULL);
}
dev_policy_free(pol);
@@ -519,15 +512,12 @@ static int select_devices(struct mddev_dev *devlist,
/* Now reject spares that don't match domains of identified members */
for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
- struct stat stb;
if (tmpdev->used != 3)
continue;
- if (stat(tmpdev->devname, &stb)< 0) {
- pr_err("fstat failed for %s: %s\n",
- tmpdev->devname, strerror(errno));
+ if (!stat_is_blkdev(tmpdev->devname, &rdev)) {
tmpdev->used = 2;
} else {
- struct dev_policy *pol = devid_policy(stb.st_rdev);
+ struct dev_policy *pol = devid_policy(rdev);
int dt = domain_test(domains, pol, NULL);
if (inargv && dt != 0)
/* take this spare as domains match
@@ -574,9 +564,7 @@ static int load_devices(struct devs *devices, char *devmap,
struct mddev_dev *tmpdev;
int devcnt = 0;
int nextspare = 0;
-#ifndef MDASSEMBLE
int bitmap_done = 0;
-#endif
int most_recent = -1;
int bestcnt = 0;
int *best = *bestp;
@@ -592,7 +580,6 @@ static int load_devices(struct devs *devices, char *devmap,
if (tmpdev->used != 1)
continue;
/* looks like a good enough match to update the super block if needed */
-#ifndef MDASSEMBLE
if (c->update) {
/* prepare useful information in info structures */
struct stat stb2;
@@ -602,6 +589,12 @@ static int load_devices(struct devs *devices, char *devmap,
if (strcmp(c->update, "uuid") == 0 && !ident->uuid_set)
random_uuid((__u8 *)ident->uuid);
+ if (strcmp(c->update, "ppl") == 0 &&
+ ident->bitmap_fd >= 0) {
+ pr_err("PPL is not compatible with bitmap\n");
+ return -1;
+ }
+
dfd = dev_open(devname,
tmpdev->disposition == 'I'
? O_RDWR : (O_RDWR|O_EXCL));
@@ -677,9 +670,7 @@ static int load_devices(struct devs *devices, char *devmap,
else
bitmap_done = 1;
}
- } else
-#endif
- {
+ } else {
dfd = dev_open(devname,
tmpdev->disposition == 'I'
? O_RDWR : (O_RDWR|O_EXCL));
@@ -762,12 +753,12 @@ static int load_devices(struct devs *devices, char *devmap,
bestcnt = newbestcnt;
}
if (best[i] >=0 &&
- devices[best[i]].i.events
- == devices[devcnt].i.events
- && (devices[best[i]].i.disk.minor
- != devices[devcnt].i.disk.minor)
- && st->ss == &super0
- && content->array.level != LEVEL_MULTIPATH) {
+ devices[best[i]].i.events ==
+ devices[devcnt].i.events &&
+ (devices[best[i]].i.disk.minor !=
+ devices[devcnt].i.disk.minor) &&
+ st->ss == &super0 &&
+ content->array.level != LEVEL_MULTIPATH) {
/* two different devices with identical superblock.
* Could be a mis-detection caused by overlapping
* partitions. fail-safe.
@@ -785,9 +776,8 @@ static int load_devices(struct devs *devices, char *devmap,
*stp = st;
return -1;
}
- if (best[i] == -1
- || (devices[best[i]].i.events
- < devices[devcnt].i.events))
+ if (best[i] == -1 || (devices[best[i]].i.events
+ < devices[devcnt].i.events))
best[i] = devcnt;
}
devcnt++;
@@ -810,14 +800,11 @@ static int force_array(struct mdinfo *content,
int okcnt = 0;
while (!enough(content->array.level, content->array.raid_disks,
content->array.layout, 1,
- avail)
- ||
+ avail) ||
(content->reshape_active && content->delta_disks > 0 &&
!enough(content->array.level, (content->array.raid_disks
- content->delta_disks),
- content->new_layout, 1,
- avail)
- )) {
+ content->new_layout, 1, avail))) {
/* Choose the newest best drive which is
* not up-to-date, update the superblock
* and add it.
@@ -859,7 +846,19 @@ static int force_array(struct mdinfo *content,
/* OK */;
else
continue;
- }
+ } else if (devices[j].i.reshape_active !=
+ content->reshape_active ||
+ (devices[j].i.reshape_active &&
+ devices[j].i.reshape_progress !=
+ content->reshape_progress))
+ /* Here, it may be a source of data. If two
+ * devices claim different progresses, it
+ * means that reshape boundaries differ for
+ * their own devices. Kernel will only treat
+ * the first one as reshape progress and
+ * go on. It may cause disaster, so avoid it.
+ */
+ continue;
if (chosen_drive < 0 ||
devices[j].i.events
> devices[chosen_drive].i.events)
@@ -921,7 +920,13 @@ static int force_array(struct mdinfo *content,
if (j >= 0 &&
!devices[j].uptodate &&
devices[j].i.recovery_start == MaxSector &&
- devices[j].i.events == current_events) {
+ devices[j].i.events == current_events &&
+ ((!devices[j].i.reshape_active &&
+ !content->reshape_active) ||
+ (devices[j].i.reshape_active ==
+ content->reshape_active &&
+ devices[j].i.reshape_progress ==
+ content->reshape_progress))) {
chosen_drive = j;
goto add_another;
}
@@ -962,6 +967,9 @@ static int start_array(int mdfd,
c->readonly = 1;
}
+ if (content->consistency_policy == CONSISTENCY_POLICY_PPL)
+ clean = 1;
+
rv = set_array_info(mdfd, st, content);
if (rv && !err_ok) {
pr_err("failed to set array info for %s: %s\n",
@@ -990,7 +998,7 @@ static int start_array(int mdfd,
}
/* First, add the raid disks, but add the chosen one last */
- for (i=0; i<= bestcnt; i++) {
+ for (i = 0; i <= bestcnt; i++) {
int j;
if (i < bestcnt) {
j = best[i];
@@ -1000,8 +1008,9 @@ static int start_array(int mdfd,
j = chosen_drive;
if (j >= 0 && !devices[j].included) {
- int dfd = dev_open(devices[j].devname,
- O_RDWR|O_EXCL);
+ int dfd;
+
+ dfd = dev_open(devices[j].devname, O_RDWR|O_EXCL);
if (dfd >= 0) {
remove_partitions(dfd);
close(dfd);
@@ -1010,28 +1019,30 @@ static int start_array(int mdfd,
if (rv) {
pr_err("failed to add %s to %s: %s\n",
- devices[j].devname,
- mddev,
+ devices[j].devname, mddev,
strerror(errno));
- if (i < content->array.raid_disks * 2
- || i == bestcnt)
+ if (i < content->array.raid_disks * 2 ||
+ i == bestcnt)
okcnt--;
else
sparecnt--;
- } else if (c->verbose > 0)
+ } else if (c->verbose > 0) {
pr_err("added %s to %s as %d%s%s\n",
devices[j].devname, mddev,
devices[j].i.disk.raid_disk,
devices[j].uptodate?"":
" (possibly out of date)",
- (devices[j].i.disk.state & (1<<MD_DISK_REPLACEMENT))?" replacement":"");
+ (devices[j].i.disk.state &
+ (1<<MD_DISK_REPLACEMENT)) ?
+ " replacement":"");
+ }
} else if (j >= 0) {
if (c->verbose > 0)
pr_err("%s is already in %s as %d\n",
devices[j].devname, mddev,
devices[j].i.disk.raid_disk);
- } else if (c->verbose > 0 && i < content->array.raid_disks*2
- && (i&1) == 0)
+ } else if (c->verbose > 0 &&
+ i < content->array.raid_disks * 2 && (i & 1) == 0)
pr_err("no uptodate device for slot %d of %s\n",
i/2, mddev);
}
@@ -1039,8 +1050,8 @@ static int start_array(int mdfd,
if (content->array.level == LEVEL_CONTAINER) {
if (c->verbose >= 0) {
pr_err("Container %s has been assembled with %d drive%s",
- mddev, okcnt+sparecnt+journalcnt,
- okcnt+sparecnt+journalcnt==1?"":"s");
+ mddev, okcnt + sparecnt + journalcnt,
+ okcnt + sparecnt + journalcnt == 1 ? "" : "s");
if (okcnt < (unsigned)content->array.raid_disks)
fprintf(stderr, " (out of %d)",
content->array.raid_disks);
@@ -1049,10 +1060,13 @@ static int start_array(int mdfd,
if (st->ss->validate_container) {
struct mdinfo *devices_list;
- struct mdinfo *info_devices = xmalloc(sizeof(struct mdinfo)*(okcnt+sparecnt));
+ struct mdinfo *info_devices;
unsigned int count;
+
devices_list = NULL;
- for (count = 0; count < okcnt+sparecnt; count++) {
+ info_devices = xmalloc(sizeof(struct mdinfo) *
+ (okcnt + sparecnt));
+ for (count = 0; count < okcnt + sparecnt; count++) {
info_devices[count] = devices[count].i;
info_devices[count].next = devices_list;
devices_list = &info_devices[count];
@@ -1078,17 +1092,16 @@ static int start_array(int mdfd,
if (c->runstop == 1 ||
(c->runstop <= 0 &&
- ( enough(content->array.level, content->array.raid_disks,
- content->array.layout, clean, avail) &&
- (okcnt + rebuilding_cnt >= req_cnt || start_partial_ok)
- ))) {
+ (enough(content->array.level, content->array.raid_disks,
+ content->array.layout, clean, avail) &&
+ (okcnt + rebuilding_cnt >= req_cnt || start_partial_ok)))) {
/* This array is good-to-go.
* If a reshape is in progress then we might need to
* continue monitoring it. In that case we start
* it read-only and let the grow code make it writable.
*/
int rv;
-#ifndef MDASSEMBLE
+
if (content->reshape_active &&
!(content->reshape_active & RESHAPE_NO_BACKUP) &&
content->delta_disks <= 0) {
@@ -1108,12 +1121,11 @@ static int start_array(int mdfd,
c->backup_file, 0,
c->freeze_reshape);
} else if (c->readonly &&
- sysfs_attribute_available(
- content, NULL, "array_state")) {
+ sysfs_attribute_available(content, NULL,
+ "array_state")) {
rv = sysfs_set_str(content, NULL,
"array_state", "readonly");
} else
-#endif
rv = ioctl(mdfd, RUN_ARRAY, NULL);
reopen_mddev(mdfd); /* drop O_EXCL */
if (rv == 0) {
@@ -1121,13 +1133,19 @@ static int start_array(int mdfd,
pr_err("%s has been started with %d drive%s",
mddev, okcnt, okcnt==1?"":"s");
if (okcnt < (unsigned)content->array.raid_disks)
- fprintf(stderr, " (out of %d)", content->array.raid_disks);
+ fprintf(stderr, " (out of %d)",
+ content->array.raid_disks);
if (rebuilding_cnt)
- fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt);
+ fprintf(stderr, "%s %d rebuilding",
+ sparecnt?",":" and",
+ rebuilding_cnt);
if (sparecnt)
- fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
+ fprintf(stderr, " and %d spare%s",
+ sparecnt,
+ sparecnt == 1 ? "" : "s");
if (content->journal_clean)
- fprintf(stderr, " and %d journal", journalcnt);
+ fprintf(stderr, " and %d journal",
+ journalcnt);
fprintf(stderr, ".\n");
}
if (content->reshape_active &&
@@ -1137,11 +1155,14 @@ static int start_array(int mdfd,
* of the stripe cache - default is 256
*/
int chunk_size = content->array.chunk_size;
+
if (content->reshape_active &&
content->new_chunk > chunk_size)
chunk_size = content->new_chunk;
if (256 < 4 * ((chunk_size+4065)/4096)) {
- struct mdinfo *sra = sysfs_read(mdfd, NULL, 0);
+ struct mdinfo *sra;
+
+ sra = sysfs_read(mdfd, NULL, 0);
if (sra)
sysfs_set_num(sra, NULL,
"stripe_cache_size",
@@ -1174,7 +1195,9 @@ static int start_array(int mdfd,
if (content->array.level == 6 &&
okcnt + 1 == (unsigned)content->array.raid_disks &&
was_forced) {
- struct mdinfo *sra = sysfs_read(mdfd, NULL, 0);
+ struct mdinfo *sra;
+
+ sra = sysfs_read(mdfd, NULL, 0);
if (sra)
sysfs_set_str(sra, NULL,
"sync_action", "repair");
@@ -1182,45 +1205,47 @@ static int start_array(int mdfd,
}
return 0;
}
- pr_err("failed to RUN_ARRAY %s: %s\n",
- mddev, strerror(errno));
+ pr_err("failed to RUN_ARRAY %s: %s\n", mddev, strerror(errno));
if (!enough(content->array.level, content->array.raid_disks,
content->array.layout, 1, avail))
pr_err("Not enough devices to start the array.\n");
else if (!enough(content->array.level,
content->array.raid_disks,
- content->array.layout, clean,
- avail))
+ content->array.layout, clean, avail))
pr_err("Not enough devices to start the array while not clean - consider --force.\n");
return 1;
}
if (c->runstop == -1) {
pr_err("%s assembled from %d drive%s",
- mddev, okcnt, okcnt==1?"":"s");
+ mddev, okcnt, okcnt == 1 ? "" : "s");
if (okcnt != (unsigned)content->array.raid_disks)
- fprintf(stderr, " (out of %d)", content->array.raid_disks);
+ fprintf(stderr, " (out of %d)",
+ content->array.raid_disks);
fprintf(stderr, ", but not started.\n");
return 2;
}
if (c->verbose >= -1) {
- pr_err("%s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s");
+ pr_err("%s assembled from %d drive%s",
+ mddev, okcnt, okcnt == 1 ? "" : "s");
if (rebuilding_cnt)
- fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt);
+ fprintf(stderr, "%s %d rebuilding",
+ sparecnt ? "," : " and", rebuilding_cnt);
if (sparecnt)
- fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
+ fprintf(stderr, " and %d spare%s", sparecnt,
+ sparecnt == 1 ? "" : "s");
if (!enough(content->array.level, content->array.raid_disks,
content->array.layout, 1, avail))
fprintf(stderr, " - not enough to start the array.\n");
else if (!enough(content->array.level,
content->array.raid_disks,
- content->array.layout, clean,
- avail))
+ content->array.layout, clean, avail))
fprintf(stderr, " - not enough to start the array while not clean - consider --force.\n");
else {
if (req_cnt == (unsigned)content->array.raid_disks)
- fprintf(stderr, " - need all %d to start it", req_cnt);
+ fprintf(stderr, " - need all %d to start it",
+ req_cnt);
else
fprintf(stderr, " - need %d to start", req_cnt);
fprintf(stderr, " (use --run to insist).\n");
@@ -1288,13 +1313,13 @@ int Assemble(struct supertype *st, char *mddev,
* START_ARRAY
*
*/
- int rv;
- int mdfd;
+ int rv = -1;
+ int mdfd = -1;
int clean;
int auto_assem = (mddev == NULL && !ident->uuid_set &&
- ident->super_minor == UnSet && ident->name[0] == 0
- && (ident->container == NULL || ident->member == NULL));
- struct devs *devices;
+ ident->super_minor == UnSet && ident->name[0] == 0 &&
+ (ident->container == NULL || ident->member == NULL));
+ struct devs *devices = NULL;
char *devmap;
int *best = NULL; /* indexed by raid_disk */
int bestcnt = 0;
@@ -1319,6 +1344,9 @@ int Assemble(struct supertype *st, char *mddev,
char chosen_name[1024];
struct map_ent *map = NULL;
struct map_ent *mp;
+ int locked = 0;
+ struct mdp_superblock_1 *sb;
+ bitmap_super_t *bms;
/*
* If any subdevs are listed, then any that don't
@@ -1349,6 +1377,12 @@ try_again:
* set of devices failed. Those are now marked as ->used==2 and
* we ignore them and try again
*/
+ if (locked)
+ /*
+ * if come back try_again is called, then need to unlock first,
+ * and lock again since the metadate is re-read.
+ */
+ cluster_release_dlmlock();
if (!st && ident->st)
st = ident->st;
if (c->verbose>0)
@@ -1366,6 +1400,14 @@ try_again:
if (!st || !st->sb || !content)
return 2;
+ sb = st->sb;
+ bms = (bitmap_super_t*)(((char*)sb) + 4096);
+ if (sb && bms->version == BITMAP_MAJOR_CLUSTERED) {
+ locked = cluster_get_dlmlock();
+ if (locked != 1)
+ return 1;
+ }
+
/* We have a full set of devices - we now need to find the
* array device.
* However there is a risk that we are racing with "mdadm -I"
@@ -1392,7 +1434,7 @@ try_again:
pr_err("Found some drive for an array that is already active: %s\n",
mp->path);
pr_err("giving up.\n");
- return 1;
+ goto out;
}
for (dv = pre_exist->devs; dv; dv = dv->next) {
/* We want to add this device to our list,
@@ -1459,23 +1501,15 @@ try_again:
name = strchr(name, ':')+1;
mdfd = create_mddev(mddev, name, ident->autof, trustworthy,
- chosen_name);
+ chosen_name, 0);
}
if (mdfd < 0) {
st->ss->free_super(st);
if (auto_assem)
goto try_again;
- return 1;
+ goto out;
}
mddev = chosen_name;
- if (get_linux_version() < 2004000 ||
- md_get_version(mdfd) < 9000) {
- pr_err("Assemble requires Linux 2.4 or later, and\n"
- " md driver version 0.90.0 or later.\n"
- " Upgrade your kernel or try --build\n");
- close(mdfd);
- return 1;
- }
if (pre_exist == NULL) {
if (mddev_busy(fd2devnm(mdfd))) {
pr_err("%s already active, cannot restart it!\n",
@@ -1492,40 +1526,41 @@ try_again:
st->ss->free_super(st);
if (auto_assem)
goto try_again;
- return 1;
+ goto out;
}
/* just incase it was started but has no content */
ioctl(mdfd, STOP_ARRAY, NULL);
}
-#ifndef MDASSEMBLE
if (content != &info) {
/* This is a member of a container. Try starting the array. */
int err;
err = assemble_container_content(st, mdfd, content, c,
chosen_name, NULL);
close(mdfd);
+ if (locked == 1)
+ cluster_release_dlmlock();
return err;
}
-#endif
+
/* Ok, no bad inconsistancy, we can try updating etc */
devices = xcalloc(num_devs, sizeof(*devices));
devmap = xcalloc(num_devs, content->array.raid_disks);
devcnt = load_devices(devices, devmap, ident, &st, devlist,
c, content, mdfd, mddev,
&most_recent, &bestcnt, &best, inargv);
- if (devcnt < 0)
- return 1;
+ if (devcnt < 0) {
+ mdfd = -3;
+ goto out;
+ }
if (devcnt == 0) {
pr_err("no devices found for %s\n",
mddev);
if (st)
st->ss->free_super(st);
- close(mdfd);
- free(devices);
free(devmap);
- return 1;
+ goto out;
}
if (c->update && strcmp(c->update, "byteorder")==0)
@@ -1639,36 +1674,34 @@ try_again:
: (O_RDONLY|O_EXCL)))< 0) {
pr_err("Cannot open %s: %s\n",
devices[j].devname, strerror(errno));
- close(mdfd);
- free(devices);
- return 1;
+ goto out;
}
if (st->ss->load_super(st,fd, NULL)) {
close(fd);
pr_err("RAID superblock has disappeared from %s\n",
devices[j].devname);
- close(mdfd);
- free(devices);
- return 1;
+ goto out;
}
close(fd);
}
if (st->sb == NULL) {
pr_err("No suitable drives found for %s\n", mddev);
- close(mdfd);
- free(devices);
- return 1;
+ goto out;
}
st->ss->getinfo_super(st, content, NULL);
-#ifndef MDASSEMBLE
- sysfs_init(content, mdfd, NULL);
-#endif
+ if (sysfs_init(content, mdfd, NULL)) {
+ pr_err("Unable to initialize sysfs\n");
+ goto out;
+ }
+
/* after reload context, store journal_clean in context */
content->journal_clean = journal_clean;
for (i=0; i<bestcnt; i++) {
int j = best[i];
unsigned int desired_state;
+ if (j < 0)
+ continue;
if (devices[j].i.disk.raid_disk == MD_DISK_ROLE_JOURNAL)
desired_state = (1<<MD_DISK_JOURNAL);
else if (i >= content->array.raid_disks * 2)
@@ -1678,8 +1711,6 @@ try_again:
else
desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
- if (j<0)
- continue;
if (!devices[j].uptodate)
continue;
@@ -1726,17 +1757,13 @@ try_again:
if (fd < 0) {
pr_err("Could not open %s for write - cannot Assemble array.\n",
devices[chosen_drive].devname);
- close(mdfd);
- free(devices);
- return 1;
+ goto out;
}
if (st->ss->store_super(st, fd)) {
close(fd);
pr_err("Could not re-write superblock on %s\n",
devices[chosen_drive].devname);
- close(mdfd);
- free(devices);
- return 1;
+ goto out;
}
if (c->verbose >= 0)
pr_err("Marking array %s as 'clean'\n",
@@ -1748,7 +1775,6 @@ try_again:
* that was moved aside due to the reshape overwriting live data
* The code of doing this lives in Grow.c
*/
-#ifndef MDASSEMBLE
if (content->reshape_active &&
!(content->reshape_active & RESHAPE_NO_BACKUP)) {
int err = 0;
@@ -1795,12 +1821,9 @@ try_again:
pr_err("Failed to restore critical section for reshape, sorry.\n");
if (c->backup_file == NULL)
cont_err("Possibly you needed to specify the --backup-file\n");
- close(mdfd);
- free(devices);
- return err;
+ goto out;
}
}
-#endif
/* Almost ready to actually *do* something */
/* First, fill in the map, so that udev can find our name
@@ -1827,6 +1850,7 @@ try_again:
ioctl(mdfd, STOP_ARRAY, NULL);
free(devices);
map_unlock(&map);
+out:
if (rv == 0) {
wait_for(chosen_name, mdfd);
close(mdfd);
@@ -1856,14 +1880,19 @@ try_again:
usecs <<= 1;
}
}
- } else
+ } else if (mdfd >= 0)
close(mdfd);
/* '2' means 'OK, but not started yet' */
+ if (locked == 1)
+ cluster_release_dlmlock();
+ if (rv == -1) {
+ free(devices);
+ return 1;
+ }
return rv == 2 ? 0 : rv;
}
-#ifndef MDASSEMBLE
int assemble_container_content(struct supertype *st, int mdfd,
struct mdinfo *content, struct context *c,
char *chosen_name, int *result)
@@ -1876,7 +1905,10 @@ int assemble_container_content(struct supertype *st, int mdfd,
char *avail;
int err;
- sysfs_init(content, mdfd, NULL);
+ if (sysfs_init(content, mdfd, NULL)) {
+ pr_err("Unable to initialize sysfs\n");
+ return 1;
+ }
sra = sysfs_read(mdfd, NULL, GET_VERSION|GET_DEVS);
if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0) {
@@ -1885,7 +1917,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
c->readonly &&
content->text_version[0] == '/')
content->text_version[0] = '-';
- if (sysfs_set_array(content, md_get_version(mdfd)) != 0) {
+ if (sysfs_set_array(content, 9003) != 0) {
sysfs_free(sra);
return 1;
}
@@ -1942,6 +1974,55 @@ int assemble_container_content(struct supertype *st, int mdfd,
map_update(NULL, fd2devnm(mdfd), content->text_version,
content->uuid, chosen_name);
+ if (content->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ st->ss->validate_ppl) {
+ content->array.state |= 1;
+ err = 0;
+
+ for (dev = content->devs; dev; dev = dev->next) {
+ int dfd;
+ char *devpath;
+ int ret;
+
+ ret = st->ss->validate_ppl(st, content, dev);
+ if (ret == 0)
+ continue;
+
+ if (ret < 0) {
+ err = 1;
+ break;
+ }
+
+ if (!c->force) {
+ pr_err("%s contains invalid PPL - consider --force or --update-subarray with --update=no-ppl\n",
+ chosen_name);
+ content->array.state &= ~1;
+ avail[dev->disk.raid_disk] = 0;
+ break;
+ }
+
+ /* have --force - overwrite the invalid ppl */
+ devpath = map_dev(dev->disk.major, dev->disk.minor, 0);
+ dfd = dev_open(devpath, O_RDWR);
+ if (dfd < 0) {
+ pr_err("Failed to open %s\n", devpath);
+ err = 1;
+ break;
+ }
+
+ err = st->ss->write_init_ppl(st, content, dfd);
+ close(dfd);
+
+ if (err)
+ break;
+ }
+
+ if (err) {
+ free(avail);
+ return err;
+ }
+ }
+
if (enough(content->array.level, content->array.raid_disks,
content->array.layout, content->array.state & 1, avail) == 0) {
if (c->export && result)
@@ -2054,4 +2135,3 @@ int assemble_container_content(struct supertype *st, int mdfd,
return err;
/* FIXME should have an O_EXCL and wait for read-auto */
}
-#endif
diff --git a/Build.c b/Build.c
index 74a440e..962c2e3 100644
--- a/Build.c
+++ b/Build.c
@@ -24,10 +24,6 @@
#include "mdadm.h"
-#define REGISTER_DEV _IO (MD_MAJOR, 1)
-#define START_MD _IO (MD_MAJOR, 2)
-#define STOP_MD _IO (MD_MAJOR, 3)
-
int Build(char *mddev, struct mddev_dev *devlist,
struct shape *s, struct context *c)
{
@@ -39,14 +35,9 @@ int Build(char *mddev, struct mddev_dev *devlist,
* geometry is 0xpp00cc
* where pp is personality: 1==linear, 2=raid0
* cc = chunk size factor: 0==4k, 1==8k etc.
- *
- * For md_version >= 0.90.0 we call
- * SET_ARRAY_INFO, ADD_NEW_DISK, RUN_ARRAY
- *
*/
int i;
- int vers;
- struct stat stb;
+ dev_t rdev;
int subdevs = 0, missing_disks = 0;
struct mddev_dev *dv;
int bitmap_fd;
@@ -55,7 +46,13 @@ int Build(char *mddev, struct mddev_dev *devlist,
char chosen_name[1024];
int uuid[4] = {0,0,0,0};
struct map_ent *map = NULL;
+ mdu_array_info_t array;
+ mdu_param_t param; /* not used by syscall */
+ if (s->level == UnSet) {
+ pr_err("a RAID level is needed to Build an array.\n");
+ return 1;
+ }
/* scan all devices, make sure they really are block devices */
for (dv = devlist; dv; dv=dv->next) {
subdevs++;
@@ -63,16 +60,8 @@ int Build(char *mddev, struct mddev_dev *devlist,
missing_disks++;
continue;
}
- if (stat(dv->devname, &stb)) {
- pr_err("Cannot find %s: %s\n",
- dv->devname, strerror(errno));
+ if (!stat_is_blkdev(dv->devname, NULL))
return 1;
- }
- if ((stb.st_mode & S_IFMT) != S_IFBLK) {
- pr_err("%s is not a block device.\n",
- dv->devname);
- return 1;
- }
}
if (s->raiddisks != subdevs) {
@@ -108,7 +97,7 @@ int Build(char *mddev, struct mddev_dev *devlist,
/* We need to create the device. It can have no name. */
map_lock(&map);
mdfd = create_mddev(mddev, NULL, c->autof, LOCAL,
- chosen_name);
+ chosen_name, 0);
if (mdfd < 0) {
map_unlock(&map);
return 1;
@@ -118,39 +107,30 @@ int Build(char *mddev, struct mddev_dev *devlist,
map_update(&map, fd2devnm(mdfd), "none", uuid, chosen_name);
map_unlock(&map);
- vers = md_get_version(mdfd);
-
- /* looks Ok, go for it */
- if (vers >= 9000) {
- mdu_array_info_t array;
- array.level = s->level;
- if (s->size == MAX_SIZE)
- s->size = 0;
- array.size = s->size;
- array.nr_disks = s->raiddisks;
- array.raid_disks = s->raiddisks;
- array.md_minor = 0;
- if (fstat(mdfd, &stb)==0)
- array.md_minor = minor(stb.st_rdev);
- array.not_persistent = 1;
- array.state = 0; /* not clean, but no errors */
- if (s->assume_clean)
- array.state |= 1;
- array.active_disks = s->raiddisks - missing_disks;
- array.working_disks = s->raiddisks - missing_disks;
- array.spare_disks = 0;
- array.failed_disks = missing_disks;
- if (s->chunk == 0 && (s->level==0 || s->level==LEVEL_LINEAR))
- s->chunk = 64;
- array.chunk_size = s->chunk*1024;
- array.layout = s->layout;
- if (ioctl(mdfd, SET_ARRAY_INFO, &array)) {
- pr_err("SET_ARRAY_INFO failed for %s: %s\n",
- mddev, strerror(errno));
- goto abort;
- }
- } else if (s->bitmap_file) {
- pr_err("bitmaps not supported with this kernel\n");
+ array.level = s->level;
+ if (s->size == MAX_SIZE)
+ s->size = 0;
+ array.size = s->size;
+ array.nr_disks = s->raiddisks;
+ array.raid_disks = s->raiddisks;
+ array.md_minor = 0;
+ if (fstat_is_blkdev(mdfd, mddev, &rdev))
+ array.md_minor = minor(rdev);
+ array.not_persistent = 1;
+ array.state = 0; /* not clean, but no errors */
+ if (s->assume_clean)
+ array.state |= 1;
+ array.active_disks = s->raiddisks - missing_disks;
+ array.working_disks = s->raiddisks - missing_disks;
+ array.spare_disks = 0;
+ array.failed_disks = missing_disks;
+ if (s->chunk == 0 && (s->level==0 || s->level==LEVEL_LINEAR))
+ s->chunk = 64;
+ array.chunk_size = s->chunk*1024;
+ array.layout = s->layout;
+ if (md_set_array_info(mdfd, &array)) {
+ pr_err("md_set_array_info() failed for %s: %s\n",
+ mddev, strerror(errno));
goto abort;
}
@@ -163,20 +143,14 @@ int Build(char *mddev, struct mddev_dev *devlist,
}
/* now add the devices */
for ((i=0), (dv = devlist) ; dv ; i++, dv=dv->next) {
+ mdu_disk_info_t disk;
unsigned long long dsize;
int fd;
+
if (strcmp("missing", dv->devname) == 0)
continue;
- if (stat(dv->devname, &stb)) {
- pr_err("Weird: %s has disappeared.\n",
- dv->devname);
+ if (!stat_is_blkdev(dv->devname, &rdev))
goto abort;
- }
- if ((stb.st_mode & S_IFMT)!= S_IFBLK) {
- pr_err("Weird: %s is no longer a block device.\n",
- dv->devname);
- goto abort;
- }
fd = open(dv->devname, O_RDONLY|O_EXCL);
if (fd < 0) {
pr_err("Cannot open %s: %s\n",
@@ -187,94 +161,58 @@ int Build(char *mddev, struct mddev_dev *devlist,
(s->size == 0 || s->size == MAX_SIZE || dsize < s->size))
s->size = dsize;
close(fd);
- if (vers >= 9000) {
- mdu_disk_info_t disk;
- disk.number = i;
- disk.raid_disk = i;
- disk.state = (1<<MD_DISK_SYNC) | (1<<MD_DISK_ACTIVE);
- if (dv->writemostly == FlagSet)
- disk.state |= 1<<MD_DISK_WRITEMOSTLY;
- disk.major = major(stb.st_rdev);
- disk.minor = minor(stb.st_rdev);
- if (ioctl(mdfd, ADD_NEW_DISK, &disk)) {
- pr_err("ADD_NEW_DISK failed for %s: %s\n",
- dv->devname, strerror(errno));
- goto abort;
- }
- } else {
- if (ioctl(mdfd, REGISTER_DEV, &stb.st_rdev)) {
- pr_err("REGISTER_DEV failed for %s: %s.\n",
- dv->devname, strerror(errno));
- goto abort;
- }
+ disk.number = i;
+ disk.raid_disk = i;
+ disk.state = (1<<MD_DISK_SYNC) | (1<<MD_DISK_ACTIVE);
+ if (dv->writemostly == FlagSet)
+ disk.state |= 1<<MD_DISK_WRITEMOSTLY;
+ disk.major = major(rdev);
+ disk.minor = minor(rdev);
+ if (ioctl(mdfd, ADD_NEW_DISK, &disk)) {
+ pr_err("ADD_NEW_DISK failed for %s: %s\n",
+ dv->devname, strerror(errno));
+ goto abort;
}
}
/* now to start it */
- if (vers >= 9000) {
- mdu_param_t param; /* not used by syscall */
- if (s->bitmap_file) {
- bitmap_fd = open(s->bitmap_file, O_RDWR);
- if (bitmap_fd < 0) {
- int major = BITMAP_MAJOR_HI;
+ if (s->bitmap_file) {
+ bitmap_fd = open(s->bitmap_file, O_RDWR);
+ if (bitmap_fd < 0) {
+ int major = BITMAP_MAJOR_HI;
#if 0
- if (s->bitmap_chunk == UnSet) {
- pr_err("%s cannot be openned.",
- s->bitmap_file);
- goto abort;
- }
-#endif
- if (vers < 9003) {
- major = BITMAP_MAJOR_HOSTENDIAN;
-#ifdef __BIG_ENDIAN
- pr_err("Warning - bitmaps created on this kernel are not portable\n"
- " between different architectures. Consider upgrading the Linux kernel.\n");
+ if (s->bitmap_chunk == UnSet) {
+ pr_err("%s cannot be opened.\n", s->bitmap_file);
+ goto abort;
+ }
#endif
- }
- bitmapsize = s->size>>9; /* FIXME wrong for RAID10 */
- if (CreateBitmap(s->bitmap_file, 1, NULL, s->bitmap_chunk,
- c->delay, s->write_behind, bitmapsize, major)) {
- goto abort;
- }
- bitmap_fd = open(s->bitmap_file, O_RDWR);
- if (bitmap_fd < 0) {
- pr_err("%s cannot be openned.",
- s->bitmap_file);
- goto abort;
- }
+ bitmapsize = s->size >> 9; /* FIXME wrong for RAID10 */
+ if (CreateBitmap(s->bitmap_file, 1, NULL,
+ s->bitmap_chunk, c->delay,
+ s->write_behind, bitmapsize, major)) {
+ goto abort;
}
- if (bitmap_fd >= 0) {
- if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
- pr_err("Cannot set bitmap file for %s: %s\n",
- mddev, strerror(errno));
- goto abort;
- }
+ bitmap_fd = open(s->bitmap_file, O_RDWR);
+ if (bitmap_fd < 0) {
+ pr_err("%s cannot be opened.\n", s->bitmap_file);
+ goto abort;
}
}
- if (ioctl(mdfd, RUN_ARRAY, &param)) {
- pr_err("RUN_ARRAY failed: %s\n",
- strerror(errno));
- if (s->chunk & (s->chunk-1)) {
- cont_err("Problem may be that chunk size is not a power of 2\n");
+ if (bitmap_fd >= 0) {
+ if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
+ pr_err("Cannot set bitmap file for %s: %s\n",
+ mddev, strerror(errno));
+ goto abort;
}
- goto abort;
- }
- } else {
- unsigned long arg;
- arg=0;
- while (s->chunk > 4096) {
- arg++;
- s->chunk >>= 1;
}
- if (s->level == 0)
- arg |= 0x20000;
- else
- arg |= 0x10000;
- if (ioctl(mdfd, START_MD, arg)) {
- pr_err("START_MD failed: %s\n",
- strerror(errno));
- goto abort;
+ }
+ if (ioctl(mdfd, RUN_ARRAY, &param)) {
+ pr_err("RUN_ARRAY failed: %s\n", strerror(errno));
+ if (s->chunk & (s->chunk - 1)) {
+ cont_err("Problem may be that chunk size is not a power of 2\n");
}
+ goto abort;
}
+
if (c->verbose >= 0)
pr_err("array %s built and started.\n",
mddev);
@@ -283,10 +221,7 @@ int Build(char *mddev, struct mddev_dev *devlist,
return 0;
abort:
- if (vers >= 9000)
- ioctl(mdfd, STOP_ARRAY, 0);
- else
- ioctl(mdfd, STOP_MD, 0);
+ ioctl(mdfd, STOP_ARRAY, 0);
close(mdfd);
return 1;
}
diff --git a/Create.c b/Create.c
index 2721884..50142d8 100644
--- a/Create.c
+++ b/Create.c
@@ -84,20 +84,19 @@ int Create(struct supertype *st, char *mddev,
* RUN_ARRAY
*/
int mdfd;
- unsigned long long minsize=0, maxsize=0;
+ unsigned long long minsize = 0, maxsize = 0;
char *mindisc = NULL;
char *maxdisc = NULL;
int dnum, raid_disk_num;
struct mddev_dev *dv;
- int fail=0, warn=0;
- struct stat stb;
+ dev_t rdev;
+ int fail = 0, warn = 0;
int first_missing = subdevs * 2;
int second_missing = subdevs * 2;
int missing_disks = 0;
int insert_point = subdevs * 2; /* where to insert a missing drive */
int total_slots;
int pass;
- int vers;
int rv;
int bitmap_fd;
int have_container = 0;
@@ -112,6 +111,7 @@ int Create(struct supertype *st, char *mddev,
char chosen_name[1024];
struct map_ent *map = NULL;
unsigned long long newsize;
+ mdu_array_info_t inf;
int major_num = BITMAP_MAJOR_HI;
if (s->bitmap_file && strcmp(s->bitmap_file, "clustered") == 0) {
@@ -150,14 +150,12 @@ int Create(struct supertype *st, char *mddev,
/* If given a single device, it might be a container, and we can
* extract a device list from there
*/
- mdu_array_info_t inf;
int fd;
memset(&inf, 0, sizeof(inf));
fd = open(devlist->devname, O_RDONLY);
if (fd >= 0 &&
- ioctl(fd, GET_ARRAY_INFO, &inf) == 0 &&
- inf.raid_disks == 0) {
+ md_get_array_info(fd, &inf) == 0 && inf.raid_disks == 0) {
/* yep, looks like a container */
if (st) {
rv = st->ss->load_container(st, fd,
@@ -259,7 +257,8 @@ int Create(struct supertype *st, char *mddev,
if (st && ! st->ss->validate_geometry(st, s->level, s->layout, s->raiddisks,
&s->chunk, s->size*2,
data_offset, NULL,
- &newsize, c->verbose>=0))
+ &newsize, s->consistency_policy,
+ c->verbose >= 0))
return 1;
if (s->chunk && s->chunk != UnSet) {
@@ -290,7 +289,7 @@ int Create(struct supertype *st, char *mddev,
info.array.active_disks = 0;
info.array.working_disks = 0;
dnum = 0;
- for (dv = devlist; dv ; dv = dv->next)
+ for (dv = devlist; dv; dv = dv->next)
if (data_offset == VARIABLE_OFFSET)
dv->data_offset = INVALID_SECTORS;
else
@@ -302,7 +301,7 @@ int Create(struct supertype *st, char *mddev,
int dfd;
char *doff;
- if (strcasecmp(dname, "missing")==0) {
+ if (strcasecmp(dname, "missing") == 0) {
if (first_missing > dnum)
first_missing = dnum;
if (second_missing > dnum && dnum > first_missing)
@@ -326,11 +325,8 @@ int Create(struct supertype *st, char *mddev,
dname, strerror(errno));
exit(2);
}
- if (fstat(dfd, &stb) != 0 ||
- (stb.st_mode & S_IFMT) != S_IFBLK) {
+ if (!fstat_is_blkdev(dfd, dname, NULL)) {
close(dfd);
- pr_err("%s is not a block device\n",
- dname);
exit(2);
}
close(dfd);
@@ -348,7 +344,7 @@ int Create(struct supertype *st, char *mddev,
*/
int i;
char *name = "default";
- for(i=0; !st && superlist[i]; i++) {
+ for(i = 0; !st && superlist[i]; i++) {
st = superlist[i]->match_metadata_desc(name);
if (!st)
continue;
@@ -358,7 +354,8 @@ int Create(struct supertype *st, char *mddev,
st, s->level, s->layout, s->raiddisks,
&s->chunk, s->size*2,
dv->data_offset, dname,
- &freesize, c->verbose > 0)) {
+ &freesize, s->consistency_policy,
+ c->verbose > 0)) {
case -1: /* Not valid, message printed, and not
* worth checking any further */
exit(2);
@@ -395,6 +392,7 @@ int Create(struct supertype *st, char *mddev,
&s->chunk, s->size*2,
dv->data_offset,
dname, &freesize,
+ s->consistency_policy,
c->verbose >= 0)) {
pr_err("%s is not suitable for this array.\n",
@@ -442,10 +440,10 @@ int Create(struct supertype *st, char *mddev,
skip_size_check:
if (c->runstop != 1 || c->verbose >= 0) {
int fd = open(dname, O_RDONLY);
- if (fd <0 ) {
+ if (fd < 0) {
pr_err("Cannot open %s: %s\n",
dname, strerror(errno));
- fail=1;
+ fail = 1;
continue;
}
warn |= check_ext2(fd, dname);
@@ -455,8 +453,8 @@ int Create(struct supertype *st, char *mddev,
st->minor_version >= 1)
/* metadata at front */
warn |= check_partitions(fd, dname, 0, 0);
- else if (s->level == 1 || s->level == LEVEL_CONTAINER
- || (s->level == 0 && s->raiddisks == 1))
+ else if (s->level == 1 || s->level == LEVEL_CONTAINER ||
+ (s->level == 0 && s->raiddisks == 1))
/* partitions could be meaningful */
warn |= check_partitions(fd, dname, freesize*2, s->size*2);
else
@@ -477,6 +475,10 @@ int Create(struct supertype *st, char *mddev,
close(fd);
}
}
+ if (missing_disks == dnum) {
+ pr_err("Subdevs can't be all missing\n");
+ return 1;
+ }
if (s->raiddisks + s->sparedisks > st->max_devs) {
pr_err("Too many devices: %s metadata only supports %d\n",
st->ss->name, st->max_devs);
@@ -493,15 +495,15 @@ int Create(struct supertype *st, char *mddev,
pr_err("no size and no drives given - aborting create.\n");
return 1;
}
- if (s->level > 0 || s->level == LEVEL_MULTIPATH
- || s->level == LEVEL_FAULTY
- || st->ss->external ) {
+ if (s->level > 0 || s->level == LEVEL_MULTIPATH ||
+ s->level == LEVEL_FAULTY || st->ss->external) {
/* size is meaningful */
if (!st->ss->validate_geometry(st, s->level, s->layout,
s->raiddisks,
&s->chunk, minsize*2,
data_offset,
- NULL, NULL, 0)) {
+ NULL, NULL,
+ s->consistency_policy, 0)) {
pr_err("devices too large for RAID level %d\n", s->level);
return 1;
}
@@ -520,6 +522,8 @@ int Create(struct supertype *st, char *mddev,
if (!s->bitmap_file &&
s->level >= 1 &&
st->ss->add_internal_bitmap &&
+ (s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
+ s->consistency_policy != CONSISTENCY_POLICY_PPL) &&
(s->write_behind || s->size > 100*1024*1024ULL)) {
if (c->verbose > 0)
pr_err("automatically enabling write-intent bitmap on large array\n");
@@ -528,6 +532,12 @@ int Create(struct supertype *st, char *mddev,
if (s->bitmap_file && strcmp(s->bitmap_file, "none") == 0)
s->bitmap_file = NULL;
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ !st->ss->write_init_ppl) {
+ pr_err("%s metadata does not support PPL\n", st->ss->name);
+ return 1;
+ }
+
if (!have_container && s->level > 0 && ((maxsize-s->size)*100 > maxsize)) {
if (c->runstop != 1 || c->verbose >= 0)
pr_err("largest drive (%s) exceeds size (%lluK) by more than 1%%\n",
@@ -561,9 +571,9 @@ int Create(struct supertype *st, char *mddev,
* as missing, so that a reconstruct happens (faster than re-parity)
* FIX: Can we do this for raid6 as well?
*/
- if (st->ss->external == 0 &&
- s->assume_clean==0 && c->force == 0 && first_missing >= s->raiddisks) {
- switch ( s->level ) {
+ if (st->ss->external == 0 && s->assume_clean == 0 &&
+ c->force == 0 && first_missing >= s->raiddisks) {
+ switch (s->level) {
case 4:
case 5:
insert_point = s->raiddisks-1;
@@ -596,7 +606,7 @@ int Create(struct supertype *st, char *mddev,
/* We need to create the device */
map_lock(&map);
- mdfd = create_mddev(mddev, name, c->autof, LOCAL, chosen_name);
+ mdfd = create_mddev(mddev, name, c->autof, LOCAL, chosen_name, 1);
if (mdfd < 0) {
map_unlock(&map);
return 1;
@@ -605,28 +615,22 @@ int Create(struct supertype *st, char *mddev,
* it could be in conflict with already existing device
* e.g. container, array
*/
- if (strncmp(chosen_name, "/dev/md/", 8) == 0
- && map_by_name(&map, chosen_name+8) != NULL) {
+ if (strncmp(chosen_name, "/dev/md/", 8) == 0 &&
+ map_by_name(&map, chosen_name+8) != NULL) {
pr_err("Array name %s is in use already.\n",
chosen_name);
close(mdfd);
map_unlock(&map);
+ udev_unblock();
return 1;
}
mddev = chosen_name;
- vers = md_get_version(mdfd);
- if (vers < 9000) {
- pr_err("Create requires md driver version 0.90.0 or later\n");
+ memset(&inf, 0, sizeof(inf));
+ md_get_array_info(mdfd, &inf);
+ if (inf.working_disks != 0) {
+ pr_err("another array by this name is already running.\n");
goto abort_locked;
- } else {
- mdu_array_info_t inf;
- memset(&inf, 0, sizeof(inf));
- ioctl(mdfd, GET_ARRAY_INFO, &inf);
- if (inf.working_disks != 0) {
- pr_err("another array by this name is already running.\n");
- goto abort_locked;
- }
}
/* Ok, lets try some ioctls */
@@ -638,20 +642,15 @@ int Create(struct supertype *st, char *mddev,
* with, but it chooses to trust me instead. Sigh
*/
info.array.md_minor = 0;
- if (fstat(mdfd, &stb)==0)
- info.array.md_minor = minor(stb.st_rdev);
+ if (fstat_is_blkdev(mdfd, mddev, &rdev))
+ info.array.md_minor = minor(rdev);
info.array.not_persistent = 0;
- if ( ( (s->level == 4 || s->level == 5) &&
- (insert_point < s->raiddisks || first_missing < s->raiddisks) )
- ||
- ( s->level == 6 && (insert_point < s->raiddisks
- || second_missing < s->raiddisks))
- ||
- ( s->level <= 0 )
- ||
- s->assume_clean
- ) {
+ if (((s->level == 4 || s->level == 5) &&
+ (insert_point < s->raiddisks || first_missing < s->raiddisks)) ||
+ (s->level == 6 && (insert_point < s->raiddisks ||
+ second_missing < s->raiddisks)) ||
+ (s->level <= 0) || s->assume_clean) {
info.array.state = 1; /* clean, but one+ drive will be missing*/
info.resync_start = MaxSector;
} else {
@@ -709,24 +708,25 @@ int Create(struct supertype *st, char *mddev,
name = strrchr(mddev, '/');
if (name) {
name++;
- if (strncmp(name, "md_", 3)==0 &&
- strlen(name) > 3 &&
- (name-mddev) == 5 /* /dev/ */)
+ if (strncmp(name, "md_", 3) == 0 &&
+ strlen(name) > 3 && (name-mddev) == 5 /* /dev/ */)
name += 3;
- else if (strncmp(name, "md", 2)==0 &&
- strlen(name) > 2 &&
- isdigit(name[2]) &&
+ else if (strncmp(name, "md", 2) == 0 &&
+ strlen(name) > 2 && isdigit(name[2]) &&
(name-mddev) == 5 /* /dev/ */)
name += 2;
}
}
- if (!st->ss->init_super(st, &info.array, s->size, name, c->homehost, uuid,
+ if (!st->ss->init_super(st, &info.array, s, name, c->homehost, uuid,
data_offset))
goto abort_locked;
total_slots = info.array.nr_disks;
st->ss->getinfo_super(st, &info, NULL);
- sysfs_init(&info, mdfd, NULL);
+ if (sysfs_init(&info, mdfd, NULL)) {
+ pr_err("unable to initialize sysfs\n");
+ goto abort_locked;
+ }
if (did_default && c->verbose >= 0) {
if (is_subarray(info.text_version)) {
@@ -755,20 +755,8 @@ int Create(struct supertype *st, char *mddev,
* to stop another mdadm from finding and using those devices.
*/
- if (s->bitmap_file && vers < 9003) {
- major_num = BITMAP_MAJOR_HOSTENDIAN;
-#ifdef __BIG_ENDIAN
- pr_err("Warning - bitmaps created on this kernel are not portable\n"
- " between different architectured. Consider upgrading the Linux kernel.\n");
-#endif
- }
-
- if (s->bitmap_file && (strcmp(s->bitmap_file, "internal")==0 ||
- strcmp(s->bitmap_file, "clustered")==0)) {
- if ((vers%100) < 2) {
- pr_err("internal bitmaps not supported by this kernel.\n");
- goto abort_locked;
- }
+ if (s->bitmap_file && (strcmp(s->bitmap_file, "internal") == 0 ||
+ strcmp(s->bitmap_file, "clustered") == 0)) {
if (!st->ss->add_internal_bitmap) {
pr_err("internal bitmaps not supported with %s metadata\n",
st->ss->name);
@@ -783,7 +771,10 @@ int Create(struct supertype *st, char *mddev,
s->bitmap_file = NULL;
}
- sysfs_init(&info, mdfd, NULL);
+ if (sysfs_init(&info, mdfd, NULL)) {
+ pr_err("unable to initialize sysfs\n");
+ goto abort_locked;
+ }
if (st->ss->external && st->container_devnm[0]) {
/* member */
@@ -845,13 +836,12 @@ int Create(struct supertype *st, char *mddev,
infos = xmalloc(sizeof(*infos) * total_slots);
enable_fds(total_slots);
- for (pass=1; pass <=2 ; pass++) {
+ for (pass = 1; pass <= 2; pass++) {
struct mddev_dev *moved_disk = NULL; /* the disk that was moved out of the insert point */
- for (dnum=0, raid_disk_num=0, dv = devlist ; dv ;
- dv=(dv->next)?(dv->next):moved_disk, dnum++) {
+ for (dnum = 0, raid_disk_num = 0, dv = devlist; dv;
+ dv = (dv->next) ? (dv->next) : moved_disk, dnum++) {
int fd;
- struct stat stb;
struct mdinfo *inf = &infos[dnum];
if (dnum >= total_slots)
@@ -861,7 +851,7 @@ int Create(struct supertype *st, char *mddev,
moved_disk = dv;
continue;
}
- if (strcasecmp(dv->devname, "missing")==0) {
+ if (strcasecmp(dv->devname, "missing") == 0) {
raid_disk_num += 1;
continue;
}
@@ -907,9 +897,10 @@ int Create(struct supertype *st, char *mddev,
dv->devname);
goto abort_locked;
}
- fstat(fd, &stb);
- inf->disk.major = major(stb.st_rdev);
- inf->disk.minor = minor(stb.st_rdev);
+ if (!fstat_is_blkdev(fd, dv->devname, &rdev))
+ return 1;
+ inf->disk.major = major(rdev);
+ inf->disk.minor = minor(rdev);
}
if (fd >= 0)
remove_partitions(fd);
@@ -930,8 +921,8 @@ int Create(struct supertype *st, char *mddev,
if (!have_container) {
/* getinfo_super might have lost these ... */
- inf->disk.major = major(stb.st_rdev);
- inf->disk.minor = minor(stb.st_rdev);
+ inf->disk.major = major(rdev);
+ inf->disk.minor = minor(rdev);
}
break;
case 2:
@@ -1064,9 +1055,15 @@ int Create(struct supertype *st, char *mddev,
pr_err("not starting array - not enough devices.\n");
}
close(mdfd);
+ /* Give udev a moment to process the Change event caused
+ * by the close.
+ */
+ usleep(100*1000);
+ udev_unblock();
return 0;
abort:
+ udev_unblock();
map_lock(&map);
abort_locked:
map_remove(&map, fd2devnm(mdfd));
diff --git a/Detail.c b/Detail.c
index 509b0d4..4dcf81d 100644
--- a/Detail.c
+++ b/Detail.c
@@ -25,6 +25,7 @@
#include "mdadm.h"
#include "md_p.h"
#include "md_u.h"
+#include <ctype.h>
#include <dirent.h>
static int cmpstringp(const void *p1, const void *p2)
@@ -51,12 +52,9 @@ static int add_device(const char *dev, char ***p_devices,
int Detail(char *dev, struct context *c)
{
/*
- * Print out details for an md array by using
- * GET_ARRAY_INFO and GET_DISK_INFO ioctl calls
+ * Print out details for an md array
*/
-
int fd = open(dev, O_RDONLY);
- int vers;
mdu_array_info_t array;
mdu_disk_info_t *disks;
int next;
@@ -67,8 +65,6 @@ int Detail(char *dev, struct context *c)
int max_devices = 0, n_devices = 0;
int spares = 0;
struct stat stb;
- int is_26 = get_linux_version() >= 2006000;
- int is_rebuilding = 0;
int failed = 0;
struct supertype *st;
char *subarray = NULL;
@@ -84,41 +80,48 @@ int Detail(char *dev, struct context *c)
char *avail = NULL;
int external;
int inactive;
+ int is_container = 0;
if (fd < 0) {
pr_err("cannot open %s: %s\n",
dev, strerror(errno));
return rv;
}
- vers = md_get_version(fd);
- if (vers < 0) {
- pr_err("%s does not appear to be an md device\n",
- dev);
- close(fd);
- return rv;
- }
- if (vers < 9000) {
- pr_err("cannot get detail for md device %s: driver version too old.\n",
- dev);
- close(fd);
- return rv;
+ sra = sysfs_read(fd, NULL, GET_VERSION | GET_DEVS |
+ GET_ARRAY_STATE | GET_STATE);
+ if (!sra) {
+ if (md_get_array_info(fd, &array)) {
+ pr_err("%s does not appear to be an md device\n", dev);
+ close(fd);
+ return rv;
+ }
}
- sra = sysfs_read(fd, NULL, GET_VERSION|GET_DEVS);
- external = (sra != NULL && sra->array.major_version == -1
- && sra->array.minor_version == -2);
+ external = (sra != NULL && sra->array.major_version == -1 &&
+ sra->array.minor_version == -2);
+ inactive = (sra != NULL && !md_array_is_active(sra));
st = super_by_fd(fd, &subarray);
- if (ioctl(fd, GET_ARRAY_INFO, &array) == 0) {
- inactive = 0;
- } else if (errno == ENODEV && sra) {
- array = sra->array;
- inactive = 1;
- } else {
- pr_err("cannot get array detail for %s: %s\n",
- dev, strerror(errno));
- close(fd);
- return rv;
+ if (md_get_array_info(fd, &array)) {
+ if (errno == ENODEV) {
+ if (sra->array.major_version == -1 &&
+ sra->array.minor_version == -1 &&
+ sra->devs == NULL) {
+ pr_err("Array associated with md device %s does not exist.\n",
+ dev);
+ close(fd);
+ sysfs_free(sra);
+ return rv;
+ }
+ array = sra->array;
+ } else {
+ pr_err("cannot get array detail for %s: %s\n",
+ dev, strerror(errno));
+ close(fd);
+ return rv;
+ }
}
+ if (array.raid_disks == 0 && external)
+ is_container = 1;
if (fstat(fd, &stb) != 0 && !S_ISBLK(stb.st_mode))
stb.st_rdev = 0;
rv = 0;
@@ -146,22 +149,23 @@ int Detail(char *dev, struct context *c)
}
/* try to load a superblock. Try sra->devs first, then try ioctl */
- if (st && !info) for (d = 0, subdev = sra ? sra->devs : NULL;
- d < max_disks || subdev;
- subdev ? (void)(subdev = subdev->next) : (void)(d++)){
+ if (st && !info)
+ for (d = 0, subdev = sra ? sra->devs : NULL;
+ d < max_disks || subdev;
+ subdev ? (void)(subdev = subdev->next) : (void)(d++)){
mdu_disk_info_t disk;
char *dv;
int fd2;
int err;
+
if (subdev)
disk = subdev->disk;
else {
disk.number = d;
- if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ if (md_get_disk_info(fd, &disk) < 0)
continue;
if (d >= array.raid_disks &&
- disk.major == 0 &&
- disk.minor == 0)
+ disk.major == 0 && disk.minor == 0)
continue;
}
@@ -227,7 +231,7 @@ int Detail(char *dev, struct context *c)
printf("MD_LEVEL=%s\n", str);
printf("MD_DEVICES=%d\n", array.raid_disks);
} else {
- if (!inactive)
+ if (is_container)
printf("MD_LEVEL=container\n");
printf("MD_DEVICES=%d\n", array.nr_disks);
}
@@ -239,7 +243,8 @@ int Detail(char *dev, struct context *c)
printf("MD_METADATA=%s\n", sra->text_version);
else
printf("MD_METADATA=%d.%d\n",
- array.major_version, array.minor_version);
+ array.major_version,
+ array.minor_version);
}
if (st && st->sb && info) {
@@ -247,12 +252,12 @@ int Detail(char *dev, struct context *c)
struct map_ent *mp, *map = NULL;
fname_from_uuid(st, info, nbuf, ':');
- printf("MD_UUID=%s\n", nbuf+5);
+ printf("MD_UUID=%s\n", nbuf + 5);
mp = map_by_uuid(&map, info->uuid);
if (mp && mp->path &&
strncmp(mp->path, "/dev/md/", 8) == 0) {
printf("MD_DEVNAME=");
- print_escape(mp->path+8);
+ print_escape(mp->path + 8);
putchar('\n');
}
@@ -276,20 +281,26 @@ int Detail(char *dev, struct context *c)
if (sra) {
struct mdinfo *mdi;
for (mdi = sra->devs; mdi; mdi = mdi->next) {
- char *path =
- map_dev(mdi->disk.major,
- mdi->disk.minor, 0);
+ char *path;
+ char *sysdev = xstrdup(mdi->sys_name + 1);
+ char *cp;
+
+ path = map_dev(mdi->disk.major,
+ mdi->disk.minor, 0);
+ for (cp = sysdev; *cp; cp++)
+ if (!isalnum(*cp))
+ *cp = '_';
if (mdi->disk.raid_disk >= 0)
printf("MD_DEVICE_%s_ROLE=%d\n",
- mdi->sys_name+4,
+ sysdev,
mdi->disk.raid_disk);
else
printf("MD_DEVICE_%s_ROLE=spare\n",
- mdi->sys_name+4);
+ sysdev);
if (path)
printf("MD_DEVICE_%s_DEV=%s\n",
- mdi->sys_name+4, path);
+ sysdev, path);
}
}
goto out;
@@ -297,24 +308,23 @@ int Detail(char *dev, struct context *c)
disks = xmalloc(max_disks * 2 * sizeof(mdu_disk_info_t));
for (d = 0; d < max_disks * 2; d++) {
- disks[d].state = (1<<MD_DISK_REMOVED);
+ disks[d].state = (1 << MD_DISK_REMOVED);
disks[d].major = disks[d].minor = 0;
disks[d].number = -1;
- disks[d].raid_disk = d/2;
+ disks[d].raid_disk = d / 2;
}
- next = array.raid_disks*2;
+ next = array.raid_disks * 2;
if (inactive) {
struct mdinfo *mdi;
- if (sra != NULL)
- for (mdi = sra->devs; mdi; mdi = mdi->next) {
- disks[next++] = mdi->disk;
- disks[next-1].number = -1;
- }
+ for (mdi = sra->devs; mdi; mdi = mdi->next) {
+ disks[next++] = mdi->disk;
+ disks[next - 1].number = -1;
+ }
} else for (d = 0; d < max_disks; d++) {
mdu_disk_info_t disk;
disk.number = d;
- if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
+ if (md_get_disk_info(fd, &disk) < 0) {
if (d < array.raid_disks)
pr_err("cannot get device detail for device %d: %s\n",
d, strerror(errno));
@@ -322,21 +332,23 @@ int Detail(char *dev, struct context *c)
}
if (disk.major == 0 && disk.minor == 0)
continue;
- if (disk.raid_disk >= 0 && disk.raid_disk < array.raid_disks
- && disks[disk.raid_disk*2].state == (1<<MD_DISK_REMOVED)
- && ((disk.state & (1<<MD_DISK_JOURNAL)) == 0))
- disks[disk.raid_disk*2] = disk;
- else if (disk.raid_disk >= 0 && disk.raid_disk < array.raid_disks
- && disks[disk.raid_disk*2+1].state == (1<<MD_DISK_REMOVED)
- && !(disk.state & (1<<MD_DISK_JOURNAL)))
- disks[disk.raid_disk*2+1] = disk;
- else if (next < max_disks*2)
+ if (disk.raid_disk >= 0 && disk.raid_disk < array.raid_disks &&
+ disks[disk.raid_disk * 2].state == (1 << MD_DISK_REMOVED) &&
+ ((disk.state & (1 << MD_DISK_JOURNAL)) == 0))
+ disks[disk.raid_disk * 2] = disk;
+ else if (disk.raid_disk >= 0 &&
+ disk.raid_disk < array.raid_disks &&
+ disks[disk.raid_disk * 2 + 1].state ==
+ (1 << MD_DISK_REMOVED) &&
+ !(disk.state & (1 << MD_DISK_JOURNAL)))
+ disks[disk.raid_disk * 2 + 1] = disk;
+ else if (next < max_disks * 2)
disks[next++] = disk;
}
avail = xcalloc(array.raid_disks, 1);
- for (d= 0; d < array.raid_disks; d++) {
+ for (d = 0; d < array.raid_disks; d++) {
if ((disks[d*2].state & (1<<MD_DISK_SYNC)) ||
(disks[d*2+1].state & (1<<MD_DISK_SYNC))) {
@@ -348,13 +360,16 @@ int Detail(char *dev, struct context *c)
if (c->brief) {
mdu_bitmap_file_t bmf;
- printf("%sARRAY %s", inactive ? "INACTIVE-":"", dev);
+ if (inactive && !is_container)
+ printf("INACTIVE-ARRAY %s", dev);
+ else
+ printf("ARRAY %s", dev);
if (c->verbose > 0) {
if (array.raid_disks)
printf(" level=%s num-devices=%d",
- str?str:"-unknown-",
- array.raid_disks );
- else if (!inactive)
+ str ? str : "-unknown-",
+ array.raid_disks);
+ else if (is_container)
printf(" level=container num-devices=%d",
array.nr_disks);
else
@@ -367,14 +382,12 @@ int Detail(char *dev, struct context *c)
if (sra && sra->array.major_version < 0)
printf(" metadata=%s", sra->text_version);
else
- printf(" metadata=%d.%d",
- array.major_version, array.minor_version);
+ printf(" metadata=%d.%d", array.major_version,
+ array.minor_version);
}
/* Only try GET_BITMAP_FILE for 0.90.01 and later */
- if (vers >= 9001 &&
- ioctl(fd, GET_BITMAP_FILE, &bmf) == 0 &&
- bmf.pathname[0]) {
+ if (ioctl(fd, GET_BITMAP_FILE, &bmf) == 0 && bmf.pathname[0]) {
printf(" bitmap=%s", bmf.pathname);
}
} else {
@@ -385,7 +398,7 @@ int Detail(char *dev, struct context *c)
char *devnm;
devnm = stat2devnm(&stb);
- for (e=ms; e; e=e->next)
+ for (e = ms; e; e = e->next)
if (strcmp(e->devnm, devnm) == 0)
break;
if (!get_dev_size(fd, NULL, &larray_size))
@@ -394,64 +407,69 @@ int Detail(char *dev, struct context *c)
printf("%s:\n", dev);
if (container)
- printf(" Container : %s, member %s\n", container, member);
+ printf(" Container : %s, member %s\n",
+ container, member);
else {
- if (sra && sra->array.major_version < 0)
- printf(" Version : %s\n", sra->text_version);
- else
- printf(" Version : %d.%d\n",
- array.major_version, array.minor_version);
+ if (sra && sra->array.major_version < 0)
+ printf(" Version : %s\n",
+ sra->text_version);
+ else
+ printf(" Version : %d.%d\n",
+ array.major_version,
+ array.minor_version);
}
atime = array.ctime;
if (atime)
- printf(" Creation Time : %.24s\n", ctime(&atime));
- if (array.raid_disks == 0 && external)
+ printf(" Creation Time : %.24s\n", ctime(&atime));
+ if (is_container)
str = "container";
if (str)
- printf(" Raid Level : %s\n", str);
+ printf(" Raid Level : %s\n", str);
if (larray_size)
- printf(" Array Size : %llu%s\n", (larray_size>>10),
+ printf(" Array Size : %llu%s\n",
+ (larray_size >> 10),
human_size(larray_size));
if (array.level >= 1) {
if (sra)
array.major_version = sra->array.major_version;
if (array.major_version != 0 &&
(larray_size >= 0xFFFFFFFFULL|| array.size == 0)) {
- unsigned long long dsize = get_component_size(fd);
+ unsigned long long dsize;
+
+ dsize = get_component_size(fd);
if (dsize > 0)
- printf(" Used Dev Size : %llu%s\n",
+ printf(" Used Dev Size : %llu%s\n",
dsize/2,
human_size((long long)dsize<<9));
else
- printf(" Used Dev Size : unknown\n");
+ printf(" Used Dev Size : unknown\n");
} else
- printf(" Used Dev Size : %lu%s\n",
+ printf(" Used Dev Size : %lu%s\n",
(unsigned long)array.size,
- human_size((unsigned long long)array.size<<10));
+ human_size((unsigned long long)
+ array.size << 10));
}
if (array.raid_disks)
- printf(" Raid Devices : %d\n", array.raid_disks);
- printf(" Total Devices : %d\n", array.nr_disks);
+ printf(" Raid Devices : %d\n", array.raid_disks);
+ printf(" Total Devices : %d\n", array.nr_disks);
if (!container &&
((sra == NULL && array.major_version == 0) ||
(sra && sra->array.major_version == 0)))
- printf("Preferred Minor : %d\n", array.md_minor);
+ printf(" Preferred Minor : %d\n", array.md_minor);
if (sra == NULL || sra->array.major_version >= 0)
- printf(" Persistence : Superblock is %spersistent\n",
- array.not_persistent?"not ":"");
+ printf(" Persistence : Superblock is %spersistent\n",
+ array.not_persistent ? "not " : "");
printf("\n");
/* Only try GET_BITMAP_FILE for 0.90.01 and later */
- if (vers >= 9001 &&
- ioctl(fd, GET_BITMAP_FILE, &bmf) == 0 &&
- bmf.pathname[0]) {
- printf(" Intent Bitmap : %s\n", bmf.pathname);
+ if (ioctl(fd, GET_BITMAP_FILE, &bmf) == 0 && bmf.pathname[0]) {
+ printf(" Intent Bitmap : %s\n", bmf.pathname);
printf("\n");
} else if (array.state & (1<<MD_SB_BITMAP_PRESENT))
- printf(" Intent Bitmap : Internal\n\n");
+ printf(" Intent Bitmap : Internal\n\n");
atime = array.utime;
if (atime)
- printf(" Update Time : %.24s\n", ctime(&atime));
+ printf(" Update Time : %.24s\n", ctime(&atime));
if (array.raid_disks) {
static char *sync_action[] = {
", recovering", ", resyncing",
@@ -465,35 +483,43 @@ int Detail(char *dev, struct context *c)
else
st = ", degraded";
- printf(" State : %s%s%s%s%s%s \n",
- (array.state&(1<<MD_SB_CLEAN))?"clean":"active", st,
- (!e || (e->percent < 0 && e->percent != RESYNC_PENDING &&
- e->percent != RESYNC_DELAYED)) ? "" : sync_action[e->resync],
+ printf(" State : %s%s%s%s%s%s \n",
+ (array.state & (1 << MD_SB_CLEAN)) ?
+ "clean" : "active", st,
+ (!e || (e->percent < 0 &&
+ e->percent != RESYNC_PENDING &&
+ e->percent != RESYNC_DELAYED)) ?
+ "" : sync_action[e->resync],
larray_size ? "": ", Not Started",
- (e && e->percent == RESYNC_DELAYED) ? " (DELAYED)": "",
- (e && e->percent == RESYNC_PENDING) ? " (PENDING)": "");
- } else if (inactive) {
- printf(" State : inactive\n");
+ (e && e->percent == RESYNC_DELAYED) ?
+ " (DELAYED)": "",
+ (e && e->percent == RESYNC_PENDING) ?
+ " (PENDING)": "");
+ } else if (inactive && !is_container) {
+ printf(" State : inactive\n");
}
if (array.raid_disks)
- printf(" Active Devices : %d\n", array.active_disks);
+ printf(" Active Devices : %d\n", array.active_disks);
if (array.working_disks > 0)
- printf("Working Devices : %d\n", array.working_disks);
+ printf(" Working Devices : %d\n",
+ array.working_disks);
if (array.raid_disks) {
- printf(" Failed Devices : %d\n", array.failed_disks);
- printf(" Spare Devices : %d\n", array.spare_disks);
+ printf(" Failed Devices : %d\n", array.failed_disks);
+ printf(" Spare Devices : %d\n", array.spare_disks);
}
printf("\n");
if (array.level == 5) {
str = map_num(r5layout, array.layout);
- printf(" Layout : %s\n", str?str:"-unknown-");
+ printf(" Layout : %s\n",
+ str ? str : "-unknown-");
}
if (array.level == 6) {
str = map_num(r6layout, array.layout);
- printf(" Layout : %s\n", str?str:"-unknown-");
+ printf(" Layout : %s\n",
+ str ? str : "-unknown-");
}
if (array.level == 10) {
- printf(" Layout :");
+ printf(" Layout :");
print_r10_layout(array.layout);
printf("\n");
}
@@ -504,96 +530,122 @@ int Detail(char *dev, struct context *c)
case 10:
case 6:
if (array.chunk_size)
- printf(" Chunk Size : %dK\n\n",
+ printf(" Chunk Size : %dK\n\n",
array.chunk_size/1024);
break;
case -1:
- printf(" Rounding : %dK\n\n", array.chunk_size/1024);
+ printf(" Rounding : %dK\n\n",
+ array.chunk_size/1024);
+ break;
+ default:
break;
- default: break;
+ }
+
+ if (array.raid_disks) {
+ struct mdinfo *mdi;
+
+ mdi = sysfs_read(fd, NULL, GET_CONSISTENCY_POLICY);
+ if (mdi) {
+ char *policy = map_num(consistency_policies,
+ mdi->consistency_policy);
+ sysfs_free(mdi);
+ if (policy)
+ printf("Consistency Policy : %s\n\n",
+ policy);
+ }
}
if (e && e->percent >= 0) {
static char *sync_action[] = {
- "Rebuild", "Resync",
- "Reshape", "Check"};
- printf(" %7s Status : %d%% complete\n", sync_action[e->resync], e->percent);
- is_rebuilding = 1;
+ "Rebuild", "Resync", "Reshape", "Check"};
+ printf(" %7s Status : %d%% complete\n",
+ sync_action[e->resync], e->percent);
}
free_mdstat(ms);
if ((st && st->sb) && (info && info->reshape_active)) {
#if 0
This is pretty boring
- printf(" Reshape pos'n : %llu%s\n", (unsigned long long) info->reshape_progress<<9,
- human_size((unsigned long long)info->reshape_progress<<9));
+ printf(" Reshape pos'n : %llu%s\n",
+ (unsigned long long) info->reshape_progress << 9,
+ human_size((unsigned long long)
+ info->reshape_progress << 9));
#endif
if (info->delta_disks != 0)
- printf(" Delta Devices : %d, (%d->%d)\n",
+ printf(" Delta Devices : %d, (%d->%d)\n",
info->delta_disks,
array.raid_disks - info->delta_disks,
array.raid_disks);
if (info->new_level != array.level) {
str = map_num(pers, info->new_level);
- printf(" New Level : %s\n", str?str:"-unknown-");
+ printf(" New Level : %s\n",
+ str ? str : "-unknown-");
}
if (info->new_level != array.level ||
info->new_layout != array.layout) {
if (info->new_level == 5) {
- str = map_num(r5layout, info->new_layout);
- printf(" New Layout : %s\n",
- str?str:"-unknown-");
+ str = map_num(r5layout,
+ info->new_layout);
+ printf(" New Layout : %s\n",
+ str ? str : "-unknown-");
}
if (info->new_level == 6) {
- str = map_num(r6layout, info->new_layout);
- printf(" New Layout : %s\n",
- str?str:"-unknown-");
+ str = map_num(r6layout,
+ info->new_layout);
+ printf(" New Layout : %s\n",
+ str ? str : "-unknown-");
}
if (info->new_level == 10) {
- printf(" New Layout : near=%d, %s=%d\n",
- info->new_layout&255,
- (info->new_layout&0x10000)?"offset":"far",
- (info->new_layout>>8)&255);
+ printf(" New Layout : near=%d, %s=%d\n",
+ info->new_layout & 255,
+ (info->new_layout & 0x10000) ?
+ "offset" : "far",
+ (info->new_layout >> 8) & 255);
}
}
if (info->new_chunk != array.chunk_size)
- printf(" New Chunksize : %dK\n", info->new_chunk/1024);
+ printf(" New Chunksize : %dK\n",
+ info->new_chunk/1024);
printf("\n");
} else if (e && e->percent >= 0)
printf("\n");
if (st && st->sb)
st->ss->detail_super(st, c->homehost);
- if (array.raid_disks == 0 && sra && sra->array.major_version == -1
- && sra->array.minor_version == -2 && sra->text_version[0] != '/') {
+ if (array.raid_disks == 0 && sra &&
+ sra->array.major_version == -1 &&
+ sra->array.minor_version == -2 &&
+ sra->text_version[0] != '/') {
/* This looks like a container. Find any active arrays
* That claim to be a member.
*/
DIR *dir = opendir("/sys/block");
struct dirent *de;
- printf(" Member Arrays :");
+ printf(" Member Arrays :");
while (dir && (de = readdir(dir)) != NULL) {
- char path[200];
+ char path[287];
char vbuf[1024];
int nlen = strlen(sra->sys_name);
dev_t devid;
if (de->d_name[0] == '.')
continue;
- sprintf(path, "/sys/block/%s/md/metadata_version",
+ sprintf(path,
+ "/sys/block/%s/md/metadata_version",
de->d_name);
if (load_sys(path, vbuf, sizeof(vbuf)) < 0)
continue;
- if (strncmp(vbuf, "external:", 9) != 0 ||
- !is_subarray(vbuf+9) ||
- strncmp(vbuf+10, sra->sys_name, nlen) != 0 ||
- vbuf[10+nlen] != '/')
+ if (strncmp(vbuf, "external:", 9) ||
+ !is_subarray(vbuf + 9) ||
+ strncmp(vbuf + 10, sra->sys_name, nlen) ||
+ vbuf[10 + nlen] != '/')
continue;
devid = devnm2devid(de->d_name);
- printf(" %s", map_dev_preferred(
- major(devid),
- minor(devid), 1, c->prefer));
+ printf(" %s",
+ map_dev_preferred(major(devid),
+ minor(devid), 1,
+ c->prefer));
}
if (dir)
closedir(dir);
@@ -607,24 +659,23 @@ This is pretty boring
}
free(info);
- for (d= 0; d < max_disks * 2; d++) {
+ for (d = 0; d < max_disks * 2; d++) {
char *dv;
mdu_disk_info_t disk = disks[d];
- if (d >= array.raid_disks*2 &&
- disk.major == 0 &&
- disk.minor == 0)
+ if (d >= array.raid_disks * 2 &&
+ disk.major == 0 && disk.minor == 0)
continue;
- if ((d & 1) &&
- disk.major == 0 &&
- disk.minor == 0)
+ if ((d & 1) && disk.major == 0 && disk.minor == 0)
continue;
if (!c->brief) {
- if (d == array.raid_disks*2) printf("\n");
+ if (d == array.raid_disks*2)
+ printf("\n");
if (disk.number < 0 && disk.raid_disk < 0)
printf(" - %5d %5d - ",
disk.major, disk.minor);
- else if (disk.raid_disk < 0 || disk.state & (1<<MD_DISK_JOURNAL))
+ else if (disk.raid_disk < 0 ||
+ disk.state & (1 << MD_DISK_JOURNAL))
printf(" %5d %5d %5d - ",
disk.number, disk.major, disk.minor);
else if (disk.number < 0)
@@ -632,67 +683,69 @@ This is pretty boring
disk.major, disk.minor, disk.raid_disk);
else
printf(" %5d %5d %5d %5d ",
- disk.number, disk.major, disk.minor, disk.raid_disk);
+ disk.number, disk.major, disk.minor,
+ disk.raid_disk);
}
if (!c->brief && array.raid_disks) {
-
- if (disk.state & (1<<MD_DISK_FAULTY)) {
+ if (disk.state & (1 << MD_DISK_FAULTY)) {
printf(" faulty");
if (disk.raid_disk < array.raid_disks &&
disk.raid_disk >= 0)
failed++;
}
- if (disk.state & (1<<MD_DISK_ACTIVE)) printf(" active");
- if (disk.state & (1<<MD_DISK_SYNC)) {
+ if (disk.state & (1 << MD_DISK_ACTIVE))
+ printf(" active");
+ if (disk.state & (1 << MD_DISK_SYNC)) {
printf(" sync");
- if (array.level == 10 && (array.layout & ~0x1FFFF) == 0) {
+ if (array.level == 10 &&
+ (array.layout & ~0x1FFFF) == 0) {
int nc = array.layout & 0xff;
int fc = (array.layout >> 8) & 0xff;
int copies = nc*fc;
- if (fc == 1 && array.raid_disks % copies == 0 && copies <= 26) {
- /* We can divide the devices into 'sets' */
- int set = disk.raid_disk % copies;
+ if (fc == 1 &&
+ array.raid_disks % copies == 0 &&
+ copies <= 26) {
+ /* We can divide the devices
+ into 'sets' */
+ int set;
+ set = disk.raid_disk % copies;
printf(" set-%c", set + 'A');
}
}
}
- if (disk.state & (1<<MD_DISK_REMOVED)) printf(" removed");
- if (disk.state & (1<<MD_DISK_WRITEMOSTLY)) printf(" writemostly");
- if (disk.state & (1<<MD_DISK_FAILFAST)) printf(" failfast");
- if (disk.state & (1<<MD_DISK_JOURNAL)) printf(" journal");
+ if (disk.state & (1 << MD_DISK_REMOVED))
+ printf(" removed");
+ if (disk.state & (1 << MD_DISK_WRITEMOSTLY))
+ printf(" writemostly");
+ if (disk.state & (1 << MD_DISK_FAILFAST))
+ printf(" failfast");
+ if (disk.state & (1 << MD_DISK_JOURNAL))
+ printf(" journal");
if ((disk.state &
- ((1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC)
- |(1<<MD_DISK_REMOVED)|(1<<MD_DISK_FAULTY)|(1<<MD_DISK_JOURNAL)))
- == 0) {
+ ((1 << MD_DISK_ACTIVE) | (1 << MD_DISK_SYNC) |
+ (1 << MD_DISK_REMOVED) | (1 << MD_DISK_FAULTY) |
+ (1 << MD_DISK_JOURNAL))) == 0) {
printf(" spare");
- if (is_26) {
- if (disk.raid_disk < array.raid_disks && disk.raid_disk >= 0)
- printf(" rebuilding");
- } else if (is_rebuilding && failed) {
- /* Taking a bit of a risk here, we remove the
- * device from the array, and then put it back.
- * If this fails, we are rebuilding
- */
- int err = ioctl(fd, HOT_REMOVE_DISK, makedev(disk.major, disk.minor));
- if (err == 0) ioctl(fd, HOT_ADD_DISK, makedev(disk.major, disk.minor));
- if (err && errno == EBUSY)
- printf(" rebuilding");
- }
+ if (disk.raid_disk < array.raid_disks &&
+ disk.raid_disk >= 0)
+ printf(" rebuilding");
}
}
- if (disk.state == 0) spares++;
- dv=map_dev_preferred(disk.major, disk.minor, 0, c->prefer);
+ if (disk.state == 0)
+ spares++;
+ dv = map_dev_preferred(disk.major, disk.minor, 0, c->prefer);
if (dv != NULL) {
if (c->brief)
n_devices = add_device(dv, &devices,
- &max_devices,
- n_devices);
+ &max_devices, n_devices);
else
printf(" %s", dv);
}
- if (!c->brief) printf("\n");
+ if (!c->brief)
+ printf("\n");
}
- if (spares && c->brief && array.raid_disks) printf(" spares=%d", spares);
+ if (spares && c->brief && array.raid_disks)
+ printf(" spares=%d", spares);
if (c->brief && st && st->sb)
st->ss->brief_detail_super(st);
if (st)
@@ -707,8 +760,7 @@ This is pretty boring
if (c->brief)
printf("\n");
if (c->test &&
- !enough(array.level, array.raid_disks, array.layout,
- 1, avail))
+ !enough(array.level, array.raid_disks, array.layout, 1, avail))
rv = 2;
free(disks);
diff --git a/Examine.c b/Examine.c
index 953b8ee..7013480 100644
--- a/Examine.c
+++ b/Examine.c
@@ -53,7 +53,6 @@ int Examine(struct mddev_dev *devlist,
*/
int fd;
int rv = 0;
- int err = 0;
struct array {
struct supertype *st;
@@ -66,6 +65,8 @@ int Examine(struct mddev_dev *devlist,
for (; devlist ; devlist = devlist->next) {
struct supertype *st;
int have_container = 0;
+ int err = 0;
+ int container = 0;
fd = dev_open(devlist->devname, O_RDONLY);
if (fd < 0) {
@@ -74,44 +75,46 @@ int Examine(struct mddev_dev *devlist,
devlist->devname, strerror(errno));
rv = 1;
}
- err = 1;
+ continue;
}
- else {
- int container = 0;
- if (forcest)
- st = dup_super(forcest);
- else if (must_be_container(fd)) {
- /* might be a container */
- st = super_by_fd(fd, NULL);
- container = 1;
- } else
- st = guess_super(fd);
- if (st) {
- err = 1;
- st->ignore_hw_compat = 1;
- if (!container)
- err = st->ss->load_super(st, fd,
- (c->brief||c->scan) ? NULL
- :devlist->devname);
- if (err && st->ss->load_container) {
- err = st->ss->load_container(st, fd,
- (c->brief||c->scan) ? NULL
- :devlist->devname);
- if (!err)
- have_container = 1;
- }
- st->ignore_hw_compat = 0;
- } else {
- if (!c->brief) {
- pr_err("No md superblock detected on %s.\n", devlist->devname);
- rv = 1;
- }
- err = 1;
+
+ if (forcest)
+ st = dup_super(forcest);
+ else if (must_be_container(fd)) {
+ /* might be a container */
+ st = super_by_fd(fd, NULL);
+ container = 1;
+ } else
+ st = guess_super(fd);
+ if (st) {
+ err = 1;
+ st->ignore_hw_compat = 1;
+ if (!container)
+ err = st->ss->load_super(st, fd,
+ (c->brief||c->scan) ? NULL
+ :devlist->devname);
+ if (err && st->ss->load_container) {
+ err = st->ss->load_container(st, fd,
+ (c->brief||c->scan) ? NULL
+ :devlist->devname);
+ if (!err)
+ have_container = 1;
}
- close(fd);
+ st->ignore_hw_compat = 0;
+ } else {
+ if (!c->brief) {
+ pr_err("No md superblock detected on %s.\n", devlist->devname);
+ rv = 1;
+ }
+ err = 1;
}
- if (err)
+ close(fd);
+
+ if (err) {
+ if (st)
+ st->ss->free_super(st);
continue;
+ }
if (c->SparcAdjust)
st->ss->update_super(st, NULL, "sparc2.2",
@@ -121,7 +124,7 @@ int Examine(struct mddev_dev *devlist,
if (c->brief && st->ss->brief_examine_super == NULL) {
if (!c->scan)
pr_err("No brief listing for %s on %s\n",
- st->ss->name, devlist->devname);
+ st->ss->name, devlist->devname);
} else if (c->brief) {
struct array *ap;
char *d;
diff --git a/Grow.c b/Grow.c
index 455c5f9..8722969 100755..100644
--- a/Grow.c
+++ b/Grow.c
@@ -58,9 +58,8 @@ int restore_backup(struct supertype *st,
for (dev = content->devs; dev; dev = dev->next) {
char buf[22];
int fd;
- sprintf(buf, "%d:%d",
- dev->disk.major,
- dev->disk.minor);
+
+ sprintf(buf, "%d:%d", dev->disk.major, dev->disk.minor);
fd = dev_open(buf, O_RDWR);
if (dev->disk.raid_disk >= 0)
@@ -109,13 +108,13 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
*/
struct mdinfo info;
- struct stat stb;
+ dev_t rdev;
int nfd, fd2;
int d, nd;
struct supertype *st = NULL;
char *subarray = NULL;
- if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
+ if (md_get_array_info(fd, &info.array) < 0) {
pr_err("cannot get array info for %s\n", devname);
return 1;
}
@@ -145,9 +144,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
free(st);
return 1;
}
- fstat(nfd, &stb);
- if ((stb.st_mode & S_IFMT) != S_IFBLK) {
- pr_err("%s is not a block device!\n", newdev);
+ if (!fstat_is_blkdev(nfd, newdev, &rdev)) {
close(nfd);
free(st);
return 1;
@@ -161,17 +158,15 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
st->ss->free_super(st);
disk.number = d;
- if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
- pr_err("cannot get device detail for device %d\n",
- d);
+ if (md_get_disk_info(fd, &disk) < 0) {
+ pr_err("cannot get device detail for device %d\n", d);
close(nfd);
free(st);
return 1;
}
dv = map_dev(disk.major, disk.minor, 1);
if (!dv) {
- pr_err("cannot find device file for device %d\n",
- d);
+ pr_err("cannot find device file for device %d\n", d);
close(nfd);
free(st);
return 1;
@@ -198,16 +193,14 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
*/
info.disk.number = d;
- info.disk.major = major(stb.st_rdev);
- info.disk.minor = minor(stb.st_rdev);
+ info.disk.major = major(rdev);
+ info.disk.minor = minor(rdev);
info.disk.raid_disk = d;
info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
- st->ss->update_super(st, &info, "linear-grow-new", newdev,
- 0, 0, NULL);
+ st->ss->update_super(st, &info, "linear-grow-new", newdev, 0, 0, NULL);
if (st->ss->store_super(st, nfd)) {
- pr_err("Cannot store new superblock on %s\n",
- newdev);
+ pr_err("Cannot store new superblock on %s\n", newdev);
close(nfd);
return 1;
}
@@ -221,7 +214,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
* Now go through and update all superblocks
*/
- if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
+ if (md_get_array_info(fd, &info.array) < 0) {
pr_err("cannot get array info for %s\n", devname);
return 1;
}
@@ -232,15 +225,13 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
char *dv;
disk.number = d;
- if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
- pr_err("cannot get device detail for device %d\n",
- d);
+ if (md_get_disk_info(fd, &disk) < 0) {
+ pr_err("cannot get device detail for device %d\n", d);
return 1;
}
dv = map_dev(disk.major, disk.minor, 1);
if (!dv) {
- pr_err("cannot find device file for device %d\n",
- d);
+ pr_err("cannot find device file for device %d\n", d);
return 1;
}
fd2 = dev_open(dv, O_RDWR);
@@ -288,14 +279,8 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
struct supertype *st;
char *subarray = NULL;
int major = BITMAP_MAJOR_HI;
- int vers = md_get_version(fd);
unsigned long long bitmapsize, array_size;
-
- if (vers < 9003) {
- major = BITMAP_MAJOR_HOSTENDIAN;
- pr_err("Warning - bitmaps created on this kernel are not portable\n"
- " between different architectures. Consider upgrading the Linux kernel.\n");
- }
+ struct mdinfo *mdi;
/*
* We only ever get called if s->bitmap_file is != NULL, so this check
@@ -323,18 +308,17 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
}
return 0;
}
- pr_err("%s already has a bitmap (%s)\n",
- devname, bmf.pathname);
+ pr_err("%s already has a bitmap (%s)\n", devname, bmf.pathname);
return 1;
}
- if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+ if (md_get_array_info(fd, &array) != 0) {
pr_err("cannot get array status for %s\n", devname);
return 1;
}
if (array.state & (1 << MD_SB_BITMAP_PRESENT)) {
if (strcmp(s->bitmap_file, "none")==0) {
array.state &= ~(1 << MD_SB_BITMAP_PRESENT);
- if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ if (md_set_array_info(fd, &array) != 0) {
if (array.state & (1 << MD_SB_CLUSTERED))
pr_err("failed to remove clustered bitmap.\n");
else
@@ -375,6 +359,12 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
ncopies = (array.layout & 255) * ((array.layout >> 8) & 255);
bitmapsize = bitmapsize * array.raid_disks / ncopies;
+
+ if (strcmp(s->bitmap_file, "clustered") == 0 &&
+ !is_near_layout_10(array.layout)) {
+ pr_err("only near layout is supported with clustered raid10\n");
+ return 1;
+ }
}
st = super_by_fd(fd, &subarray);
@@ -389,12 +379,23 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
free(st);
return 1;
}
+
+ mdi = sysfs_read(fd, NULL, GET_CONSISTENCY_POLICY);
+ if (mdi) {
+ if (mdi->consistency_policy == CONSISTENCY_POLICY_PPL) {
+ pr_err("Cannot add bitmap to array with PPL\n");
+ free(mdi);
+ free(st);
+ return 1;
+ }
+ free(mdi);
+ }
+
if (strcmp(s->bitmap_file, "internal") == 0 ||
strcmp(s->bitmap_file, "clustered") == 0) {
int rv;
int d;
int offset_setable = 0;
- struct mdinfo *mdi;
if (st->ss->add_internal_bitmap == NULL) {
pr_err("Internal bitmaps not supported with %s metadata\n", st->ss->name);
return 1;
@@ -410,7 +411,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
int fd2;
disk.number = d;
- if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ if (md_get_disk_info(fd, &disk) < 0)
continue;
if (disk.major == 0 && disk.minor == 0)
continue;
@@ -443,14 +444,18 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
}
if (offset_setable) {
st->ss->getinfo_super(st, mdi, NULL);
- sysfs_init(mdi, fd, NULL);
+ if (sysfs_init(mdi, fd, NULL)) {
+ pr_err("failed to intialize sysfs.\n");
+ free(mdi);
+ }
rv = sysfs_set_num_signed(mdi, NULL, "bitmap/location",
mdi->bitmap_offset);
+ free(mdi);
} else {
if (strcmp(s->bitmap_file, "clustered") == 0)
array.state |= (1 << MD_SB_CLUSTERED);
array.state |= (1 << MD_SB_BITMAP_PRESENT);
- rv = ioctl(fd, SET_ARRAY_INFO, &array);
+ rv = md_set_array_info(fd, &array);
}
if (rv < 0) {
if (errno == EBUSY)
@@ -470,7 +475,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
char *dv;
int fd2;
disk.number = d;
- if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ if (md_get_disk_info(fd, &disk) < 0)
continue;
if ((disk.major==0 && disk.minor == 0) ||
(disk.state & (1 << MD_DISK_REMOVED)))
@@ -515,6 +520,190 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
return 0;
}
+int Grow_consistency_policy(char *devname, int fd, struct context *c, struct shape *s)
+{
+ struct supertype *st;
+ struct mdinfo *sra;
+ struct mdinfo *sd;
+ char *subarray = NULL;
+ int ret = 0;
+ char container_dev[PATH_MAX];
+ char buf[20];
+
+ if (s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
+ s->consistency_policy != CONSISTENCY_POLICY_PPL) {
+ pr_err("Operation not supported for consistency policy %s\n",
+ map_num(consistency_policies, s->consistency_policy));
+ return 1;
+ }
+
+ st = super_by_fd(fd, &subarray);
+ if (!st)
+ return 1;
+
+ sra = sysfs_read(fd, NULL, GET_CONSISTENCY_POLICY|GET_LEVEL|
+ GET_DEVS|GET_STATE);
+ if (!sra) {
+ ret = 1;
+ goto free_st;
+ }
+
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ !st->ss->write_init_ppl) {
+ pr_err("%s metadata does not support PPL\n", st->ss->name);
+ ret = 1;
+ goto free_info;
+ }
+
+ if (sra->array.level != 5) {
+ pr_err("Operation not supported for array level %d\n",
+ sra->array.level);
+ ret = 1;
+ goto free_info;
+ }
+
+ if (sra->consistency_policy == (unsigned)s->consistency_policy) {
+ pr_err("Consistency policy is already %s\n",
+ map_num(consistency_policies, s->consistency_policy));
+ ret = 1;
+ goto free_info;
+ } else if (sra->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
+ sra->consistency_policy != CONSISTENCY_POLICY_PPL) {
+ pr_err("Current consistency policy is %s, cannot change to %s\n",
+ map_num(consistency_policies, sra->consistency_policy),
+ map_num(consistency_policies, s->consistency_policy));
+ ret = 1;
+ goto free_info;
+ }
+
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
+ if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0) {
+ ret = 1;
+ goto free_info;
+ } else if (strcmp(buf, "reshape\n") == 0) {
+ pr_err("PPL cannot be enabled when reshape is in progress\n");
+ ret = 1;
+ goto free_info;
+ }
+ }
+
+ if (subarray) {
+ char *update;
+
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL)
+ update = "ppl";
+ else
+ update = "no-ppl";
+
+ sprintf(container_dev, "/dev/%s", st->container_devnm);
+
+ ret = Update_subarray(container_dev, subarray, update, NULL,
+ c->verbose);
+ if (ret)
+ goto free_info;
+ }
+
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
+ struct mdinfo info;
+
+ if (subarray) {
+ struct mdinfo *mdi;
+ int cfd;
+
+ cfd = open(container_dev, O_RDWR|O_EXCL);
+ if (cfd < 0) {
+ pr_err("Failed to open %s\n", container_dev);
+ ret = 1;
+ goto free_info;
+ }
+
+ ret = st->ss->load_container(st, cfd, st->container_devnm);
+ close(cfd);
+
+ if (ret) {
+ pr_err("Cannot read superblock for %s\n",
+ container_dev);
+ goto free_info;
+ }
+
+ mdi = st->ss->container_content(st, subarray);
+ info = *mdi;
+ free(mdi);
+ }
+
+ for (sd = sra->devs; sd; sd = sd->next) {
+ int dfd;
+ char *devpath;
+
+ devpath = map_dev(sd->disk.major, sd->disk.minor, 0);
+ dfd = dev_open(devpath, O_RDWR);
+ if (dfd < 0) {
+ pr_err("Failed to open %s\n", devpath);
+ ret = 1;
+ goto free_info;
+ }
+
+ if (!subarray) {
+ ret = st->ss->load_super(st, dfd, NULL);
+ if (ret) {
+ pr_err("Failed to load super-block.\n");
+ close(dfd);
+ goto free_info;
+ }
+
+ ret = st->ss->update_super(st, sra, "ppl",
+ devname,
+ c->verbose, 0, NULL);
+ if (ret) {
+ close(dfd);
+ st->ss->free_super(st);
+ goto free_info;
+ }
+ st->ss->getinfo_super(st, &info, NULL);
+ }
+
+ ret |= sysfs_set_num(sra, sd, "ppl_sector",
+ info.ppl_sector);
+ ret |= sysfs_set_num(sra, sd, "ppl_size",
+ info.ppl_size);
+
+ if (ret) {
+ pr_err("Failed to set PPL attributes for %s\n",
+ sd->sys_name);
+ close(dfd);
+ st->ss->free_super(st);
+ goto free_info;
+ }
+
+ ret = st->ss->write_init_ppl(st, &info, dfd);
+ if (ret)
+ pr_err("Failed to write PPL\n");
+
+ close(dfd);
+
+ if (!subarray)
+ st->ss->free_super(st);
+
+ if (ret)
+ goto free_info;
+ }
+ }
+
+ ret = sysfs_set_str(sra, NULL, "consistency_policy",
+ map_num(consistency_policies,
+ s->consistency_policy));
+ if (ret)
+ pr_err("Failed to change array consistency policy\n");
+
+free_info:
+ sysfs_free(sra);
+free_st:
+ free(st);
+ free(subarray);
+
+ return ret;
+}
+
/*
* When reshaping an array we might need to backup some data.
* This is written to all spares with a 'super_block' describing it.
@@ -634,8 +823,8 @@ static void unfreeze(struct supertype *st)
char buf[20];
if (sra &&
- sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0
- && strcmp(buf, "frozen\n") == 0)
+ sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0 &&
+ strcmp(buf, "frozen\n") == 0)
sysfs_set_str(sra, NULL, "sync_action", "idle");
sysfs_free(sra);
}
@@ -663,8 +852,7 @@ static int reshape_super(struct supertype *st, unsigned long long size,
/* nothing extra to check in the native case */
if (!st->ss->external)
return 0;
- if (!st->ss->reshape_super ||
- !st->ss->manage_reshape) {
+ if (!st->ss->reshape_super || !st->ss->manage_reshape) {
pr_err("%s metadata does not support reshape\n",
st->ss->name);
return 1;
@@ -742,7 +930,8 @@ int start_reshape(struct mdinfo *sra, int already_running,
if (!already_running && err == 0) {
int cnt = 5;
do {
- err = sysfs_set_str(sra, NULL, "sync_action", "reshape");
+ err = sysfs_set_str(sra, NULL, "sync_action",
+ "reshape");
if (err)
sleep(1);
} while (err && errno == EBUSY && cnt-- > 0);
@@ -901,10 +1090,8 @@ int reshape_prepare_fdlist(char *devname,
continue;
if (sd->disk.state & (1<<MD_DISK_SYNC) &&
sd->disk.raid_disk < raid_disks) {
- char *dn = map_dev(sd->disk.major,
- sd->disk.minor, 1);
- fdlist[sd->disk.raid_disk]
- = dev_open(dn, O_RDONLY);
+ char *dn = map_dev(sd->disk.major, sd->disk.minor, 1);
+ fdlist[sd->disk.raid_disk] = dev_open(dn, O_RDONLY);
offsets[sd->disk.raid_disk] = sd->data_offset*512;
if (fdlist[sd->disk.raid_disk] < 0) {
pr_err("%s: cannot open component %s\n",
@@ -914,8 +1101,7 @@ int reshape_prepare_fdlist(char *devname,
}
} else if (backup_file == NULL) {
/* spare */
- char *dn = map_dev(sd->disk.major,
- sd->disk.minor, 1);
+ char *dn = map_dev(sd->disk.major, sd->disk.minor, 1);
fdlist[d] = dev_open(dn, O_RDWR);
offsets[d] = (sd->data_offset + sra->component_size - blocks - 8)*512;
if (fdlist[d] < 0) {
@@ -1084,8 +1270,7 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
* raid5 with 2 disks, or
* raid0 with 1 disk
*/
- if (info->new_level > 1 &&
- (info->component_size & 7))
+ if (info->new_level > 1 && (info->component_size & 7))
return "Cannot convert RAID1 of this size - reduce size to multiple of 4K first.";
if (info->new_level == 0) {
if (info->delta_disks != UnSet &&
@@ -1103,12 +1288,9 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
re->level = 1;
return NULL;
}
- if (info->array.raid_disks != 2 &&
- info->new_level == 5)
+ if (info->array.raid_disks != 2 && info->new_level == 5)
return "Can only convert a 2-device array to RAID5";
- if (info->array.raid_disks == 2 &&
- info->new_level == 5) {
-
+ if (info->array.raid_disks == 2 && info->new_level == 5) {
re->level = 5;
re->before.data_disks = 1;
if (info->delta_disks != UnSet &&
@@ -1138,17 +1320,22 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
case 0:
if ((info->array.layout & ~0xff) != 0x100)
return "Cannot Grow RAID10 with far/offset layout";
- /* number of devices must be multiple of number of copies */
- if (info->array.raid_disks % (info->array.layout & 0xff))
+ /*
+ * number of devices must be multiple of
+ * number of copies
+ */
+ if (info->array.raid_disks %
+ (info->array.layout & 0xff))
return "RAID10 layout too complex for Grow operation";
- new_disks = (info->array.raid_disks
- / (info->array.layout & 0xff));
+ new_disks = (info->array.raid_disks /
+ (info->array.layout & 0xff));
if (info->delta_disks == UnSet)
info->delta_disks = (new_disks
- info->array.raid_disks);
- if (info->delta_disks != new_disks - info->array.raid_disks)
+ if (info->delta_disks !=
+ new_disks - info->array.raid_disks)
return "New number of raid-devices impossible for RAID10";
if (info->new_chunk &&
info->new_chunk != info->array.chunk_size)
@@ -1219,7 +1406,8 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
case 0:
/* RAID0 can be converted to RAID10, or to RAID456 */
if (info->new_level == 10) {
- if (info->new_layout == UnSet && info->delta_disks == UnSet) {
+ if (info->new_layout == UnSet &&
+ info->delta_disks == UnSet) {
/* Assume near=2 layout */
info->new_layout = 0x102;
info->delta_disks = info->array.raid_disks;
@@ -1227,8 +1415,8 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
if (info->new_layout == UnSet) {
int copies = 1 + (info->delta_disks
/ info->array.raid_disks);
- if (info->array.raid_disks * (copies-1)
- != info->delta_disks)
+ if (info->array.raid_disks * (copies-1) !=
+ info->delta_disks)
return "Impossible number of devices for RAID0->RAID10";
info->new_layout = 0x100 + copies;
}
@@ -1432,7 +1620,8 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
* can leave it unchanged, else we must
* fail
*/
- ls = map_num(r6layout, info->new_layout);
+ ls = map_num(r6layout,
+ info->new_layout);
if (!ls ||
strcmp(ls+strlen(ls)-2, "-6") != 0)
return "Please specify new layout";
@@ -1458,16 +1647,19 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
if (info->delta_disks == UnSet)
info->delta_disks = delta_parity;
- re->after.data_disks = (re->before.data_disks
- + info->delta_disks
- - delta_parity);
+ re->after.data_disks =
+ (re->before.data_disks + info->delta_disks - delta_parity);
+
switch (re->level) {
- case 6: re->parity = 2;
+ case 6:
+ re->parity = 2;
break;
case 4:
- case 5: re->parity = 1;
+ case 5:
+ re->parity = 1;
break;
- default: re->parity = 0;
+ default:
+ re->parity = 0;
break;
}
/* So we have a restripe operation, we need to calculate the number
@@ -1501,8 +1693,7 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
re->backup_blocks = compute_backup_blocks(
info->new_chunk, info->array.chunk_size,
- re->after.data_disks,
- re->before.data_disks);
+ re->after.data_disks, re->before.data_disks);
re->min_offset_change = re->backup_blocks / re->before.data_disks;
re->new_size = info->component_size * re->after.data_disks;
@@ -1521,12 +1712,11 @@ static int set_array_size(struct supertype *st, struct mdinfo *sra,
if (text_version == NULL)
text_version = sra->text_version;
- subarray = strchr(text_version+1, '/')+1;
+ subarray = strchr(text_version + 1, '/')+1;
info = st->ss->container_content(st, subarray);
if (info) {
unsigned long long current_size = 0;
- unsigned long long new_size =
- info->custom_array_size/2;
+ unsigned long long new_size = info->custom_array_size/2;
if (sysfs_get_ll(sra, NULL, "array_size", &current_size) == 0 &&
new_size > current_size) {
@@ -1599,13 +1789,18 @@ int Grow_reshape(char *devname, int fd,
struct mdinfo info;
struct mdinfo *sra;
- if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
+ if (md_get_array_info(fd, &array) < 0) {
pr_err("%s is not an active md array - aborting\n",
devname);
return 1;
}
- if (data_offset != INVALID_SECTORS && array.level != 10
- && (array.level < 4 || array.level > 6)) {
+ if (s->level != UnSet && s->chunk) {
+ pr_err("Cannot change array level in the same operation as changing chunk size.\n");
+ return 1;
+ }
+
+ if (data_offset != INVALID_SECTORS && array.level != 10 &&
+ (array.level < 4 || array.level > 6)) {
pr_err("--grow --data-offset not yet supported\n");
return 1;
}
@@ -1617,14 +1812,20 @@ int Grow_reshape(char *devname, int fd,
return 1;
}
- if (s->raiddisks && s->raiddisks < array.raid_disks && array.level > 1 &&
- get_linux_version() < 2006032 &&
+ if (s->raiddisks && s->raiddisks < array.raid_disks &&
+ array.level > 1 && get_linux_version() < 2006032 &&
!check_env("MDADM_FORCE_FEWER")) {
pr_err("reducing the number of devices is not safe before Linux 2.6.32\n"
" Please use a newer kernel\n");
return 1;
}
+ if (array.level > 1 && s->size > 1 &&
+ (array.chunk_size / 1024) > (int)s->size) {
+ pr_err("component size must be larger than chunk size.\n");
+ return 1;
+ }
+
st = super_by_fd(fd, &subarray);
if (!st) {
pr_err("Unable to determine metadata format for %s\n", devname);
@@ -1638,7 +1839,7 @@ int Grow_reshape(char *devname, int fd,
(array.state & (1<<MD_SB_BITMAP_PRESENT)) &&
!(array.state & (1<<MD_SB_CLUSTERED))) {
array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
- if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
+ if (md_set_array_info(fd, &array)!= 0) {
pr_err("failed to remove internal bitmap.\n");
return 1;
}
@@ -1649,7 +1850,7 @@ int Grow_reshape(char *devname, int fd,
* pre-requisite spare devices (mdmon owns final validation)
*/
if (st->ss->external) {
- int rv;
+ int retval;
if (subarray) {
container = st->container_devnm;
@@ -1661,17 +1862,15 @@ int Grow_reshape(char *devname, int fd,
fd = cfd;
}
if (cfd < 0) {
- pr_err("Unable to open container for %s\n",
- devname);
+ pr_err("Unable to open container for %s\n", devname);
free(subarray);
return 1;
}
- rv = st->ss->load_container(st, cfd, NULL);
+ retval = st->ss->load_container(st, cfd, NULL);
- if (rv) {
- pr_err("Cannot read superblock for %s\n",
- devname);
+ if (retval) {
+ pr_err("Cannot read superblock for %s\n", devname);
free(subarray);
return 1;
}
@@ -1688,10 +1887,11 @@ int Grow_reshape(char *devname, int fd,
/* check if reshape is allowed based on metadata
* indications stored in content.array.status
*/
- if (content->array.state & (1<<MD_SB_BLOCK_VOLUME))
+ if (content->array.state &
+ (1 << MD_SB_BLOCK_VOLUME))
allow_reshape = 0;
- if (content->array.state
- & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE))
+ if (content->array.state &
+ (1 << MD_SB_BLOCK_CONTAINER_RESHAPE))
allow_reshape = 0;
if (!allow_reshape) {
pr_err("cannot reshape arrays in container with unsupported metadata: %s(%s)\n",
@@ -1700,6 +1900,13 @@ int Grow_reshape(char *devname, int fd,
free(subarray);
return 1;
}
+ if (content->consistency_policy ==
+ CONSISTENCY_POLICY_PPL) {
+ pr_err("Operation not supported when ppl consistency policy is enabled\n");
+ sysfs_free(cc);
+ free(subarray);
+ return 1;
+ }
}
sysfs_free(cc);
}
@@ -1711,7 +1918,8 @@ int Grow_reshape(char *devname, int fd,
for (dv = devlist; dv; dv = dv->next)
added_disks++;
if (s->raiddisks > array.raid_disks &&
- array.spare_disks +added_disks < (s->raiddisks - array.raid_disks) &&
+ array.spare_disks + added_disks <
+ (s->raiddisks - array.raid_disks) &&
!c->force) {
pr_err("Need %d spare%s to avoid degraded array, and only have %d.\n"
" Use --force to over-ride this check.\n",
@@ -1721,8 +1929,8 @@ int Grow_reshape(char *devname, int fd,
return 1;
}
- sra = sysfs_read(fd, NULL, GET_LEVEL | GET_DISKS | GET_DEVS
- | GET_STATE | GET_VERSION);
+ sra = sysfs_read(fd, NULL, GET_LEVEL | GET_DISKS | GET_DEVS |
+ GET_STATE | GET_VERSION);
if (sra) {
if (st->ss->external && subarray == NULL) {
array.level = LEVEL_CONTAINER;
@@ -1745,7 +1953,8 @@ int Grow_reshape(char *devname, int fd,
}
/* ========= set size =============== */
- if (s->size > 0 && (s->size == MAX_SIZE || s->size != (unsigned)array.size)) {
+ if (s->size > 0 &&
+ (s->size == MAX_SIZE || s->size != (unsigned)array.size)) {
unsigned long long orig_size = get_component_size(fd)/2;
unsigned long long min_csize;
struct mdinfo *mdi;
@@ -1761,7 +1970,8 @@ int Grow_reshape(char *devname, int fd,
}
if (reshape_super(st, s->size, UnSet, UnSet, 0, 0, UnSet, NULL,
- devname, APPLY_METADATA_CHANGES, c->verbose > 0)) {
+ devname, APPLY_METADATA_CHANGES,
+ c->verbose > 0)) {
rv = 1;
goto release;
}
@@ -1780,7 +1990,8 @@ int Grow_reshape(char *devname, int fd,
sizeinfo->array.layout,
sizeinfo->array.raid_disks);
new_size /= data_disks;
- dprintf("Metadata size correction from %llu to %llu (%llu)\n", orig_size, new_size,
+ dprintf("Metadata size correction from %llu to %llu (%llu)\n",
+ orig_size, new_size,
new_size * data_disks);
s->size = new_size;
sysfs_free(sizeinfo);
@@ -1793,33 +2004,24 @@ int Grow_reshape(char *devname, int fd,
* understands '0' to mean 'max'.
*/
min_csize = 0;
- rv = 0;
for (mdi = sra->devs; mdi; mdi = mdi->next) {
- if (sysfs_set_num(sra, mdi, "size",
- s->size == MAX_SIZE ? 0 : s->size) < 0) {
- /* Probably kernel refusing to let us
- * reduce the size - not an error.
- */
- break;
- }
+ sysfs_set_num(sra, mdi, "size",
+ s->size == MAX_SIZE ? 0 : s->size);
if (array.not_persistent == 0 &&
array.major_version == 0 &&
get_linux_version() < 3001000) {
/* Dangerous to allow size to exceed 2TB */
unsigned long long csize;
- if (sysfs_get_ll(sra, mdi, "size", &csize) == 0) {
+ if (sysfs_get_ll(sra, mdi, "size",
+ &csize) == 0) {
if (csize >= 2ULL*1024*1024*1024)
csize = 2ULL*1024*1024*1024;
- if ((min_csize == 0 || (min_csize
- > csize)))
+ if ((min_csize == 0 ||
+ (min_csize > csize)))
min_csize = csize;
}
}
}
- if (rv) {
- pr_err("Cannot set size on array members.\n");
- goto size_change_error;
- }
if (min_csize && s->size > min_csize) {
pr_err("Cannot safely make this array use more than 2TB per device on this kernel.\n");
rv = 1;
@@ -1834,14 +2036,13 @@ int Grow_reshape(char *devname, int fd,
}
if (st->ss->external) {
if (sra->array.level == 0) {
- rv = sysfs_set_str(sra, NULL, "level",
- "raid5");
+ rv = sysfs_set_str(sra, NULL, "level", "raid5");
if (!rv) {
raid0_takeover = 1;
/* get array parameters after takeover
* to change one parameter at time only
*/
- rv = ioctl(fd, GET_ARRAY_INFO, &array);
+ rv = md_get_array_info(fd, &array);
}
}
/* make sure mdmon is
@@ -1867,7 +2068,7 @@ int Grow_reshape(char *devname, int fd,
else
rv = -1;
} else {
- rv = ioctl(fd, SET_ARRAY_INFO, &array);
+ rv = md_set_array_info(fd, &array);
/* manage array size when it is managed externally
*/
@@ -1883,7 +2084,7 @@ int Grow_reshape(char *devname, int fd,
/* go back to raid0, drop parity disk
*/
sysfs_set_str(sra, NULL, "level", "raid0");
- ioctl(fd, GET_ARRAY_INFO, &array);
+ md_get_array_info(fd, &array);
}
size_change_error:
@@ -1909,10 +2110,11 @@ size_change_error:
* a backport has been arranged.
*/
if (sra == NULL ||
- sysfs_set_str(sra, NULL, "resync_start", "none") < 0)
+ sysfs_set_str(sra, NULL, "resync_start",
+ "none") < 0)
pr_err("--assume-clean not supported with --grow on this kernel\n");
}
- ioctl(fd, GET_ARRAY_INFO, &array);
+ md_get_array_info(fd, &array);
s->size = get_component_size(fd)/2;
if (s->size == 0)
s->size = array.size;
@@ -1939,8 +2141,7 @@ size_change_error:
(s->raiddisks == 0 || s->raiddisks == array.raid_disks)) {
/* Nothing more to do */
if (!changed && c->verbose >= 0)
- pr_err("%s: no change requested\n",
- devname);
+ pr_err("%s: no change requested\n", devname);
goto release;
}
@@ -1954,6 +2155,7 @@ size_change_error:
array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) ||
(s->level == 0 && array.level == 1 && sra)) {
int err;
+
err = remove_disks_for_takeover(st, sra, array.layout);
if (err) {
dprintf("Array cannot be reshaped\n");
@@ -1972,7 +2174,11 @@ size_change_error:
memset(&info, 0, sizeof(info));
info.array = array;
- sysfs_init(&info, fd, NULL);
+ if (sysfs_init(&info, fd, NULL)) {
+ pr_err("failed to intialize sysfs.\n");
+ rv = 1;
+ goto release;
+ }
strcpy(info.text_version, sra->text_version);
info.component_size = s->size*2;
info.new_level = s->level;
@@ -2078,12 +2284,12 @@ size_change_error:
rv =1 ;
}
if (s->layout_str) {
- if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+ if (md_get_array_info(fd, &array) != 0) {
dprintf("Cannot get array information.\n");
goto release;
}
array.layout = info.new_layout;
- if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ if (md_set_array_info(fd, &array) != 0) {
pr_err("failed to set new layout\n");
rv = 1;
} else if (c->verbose >= 0)
@@ -2104,7 +2310,8 @@ size_change_error:
fd = -1;
}
rv = reshape_container(container, devname, -1, st, &info,
- c->force, c->backup_file, c->verbose, 0, 0, 0);
+ c->force, c->backup_file, c->verbose,
+ 0, 0, 0);
frozen = 0;
} else {
/* get spare devices from external metadata
@@ -2126,15 +2333,15 @@ size_change_error:
if (reshape_super(st, 0, info.new_level,
info.new_layout, info.new_chunk,
info.array.raid_disks, info.delta_disks,
- c->backup_file, devname, APPLY_METADATA_CHANGES,
- c->verbose)) {
+ c->backup_file, devname,
+ APPLY_METADATA_CHANGES, c->verbose)) {
rv = 1;
goto release;
}
sync_metadata(st);
rv = reshape_array(container, fd, devname, st, &info, c->force,
- devlist, data_offset, c->backup_file, c->verbose,
- 0, 0, 0);
+ devlist, data_offset, c->backup_file,
+ c->verbose, 0, 0, 0);
frozen = 0;
}
release:
@@ -2305,8 +2512,10 @@ static int set_new_data_offset(struct mdinfo *sra, struct supertype *st,
dir = -1;
else
dir = 1;
- } else if ((data_offset <= info2.data_offset && dir == 1) ||
- (data_offset >= info2.data_offset && dir == -1)) {
+ } else if ((data_offset <= info2.data_offset &&
+ dir == 1) ||
+ (data_offset >= info2.data_offset &&
+ dir == -1)) {
pr_err("%s: differing data offsets on devices make this --data-offset setting impossible\n",
dn);
goto release;
@@ -2431,8 +2640,7 @@ static int set_new_data_offset(struct mdinfo *sra, struct supertype *st,
}
if (err < 0) {
if (errno == E2BIG && data_offset != INVALID_SECTORS) {
- pr_err("data-offset is too big for %s\n",
- dn);
+ pr_err("data-offset is too big for %s\n", dn);
goto release;
}
if (sd == sra->devs &&
@@ -2442,8 +2650,7 @@ static int set_new_data_offset(struct mdinfo *sra, struct supertype *st,
* For RAID5/6 this is not fatal
*/
return 1;
- pr_err("Cannot set new_offset for %s\n",
- dn);
+ pr_err("Cannot set new_offset for %s\n", dn);
break;
}
}
@@ -2489,8 +2696,7 @@ static int raid10_reshape(char *container, int fd, char *devname,
GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK
);
if (!sra) {
- pr_err("%s: Cannot get array details from sysfs\n",
- devname);
+ pr_err("%s: Cannot get array details from sysfs\n", devname);
goto release;
}
min = reshape->min_offset_change;
@@ -2498,18 +2704,16 @@ static int raid10_reshape(char *container, int fd, char *devname,
if (info->delta_disks)
sysfs_set_str(sra, NULL, "reshape_direction",
info->delta_disks < 0 ? "backwards" : "forwards");
- if (info->delta_disks < 0 &&
- info->space_after < min) {
+ if (info->delta_disks < 0 && info->space_after < min) {
int rv = sysfs_set_num(sra, NULL, "component_size",
- (sra->component_size -
- min)/2);
+ (sra->component_size - min)/2);
if (rv) {
pr_err("cannot reduce component size\n");
goto release;
}
}
- err = set_new_data_offset(sra, st, devname, info->delta_disks, data_offset,
- min, 0);
+ err = set_new_data_offset(sra, st, devname, info->delta_disks,
+ data_offset, min, 0);
if (err == 1) {
pr_err("Cannot set new_data_offset: RAID10 reshape not\n");
cont_err("supported on this kernel\n");
@@ -2520,10 +2724,12 @@ static int raid10_reshape(char *container, int fd, char *devname,
if (!err && sysfs_set_num(sra, NULL, "chunk_size", info->new_chunk) < 0)
err = errno;
- if (!err && sysfs_set_num(sra, NULL, "layout", reshape->after.layout) < 0)
+ if (!err && sysfs_set_num(sra, NULL, "layout",
+ reshape->after.layout) < 0)
err = errno;
- if (!err && sysfs_set_num(sra, NULL, "raid_disks",
- info->array.raid_disks + info->delta_disks) < 0)
+ if (!err &&
+ sysfs_set_num(sra, NULL, "raid_disks",
+ info->array.raid_disks + info->delta_disks) < 0)
err = errno;
if (!err && sysfs_set_str(sra, NULL, "sync_action", "reshape") < 0)
err = errno;
@@ -2641,14 +2847,13 @@ static int impose_reshape(struct mdinfo *sra,
* reshape->after.data_disks);
}
- ioctl(fd, GET_ARRAY_INFO, &array);
+ md_get_array_info(fd, &array);
if (info->array.chunk_size == info->new_chunk &&
reshape->before.layout == reshape->after.layout &&
st->ss->external == 0) {
/* use SET_ARRAY_INFO but only if reshape hasn't started */
array.raid_disks = reshape->after.data_disks + reshape->parity;
- if (!restart &&
- ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ if (!restart && md_set_array_info(fd, &array) != 0) {
int err = errno;
pr_err("Cannot set device shape for %s: %s\n",
@@ -2675,8 +2880,7 @@ static int impose_reshape(struct mdinfo *sra,
reshape->parity) < 0)
err = errno;
if (err) {
- pr_err("Cannot set device shape for %s\n",
- devname);
+ pr_err("Cannot set device shape for %s\n", devname);
if (err == EBUSY &&
(array.state & (1<<MD_SB_BITMAP_PRESENT)))
@@ -2694,11 +2898,14 @@ static int impose_level(int fd, int level, char *devname, int verbose)
char *c;
struct mdu_array_info_s array;
struct mdinfo info;
- sysfs_init(&info, fd, NULL);
- ioctl(fd, GET_ARRAY_INFO, &array);
- if (level == 0 &&
- (array.level >= 4 && array.level <= 6)) {
+ if (sysfs_init(&info, fd, NULL)) {
+ pr_err("failed to intialize sysfs.\n");
+ return 1;
+ }
+
+ md_get_array_info(fd, &array);
+ if (level == 0 && (array.level >= 4 && array.level <= 6)) {
/* To convert to RAID0 we need to fail and
* remove any non-data devices. */
int found = 0;
@@ -2706,57 +2913,46 @@ static int impose_level(int fd, int level, char *devname, int verbose)
int data_disks = array.raid_disks - 1;
if (array.level == 6)
data_disks -= 1;
- if (array.level == 5 &&
- array.layout != ALGORITHM_PARITY_N)
+ if (array.level == 5 && array.layout != ALGORITHM_PARITY_N)
return -1;
- if (array.level == 6 &&
- array.layout != ALGORITHM_PARITY_N_6)
+ if (array.level == 6 && array.layout != ALGORITHM_PARITY_N_6)
return -1;
sysfs_set_str(&info, NULL,"sync_action", "idle");
/* First remove any spares so no recovery starts */
for (d = 0, found = 0;
- d < MAX_DISKS && found < array.nr_disks;
- d++) {
+ d < MAX_DISKS && found < array.nr_disks; d++) {
mdu_disk_info_t disk;
disk.number = d;
- if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ if (md_get_disk_info(fd, &disk) < 0)
continue;
if (disk.major == 0 && disk.minor == 0)
continue;
found++;
- if ((disk.state & (1 << MD_DISK_ACTIVE))
- && disk.raid_disk < data_disks)
+ if ((disk.state & (1 << MD_DISK_ACTIVE)) &&
+ disk.raid_disk < data_disks)
/* keep this */
continue;
ioctl(fd, HOT_REMOVE_DISK,
makedev(disk.major, disk.minor));
}
/* Now fail anything left */
- ioctl(fd, GET_ARRAY_INFO, &array);
+ md_get_array_info(fd, &array);
for (d = 0, found = 0;
- d < MAX_DISKS && found < array.nr_disks;
- d++) {
- int cnt;
+ d < MAX_DISKS && found < array.nr_disks; d++) {
mdu_disk_info_t disk;
disk.number = d;
- if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ if (md_get_disk_info(fd, &disk) < 0)
continue;
if (disk.major == 0 && disk.minor == 0)
continue;
found++;
- if ((disk.state & (1 << MD_DISK_ACTIVE))
- && disk.raid_disk < data_disks)
+ if ((disk.state & (1 << MD_DISK_ACTIVE)) &&
+ disk.raid_disk < data_disks)
/* keep this */
continue;
ioctl(fd, SET_DISK_FAULTY,
makedev(disk.major, disk.minor));
- cnt = 5;
- while (ioctl(fd, HOT_REMOVE_DISK,
- makedev(disk.major, disk.minor)) < 0
- && errno == EBUSY
- && cnt--) {
- usleep(10000);
- }
+ hot_remove_disk(fd, makedev(disk.major, disk.minor), 1);
}
}
c = map_num(pers, level);
@@ -2772,8 +2968,7 @@ static int impose_level(int fd, int level, char *devname, int verbose)
return err;
}
if (verbose >= 0)
- pr_err("level of %s changed to %s\n",
- devname, c);
+ pr_err("level of %s changed to %s\n", devname, c);
}
return 0;
}
@@ -2808,12 +3003,11 @@ static int continue_via_systemd(char *devnm)
*/
close(2);
open("/dev/null", O_WRONLY);
- snprintf(pathbuf, sizeof(pathbuf), "mdadm-grow-continue@%s.service",
- devnm);
- status = execl("/usr/bin/systemctl", "systemctl",
- "start",
+ snprintf(pathbuf, sizeof(pathbuf),
+ "mdadm-grow-continue@%s.service", devnm);
+ status = execl("/usr/bin/systemctl", "systemctl", "restart",
pathbuf, NULL);
- status = execl("/bin/systemctl", "systemctl", "start",
+ status = execl("/bin/systemctl", "systemctl", "restart",
pathbuf, NULL);
exit(1);
case -1: /* Just do it ourselves. */
@@ -2860,7 +3054,7 @@ static int reshape_array(char *container, int fd, char *devname,
/* when reshaping a RAID0, the component_size might be zero.
* So try to fix that up.
*/
- if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+ if (md_get_array_info(fd, &array) != 0) {
dprintf("Cannot get array information.\n");
goto release;
}
@@ -2893,17 +3087,17 @@ static int reshape_array(char *container, int fd, char *devname,
pr_err("%s\n", msg);
goto release;
}
- if (restart &&
- (reshape.level != info->array.level ||
- reshape.before.layout != info->array.layout ||
- reshape.before.data_disks + reshape.parity
- != info->array.raid_disks - max(0, info->delta_disks))) {
+ if (restart && (reshape.level != info->array.level ||
+ reshape.before.layout != info->array.layout ||
+ reshape.before.data_disks + reshape.parity !=
+ info->array.raid_disks - max(0, info->delta_disks))) {
pr_err("reshape info is not in native format - cannot continue.\n");
goto release;
}
if (st->ss->external && restart && (info->reshape_progress == 0) &&
- !((sysfs_get_str(info, NULL, "sync_action", buf, sizeof(buf)) > 0) &&
+ !((sysfs_get_str(info, NULL, "sync_action",
+ buf, sizeof(buf)) > 0) &&
(strncmp(buf, "reshape", 7) == 0))) {
/* When reshape is restarted from '0', very begin of array
* it is possible that for external metadata reshape and array
@@ -2917,7 +3111,10 @@ static int reshape_array(char *container, int fd, char *devname,
restart = 0;
}
if (restart) {
- /* reshape already started. just skip to monitoring the reshape */
+ /*
+ * reshape already started. just skip to monitoring
+ * the reshape
+ */
if (reshape.backup_blocks == 0)
return 0;
if (restart & RESHAPE_NO_BACKUP)
@@ -2925,8 +3122,8 @@ static int reshape_array(char *container, int fd, char *devname,
/* Need 'sra' down at 'started:' */
sra = sysfs_read(fd, NULL,
- GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK|
- GET_CACHE);
+ GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|
+ GET_CHUNK|GET_CACHE);
if (!sra) {
pr_err("%s: Cannot get array details from sysfs\n",
devname);
@@ -2949,11 +3146,10 @@ static int reshape_array(char *container, int fd, char *devname,
for (dv = devlist; dv ; dv=dv->next)
added_disks++;
spares_needed = max(reshape.before.data_disks,
- reshape.after.data_disks)
- + reshape.parity - array.raid_disks;
+ reshape.after.data_disks) +
+ reshape.parity - array.raid_disks;
- if (!force &&
- info->new_level > 1 && info->array.level > 1 &&
+ if (!force && info->new_level > 1 && info->array.level > 1 &&
spares_needed > info->array.spare_disks + added_disks) {
pr_err("Need %d spare%s to avoid degraded array, and only have %d.\n"
" Use --force to over-ride this check.\n",
@@ -2969,8 +3165,7 @@ static int reshape_array(char *container, int fd, char *devname,
if ((info->new_level > 1 || info->new_level == 0) &&
spares_needed > info->array.spare_disks +added_disks) {
pr_err("Need %d spare%s to create working array, and only have %d.\n",
- spares_needed,
- spares_needed == 1 ? "" : "s",
+ spares_needed, spares_needed == 1 ? "" : "s",
info->array.spare_disks + added_disks);
goto release;
}
@@ -2992,8 +3187,7 @@ static int reshape_array(char *container, int fd, char *devname,
if (!mdmon_running(container))
start_mdmon(container);
ping_monitor(container);
- if (mdmon_running(container) &&
- st->update_tail == NULL)
+ if (mdmon_running(container) && st->update_tail == NULL)
st->update_tail = &st->updates;
}
}
@@ -3009,13 +3203,19 @@ static int reshape_array(char *container, int fd, char *devname,
struct mdinfo *d;
if (info2) {
- sysfs_init(info2, fd, st->devnm);
+ if (sysfs_init(info2, fd, st->devnm)) {
+ pr_err("unable to initialize sysfs for %s\n",
+ st->devnm);
+ free(info2);
+ goto release;
+ }
/* When increasing number of devices, we need to set
* new raid_disks before adding these, or they might
* be rejected.
*/
if (reshape.backup_blocks &&
- reshape.after.data_disks > reshape.before.data_disks)
+ reshape.after.data_disks >
+ reshape.before.data_disks)
subarray_set_num(container, info2, "raid_disks",
reshape.after.data_disks +
reshape.parity);
@@ -3036,8 +3236,7 @@ static int reshape_array(char *container, int fd, char *devname,
* level and frozen, we can safely add them.
*/
if (devlist) {
- if (Manage_subdevs(devname, fd, devlist, verbose,
- 0, NULL, 0))
+ if (Manage_subdevs(devname, fd, devlist, verbose, 0, NULL, 0))
goto release;
}
@@ -3048,7 +3247,7 @@ static int reshape_array(char *container, int fd, char *devname,
* some more changes: layout, raid_disks, chunk_size
*/
/* read current array info */
- if (ioctl(fd, GET_ARRAY_INFO, &array) != 0) {
+ if (md_get_array_info(fd, &array) != 0) {
dprintf("Cannot get array information.\n");
goto release;
}
@@ -3057,18 +3256,18 @@ static int reshape_array(char *container, int fd, char *devname,
if (info->new_layout != UnSet &&
info->new_layout != array.layout) {
array.layout = info->new_layout;
- if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ if (md_set_array_info(fd, &array) != 0) {
pr_err("failed to set new layout\n");
goto release;
} else if (verbose >= 0)
printf("layout for %s set to %d\n",
devname, array.layout);
}
- if (info->delta_disks != UnSet &&
- info->delta_disks != 0 &&
- array.raid_disks != (info->array.raid_disks + info->delta_disks)) {
+ if (info->delta_disks != UnSet && info->delta_disks != 0 &&
+ array.raid_disks !=
+ (info->array.raid_disks + info->delta_disks)) {
array.raid_disks += info->delta_disks;
- if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ if (md_set_array_info(fd, &array) != 0) {
pr_err("failed to set raid disks\n");
goto release;
} else if (verbose >= 0) {
@@ -3143,8 +3342,7 @@ static int reshape_array(char *container, int fd, char *devname,
/* Nothing to do. */
return 0;
return raid10_reshape(container, fd, devname, st, info,
- &reshape, data_offset,
- force, verbose);
+ &reshape, data_offset, force, verbose);
}
sra = sysfs_read(fd, NULL,
GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE|GET_CHUNK|
@@ -3228,15 +3426,13 @@ started:
* not so big that we reject it below.
* Try for 16 megabytes
*/
- while (blocks * 32 < sra->component_size &&
- blocks < 16*1024*2)
+ while (blocks * 32 < sra->component_size && blocks < 16*1024*2)
blocks *= 2;
} else
pr_err("Need to backup %luK of critical section..\n", blocks/2);
if (blocks >= sra->component_size/2) {
- pr_err("%s: Something wrong - reshape aborted\n",
- devname);
+ pr_err("%s: Something wrong - reshape aborted\n", devname);
goto release;
}
@@ -3249,9 +3445,8 @@ started:
offsets = xcalloc((1+nrdisks), sizeof(offsets[0]));
odisks = reshape.before.data_disks + reshape.parity;
- d = reshape_prepare_fdlist(devname, sra, odisks,
- nrdisks, blocks, backup_file,
- fdlist, offsets);
+ d = reshape_prepare_fdlist(devname, sra, odisks, nrdisks, blocks,
+ backup_file, fdlist, offsets);
if (d < odisks) {
goto release;
}
@@ -3272,8 +3467,7 @@ started:
if (!reshape_open_backup_file(backup_file, fd, devname,
(signed)blocks,
fdlist+d, offsets+d,
- sra->sys_name,
- restart)) {
+ sra->sys_name, restart)) {
goto release;
}
d++;
@@ -3281,8 +3475,8 @@ started:
}
update_cache_size(container, sra, info,
- min(reshape.before.data_disks, reshape.after.data_disks),
- blocks);
+ min(reshape.before.data_disks,
+ reshape.after.data_disks), blocks);
/* Right, everything seems fine. Let's kick things off.
* If only changing raid_disks, use ioctl, else use
@@ -3298,8 +3492,7 @@ started:
reshape.after.data_disks);
if (err) {
pr_err("Cannot %s reshape for %s\n",
- restart ? "continue" : "start",
- devname);
+ restart ? "continue" : "start", devname);
goto release;
}
if (restart)
@@ -3354,8 +3547,7 @@ started:
mds = mdstat_read(1, 0);
for (m = mds; m; m = m->next)
if (strcmp(m->devnm, sra->sys_name) == 0) {
- if (m->resync &&
- m->percent == RESYNC_DELAYED)
+ if (m->resync && m->percent == RESYNC_DELAYED)
delayed = 1;
if (m->resync == 0)
/* Haven't started the reshape thread
@@ -3387,15 +3579,12 @@ started:
/* metadata handler takes it from here */
done = st->ss->manage_reshape(
fd, sra, &reshape, st, blocks,
- fdlist, offsets,
- d - odisks, fdlist+odisks,
- offsets+odisks);
+ fdlist, offsets, d - odisks, fdlist + odisks,
+ offsets + odisks);
} else
done = child_monitor(
- fd, sra, &reshape, st, blocks,
- fdlist, offsets,
- d - odisks, fdlist+odisks,
- offsets+odisks);
+ fd, sra, &reshape, st, blocks, fdlist, offsets,
+ d - odisks, fdlist + odisks, offsets + odisks);
free(fdlist);
free(offsets);
@@ -3421,9 +3610,8 @@ started:
}
if (!st->ss->external &&
- !(reshape.before.data_disks != reshape.after.data_disks
- && info->custom_array_size) &&
- info->new_level == reshape.level &&
+ !(reshape.before.data_disks != reshape.after.data_disks &&
+ info->custom_array_size) && info->new_level == reshape.level &&
!forked) {
/* no need to wait for the reshape to finish as
* there is nothing more to do.
@@ -3447,8 +3635,7 @@ started:
/* set new array size if required customer_array_size is used
* by this metadata.
*/
- if (reshape.before.data_disks !=
- reshape.after.data_disks &&
+ if (reshape.before.data_disks != reshape.after.data_disks &&
info->custom_array_size)
set_array_size(st, info, info->text_version);
@@ -3587,7 +3774,8 @@ int reshape_container(char *container, char *devname,
fd = open_dev(mdstat->devnm);
if (fd < 0) {
- pr_err("Device %s cannot be opened for reshape.\n", adev);
+ pr_err("Device %s cannot be opened for reshape.\n",
+ adev);
break;
}
@@ -3608,7 +3796,12 @@ int reshape_container(char *container, char *devname,
}
strcpy(last_devnm, mdstat->devnm);
- sysfs_init(content, fd, mdstat->devnm);
+ if (sysfs_init(content, fd, mdstat->devnm)) {
+ pr_err("Unable to initialize sysfs for %s\n",
+ mdstat->devnm);
+ rv = 1;
+ break;
+ }
if (mdmon_running(container))
flush_mdmon(container);
@@ -3769,8 +3962,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
max_progress = backup_point;
else
max_progress =
- read_offset *
- reshape->after.data_disks;
+ read_offset * reshape->after.data_disks;
} else {
if (read_offset > write_offset - write_range)
/* Can only progress as far as has been backed up,
@@ -3785,8 +3977,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
else {
/* Can progress until metadata update is required */
max_progress =
- read_offset *
- reshape->after.data_disks;
+ read_offset * reshape->after.data_disks;
/* but data must be suspended */
if (max_progress < *suspend_point)
max_progress = *suspend_point;
@@ -3815,8 +4006,8 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
* a backup.
*/
if (advancing) {
- if ((need_backup > info->reshape_progress
- || info->array.major_version < 0) &&
+ if ((need_backup > info->reshape_progress ||
+ info->array.major_version < 0) &&
*suspend_point < info->reshape_progress + target) {
if (need_backup < *suspend_point + 2 * target)
*suspend_point = need_backup;
@@ -3835,7 +4026,8 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
*suspend_point > 0) {
*suspend_point = 0;
sysfs_set_num(info, NULL, "suspend_lo", 0);
- sysfs_set_num(info, NULL, "suspend_hi", need_backup);
+ sysfs_set_num(info, NULL, "suspend_hi",
+ need_backup);
}
} else {
/* Need to suspend continually */
@@ -3866,7 +4058,10 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
*/
/* scale down max_progress to per_disk */
max_progress /= reshape->after.data_disks;
- /* Round to chunk size as some kernels give an erroneously high number */
+ /*
+ * Round to chunk size as some kernels give an erroneously
+ * high number
+ */
max_progress /= info->new_chunk/512;
max_progress *= info->new_chunk/512;
/* And round to old chunk size as the kernel wants that */
@@ -3903,22 +4098,21 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
* waiting forever on a dead array
*/
char action[20];
- if (sysfs_get_str(info, NULL, "sync_action",
- action, 20) <= 0 ||
+ if (sysfs_get_str(info, NULL, "sync_action", action, 20) <= 0 ||
strncmp(action, "reshape", 7) != 0)
break;
/* Some kernels reset 'sync_completed' to zero
* before setting 'sync_action' to 'idle'.
* So we need these extra tests.
*/
- if (completed == 0 && advancing
- && strncmp(action, "idle", 4) == 0
- && info->reshape_progress > 0)
+ if (completed == 0 && advancing &&
+ strncmp(action, "idle", 4) == 0 &&
+ info->reshape_progress > 0)
break;
- if (completed == 0 && !advancing
- && strncmp(action, "idle", 4) == 0
- && info->reshape_progress < (info->component_size
- * reshape->after.data_disks))
+ if (completed == 0 && !advancing &&
+ strncmp(action, "idle", 4) == 0 &&
+ info->reshape_progress <
+ (info->component_size * reshape->after.data_disks))
break;
sysfs_wait(fd, NULL);
if (sysfs_fd_get_ll(fd, &completed) < 0)
@@ -3931,8 +4125,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
if (completed == 0) {
unsigned long long reshapep;
char action[20];
- if (sysfs_get_str(info, NULL, "sync_action",
- action, 20) > 0 &&
+ if (sysfs_get_str(info, NULL, "sync_action", action, 20) > 0 &&
strncmp(action, "idle", 4) == 0 &&
sysfs_get_ll(info, NULL,
"reshape_position", &reshapep) == 0)
@@ -3969,8 +4162,8 @@ check_progress:
* it was just a device failure that leaves us degraded but
* functioning.
*/
- if (sysfs_get_str(info, NULL, "reshape_position", buf, sizeof(buf)) < 0
- || strncmp(buf, "none", 4) != 0) {
+ if (sysfs_get_str(info, NULL, "reshape_position", buf,
+ sizeof(buf)) < 0 || strncmp(buf, "none", 4) != 0) {
/* The abort might only be temporary. Wait up to 10
* seconds for fd to contain a valid number again.
*/
@@ -3987,7 +4180,8 @@ check_progress:
/* If "sync_max" is no longer max_progress
* we need to freeze things
*/
- sysfs_get_ll(info, NULL, "sync_max", &new_sync_max);
+ sysfs_get_ll(info, NULL, "sync_max",
+ &new_sync_max);
*frozen = (new_sync_max != max_progress);
break;
case -2: /* read error - abort */
@@ -4002,9 +4196,10 @@ check_progress:
/* Maybe racing with array shutdown - check state */
if (fd >= 0)
close(fd);
- if (sysfs_get_str(info, NULL, "array_state", buf, sizeof(buf)) < 0
- || strncmp(buf, "inactive", 8) == 0
- || strncmp(buf, "clear",5) == 0)
+ if (sysfs_get_str(info, NULL, "array_state", buf,
+ sizeof(buf)) < 0 ||
+ strncmp(buf, "inactive", 8) == 0 ||
+ strncmp(buf, "clear",5) == 0)
return -2; /* abort */
return -1; /* complete */
}
@@ -4077,15 +4272,14 @@ static int grow_backup(struct mdinfo *sra,
bsb.magic[15] = '2';
for (i = 0; i < dests; i++)
if (part)
- lseek64(destfd[i], destoffsets[i] + __le64_to_cpu(bsb.devstart2)*512, 0);
+ lseek64(destfd[i], destoffsets[i] +
+ __le64_to_cpu(bsb.devstart2)*512, 0);
else
lseek64(destfd[i], destoffsets[i], 0);
- rv = save_stripes(sources, offsets,
- disks, chunk, level, layout,
- dests, destfd,
- offset*512*odata, stripes * chunk * odata,
- buf);
+ rv = save_stripes(sources, offsets, disks, chunk, level, layout,
+ dests, destfd, offset * 512 * odata,
+ stripes * chunk * odata, buf);
if (rv)
return rv;
@@ -4093,14 +4287,16 @@ static int grow_backup(struct mdinfo *sra,
for (i = 0; i < dests; i++) {
bsb.devstart = __cpu_to_le64(destoffsets[i]/512);
- bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
+ bsb.sb_csum = bsb_csum((char*)&bsb,
+ ((char*)&bsb.sb_csum)-((char*)&bsb));
if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0)
bsb.sb_csum2 = bsb_csum((char*)&bsb,
((char*)&bsb.sb_csum2)-((char*)&bsb));
rv = -1;
- if ((unsigned long long)lseek64(destfd[i], destoffsets[i] - 4096, 0)
- != destoffsets[i] - 4096)
+ if ((unsigned long long)lseek64(destfd[i],
+ destoffsets[i] - 4096, 0) !=
+ destoffsets[i] - 4096)
break;
if (write(destfd[i], &bsb, 512) != 512)
break;
@@ -4150,15 +4346,15 @@ static int forget_backup(int dests, int *destfd,
rv = 0;
for (i = 0; i < dests; i++) {
bsb.devstart = __cpu_to_le64(destoffsets[i]/512);
- bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
+ bsb.sb_csum = bsb_csum((char*)&bsb,
+ ((char*)&bsb.sb_csum)-((char*)&bsb));
if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0)
bsb.sb_csum2 = bsb_csum((char*)&bsb,
((char*)&bsb.sb_csum2)-((char*)&bsb));
if ((unsigned long long)lseek64(destfd[i], destoffsets[i]-4096, 0) !=
destoffsets[i]-4096)
rv = -1;
- if (rv == 0 &&
- write(destfd[i], &bsb, 512) != 512)
+ if (rv == 0 && write(destfd[i], &bsb, 512) != 512)
rv = -1;
fsync(destfd[i]);
}
@@ -4274,9 +4470,10 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
unsigned long long backup_point, wait_point;
unsigned long long reshape_completed;
int done = 0;
- int increasing = reshape->after.data_disks >= reshape->before.data_disks;
- int part = 0; /* The next part of the backup area to fill. It may already
- * be full, so we need to check */
+ int increasing = reshape->after.data_disks >=
+ reshape->before.data_disks;
+ int part = 0; /* The next part of the backup area to fill. It
+ * may already be full, so we need to check */
int level = reshape->level;
int layout = reshape->before.layout;
int data = reshape->before.data_disks;
@@ -4440,11 +4637,9 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
}
if (actual_stripes == 0)
break;
- grow_backup(sra, offset, actual_stripes,
- fds, offsets,
- disks, chunk, level, layout,
- dests, destfd, destoffsets,
- part, &degraded, buf);
+ grow_backup(sra, offset, actual_stripes, fds, offsets,
+ disks, chunk, level, layout, dests, destfd,
+ destoffsets, part, &degraded, buf);
validate(afd, destfd[0], destoffsets[0]);
/* record where 'part' is up to */
part = !part;
@@ -4473,8 +4668,8 @@ int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
* write that data into the array and update the super blocks with
* the new reshape_progress
*/
-int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt,
- char *backup_file, int verbose)
+int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist,
+ int cnt, char *backup_file, int verbose)
{
int i, j;
int old_disks;
@@ -4483,9 +4678,11 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
int ndata, odata;
odata = info->array.raid_disks - info->delta_disks - 1;
- if (info->array.level == 6) odata--; /* number of data disks */
+ if (info->array.level == 6)
+ odata--; /* number of data disks */
ndata = info->array.raid_disks - 1;
- if (info->new_level == 6) ndata--;
+ if (info->new_level == 6)
+ ndata--;
old_disks = info->array.raid_disks - info->delta_disks;
@@ -4548,24 +4745,30 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
}
if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb))) {
if (verbose)
- pr_err("Bad backup-metadata checksum on %s\n", devname);
+ pr_err("Bad backup-metadata checksum on %s\n",
+ devname);
continue; /* bad checksum */
}
if (memcmp(bsb.magic, "md_backup_data-2", 16) == 0 &&
bsb.sb_csum2 != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum2)-((char*)&bsb))) {
if (verbose)
- pr_err("Bad backup-metadata checksum2 on %s\n", devname);
+ pr_err("Bad backup-metadata checksum2 on %s\n",
+ devname);
continue; /* Bad second checksum */
}
if (memcmp(bsb.set_uuid,info->uuid, 16) != 0) {
if (verbose)
- pr_err("Wrong uuid on backup-metadata on %s\n", devname);
+ pr_err("Wrong uuid on backup-metadata on %s\n",
+ devname);
continue; /* Wrong uuid */
}
- /* array utime and backup-mtime should be updated at much the same time, but it seems that
- * sometimes they aren't... So allow considerable flexability in matching, and allow
- * this test to be overridden by an environment variable.
+ /*
+ * array utime and backup-mtime should be updated at
+ * much the same time, but it seems that sometimes
+ * they aren't... So allow considerable flexability in
+ * matching, and allow this test to be overridden by
+ * an environment variable.
*/
if(time_after(info->array.utime, (unsigned int)__le64_to_cpu(bsb.mtime) + 2*60*60) ||
time_before(info->array.utime, (unsigned int)__le64_to_cpu(bsb.mtime) - 10*60)) {
@@ -4606,8 +4809,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
/* reshape_progress is increasing */
if ((__le64_to_cpu(bsb.arraystart)
+ __le64_to_cpu(bsb.length)
- < info->reshape_progress)
- &&
+ < info->reshape_progress) &&
(__le64_to_cpu(bsb.arraystart2)
+ __le64_to_cpu(bsb.length2)
< info->reshape_progress))
@@ -4653,12 +4855,10 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
}
printf("%s: restoring critical section\n", Name);
- if (restore_stripes(fdlist, offsets,
- info->array.raid_disks,
- info->new_chunk,
- info->new_level,
- info->new_layout,
- fd, __le64_to_cpu(bsb.devstart)*512,
+ if (restore_stripes(fdlist, offsets, info->array.raid_disks,
+ info->new_chunk, info->new_level,
+ info->new_layout, fd,
+ __le64_to_cpu(bsb.devstart)*512,
__le64_to_cpu(bsb.arraystart)*512,
__le64_to_cpu(bsb.length)*512, NULL)) {
/* didn't succeed, so giveup */
@@ -4670,12 +4870,10 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
}
if (bsb.magic[15] == '2' &&
- restore_stripes(fdlist, offsets,
- info->array.raid_disks,
- info->new_chunk,
- info->new_level,
- info->new_layout,
- fd, __le64_to_cpu(bsb.devstart)*512 +
+ restore_stripes(fdlist, offsets, info->array.raid_disks,
+ info->new_chunk, info->new_level,
+ info->new_layout, fd,
+ __le64_to_cpu(bsb.devstart)*512 +
__le64_to_cpu(bsb.devstart2)*512,
__le64_to_cpu(bsb.arraystart2)*512,
__le64_to_cpu(bsb.length2)*512, NULL)) {
@@ -4708,15 +4906,16 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
else
lo = lo1;
}
- if (lo < hi &&
- (info->reshape_progress < lo ||
- info->reshape_progress > hi))
+ if (lo < hi && (info->reshape_progress < lo ||
+ info->reshape_progress > hi))
/* backup does not affect reshape_progress*/ ;
else if (info->delta_disks >= 0) {
info->reshape_progress = __le64_to_cpu(bsb.arraystart) +
__le64_to_cpu(bsb.length);
if (bsb.magic[15] == '2') {
- unsigned long long p2 = __le64_to_cpu(bsb.arraystart2) +
+ unsigned long long p2;
+
+ p2 = __le64_to_cpu(bsb.arraystart2) +
__le64_to_cpu(bsb.length2);
if (p2 > info->reshape_progress)
info->reshape_progress = p2;
@@ -4724,7 +4923,9 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
} else {
info->reshape_progress = __le64_to_cpu(bsb.arraystart);
if (bsb.magic[15] == '2') {
- unsigned long long p2 = __le64_to_cpu(bsb.arraystart2);
+ unsigned long long p2;
+
+ p2 = __le64_to_cpu(bsb.arraystart2);
if (p2 < info->reshape_progress)
info->reshape_progress = p2;
}
@@ -4736,8 +4937,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt
continue;
st->ss->getinfo_super(st, &dinfo, NULL);
dinfo.reshape_progress = info->reshape_progress;
- st->ss->update_super(st, &dinfo,
- "_reshape_progress",
+ st->ss->update_super(st, &dinfo, "_reshape_progress",
NULL,0, 0, NULL);
st->ss->store_super(st, fdlist[j]);
st->ss->free_super(st);
@@ -4798,13 +4998,11 @@ int Grow_continue_command(char *devname, int fd,
int cfd = -1;
int fd2;
- dprintf("Grow continue from command line called for %s\n",
- devname);
+ dprintf("Grow continue from command line called for %s\n", devname);
st = super_by_fd(fd, &subarray);
if (!st || !st->ss) {
- pr_err("Unable to determine metadata format for %s\n",
- devname);
+ pr_err("Unable to determine metadata format for %s\n", devname);
return 1;
}
dprintf("Grow continue is run for ");
@@ -4812,12 +5010,14 @@ int Grow_continue_command(char *devname, int fd,
int d;
int cnt = 5;
dprintf_cont("native array (%s)\n", devname);
- if (ioctl(fd, GET_ARRAY_INFO, &array.array) < 0) {
- pr_err("%s is not an active md array - aborting\n", devname);
+ if (md_get_array_info(fd, &array.array) < 0) {
+ pr_err("%s is not an active md array - aborting\n",
+ devname);
ret_val = 1;
goto Grow_continue_command_exit;
}
content = &array;
+ sysfs_init(content, fd, NULL);
/* Need to load a superblock.
* FIXME we should really get what we need from
* sysfs
@@ -4828,7 +5028,7 @@ int Grow_continue_command(char *devname, int fd,
char *dv;
int err;
disk.number = d;
- if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ if (md_get_disk_info(fd, &disk) < 0)
continue;
if (disk.major == 0 && disk.minor == 0)
continue;
@@ -4882,15 +5082,14 @@ int Grow_continue_command(char *devname, int fd,
*/
ret_val = st->ss->load_container(st, cfd, NULL);
if (ret_val) {
- pr_err("Cannot read superblock for %s\n",
- devname);
+ pr_err("Cannot read superblock for %s\n", devname);
ret_val = 1;
goto Grow_continue_command_exit;
}
cc = st->ss->container_content(st, subarray);
for (content = cc; content ; content = content->next) {
- char *array;
+ char *array_name;
int allow_reshape = 1;
if (content->reshape_active == 0)
@@ -4915,8 +5114,8 @@ int Grow_continue_command(char *devname, int fd,
goto Grow_continue_command_exit;
}
- array = strchr(content->text_version+1, '/')+1;
- mdstat = mdstat_by_subdev(array, container);
+ array_name = strchr(content->text_version+1, '/')+1;
+ mdstat = mdstat_by_subdev(array_name, container);
if (!mdstat)
continue;
if (mdstat->active == 0) {
@@ -4940,7 +5139,13 @@ int Grow_continue_command(char *devname, int fd,
goto Grow_continue_command_exit;
}
- sysfs_init(content, fd2, mdstat->devnm);
+ if (sysfs_init(content, fd2, mdstat->devnm)) {
+ pr_err("Unable to initialize sysfs for %s, Grow cannot continue.\n",
+ mdstat->devnm);
+ ret_val = 1;
+ close(fd2);
+ goto Grow_continue_command_exit;
+ }
close(fd2);
@@ -4999,15 +5204,13 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
st->ss->load_container(st, cfd, st->container_devnm);
close(cfd);
ret_val = reshape_container(st->container_devnm, NULL, mdfd,
- st, info, 0, backup_file,
- 0, forked,
- 1 | info->reshape_active,
+ st, info, 0, backup_file, 0,
+ forked, 1 | info->reshape_active,
freeze_reshape);
} else
ret_val = reshape_array(NULL, mdfd, "array", st, info, 1,
- NULL, INVALID_SECTORS,
- backup_file, 0, forked,
- 1 | info->reshape_active,
+ NULL, INVALID_SECTORS, backup_file,
+ 0, forked, 1 | info->reshape_active,
freeze_reshape);
return ret_val;
@@ -5030,8 +5233,7 @@ char *locate_backup(char *name)
char *fl = make_backup(name);
struct stat stb;
- if (stat(fl, &stb) == 0 &&
- S_ISREG(stb.st_mode))
+ if (stat(fl, &stb) == 0 && S_ISREG(stb.st_mode))
return fl;
free(fl);
diff --git a/Incremental.c b/Incremental.c
index 0f507bb..0beab16 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -86,7 +86,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
* - if number of OK devices match expected, or -R and there are enough,
* start the array (auto-readonly).
*/
- struct stat stb;
+ dev_t rdev, rdev2;
struct mdinfo info, dinfo;
struct mdinfo *sra = NULL, *d;
struct mddev_ident *match;
@@ -99,7 +99,6 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
int active_disks;
int trustworthy;
char *name_to_use;
- mdu_array_info_t ainf;
struct dev_policy *policy = NULL;
struct map_ent target_array;
int have_target;
@@ -108,18 +107,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
struct createinfo *ci = conf_get_create_info();
- if (stat(devname, &stb) < 0) {
- if (c->verbose >= 0)
- pr_err("stat failed for %s: %s.\n",
- devname, strerror(errno));
- return rv;
- }
- if ((stb.st_mode & S_IFMT) != S_IFBLK) {
- if (c->verbose >= 0)
- pr_err("%s is not a block device.\n",
- devname);
+ if (!stat_is_blkdev(devname, &rdev))
return rv;
- }
dfd = dev_open(devname, O_RDONLY);
if (dfd < 0) {
if (c->verbose >= 0)
@@ -158,10 +147,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
if (!devlist) {
devlist = conf_get_devs();
for (;devlist; devlist = devlist->next) {
- struct stat st2;
- if (stat(devlist->devname, &st2) == 0 &&
- (st2.st_mode & S_IFMT) == S_IFBLK &&
- st2.st_rdev == stb.st_rdev)
+ if (stat_is_blkdev(devlist->devname, &rdev2) &&
+ rdev2 == rdev)
break;
}
}
@@ -175,21 +162,11 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
/* 2/ Find metadata, reject if none appropriate (check
* version/name from args) */
- if (fstat(dfd, &stb) < 0) {
- if (c->verbose >= 0)
- pr_err("fstat failed for %s: %s.\n",
- devname, strerror(errno));
+ if (!fstat_is_blkdev(dfd, devname, &rdev))
goto out;
- }
- if ((stb.st_mode & S_IFMT) != S_IFBLK) {
- if (c->verbose >= 0)
- pr_err("%s is not a block device.\n",
- devname);
- goto out;
- }
- dinfo.disk.major = major(stb.st_rdev);
- dinfo.disk.minor = minor(stb.st_rdev);
+ dinfo.disk.major = major(rdev);
+ dinfo.disk.minor = minor(rdev);
policy = disk_policy(&dinfo);
have_target = policy_check_path(&dinfo, &target_array);
@@ -225,8 +202,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
if (!match && rv == 2)
goto out;
- if (match && match->devname
- && strcasecmp(match->devname, "<ignore>") == 0) {
+ if (match && match->devname &&
+ strcasecmp(match->devname, "<ignore>") == 0) {
if (c->verbose >= 0)
pr_err("array containing %s is explicitly ignored by mdadm.conf\n",
devname);
@@ -267,8 +244,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
c->autof = ci->autof;
name_to_use = info.name;
- if (name_to_use[0] == 0 &&
- info.array.level == LEVEL_CONTAINER) {
+ if (name_to_use[0] == 0 && info.array.level == LEVEL_CONTAINER) {
name_to_use = info.text_version;
trustworthy = METADATA;
}
@@ -321,12 +297,17 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
/* Couldn't find an existing array, maybe make a new one */
mdfd = create_mddev(match ? match->devname : NULL,
- name_to_use, c->autof, trustworthy, chosen_name);
+ name_to_use, c->autof, trustworthy, chosen_name, 0);
if (mdfd < 0)
goto out_unlock;
- sysfs_init(&info, mdfd, NULL);
+ if (sysfs_init(&info, mdfd, NULL)) {
+ pr_err("unable to initialize sysfs for %s\n",
+ chosen_name);
+ rv = 2;
+ goto out_unlock;
+ }
if (set_array_info(mdfd, st, &info) != 0) {
pr_err("failed to set array info for %s: %s\n",
@@ -336,8 +317,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
}
dinfo = info;
- dinfo.disk.major = major(stb.st_rdev);
- dinfo.disk.minor = minor(stb.st_rdev);
+ dinfo.disk.major = major(rdev);
+ dinfo.disk.minor = minor(rdev);
if (add_disk(mdfd, st, &info, &dinfo) != 0) {
pr_err("failed to add %s to new array %s: %s.\n",
devname, chosen_name, strerror(errno));
@@ -393,12 +374,11 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
* flag has a different meaning. The test has to happen
* at the device level there
*/
- if (!st->ss->external
- && (info.disk.state & (1<<MD_DISK_SYNC)) != 0
- && ! policy_action_allows(policy, st->ss->name,
- act_re_add)
- && c->runstop < 1) {
- if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
+ if (!st->ss->external &&
+ (info.disk.state & (1 << MD_DISK_SYNC)) != 0 &&
+ !policy_action_allows(policy, st->ss->name, act_re_add) &&
+ c->runstop < 1) {
+ if (md_array_active(mdfd)) {
pr_err("not adding %s to active array (without --run) %s\n",
devname, chosen_name);
rv = 2;
@@ -439,8 +419,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
goto out_unlock;
}
}
- info.disk.major = major(stb.st_rdev);
- info.disk.minor = minor(stb.st_rdev);
+ info.disk.major = major(rdev);
+ info.disk.minor = minor(rdev);
/* add disk needs to know about containers */
if (st->ss->external)
sra->array.level = LEVEL_CONTAINER;
@@ -528,9 +508,11 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
journal_device_missing = (info.journal_device_required) && (info.journal_clean == 0);
+ if (info.consistency_policy == CONSISTENCY_POLICY_PPL)
+ info.array.state |= 1;
+
if (enough(info.array.level, info.array.raid_disks,
- info.array.layout, info.array.state & 1,
- avail) == 0) {
+ info.array.layout, info.array.state & 1, avail) == 0) {
if (c->export) {
printf("MD_STARTED=no\n");
} else if (c->verbose >= 0)
@@ -546,7 +528,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
/* + add any bitmap file */
/* + start the array (auto-readonly). */
- if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
+ if (md_array_active(mdfd)) {
if (c->export) {
printf("MD_STARTED=already\n");
} else if (c->verbose >= 0)
@@ -591,8 +573,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
if (d->disk.state & (1<<MD_DISK_REMOVED))
remove_disk(mdfd, st, sra, d);
- if ((sra == NULL || active_disks >= info.array.working_disks)
- && trustworthy != FOREIGN)
+ if ((sra == NULL || active_disks >= info.array.working_disks) &&
+ trustworthy != FOREIGN)
rv = ioctl(mdfd, RUN_ARRAY, NULL);
else
rv = sysfs_set_str(sra, NULL,
@@ -616,7 +598,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
* those devices we should re-add them now.
*/
for (dsk = sra->devs; dsk ; dsk = dsk->next) {
- if (disk_action_allows(dsk, st->ss->name, act_re_add) &&
+ if (disk_action_allows(dsk, st->ss->name,
+ act_re_add) &&
add_disk(mdfd, st, sra, dsk) == 0)
pr_err("%s re-added to %s\n",
dsk->sys_name, chosen_name);
@@ -659,9 +642,8 @@ static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
* and events less than the passed events, and remove the device.
*/
struct mdinfo *d;
- mdu_array_info_t ra;
- if (ioctl(mdfd, GET_ARRAY_INFO, &ra) == 0)
+ if (md_array_active(mdfd))
return; /* not safe to remove from active arrays
* without thinking more */
@@ -681,8 +663,7 @@ static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
st->ss->free_super(st);
close(dfd);
- if (info.disk.number != number ||
- info.events >= events)
+ if (info.disk.number != number || info.events >= events)
continue;
if (d->disk.raid_disk > -1)
@@ -703,6 +684,7 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
int cnt = 0;
int replcnt = 0;
__u64 max_events = 0;
+ __u64 max_journal_events = 0;
char *avail = NULL;
int *best = NULL;
char *devmap = NULL;
@@ -733,8 +715,9 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
info.array.raid_disks = raid_disks;
st->ss->getinfo_super(st, &info, devmap + raid_disks * devnum);
- if (info.disk.raid_disk == MD_DISK_ROLE_JOURNAL)
- bestinfo->journal_clean = 1;
+ if (info.disk.raid_disk == MD_DISK_ROLE_JOURNAL &&
+ info.events > max_journal_events)
+ max_journal_events = info.events;
if (!avail) {
raid_disks = info.array.raid_disks;
avail = xcalloc(raid_disks, 1);
@@ -784,6 +767,8 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
replcnt++;
st->ss->free_super(st);
}
+ if (max_journal_events >= max_events - 1)
+ bestinfo->journal_clean = 1;
if (!avail)
return 0;
@@ -821,28 +806,28 @@ static int count_active(struct supertype *st, struct mdinfo *sra,
}
/* test if container has degraded member(s) */
-static int container_members_max_degradation(struct map_ent *map, struct map_ent *me)
+static int
+container_members_max_degradation(struct map_ent *map, struct map_ent *me)
{
- mdu_array_info_t array;
- int afd;
- int max_degraded = 0;
+ struct mdinfo *sra;
+ int degraded, max_degraded = 0;
for(; map; map = map->next) {
if (!metadata_container_matches(map->metadata, me->devnm))
continue;
- afd = open_dev(map->devnm);
- if (afd < 0)
- continue;
/* most accurate information regarding array degradation */
- if (ioctl(afd, GET_ARRAY_INFO, &array) >= 0) {
- int degraded = array.raid_disks - array.active_disks -
- array.spare_disks;
- if (degraded > max_degraded)
- max_degraded = degraded;
- }
- close(afd);
+ sra = sysfs_read(-1, map->devnm,
+ GET_DISKS | GET_DEVS | GET_STATE);
+ if (!sra)
+ continue;
+ degraded = sra->array.raid_disks - sra->array.active_disks -
+ sra->array.spare_disks;
+ if (degraded > max_degraded)
+ max_degraded = degraded;
+ sysfs_free(sra);
}
- return (max_degraded);
+
+ return max_degraded;
}
static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
@@ -860,12 +845,12 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
* Return 0 on success, or some exit code on failure, probably 1.
*/
int rv = 1;
- struct stat stb;
+ dev_t rdev;
struct map_ent *mp, *map = NULL;
struct mdinfo *chosen = NULL;
int dfd = *dfdp;
- if (fstat(dfd, &stb) != 0)
+ if (!fstat_is_blkdev(dfd, devname, &rdev))
return 1;
/*
@@ -885,8 +870,8 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
struct supertype *st2;
struct domainlist *dl = NULL;
struct mdinfo *sra;
- unsigned long long devsize;
- unsigned long long component_size = 0;
+ unsigned long long devsize, freesize = 0;
+ struct spare_criteria sc = {0, 0};
if (is_subarray(mp->metadata))
continue;
@@ -905,16 +890,10 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
}
sra = sysfs_read(-1, mp->devnm,
GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
- GET_DEGRADED|GET_COMPONENT|GET_VERSION);
- if (!sra) {
- /* Probably a container - no degraded info */
- sra = sysfs_read(-1, mp->devnm,
- GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
- GET_COMPONENT|GET_VERSION);
- if (sra)
- sra->array.failed_disks = -1;
- }
- if (!sra)
+ GET_COMPONENT|GET_VERSION);
+ if (sra)
+ sra->array.failed_disks = -1;
+ else
continue;
if (st == NULL) {
int i;
@@ -955,20 +934,22 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
}
if (st3->ss->load_container &&
!st3->ss->load_container(st3, mdfd, mp->path)) {
- component_size = st3->ss->min_acceptable_spare_size(st3);
+ if (st3->ss->get_spare_criteria)
+ st3->ss->get_spare_criteria(st3, &sc);
st3->ss->free_super(st3);
}
free(st3);
close(mdfd);
}
if ((sra->component_size > 0 &&
- st2->ss->avail_size(st2, devsize,
- sra->devs
- ? sra->devs->data_offset
- : INVALID_SECTORS)
- < sra->component_size)
- ||
- (sra->component_size == 0 && devsize < component_size)) {
+ st2->ss->validate_geometry(st2, sra->array.level, sra->array.layout,
+ sra->array.raid_disks, &sra->array.chunk_size,
+ sra->component_size,
+ sra->devs ? sra->devs->data_offset : INVALID_SECTORS,
+ devname, &freesize, sra->consistency_policy,
+ 0) &&
+ freesize < sra->component_size) ||
+ (sra->component_size == 0 && devsize < sc.min_size)) {
if (verbose > 1)
pr_err("not adding %s to %s as it is too small\n",
devname, mp->path);
@@ -1037,8 +1018,8 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
devlist.writemostly = FlagDefault;
devlist.failfast = FlagDefault;
devlist.devname = chosen_devname;
- sprintf(chosen_devname, "%d:%d", major(stb.st_rdev),
- minor(stb.st_rdev));
+ sprintf(chosen_devname, "%d:%d", major(rdev),
+ minor(rdev));
devlist.disposition = 'a';
close(dfd);
*dfdp = -1;
@@ -1100,8 +1081,7 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
char *devname = NULL;
unsigned long long devsectors;
- if (de->d_ino == 0 ||
- de->d_name[0] == '.' ||
+ if (de->d_ino == 0 || de->d_name[0] == '.' ||
(de->d_type != DT_LNK && de->d_type != DT_UNKNOWN))
goto next;
@@ -1139,8 +1119,7 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
st2 = dup_super(st);
else
st2 = guess_super_type(fd, guess_partitions);
- if (st2 == NULL ||
- st2->ss->load_super(st2, fd, NULL) < 0)
+ if (st2 == NULL || st2->ss->load_super(st2, fd, NULL) < 0)
goto next;
st2->ignore_hw_compat = 0;
@@ -1168,8 +1147,7 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
* metadata which makes better use of the device can
* be found.
*/
- if (chosen == NULL ||
- chosen_size < info.component_size) {
+ if (chosen == NULL || chosen_size < info.component_size) {
chosen_size = info.component_size;
free(chosen);
chosen = devname;
@@ -1290,7 +1268,7 @@ static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
* what arrays might be candidates.
*/
if (st) {
- /* just try try 'array' or 'partition' based on this metadata */
+ /* just try to add 'array' or 'partition' based on this metadata */
if (st->ss->add_to_super)
return array_try_spare(devname, dfdp, pol, target, bare,
st, verbose);
@@ -1341,8 +1319,6 @@ int IncrementalScan(struct context *c, char *devnm)
restart:
for (me = mapl ; me ; me = me->next) {
- mdu_array_info_t array;
- mdu_bitmap_file_t bmf;
struct mdinfo *sra;
int mdfd;
@@ -1387,15 +1363,14 @@ restart:
rv = 1;
continue;
}
- if (ioctl(mdfd, GET_ARRAY_INFO, &array) == 0 ||
- errno != ENODEV) {
+ if (md_array_active(mdfd)) {
close(mdfd);
continue;
}
/* Ok, we can try this one. Maybe it needs a bitmap */
for (mddev = devs ; mddev ; mddev = mddev->next)
- if (mddev->devname && me->path
- && devname_matches(mddev->devname, me->path))
+ if (mddev->devname && me->path &&
+ devname_matches(mddev->devname, me->path))
break;
if (mddev && mddev->bitmap_file) {
/*
@@ -1403,13 +1378,12 @@ restart:
* is a hint only
*/
int added = -1;
- if (ioctl(mdfd, GET_ARRAY_INFO, &bmf) < 0) {
- int bmfd = open(mddev->bitmap_file, O_RDWR);
- if (bmfd >= 0) {
- added = ioctl(mdfd, SET_BITMAP_FILE,
- bmfd);
- close(bmfd);
- }
+ int bmfd;
+
+ bmfd = open(mddev->bitmap_file, O_RDWR);
+ if (bmfd >= 0) {
+ added = ioctl(mdfd, SET_BITMAP_FILE, bmfd);
+ close(bmfd);
}
if (c->verbose >= 0) {
if (added == 0)
@@ -1600,7 +1574,7 @@ static int Incremental_container(struct supertype *st, char *devname,
ra->name,
c->autof,
trustworthy,
- chosen_name);
+ chosen_name, 0);
}
if (only && (!mp || strcmp(mp->devnm, only) != 0))
continue;
@@ -1652,12 +1626,15 @@ static int Incremental_container(struct supertype *st, char *devname,
struct supertype *sst =
super_imsm.match_metadata_desc("imsm");
struct mdinfo *sinfo;
- unsigned long long min_size = 0;
- if (st->ss->min_acceptable_spare_size)
- min_size = st->ss->min_acceptable_spare_size(st);
+
if (!sst->ss->load_container(sst, sfd, NULL)) {
+ struct spare_criteria sc = {0, 0};
+
+ if (st->ss->get_spare_criteria)
+ st->ss->get_spare_criteria(st, &sc);
+
close(sfd);
- sinfo = container_choose_spares(sst, min_size,
+ sinfo = container_choose_spares(sst, &sc,
domains, NULL,
st->ss->name, 0);
sst->ss->free_super(sst);
@@ -1734,7 +1711,10 @@ int IncrementalRemove(char *devname, char *id_path, int verbose)
pr_err("%s does not appear to be a component of any array\n", devname);
return 1;
}
- sysfs_init(&mdi, -1, ent->devnm);
+ if (sysfs_init(&mdi, -1, ent->devnm)) {
+ pr_err("unable to initialize sysfs for: %s\n", devname);
+ return 1;
+ }
mdfd = open_dev_excl(ent->devnm);
if (mdfd > 0) {
close(mdfd);
diff --git a/Kill.c b/Kill.c
index f2fdb85..ff52561 100644
--- a/Kill.c
+++ b/Kill.c
@@ -63,7 +63,7 @@ int Kill(char *dev, struct supertype *st, int force, int verbose, int noexcl)
rv = st->ss->load_super(st, fd, dev);
if (rv == 0 || (force && rv >= 2)) {
st->ss->free_super(st);
- st->ss->init_super(st, NULL, 0, "", NULL, NULL,
+ st->ss->init_super(st, NULL, NULL, "", NULL, NULL,
INVALID_SECTORS);
if (st->ss->store_super(st, fd)) {
if (verbose >= 0)
diff --git a/Makefile b/Makefile
index 8c8b658..188a218 100644
--- a/Makefile
+++ b/Makefile
@@ -30,6 +30,7 @@
# define "CXFLAGS" to give extra flags to CC.
# e.g. make CXFLAGS=-O to optimise
+CXFLAGS ?=-O2
TCC = tcc
UCLIBC_GCC = $(shell for nm in i386-uclibc-linux-gcc i386-uclibc-gcc; do which $$nm > /dev/null && { echo $$nm ; exit; } ; done; echo false No uclibc found )
#DIET_GCC = diet gcc
@@ -43,11 +44,16 @@ KLIBC_GCC = gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIB
CC ?= $(CROSS_COMPILE)gcc
CXFLAGS ?= -ggdb
-CWFLAGS = -Wall -Wstrict-prototypes -Wextra -Wno-unused-parameter
+CWFLAGS = -Wall -Werror -Wstrict-prototypes -Wextra -Wno-unused-parameter
ifdef WARN_UNUSED
CWFLAGS += -Wp,-D_FORTIFY_SOURCE=2 -O3
endif
+FALLTHROUGH := $(shell gcc -v --help 2>&1 | grep "implicit-fallthrough" | wc -l)
+ifneq "$(FALLTHROUGH)" "0"
+CWFLAGS += -Wimplicit-fallthrough=0
+endif
+
ifdef DEBIAN
CPPFLAGS += -DDEBIAN
endif
@@ -62,8 +68,8 @@ CPPFLAGS += -DBINDIR=\"$(BINDIR)\"
PKG_CONFIG ?= pkg-config
SYSCONFDIR = /etc
-CONFFILE = $(SYSCONFDIR)/mdadm/mdadm.conf
-CONFFILE2 = $(SYSCONFDIR)/mdadm.conf
+CONFFILE = $(SYSCONFDIR)/mdadm.conf
+CONFFILE2 = $(SYSCONFDIR)/mdadm/mdadm.conf
MAILCMD =/usr/sbin/sendmail -t
CONFFILEFLAGS = -DCONFFILE=\"$(CONFFILE)\" -DCONFFILE2=\"$(CONFFILE2)\"
# Both MAP_DIR and MDMON_DIR should be somewhere that persists across the
@@ -79,6 +85,7 @@ MDMON_DIR = $(RUN_DIR)
# place for autoreplace cookies
FAILED_SLOTS_DIR = $(RUN_DIR)/failed-slots
SYSTEMD_DIR=/lib/systemd/system
+LIB_DIR=/usr/libexec/mdadm
COROSYNC:=$(shell [ -d /usr/include/corosync ] || echo -DNO_COROSYNC)
DLM:=$(shell [ -f /usr/include/libdlm.h ] || echo -DNO_DLM)
@@ -89,7 +96,7 @@ DIRFLAGS += -DFAILED_SLOTS_DIR=\"$(FAILED_SLOTS_DIR)\"
CFLAGS = $(CWFLAGS) $(CXFLAGS) -DSendmail=\""$(MAILCMD)"\" $(CONFFILEFLAGS) $(DIRFLAGS) $(COROSYNC) $(DLM)
VERSION = $(shell [ -d .git ] && git describe HEAD | sed 's/mdadm-//')
-VERS_DATE = $(shell [ -d .git ] && date --date="`git log -n1 --format=format:%cd --date=short`" '+%0dth %B %Y' | sed -e 's/1th/1st/' -e 's/2th/2nd/' -e 's/11st/11th/' -e 's/12nd/12th/')
+VERS_DATE = $(shell [ -d .git ] && date --iso-8601 --date="`git log -n1 --format=format:%cd --date=iso --date=short`")
DVERS = $(if $(VERSION),-DVERSION=\"$(VERSION)\",)
DDATE = $(if $(VERS_DATE),-DVERS_DATE="\"$(VERS_DATE)\"",)
CFLAGS += $(DVERS) $(DDATE)
@@ -146,24 +153,13 @@ MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \
Kill.o sg_io.o dlink.o ReadMe.o super-intel.o \
super-mbr.o super-gpt.o \
super-ddf.o sha1.o crc32.o msg.o bitmap.o xmalloc.o \
- platform-intel.o probe_roms.o
+ platform-intel.o probe_roms.o crc32c.o
MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS))
STATICSRC = pwgr.c
STATICOBJS = pwgr.o
-ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c policy.c dlink.c util.c \
- maps.c lib.c xmalloc.c \
- super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \
- platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c mapfile.c
-ASSEMBLE_AUTO_SRCS := mdopen.c
-ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE
-ifdef MDASSEMBLE_AUTO
-ASSEMBLE_SRCS += $(ASSEMBLE_AUTO_SRCS)
-ASSEMBLE_FLAGS += -DMDASSEMBLE_AUTO
-endif
-
all : mdadm mdmon
man : mdadm.man md.man mdadm.conf.man mdmon.man raid6check.man
@@ -175,12 +171,10 @@ check_rundir:
fi
everything: all mdadm.static swap_super test_stripe raid6check \
- mdassemble mdassemble.auto mdassemble.static mdassemble.man \
mdadm.Os mdadm.O2 man
everything-test: all mdadm.static swap_super test_stripe \
- mdassemble.auto mdassemble.static mdassemble.man \
mdadm.Os mdadm.O2 man
-# mdadm.uclibc and mdassemble.uclibc don't work on x86-64
+# mdadm.uclibc doesn't work on x86-64
# mdadm.tcc doesn't work..
mdadm : $(OBJS) | check_rundir
@@ -216,31 +210,6 @@ test_stripe : restripe.c xmalloc.o mdadm.h
raid6check : raid6check.o mdadm.h $(CHECK_OBJS)
$(CC) $(CXFLAGS) $(LDFLAGS) -o raid6check raid6check.o $(CHECK_OBJS)
-mdassemble : $(ASSEMBLE_SRCS) $(INCL)
- $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS) $(STATICSRC)
-
-mdassemble.diet : $(ASSEMBLE_SRCS) $(INCL)
- rm -f $(OBJS)
- $(DIET_GCC) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS) $(STATICSRC)
-
-mdassemble.static : $(ASSEMBLE_SRCS) $(INCL)
- rm -f $(OBJS)
- $(CC) $(LDFLAGS) $(CPPFLAGS) $(ASSEMBLE_FLAGS) -static -DHAVE_STDINT_H -o mdassemble.static $(ASSEMBLE_SRCS) $(STATICSRC)
-
-mdassemble.auto : $(ASSEMBLE_SRCS) $(INCL) $(ASSEMBLE_AUTO_SRCS)
- rm -f mdassemble.static
- $(MAKE) MDASSEMBLE_AUTO=1 mdassemble.static
- mv mdassemble.static mdassemble.auto
-
-mdassemble.uclibc : $(ASSEMBLE_SRCS) $(INCL)
- rm -f $(OJS)
- $(UCLIBC_GCC) $(ASSEMBLE_FLAGS) -DUCLIBC -DHAVE_STDINT_H -static -o mdassemble.uclibc $(ASSEMBLE_SRCS) $(STATICSRC)
-
-# This doesn't work
-mdassemble.klibc : $(ASSEMBLE_SRCS) $(INCL)
- rm -f $(OBJS)
- $(KLIBC_GCC) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS)
-
mdadm.8 : mdadm.8.in
sed -e 's/{DEFAULT_METADATA}/$(DEFAULT_METADATA)/g' \
-e 's,{MAP_PATH},$(MAP_PATH),g' mdadm.8.in > mdadm.8
@@ -257,9 +226,6 @@ md.man : md.4
mdadm.conf.man : mdadm.conf.5
man -l mdadm.conf.5 > mdadm.conf.man
-mdassemble.man : mdassemble.8
- man -l mdassemble.8 > mdassemble.man
-
raid6check.man : raid6check.8
man -l raid6check.8 > raid6check.man
@@ -291,8 +257,10 @@ install-man: mdadm.8 md.4 mdadm.conf.5 mdmon.8
$(INSTALL) -D -m 644 md.4 $(DESTDIR)$(MAN4DIR)/md.4
$(INSTALL) -D -m 644 mdadm.conf.5 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5
-install-udev: udev-md-raid-arrays.rules udev-md-raid-assembly.rules
- @for file in 63-md-raid-arrays.rules 64-md-raid-assembly.rules ; \
+install-udev: udev-md-raid-arrays.rules udev-md-raid-assembly.rules udev-md-raid-creating.rules \
+ udev-md-clustered-confirm-device.rules
+ @for file in 01-md-raid-creating.rules 63-md-raid-arrays.rules 64-md-raid-assembly.rules \
+ 69-md-clustered-confirm-device.rules ; \
do sed -e 's,BINDIR,$(BINDIR),g' udev-$${file#??-} > .install.tmp.1 && \
$(ECHO) $(INSTALL) -D -m 644 udev-$${file#??-} $(DESTDIR)$(UDEVDIR)/rules.d/$$file ; \
$(INSTALL) -D -m 644 .install.tmp.1 $(DESTDIR)$(UDEVDIR)/rules.d/$$file ; \
@@ -313,7 +281,7 @@ install-systemd: systemd/mdmon@.service
$(INSTALL) -D -m 755 .install.tmp.3 $(DESTDIR)$(SYSTEMD_DIR)-shutdown/$$file ; \
rm -f .install.tmp.3; \
done
- if [ -f /etc/SuSE-release -o -n "$(SUSE)" ] ;then $(INSTALL) -D -m 755 systemd/SUSE-mdadm_env.sh $(DESTDIR)$(SYSTEMD_DIR)/../scripts/mdadm_env.sh ;fi
+ if [ -f /etc/SuSE-release -o -n "$(SUSE)" ] ;then $(INSTALL) -D -m 755 systemd/SUSE-mdadm_env.sh $(DESTDIR)$(LIB_DIR)/mdadm_env.sh ;fi
uninstall:
rm -f $(DESTDIR)$(MAN8DIR)/mdadm.8 $(DESTDIR)$(MAN8DIR)/mdmon.8 $(DESTDIR)$(MAN4DIR)/md.4 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5 $(DESTDIR)$(BINDIR)/mdadm
@@ -324,9 +292,7 @@ test: mdadm mdmon test_stripe swap_super raid6check
clean :
rm -f mdadm mdmon $(OBJS) $(MON_OBJS) $(STATICOBJS) core *.man \
mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt .merge_file_* \
- mdadm.Os mdadm.O2 mdmon.O2 \
- mdassemble mdassemble.static mdassemble.auto mdassemble.uclibc \
- mdassemble.klibc swap_super \
+ mdadm.Os mdadm.O2 mdmon.O2 swap_super \
init.cpio.gz mdadm.uclibc.static test_stripe raid6check raid6check.o mdmon \
mdadm.8
diff --git a/Manage.c b/Manage.c
index 5c3d2b9..21536f5 100644
--- a/Manage.c
+++ b/Manage.c
@@ -27,10 +27,6 @@
#include "md_p.h"
#include <ctype.h>
-#define REGISTER_DEV _IO (MD_MAJOR, 1)
-#define START_MD _IO (MD_MAJOR, 2)
-#define STOP_MD _IO (MD_MAJOR, 3)
-
int Manage_ro(char *devname, int fd, int readonly)
{
/* switch to readonly or rw
@@ -40,17 +36,9 @@ int Manage_ro(char *devname, int fd, int readonly)
* use RESTART_ARRAY_RW or STOP_ARRAY_RO
*
*/
- mdu_array_info_t array;
-#ifndef MDASSEMBLE
struct mdinfo *mdi;
-#endif
int rv = 0;
- if (md_get_version(fd) < 9000) {
- pr_err("need md driver version 0.90.0 or later\n");
- return 1;
- }
-#ifndef MDASSEMBLE
/* If this is an externally-managed array, we need to modify the
* metadata_version so that mdmon doesn't undo our change.
*/
@@ -94,10 +82,9 @@ int Manage_ro(char *devname, int fd, int readonly)
}
goto out;
}
-#endif
- if (ioctl(fd, GET_ARRAY_INFO, &array)) {
- pr_err("%s does not appear to be active.\n",
- devname);
+
+ if (!md_array_active(fd)) {
+ pr_err("%s does not appear to be active.\n", devname);
rv = 1;
goto out;
}
@@ -118,14 +105,10 @@ int Manage_ro(char *devname, int fd, int readonly)
}
}
out:
-#ifndef MDASSEMBLE
sysfs_free(mdi);
-#endif
return rv;
}
-#ifndef MDASSEMBLE
-
static void remove_devices(char *devnm, char *path)
{
/*
@@ -176,10 +159,6 @@ int Manage_run(char *devname, int fd, struct context *c)
*/
char nm[32], *nmp;
- if (md_get_version(fd) < 9000) {
- pr_err("need md driver version 0.90.0 or later\n");
- return 1;
- }
nmp = fd2devnm(fd);
if (!nmp) {
pr_err("Cannot find %s in sysfs!!\n", devname);
@@ -207,14 +186,6 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
if (will_retry && verbose == 0)
verbose = -1;
- if (md_get_version(fd) < 9000) {
- if (ioctl(fd, STOP_MD, 0) == 0)
- return 0;
- pr_err("stopping device %s failed: %s\n",
- devname, strerror(errno));
- return 1;
- }
-
strcpy(devnm, fd2devnm(fd));
/* Get EXCL access first. If this fails, then attempting
* to stop is probably a bad idea.
@@ -233,11 +204,9 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
count = 5;
while (((fd = ((devname[0] == '/')
?open(devname, O_RDONLY|O_EXCL)
- :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0
- || strcmp(fd2devnm(fd), devnm) != 0)
- && container[0]
- && mdmon_running(container)
- && count) {
+ :open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0 ||
+ strcmp(fd2devnm(fd), devnm) != 0) && container[0] &&
+ mdmon_running(container) && count) {
/* Can't open, so something might be wrong. However it
* is a container, so we might be racing with mdmon, so
* retry for a bit.
@@ -273,8 +242,8 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
while (count &&
(err = sysfs_set_str(mdi, NULL,
"array_state",
- "inactive")) < 0
- && errno == EBUSY) {
+ "inactive")) < 0 &&
+ errno == EBUSY) {
usleep(200000);
count--;
}
@@ -476,9 +445,8 @@ done:
* so it is reasonable to retry for a while - 5 seconds.
*/
count = 25; err = 0;
- while (count && fd >= 0
- && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
- && errno == EBUSY) {
+ while (count && fd >= 0 &&
+ (err = ioctl(fd, STOP_ARRAY, NULL)) < 0 && errno == EBUSY) {
usleep(200000);
count --;
}
@@ -539,14 +507,14 @@ static void add_faulty(struct mddev_dev *dv, int fd, char disp)
int remaining_disks;
int i;
- if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+ if (md_get_array_info(fd, &array) != 0)
return;
remaining_disks = array.nr_disks;
for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
char buf[40];
disk.number = i;
- if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+ if (md_get_disk_info(fd, &disk) != 0)
continue;
if (disk.major == 0 && disk.minor == 0)
continue;
@@ -565,7 +533,7 @@ static void add_detached(struct mddev_dev *dv, int fd, char disp)
int remaining_disks;
int i;
- if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+ if (md_get_array_info(fd, &array) != 0)
return;
remaining_disks = array.nr_disks;
@@ -573,7 +541,7 @@ static void add_detached(struct mddev_dev *dv, int fd, char disp)
char buf[40];
int sfd;
disk.number = i;
- if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+ if (md_get_disk_info(fd, &disk) != 0)
continue;
if (disk.major == 0 && disk.minor == 0)
continue;
@@ -602,7 +570,7 @@ static void add_set(struct mddev_dev *dv, int fd, char set_char)
int copies, set;
int i;
- if (ioctl(fd, GET_ARRAY_INFO, &array) != 0)
+ if (md_get_array_info(fd, &array) != 0)
return;
if (array.level != 10)
return;
@@ -615,7 +583,7 @@ static void add_set(struct mddev_dev *dv, int fd, char set_char)
for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
char buf[40];
disk.number = i;
- if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+ if (md_get_disk_info(fd, &disk) != 0)
continue;
if (disk.major == 0 && disk.minor == 0)
continue;
@@ -661,9 +629,8 @@ int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
get_linux_version() <= 2006018)
goto skip_re_add;
disc.number = mdi.disk.number;
- if (ioctl(fd, GET_DISK_INFO, &disc) != 0
- || disc.major != 0 || disc.minor != 0
- )
+ if (md_get_disk_info(fd, &disc) != 0 ||
+ disc.major != 0 || disc.minor != 0)
goto skip_re_add;
disc.major = major(rdev);
disc.minor = minor(rdev);
@@ -688,8 +655,8 @@ int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
if (dv->failfast == FlagClear)
disc.state &= ~(1 << MD_DISK_FAILFAST);
remove_partitions(tfd);
- if (update || dv->writemostly != FlagDefault
- || dv->failfast != FlagDefault) {
+ if (update || dv->writemostly != FlagDefault ||
+ dv->failfast != FlagDefault) {
int rv = -1;
tfd = dev_open(dv->devname, O_RDWR);
if (tfd < 0) {
@@ -774,9 +741,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
" Adding anyway as --force was given.\n",
dv->devname, devname);
}
- if (!tst->ss->external &&
- array->major_version == 0 &&
- md_get_version(fd)%100 < 2) {
+ if (!tst->ss->external && array->major_version == 0) {
if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
if (verbose >= 0)
pr_err("hot added %s\n",
@@ -805,7 +770,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
char *dev;
int dfd;
disc.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc))
+ if (md_get_disk_info(fd, &disc))
continue;
if (disc.major==0 && disc.minor==0)
continue;
@@ -827,8 +792,8 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
break;
}
/* FIXME this is a bad test to be using */
- if (!tst->sb && (dv->disposition != 'a'
- && dv->disposition != 'S')) {
+ if (!tst->sb && (dv->disposition != 'a' &&
+ dv->disposition != 'S')) {
/* we are re-adding a device to a
* completely dead array - have to depend
* on kernel to check
@@ -888,7 +853,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
for (d = 0; d < MAX_DISKS && found < array->nr_disks; d++) {
disc.number = d;
- if (ioctl(fd, GET_DISK_INFO, &disc))
+ if (md_get_disk_info(fd, &disc))
continue;
if (disc.major == 0 && disc.minor == 0)
continue;
@@ -929,7 +894,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
*/
for (j = array->raid_disks; j < tst->max_devs; j++) {
disc.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc))
+ if (md_get_disk_info(fd, &disc))
break;
if (disc.major==0 && disc.minor==0)
break;
@@ -946,7 +911,6 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
/* only add journal to array that supports journaling */
if (dv->disposition == 'j') {
- struct mdinfo mdi;
struct mdinfo *mdp;
mdp = sysfs_read(fd, NULL, GET_ARRAY_STATE);
@@ -955,7 +919,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
return -1;
}
- if (strncmp(mdp->sysfs_array_state, "readonly", 8) != 0) {
+ if (mdp->array_state != ARRAY_READONLY) {
sysfs_free(mdp);
pr_err("%s is not readonly, cannot add journal.\n", devname);
return -1;
@@ -963,11 +927,6 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
sysfs_free(mdp);
- tst->ss->getinfo_super(tst, &mdi, NULL);
- if (mdi.journal_device_required == 0) {
- pr_err("%s does not support journal device.\n", devname);
- return -1;
- }
disc.raid_disk = 0;
}
@@ -994,7 +953,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
for (j = 0; j < tst->max_devs; j++) {
mdu_disk_info_t disc2;
disc2.number = j;
- if (ioctl(fd, GET_DISK_INFO, &disc2))
+ if (md_get_disk_info(fd, &disc2))
continue;
if (disc2.major==0 && disc2.minor==0)
continue;
@@ -1110,7 +1069,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
}
int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
- int sysfd, unsigned long rdev, int verbose, char *devname)
+ int sysfd, unsigned long rdev, int force, int verbose, char *devname)
{
int lfd = -1;
int err;
@@ -1177,13 +1136,9 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
/* device has been removed and we don't know
* the major:minor number
*/
- int n = write(sysfd, "remove", 6);
- if (n != 6)
- err = -1;
- else
- err = 0;
+ err = sys_hot_remove_disk(sysfd, force);
} else {
- err = ioctl(fd, HOT_REMOVE_DISK, rdev);
+ err = hot_remove_disk(fd, rdev, force);
if (err && errno == ENODEV) {
/* Old kernels rejected this if no personality
* is registered */
@@ -1387,13 +1342,15 @@ int Manage_subdevs(char *devname, int fd,
int busy = 0;
int raid_slot = -1;
- if (ioctl(fd, GET_ARRAY_INFO, &array)) {
- pr_err("Cannot get array info for %s\n",
- devname);
+ if (sysfs_init(&info, fd, NULL)) {
+ pr_err("sysfs not availabile for %s\n", devname);
goto abort;
}
- sysfs_init(&info, fd, NULL);
+ if (md_get_array_info(fd, &array)) {
+ pr_err("Cannot get array info for %s\n", devname);
+ goto abort;
+ }
/* array.size is only 32 bits and may be truncated.
* So read from sysfs if possible, and record number of sectors
*/
@@ -1410,7 +1367,7 @@ int Manage_subdevs(char *devname, int fd,
}
for (dv = devlist; dv; dv = dv->next) {
- unsigned long rdev = 0; /* device to add/remove etc */
+ dev_t rdev = 0; /* device to add/remove etc */
int rv;
int mj,mn;
@@ -1427,8 +1384,7 @@ int Manage_subdevs(char *devname, int fd,
if (strcmp(dv->devname, "failed") == 0 ||
strcmp(dv->devname, "faulty") == 0) {
- if (dv->disposition != 'A'
- && dv->disposition != 'r') {
+ if (dv->disposition != 'A' && dv->disposition != 'r') {
pr_err("%s only meaningful with -r or --re-add, not -%c\n",
dv->devname, dv->disposition);
goto abort;
@@ -1461,7 +1417,7 @@ int Manage_subdevs(char *devname, int fd,
}
add_devlist = conf_get_devs();
if (add_devlist == NULL) {
- pr_err("no devices to scan for missing members.");
+ pr_err("no devices to scan for missing members.\n");
continue;
}
for (dp = &add_devlist; *dp; dp = & (*dp)->next)
@@ -1533,31 +1489,24 @@ int Manage_subdevs(char *devname, int fd,
goto abort;
}
}
- } else if ((dv->disposition == 'r' || dv->disposition == 'f')
- && get_maj_min(dv->devname, &mj, &mn)) {
+ } else if ((dv->disposition == 'r' ||
+ dv->disposition == 'f') &&
+ get_maj_min(dv->devname, &mj, &mn)) {
/* for 'fail' and 'remove', the device might
* not exist.
*/
rdev = makedev(mj, mn);
} else {
- struct stat stb;
tfd = dev_open(dv->devname, O_RDONLY);
if (tfd >= 0) {
- fstat(tfd, &stb);
+ fstat_is_blkdev(tfd, dv->devname, &rdev);
close(tfd);
} else {
int open_err = errno;
- if (stat(dv->devname, &stb) != 0) {
- pr_err("Cannot find %s: %s\n",
- dv->devname, strerror(errno));
- goto abort;
- }
- if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+ if (!stat_is_blkdev(dv->devname, &rdev)) {
if (dv->disposition == 'M')
/* non-fatal. Also improbable */
continue;
- pr_err("%s is not a block device.\n",
- dv->devname);
goto abort;
}
if (dv->disposition == 'r')
@@ -1574,7 +1523,6 @@ int Manage_subdevs(char *devname, int fd,
goto abort;
}
}
- rdev = stb.st_rdev;
}
switch(dv->disposition){
default:
@@ -1607,7 +1555,7 @@ int Manage_subdevs(char *devname, int fd,
if (dv->disposition == 'F')
/* Need to remove first */
- ioctl(fd, HOT_REMOVE_DISK, rdev);
+ hot_remove_disk(fd, rdev, force);
/* Make sure it isn't in use (in 2.6 or later) */
tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
if (tfd >= 0) {
@@ -1649,7 +1597,7 @@ int Manage_subdevs(char *devname, int fd,
rv = -1;
} else
rv = Manage_remove(tst, fd, dv, sysfd,
- rdev, verbose,
+ rdev, verbose, force,
devname);
if (sysfd >= 0)
close(sysfd);
@@ -1823,4 +1771,3 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
close(fd2);
return 0;
}
-#endif
diff --git a/Monitor.c b/Monitor.c
index 802a9d9..c7c05d2 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -125,8 +125,9 @@ int Monitor(struct mddev_dev *devlist,
struct state *st2;
int finished = 0;
struct mdstat_ent *mdstat = NULL;
- char *mailfrom = NULL;
+ char *mailfrom;
struct alert_info info;
+ struct mddev_ident *mdlist;
if (!mailaddr) {
mailaddr = conf_get_mailaddr();
@@ -138,7 +139,7 @@ int Monitor(struct mddev_dev *devlist,
if (!alert_cmd) {
alert_cmd = conf_get_program();
- if (alert_cmd && ! c->scan)
+ if (alert_cmd && !c->scan)
pr_err("Monitor using program \"%s\" from config file\n",
alert_cmd);
}
@@ -162,9 +163,10 @@ int Monitor(struct mddev_dev *devlist,
return 1;
if (devlist == NULL) {
- struct mddev_ident *mdlist = conf_get_ident(NULL);
- for (; mdlist; mdlist=mdlist->next) {
+ mdlist = conf_get_ident(NULL);
+ for (; mdlist; mdlist = mdlist->next) {
struct state *st;
+
if (mdlist->devname == NULL)
continue;
if (strcasecmp(mdlist->devname, "<ignore>") == 0)
@@ -188,9 +190,10 @@ int Monitor(struct mddev_dev *devlist,
}
} else {
struct mddev_dev *dv;
- for (dv=devlist ; dv; dv=dv->next) {
- struct mddev_ident *mdlist = conf_get_ident(dv->devname);
+
+ for (dv = devlist; dv; dv = dv->next) {
struct state *st = xcalloc(1, sizeof *st);
+ mdlist = conf_get_ident(dv->devname);
st->devname = xstrdup(dv->devname);
st->next = statelist;
st->devnm[0] = 0;
@@ -205,18 +208,18 @@ int Monitor(struct mddev_dev *devlist,
}
}
- while (! finished) {
+ while (!finished) {
int new_found = 0;
struct state *st, **stp;
int anydegraded = 0;
if (mdstat)
free_mdstat(mdstat);
- mdstat = mdstat_read(oneshot?0:1, 0);
+ mdstat = mdstat_read(oneshot ? 0 : 1, 0);
if (!mdstat)
mdstat_close();
- for (st=statelist; st; st=st->next)
+ for (st = statelist; st; st = st->next)
if (check_array(st, mdstat, c->test, &info,
increments, c->prefer))
anydegraded = 1;
@@ -290,8 +293,8 @@ static int make_daemon(char *pidfile)
}
close(0);
open("/dev/null", O_RDWR);
- dup2(0,1);
- dup2(0,2);
+ dup2(0, 1);
+ dup2(0, 2);
setsid();
return -1;
}
@@ -322,8 +325,7 @@ static int check_one_sharer(int scan)
fclose(fp);
}
if (scan) {
- if (mkdir(MDMON_DIR, S_IRWXU) < 0 &&
- errno != EEXIST) {
+ if (mkdir(MDMON_DIR, S_IRWXU) < 0 && errno != EEXIST) {
pr_err("Can't create autorebuild.pid file\n");
} else {
fp = fopen(path, "w");
@@ -346,7 +348,8 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info)
if (!info->alert_cmd && !info->mailaddr && !info->dosyslog) {
time_t now = time(0);
- printf("%1.15s: %s on %s %s\n", ctime(&now)+4, event, dev, disc?disc:"unknown device");
+ printf("%1.15s: %s on %s %s\n", ctime(&now) + 4,
+ event, dev, disc?disc:"unknown device");
}
if (info->alert_cmd) {
int pid = fork();
@@ -362,11 +365,10 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info)
exit(2);
}
}
- if (info->mailaddr &&
- (strncmp(event, "Fail", 4)==0 ||
- strncmp(event, "Test", 4)==0 ||
- strncmp(event, "Spares", 6)==0 ||
- strncmp(event, "Degrade", 7)==0)) {
+ if (info->mailaddr && (strncmp(event, "Fail", 4) == 0 ||
+ strncmp(event, "Test", 4) == 0 ||
+ strncmp(event, "Spares", 6) == 0 ||
+ strncmp(event, "Degrade", 7) == 0)) {
FILE *mp = popen(Sendmail, "w");
if (mp) {
FILE *mdstat;
@@ -376,7 +378,8 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info)
if (info->mailfrom)
fprintf(mp, "From: %s\n", info->mailfrom);
else
- fprintf(mp, "From: %s monitoring <root>\n", Name);
+ fprintf(mp, "From: %s monitoring <root>\n",
+ Name);
fprintf(mp, "To: %s\n", info->mailaddr);
fprintf(mp, "Subject: %s event on %s:%s\n\n",
event, dev, hname);
@@ -402,8 +405,9 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info)
int n;
fprintf(mp,
"\nP.S. The /proc/mdstat file currently contains the following:\n\n");
- while ( (n=fread(buf, 1, sizeof(buf), mdstat)) > 0)
- n=fwrite(buf, 1, n, mp);
+ while ((n = fread(buf, 1, sizeof(buf),
+ mdstat)) > 0)
+ n = fwrite(buf, 1, n, mp);
fclose(mdstat);
}
pclose(mp);
@@ -415,13 +419,13 @@ static void alert(char *event, char *dev, char *disc, struct alert_info *info)
/* Log at a different severity depending on the event.
*
* These are the critical events: */
- if (strncmp(event, "Fail", 4)==0 ||
- strncmp(event, "Degrade", 7)==0 ||
- strncmp(event, "DeviceDisappeared", 17)==0)
+ if (strncmp(event, "Fail", 4) == 0 ||
+ strncmp(event, "Degrade", 7) == 0 ||
+ strncmp(event, "DeviceDisappeared", 17) == 0)
priority = LOG_CRIT;
/* Good to know about, but are not failures: */
- else if (strncmp(event, "Rebuild", 7)==0 ||
- strncmp(event, "MoveSpare", 9)==0 ||
+ else if (strncmp(event, "Rebuild", 7) == 0 ||
+ strncmp(event, "MoveSpare", 9) == 0 ||
strncmp(event, "Spares", 6) != 0)
priority = LOG_WARNING;
/* Everything else: */
@@ -451,73 +455,32 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
* '1' if the array is degraded, or '0' if it is optimal (or dead).
*/
struct { int state, major, minor; } info[MAX_DISKS];
+ struct mdinfo *sra = NULL;
mdu_array_info_t array;
struct mdstat_ent *mse = NULL, *mse2;
char *dev = st->devname;
- int fd = -1;
+ int fd;
int i;
int remaining_disks;
int last_disk;
int new_array = 0;
+ int retval;
+ int is_container = 0;
+ unsigned long redundancy_only_flags = 0;
if (test)
alert("TestMessage", dev, NULL, ainfo);
- if (st->devnm[0])
- fd = open("/sys/block", O_RDONLY|O_DIRECTORY);
- if (fd >= 0) {
- /* Don't open the device unless it is present and
- * active in sysfs.
- */
- char buf[10];
- close(fd);
- fd = sysfs_open(st->devnm, NULL, "array_state");
- if (fd < 0 ||
- read(fd, buf, 10) < 5 ||
- strncmp(buf,"clear",5) == 0 ||
- strncmp(buf,"inact",5) == 0) {
- if (fd >= 0)
- close(fd);
- fd = sysfs_open(st->devnm, NULL, "level");
- if (fd < 0 || read(fd, buf, 10) != 0) {
- if (fd >= 0)
- close(fd);
- if (!st->err)
- alert("DeviceDisappeared", dev, NULL, ainfo);
- st->err++;
- return 0;
- }
- }
- close(fd);
- }
+
+ retval = 0;
+
fd = open(dev, O_RDONLY);
- if (fd < 0) {
- if (!st->err)
- alert("DeviceDisappeared", dev, NULL, ainfo);
- st->err++;
- return 0;
- }
- fcntl(fd, F_SETFD, FD_CLOEXEC);
- if (ioctl(fd, GET_ARRAY_INFO, &array)<0) {
- if (!st->err)
- alert("DeviceDisappeared", dev, NULL, ainfo);
- st->err++;
- close(fd);
- return 0;
- }
- /* It's much easier to list what array levels can't
- * have a device disappear than all of them that can
- */
- if (array.level == 0 || array.level == -1) {
- if (!st->err && !st->from_config)
- alert("DeviceDisappeared", dev, " Wrong-Level", ainfo);
- st->err++;
- close(fd);
- return 0;
- }
+ if (fd < 0)
+ goto disappeared;
+
if (st->devnm[0] == 0)
strcpy(st->devnm, fd2devnm(fd));
- for (mse2 = mdstat ; mse2 ; mse2=mse2->next)
+ for (mse2 = mdstat; mse2; mse2 = mse2->next)
if (strcmp(mse2->devnm, st->devnm) == 0) {
mse2->devnm[0] = 0; /* flag it as "used" */
mse = mse2;
@@ -525,11 +488,41 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
if (!mse) {
/* duplicated array in statelist
- * or re-created after reading mdstat*/
+ * or re-created after reading mdstat
+ */
st->err++;
- close(fd);
- return 0;
+ goto out;
+ }
+
+ if (mse->level == NULL)
+ is_container = 1;
+
+ if (!is_container && !md_array_active(fd))
+ goto disappeared;
+
+ fcntl(fd, F_SETFD, FD_CLOEXEC);
+ if (md_get_array_info(fd, &array) < 0)
+ goto disappeared;
+
+ if (!is_container && map_name(pers, mse->level) > 0)
+ redundancy_only_flags |= GET_MISMATCH;
+
+ sra = sysfs_read(-1, st->devnm, GET_LEVEL | GET_DISKS | GET_DEVS |
+ GET_STATE | redundancy_only_flags);
+
+ if (!sra)
+ goto disappeared;
+
+ /* It's much easier to list what array levels can't
+ * have a device disappear than all of them that can
+ */
+ if (sra->array.level == 0 || sra->array.level == -1) {
+ if (!st->err && !st->from_config)
+ alert("DeviceDisappeared", dev, " Wrong-Level", ainfo);
+ st->err++;
+ goto out;
}
+
/* this array is in /proc/mdstat */
if (array.utime == 0)
/* external arrays don't update utime, so
@@ -544,71 +537,62 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
alert("NewArray", st->devname, NULL, ainfo);
}
- if (st->utime == array.utime &&
- st->failed == array.failed_disks &&
- st->working == array.working_disks &&
- st->spare == array.spare_disks &&
- (mse == NULL || (
- mse->percent == st->percent
- ))) {
- close(fd);
+ if (st->utime == array.utime && st->failed == sra->array.failed_disks &&
+ st->working == sra->array.working_disks &&
+ st->spare == sra->array.spare_disks &&
+ (mse == NULL || (mse->percent == st->percent))) {
if ((st->active < st->raid) && st->spare == 0)
- return 1;
- else
- return 0;
+ retval = 1;
+ goto out;
}
if (st->utime == 0 && /* new array */
- mse->pattern && strchr(mse->pattern, '_') /* degraded */
- )
+ mse->pattern && strchr(mse->pattern, '_') /* degraded */)
alert("DegradedArray", dev, NULL, ainfo);
- if (st->utime == 0 && /* new array */
- st->expected_spares > 0 &&
- array.spare_disks < st->expected_spares)
+ if (st->utime == 0 && /* new array */ st->expected_spares > 0 &&
+ sra->array.spare_disks < st->expected_spares)
alert("SparesMissing", dev, NULL, ainfo);
if (st->percent < 0 && st->percent != RESYNC_UNKNOWN &&
mse->percent >= 0)
alert("RebuildStarted", dev, NULL, ainfo);
- if (st->percent >= 0 &&
- mse->percent >= 0 &&
+ if (st->percent >= 0 && mse->percent >= 0 &&
(mse->percent / increments) > (st->percent / increments)) {
- char percentalert[15]; // "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
+ char percentalert[15];
+ /*
+ * "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
+ */
if((mse->percent / increments) == 0)
- snprintf(percentalert, sizeof(percentalert), "RebuildStarted");
+ snprintf(percentalert, sizeof(percentalert),
+ "RebuildStarted");
else
- snprintf(percentalert, sizeof(percentalert), "Rebuild%02d", mse->percent);
+ snprintf(percentalert, sizeof(percentalert),
+ "Rebuild%02d", mse->percent);
alert(percentalert, dev, NULL, ainfo);
}
- if (mse->percent == RESYNC_NONE &&
- st->percent >= 0) {
+ if (mse->percent == RESYNC_NONE && st->percent >= 0) {
/* Rebuild/sync/whatever just finished.
* If there is a number in /mismatch_cnt,
* we should report that.
*/
- struct mdinfo *sra =
- sysfs_read(-1, st->devnm, GET_MISMATCH);
if (sra && sra->mismatch_cnt > 0) {
char cnt[80];
snprintf(cnt, sizeof(cnt),
" mismatches found: %d (on raid level %d)",
- sra->mismatch_cnt, array.level);
+ sra->mismatch_cnt, sra->array.level);
alert("RebuildFinished", dev, cnt, ainfo);
} else
alert("RebuildFinished", dev, NULL, ainfo);
- if (sra)
- sysfs_free(sra);
}
st->percent = mse->percent;
- remaining_disks = array.nr_disks;
- for (i=0; i<MAX_DISKS && remaining_disks > 0;
- i++) {
+ remaining_disks = sra->array.nr_disks;
+ for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
mdu_disk_info_t disc;
disc.number = i;
- if (ioctl(fd, GET_DISK_INFO, &disc) >= 0) {
+ if (md_get_disk_info(fd, &disc) >= 0) {
info[i].state = disc.state;
info[i].major = disc.major;
info[i].minor = disc.minor;
@@ -623,31 +607,25 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
strncmp(mse->metadata_version, "external:", 9) == 0 &&
is_subarray(mse->metadata_version+9)) {
char *sl;
- strcpy(st->parent_devnm,
- mse->metadata_version+10);
+ strcpy(st->parent_devnm, mse->metadata_version + 10);
sl = strchr(st->parent_devnm, '/');
if (sl)
*sl = 0;
} else
st->parent_devnm[0] = 0;
- if (st->metadata == NULL &&
- st->parent_devnm[0] == 0)
+ if (st->metadata == NULL && st->parent_devnm[0] == 0)
st->metadata = super_by_fd(fd, NULL);
- close(fd);
-
- for (i=0; i<MAX_DISKS; i++) {
- mdu_disk_info_t disc = {0,0,0,0,0};
- int newstate=0;
+ for (i = 0; i < MAX_DISKS; i++) {
+ mdu_disk_info_t disc = {0, 0, 0, 0, 0};
+ int newstate = 0;
int change;
char *dv = NULL;
disc.number = i;
- if (i < last_disk &&
- (info[i].major || info[i].minor)) {
+ if (i < last_disk && (info[i].major || info[i].minor)) {
newstate = info[i].state;
- dv = map_dev_preferred(
- info[i].major, info[i].minor, 1,
- prefer);
+ dv = map_dev_preferred(info[i].major, info[i].minor, 1,
+ prefer);
disc.state = newstate;
disc.major = info[i].major;
disc.minor = info[i].minor;
@@ -655,33 +633,45 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
newstate = (1 << MD_DISK_REMOVED);
if (dv == NULL && st->devid[i])
- dv = map_dev_preferred(
- major(st->devid[i]),
- minor(st->devid[i]), 1, prefer);
+ dv = map_dev_preferred(major(st->devid[i]),
+ minor(st->devid[i]), 1, prefer);
change = newstate ^ st->devstate[i];
if (st->utime && change && !st->err && !new_array) {
- if ((st->devstate[i]&change)&(1<<MD_DISK_SYNC))
+ if ((st->devstate[i]&change) & (1 << MD_DISK_SYNC))
alert("Fail", dev, dv, ainfo);
- else if ((newstate & (1<<MD_DISK_FAULTY)) &&
+ else if ((newstate & (1 << MD_DISK_FAULTY)) &&
(disc.major || disc.minor) &&
- st->devid[i] == makedev(disc.major, disc.minor))
+ st->devid[i] == makedev(disc.major,
+ disc.minor))
alert("FailSpare", dev, dv, ainfo);
- else if ((newstate&change)&(1<<MD_DISK_SYNC))
+ else if ((newstate&change) & (1 << MD_DISK_SYNC))
alert("SpareActive", dev, dv, ainfo);
}
st->devstate[i] = newstate;
st->devid[i] = makedev(disc.major, disc.minor);
}
- st->active = array.active_disks;
- st->working = array.working_disks;
- st->spare = array.spare_disks;
- st->failed = array.failed_disks;
+ st->active = sra->array.active_disks;
+ st->working = sra->array.working_disks;
+ st->spare = sra->array.spare_disks;
+ st->failed = sra->array.failed_disks;
st->utime = array.utime;
- st->raid = array.raid_disks;
+ st->raid = sra->array.raid_disks;
st->err = 0;
if ((st->active < st->raid) && st->spare == 0)
- return 1;
- return 0;
+ retval = 1;
+
+ out:
+ if (sra)
+ sysfs_free(sra);
+ if (fd >= 0)
+ close(fd);
+ return retval;
+
+ disappeared:
+ if (!st->err)
+ alert("DeviceDisappeared", dev, NULL, ainfo);
+ st->err++;
+ goto out;
}
static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
@@ -691,12 +681,10 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
int new_found = 0;
char *name;
- for (mse=mdstat; mse; mse=mse->next)
- if (mse->devnm[0] &&
- (!mse->level || /* retrieve containers */
- (strcmp(mse->level, "raid0") != 0 &&
- strcmp(mse->level, "linear") != 0))
- ) {
+ for (mse = mdstat; mse; mse = mse->next)
+ if (mse->devnm[0] && (!mse->level || /* retrieve containers */
+ (strcmp(mse->level, "raid0") != 0 &&
+ strcmp(mse->level, "linear") != 0))) {
struct state *st = xcalloc(1, sizeof *st);
mdu_array_info_t array;
int fd;
@@ -709,9 +697,10 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
st->devname = xstrdup(name);
if ((fd = open(st->devname, O_RDONLY)) < 0 ||
- ioctl(fd, GET_ARRAY_INFO, &array)< 0) {
+ md_get_array_info(fd, &array) < 0) {
/* no such array */
- if (fd >=0) close(fd);
+ if (fd >= 0)
+ close(fd);
put_md_name(st->devname);
free(st->devname);
if (st->metadata) {
@@ -729,7 +718,8 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
st->percent = RESYNC_UNKNOWN;
st->expected_spares = -1;
if (mse->metadata_version &&
- strncmp(mse->metadata_version, "external:", 9) == 0 &&
+ strncmp(mse->metadata_version,
+ "external:", 9) == 0 &&
is_subarray(mse->metadata_version+9)) {
char *sl;
strcpy(st->parent_devnm,
@@ -746,13 +736,14 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
return new_found;
}
-static int get_min_spare_size_required(struct state *st, unsigned long long *sizep)
+static int get_required_spare_criteria(struct state *st,
+ struct spare_criteria *sc)
{
int fd;
- if (!st->metadata ||
- !st->metadata->ss->min_acceptable_spare_size) {
- *sizep = 0;
+ if (!st->metadata || !st->metadata->ss->get_spare_criteria) {
+ sc->min_size = 0;
+ sc->sector_size = 0;
return 0;
}
@@ -766,7 +757,8 @@ static int get_min_spare_size_required(struct state *st, unsigned long long *siz
close(fd);
if (!st->metadata->sb)
return 1;
- *sizep = st->metadata->ss->min_acceptable_spare_size(st->metadata);
+
+ st->metadata->ss->get_spare_criteria(st->metadata, sc);
st->metadata->ss->free_super(st->metadata);
return 0;
@@ -798,31 +790,38 @@ static int check_donor(struct state *from, struct state *to)
}
static dev_t choose_spare(struct state *from, struct state *to,
- struct domainlist *domlist, unsigned long long min_size)
+ struct domainlist *domlist, struct spare_criteria *sc)
{
int d;
dev_t dev = 0;
for (d = from->raid; !dev && d < MAX_DISKS; d++) {
- if (from->devid[d] > 0 &&
- from->devstate[d] == 0) {
+ if (from->devid[d] > 0 && from->devstate[d] == 0) {
struct dev_policy *pol;
unsigned long long dev_size;
+ unsigned int dev_sector_size;
if (to->metadata->ss->external &&
test_partition_from_id(from->devid[d]))
continue;
- if (min_size &&
+ if (sc->min_size &&
dev_size_from_id(from->devid[d], &dev_size) &&
- dev_size < min_size)
+ dev_size < sc->min_size)
+ continue;
+
+ if (sc->sector_size &&
+ dev_sector_size_from_id(from->devid[d],
+ &dev_sector_size) &&
+ sc->sector_size != dev_sector_size)
continue;
pol = devid_policy(from->devid[d]);
if (from->spare_group)
pol_add(&pol, pol_domain,
from->spare_group, NULL);
- if (domain_test(domlist, pol, to->metadata->ss->name) == 1)
+ if (domain_test(domlist, pol,
+ to->metadata->ss->name) == 1)
dev = from->devid[d];
dev_policy_free(pol);
}
@@ -832,7 +831,7 @@ static dev_t choose_spare(struct state *from, struct state *to,
static dev_t container_choose_spare(struct state *from, struct state *to,
struct domainlist *domlist,
- unsigned long long min_size, int active)
+ struct spare_criteria *sc, int active)
{
/* This is similar to choose_spare, but we cannot trust devstate,
* so we need to read the metadata instead
@@ -869,8 +868,8 @@ static dev_t container_choose_spare(struct state *from, struct state *to,
}
dp = list->devs;
while (dp) {
- if (dp->disk.state & (1<<MD_DISK_SYNC) &&
- !(dp->disk.state & (1<<MD_DISK_FAULTY)))
+ if (dp->disk.state & (1 << MD_DISK_SYNC) &&
+ !(dp->disk.state & (1 << MD_DISK_FAULTY)))
active_cnt++;
dp = dp->next;
}
@@ -883,7 +882,7 @@ static dev_t container_choose_spare(struct state *from, struct state *to,
}
/* We only need one spare so full list not needed */
- list = container_choose_spares(st, min_size, domlist, from->spare_group,
+ list = container_choose_spares(st, sc, domlist, from->spare_group,
to->metadata->ss->name, 1);
if (list) {
struct mdinfo *disks = list->devs;
@@ -899,15 +898,14 @@ static void try_spare_migration(struct state *statelist, struct alert_info *info
{
struct state *from;
struct state *st;
+ struct spare_criteria sc;
link_containers_with_subarrays(statelist);
for (st = statelist; st; st = st->next)
- if (st->active < st->raid &&
- st->spare == 0 && !st->err) {
+ if (st->active < st->raid && st->spare == 0 && !st->err) {
struct domainlist *domlist = NULL;
int d;
struct state *to = st;
- unsigned long long min_size;
if (to->parent_devnm[0] && !to->parent)
/* subarray monitored without parent container
@@ -918,14 +916,14 @@ static void try_spare_migration(struct state *statelist, struct alert_info *info
/* member of a container */
to = to->parent;
- if (get_min_spare_size_required(to, &min_size))
+ if (get_required_spare_criteria(to, &sc))
continue;
if (to->metadata->ss->external) {
/* We must make sure there is
* no suitable spare in container already.
* If there is we don't add more */
dev_t devid = container_choose_spare(
- to, to, NULL, min_size, st->active);
+ to, to, NULL, &sc, st->active);
if (devid > 0)
continue;
}
@@ -948,13 +946,15 @@ static void try_spare_migration(struct state *statelist, struct alert_info *info
continue;
if (from->metadata->ss->external)
devid = container_choose_spare(
- from, to, domlist, min_size, 0);
+ from, to, domlist, &sc, 0);
else
devid = choose_spare(from, to, domlist,
- min_size);
- if (devid > 0
- && move_spare(from->devname, to->devname, devid)) {
- alert("MoveSpare", to->devname, from->devname, info);
+ &sc);
+ if (devid > 0 &&
+ move_spare(from->devname, to->devname,
+ devid)) {
+ alert("MoveSpare", to->devname,
+ from->devname, info);
break;
}
}
@@ -979,8 +979,7 @@ static void link_containers_with_subarrays(struct state *list)
for (st = list; st; st = st->next)
if (st->parent_devnm[0])
for (cont = list; cont; cont = cont->next)
- if (!cont->err &&
- cont->parent_devnm[0] == 0 &&
+ if (!cont->err && cont->parent_devnm[0] == 0 &&
strcmp(cont->devnm, st->parent_devnm) == 0) {
st->parent = cont;
st->subarray = cont->subarray;
@@ -992,23 +991,28 @@ static void link_containers_with_subarrays(struct state *list)
/* Not really Monitor but ... */
int Wait(char *dev)
{
- struct stat stb;
char devnm[32];
+ dev_t rdev;
+ char *tmp;
int rv = 1;
int frozen_remaining = 3;
- if (stat(dev, &stb) != 0) {
- pr_err("Cannot find %s: %s\n", dev,
- strerror(errno));
+ if (!stat_is_blkdev(dev, &rdev))
+ return 2;
+
+ tmp = devid2devnm(rdev);
+ if (!tmp) {
+ pr_err("Cannot get md device name.\n");
return 2;
}
- strcpy(devnm, stat2devnm(&stb));
+
+ strcpy(devnm, tmp);
while(1) {
struct mdstat_ent *ms = mdstat_read(1, 0);
struct mdstat_ent *e;
- for (e=ms ; e; e=e->next)
+ for (e = ms; e; e = e->next)
if (strcmp(e->devnm, devnm) == 0)
break;
@@ -1019,7 +1023,9 @@ int Wait(char *dev)
*/
struct mdinfo mdi;
char buf[21];
- sysfs_init(&mdi, -1, devnm);
+
+ if (sysfs_init(&mdi, -1, devnm))
+ return 2;
if (sysfs_get_str(&mdi, NULL, "sync_action",
buf, 20) > 0 &&
strcmp(buf,"idle\n") != 0) {
@@ -1049,18 +1055,18 @@ int Wait(char *dev)
}
}
-#ifndef MDASSEMBLE
-
static char *clean_states[] = {
"clear", "inactive", "readonly", "read-auto", "clean", NULL };
-int WaitClean(char *dev, int sock, int verbose)
+int WaitClean(char *dev, int verbose)
{
int fd;
struct mdinfo *mdi;
int rv = 1;
char devnm[32];
+ if (!stat_is_blkdev(dev, NULL))
+ return 2;
fd = open(dev, O_RDONLY);
if (fd < 0) {
if (verbose)
@@ -1119,18 +1125,18 @@ int WaitClean(char *dev, int sock, int verbose)
}
if (rv < 0)
rv = 1;
- else if (fping_monitor(sock) == 0 ||
- ping_monitor(mdi->text_version) == 0) {
+ else if (ping_monitor(mdi->text_version) == 0) {
/* we need to ping to close the window between array
* state transitioning to clean and the metadata being
* marked clean
*/
rv = 0;
- } else
+ } else {
rv = 1;
+ pr_err("Error connecting monitor with %s\n", dev);
+ }
if (rv && verbose)
- pr_err("Error waiting for %s to be clean\n",
- dev);
+ pr_err("Error waiting for %s to be clean\n", dev);
/* restore the original safe_mode_delay */
sysfs_set_safemode(mdi, mdi->safe_mode_delay);
@@ -1142,4 +1148,3 @@ int WaitClean(char *dev, int sock, int verbose)
return rv;
}
-#endif /* MDASSEMBLE */
diff --git a/Query.c b/Query.c
index fbc1d10..2bd0e2a 100644
--- a/Query.c
+++ b/Query.c
@@ -32,54 +32,70 @@ int Query(char *dev)
* whether it is an md device and whether it has
* a superblock
*/
- int fd = open(dev, O_RDONLY);
- int vers;
- int ioctlerr;
+ int fd;
+ int ioctlerr, staterr;
int superror;
+ int level, raid_disks, spare_disks;
struct mdinfo info;
- mdu_array_info_t array;
+ struct mdinfo *sra;
struct supertype *st = NULL;
-
unsigned long long larray_size;
struct stat stb;
char *mddev;
mdu_disk_info_t disc;
char *activity;
+ fd = open(dev, O_RDONLY);
if (fd < 0){
- pr_err("cannot open %s: %s\n",
- dev, strerror(errno));
+ pr_err("cannot open %s: %s\n", dev, strerror(errno));
return 1;
}
- vers = md_get_version(fd);
- if (ioctl(fd, GET_ARRAY_INFO, &array)<0)
- ioctlerr = errno;
- else ioctlerr = 0;
+ if (fstat(fd, &stb) < 0)
+ staterr = errno;
+ else
+ staterr = 0;
+
+ ioctlerr = 0;
- fstat(fd, &stb);
+ sra = sysfs_read(fd, dev, GET_DISKS | GET_LEVEL | GET_DEVS | GET_STATE);
+ if (sra) {
+ level = sra->array.level;
+ raid_disks = sra->array.raid_disks;
+ spare_disks = sra->array.spare_disks;
+ } else {
+ mdu_array_info_t array;
- if (vers>=9000 && !ioctlerr) {
+ if (md_get_array_info(fd, &array) < 0) {
+ ioctlerr = errno;
+ level = -1;
+ raid_disks = -1;
+ spare_disks = -1;
+ } else {
+ level = array.level;
+ raid_disks = array.raid_disks;
+ spare_disks = array.spare_disks;
+ }
+ }
+
+ if (!ioctlerr && !staterr) {
if (!get_dev_size(fd, NULL, &larray_size))
larray_size = 0;
}
- if (vers < 0)
- printf("%s: is not an md array\n", dev);
- else if (vers < 9000)
- printf("%s: is an md device, but kernel cannot provide details\n", dev);
- else if (ioctlerr == ENODEV)
+ if (ioctlerr == ENODEV)
printf("%s: is an md device which is not active\n", dev);
else if (ioctlerr)
printf("%s: is an md device, but gives \"%s\" when queried\n",
dev, strerror(ioctlerr));
+ else if (staterr)
+ printf("%s: is not a valid md device, returning %s\n",
+ dev, strerror(ioctlerr));
else {
printf("%s: %s %s %d devices, %d spare%s. Use mdadm --detail for more detail.\n",
- dev,
- human_size_brief(larray_size,IEC),
- map_num(pers, array.level),
- array.raid_disks,
- array.spare_disks, array.spare_disks==1?"":"s");
+ dev, human_size_brief(larray_size,IEC),
+ map_num(pers, level), raid_disks,
+ spare_disks, spare_disks == 1 ? "" : "s");
}
st = guess_super(fd);
if (st && st->ss->compare_super != NULL)
@@ -99,9 +115,8 @@ int Query(char *dev)
disc.number = info.disk.number;
activity = "undetected";
if (mddev && (fd = open(mddev, O_RDONLY))>=0) {
- if (md_get_version(fd) >= 9000 &&
- ioctl(fd, GET_ARRAY_INFO, &array)>= 0) {
- if (ioctl(fd, GET_DISK_INFO, &disc) >= 0 &&
+ if (md_array_active(fd)) {
+ if (md_get_disk_info(fd, &disc) >= 0 &&
makedev((unsigned)disc.major,(unsigned)disc.minor) == stb.st_rdev)
activity = "active";
else
diff --git a/README.initramfs b/README.initramfs
index 8f9b8dd..c5fa668 100644
--- a/README.initramfs
+++ b/README.initramfs
@@ -16,11 +16,10 @@ devices.
These mechanisms, while useful, do not provide complete functionality
and are unlikely to be extended. The preferred way to assemble md
-arrays at boot time is using 'mdadm' or 'mdassemble' (which is a
-trimmed-down mdadm). To assemble an array which contains the root
-filesystem, mdadm needs to be run before that filesystem is mounted,
-and so needs to be run from an initial-ram-fs. It is how this can
-work that is the primary focus of this document.
+arrays at boot time is using 'mdadm'. To assemble an array which
+contains the root filesystem, mdadm needs to be run before that
+filesystem is mounted, and so needs to be run from an initial-ram-fs.
+It is how this can work that is the primary focus of this document.
It should be noted up front that only the array containing the root
filesystem should be assembled from the initramfs. Any other arrays
diff --git a/ReadMe.c b/ReadMe.c
index f5c156f..27121da 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -28,10 +28,10 @@
#include "mdadm.h"
#ifndef VERSION
-#define VERSION "4.0"
+#define VERSION "4.1-rc1"
#endif
#ifndef VERS_DATE
-#define VERS_DATE "2017-01-09"
+#define VERS_DATE "2018-03-22"
#endif
char Version[] = "mdadm - v" VERSION " - " VERS_DATE "\n";
@@ -78,11 +78,11 @@ char Version[] = "mdadm - v" VERSION " - " VERS_DATE "\n";
* found, it is started.
*/
-char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:";
+char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
char short_bitmap_options[]=
- "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:";
+ "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
char short_bitmap_auto_options[]=
- "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sa:rfRSow1tye:";
+ "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sa:rfRSow1tye:k:";
struct option long_options[] = {
{"manage", 0, 0, ManageOpt},
@@ -148,6 +148,7 @@ struct option long_options[] = {
{"nodes",1, 0, Nodes}, /* also for --assemble */
{"home-cluster",1, 0, ClusterName},
{"write-journal",1, 0, WriteJournal},
+ {"consistency-policy", 1, 0, 'k'},
/* For assemble */
{"uuid", 1, 0, 'u'},
@@ -341,7 +342,7 @@ char OptionHelp[] =
*/
char Help_create[] =
-"Usage: mdadm --create device -chunk=X --level=Y --raid-devices=Z devices\n"
+"Usage: mdadm --create device --chunk=X --level=Y --raid-devices=Z devices\n"
"\n"
" This usage will initialise a new md array, associate some\n"
" devices with it, and activate the array. In order to create an\n"
@@ -362,27 +363,29 @@ char Help_create[] =
" other levels.\n"
"\n"
" Options that are valid with --create (-C) are:\n"
-" --bitmap= : Create a bitmap for the array with the given filename\n"
-" : or an internal bitmap is 'internal' is given\n"
-" --chunk= -c : chunk size in kibibytes\n"
-" --rounding= : rounding factor for linear array (==chunk size)\n"
-" --level= -l : raid level: 0,1,4,5,6,10,linear,multipath and synonyms\n"
-" --parity= -p : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n"
-" --layout= : same as --parity, for RAID10: [fno]NN \n"
-" --raid-devices= -n : number of active devices in array\n"
-" --spare-devices= -x: number of spare (eXtra) devices in initial array\n"
-" --size= -z : Size (in K) of each drive in RAID1/4/5/6/10 - optional\n"
-" --data-offset= : Space to leave between start of device and start\n"
-" : of array data.\n"
-" --force -f : Honour devices as listed on command line. Don't\n"
-" : insert a missing drive for RAID5.\n"
-" --run -R : insist of running the array even if not all\n"
-" : devices are present or some look odd.\n"
-" --readonly -o : start the array readonly - not supported yet.\n"
-" --name= -N : Textual name for array - max 32 characters\n"
-" --bitmap-chunk= : bitmap chunksize in Kilobytes.\n"
-" --delay= -d : bitmap update delay in seconds.\n"
-" --write-journal= : Specify journal device for RAID-4/5/6 array\n"
+" --bitmap= -b : Create a bitmap for the array with the given filename\n"
+" : or an internal bitmap if 'internal' is given\n"
+" --chunk= -c : chunk size in kibibytes\n"
+" --rounding= : rounding factor for linear array (==chunk size)\n"
+" --level= -l : raid level: 0,1,4,5,6,10,linear,multipath and synonyms\n"
+" --parity= -p : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n"
+" --layout= : same as --parity, for RAID10: [fno]NN \n"
+" --raid-devices= -n : number of active devices in array\n"
+" --spare-devices= -x : number of spare (eXtra) devices in initial array\n"
+" --size= -z : Size (in K) of each drive in RAID1/4/5/6/10 - optional\n"
+" --data-offset= : Space to leave between start of device and start\n"
+" : of array data.\n"
+" --force -f : Honour devices as listed on command line. Don't\n"
+" : insert a missing drive for RAID5.\n"
+" --run -R : insist of running the array even if not all\n"
+" : devices are present or some look odd.\n"
+" --readonly -o : start the array readonly - not supported yet.\n"
+" --name= -N : Textual name for array - max 32 characters\n"
+" --bitmap-chunk= : bitmap chunksize in Kilobytes.\n"
+" --delay= -d : bitmap update delay in seconds.\n"
+" --write-journal= : Specify journal device for RAID-4/5/6 array\n"
+" --consistency-policy= : Specify the policy that determines how the array\n"
+" -k : maintains consistency in case of unexpected shutdown.\n"
"\n"
;
@@ -556,28 +559,30 @@ char Help_grow[] =
"reconfiguration.\n"
"\n"
"Options that are valid with the grow (-G --grow) mode are:\n"
-" --level= -l : Tell mdadm what level to convert the array to.\n"
-" --layout= -p : For a FAULTY array, set/change the error mode.\n"
-" : for other arrays, update the layout\n"
-" --size= -z : Change the active size of devices in an array.\n"
-" : This is useful if all devices have been replaced\n"
-" : with larger devices. Value is in Kilobytes, or\n"
-" : the special word 'max' meaning 'as large as possible'.\n"
-" --assume-clean : When increasing the --size, this flag will avoid\n"
-" : a resync of the new space\n"
-" --chunk= -c : Change the chunksize of the array\n"
-" --raid-devices= -n : Change the number of active devices in an array.\n"
-" --add= -a : Add listed devices as part of reshape. This is\n"
-" : needed for resizing a RAID0 which cannot have\n"
-" : spares already present.\n"
-" --bitmap= -b : Add or remove a write-intent bitmap.\n"
-" --backup-file= file : A file on a different device to store data for a\n"
-" : short time while increasing raid-devices on a\n"
-" : RAID4/5/6 array. Also needed throughout a reshape\n"
-" : when changing parameters other than raid-devices\n"
-" --array-size= -Z : Change visible size of array. This does not change\n"
-" : any data on the device, and is not stable across restarts.\n"
-" --data-offset= : Location on device to move start of data to.\n"
+" --level= -l : Tell mdadm what level to convert the array to.\n"
+" --layout= -p : For a FAULTY array, set/change the error mode.\n"
+" : for other arrays, update the layout\n"
+" --size= -z : Change the active size of devices in an array.\n"
+" : This is useful if all devices have been replaced\n"
+" : with larger devices. Value is in Kilobytes, or\n"
+" : the special word 'max' meaning 'as large as possible'.\n"
+" --assume-clean : When increasing the --size, this flag will avoid\n"
+" : a resync of the new space\n"
+" --chunk= -c : Change the chunksize of the array\n"
+" --raid-devices= -n : Change the number of active devices in an array.\n"
+" --add= -a : Add listed devices as part of reshape. This is\n"
+" : needed for resizing a RAID0 which cannot have\n"
+" : spares already present.\n"
+" --bitmap= -b : Add or remove a write-intent bitmap.\n"
+" --backup-file= file : A file on a different device to store data for a\n"
+" : short time while increasing raid-devices on a\n"
+" : RAID4/5/6 array. Also needed throughout a reshape\n"
+" : when changing parameters other than raid-devices\n"
+" --array-size= -Z : Change visible size of array. This does not change any\n"
+" : data on the device, and is not stable across restarts.\n"
+" --data-offset= : Location on device to move start of data to.\n"
+" --consistency-policy= : Change the consistency policy of an active array.\n"
+" -k : Currently works only for PPL with RAID5.\n"
;
char Help_incr[] =
@@ -604,7 +609,7 @@ char Help_incr[] =
;
char Help_config[] =
-"The /etc/mdadm/mdadm.conf config file:\n\n"
+"The /etc/mdadm.conf config file:\n\n"
" The config file contains, apart from blank lines and comment lines that\n"
" start with a hash(#), array lines, device lines, and various\n"
" configuration lines.\n"
diff --git a/bitmap.c b/bitmap.c
index ccedfd3..e38cb96 100644
--- a/bitmap.c
+++ b/bitmap.c
@@ -194,12 +194,10 @@ bitmap_file_open(char *filename, struct supertype **stp, int node_num)
}
if (fstat(fd, &stb) < 0) {
- pr_err("failed to determine bitmap file/device type: %s\n",
- strerror(errno));
+ pr_err("fstat failed for %s: %s\n", filename, strerror(errno));
close(fd);
return -1;
}
-
if ((stb.st_mode & S_IFMT) == S_IFBLK) {
/* block device, so we are probably after an internal bitmap */
if (!st)
@@ -219,7 +217,6 @@ bitmap_file_open(char *filename, struct supertype **stp, int node_num)
fd = -1;
}
}
-
*stp = st;
}
@@ -260,7 +257,7 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
if (!info)
return rv;
sb = &info->sb;
- if (sb->magic != BITMAP_MAGIC && md_get_version(fd) > 0) {
+ if (sb->magic != BITMAP_MAGIC) {
pr_err("This is an md array. To view a bitmap you need to examine\n");
pr_err("a member device, not the array.\n");
pr_err("Reporting bitmap that would be used if this array were used\n");
diff --git a/bitmap.h b/bitmap.h
index b8fb071..7b1f80f 100644
--- a/bitmap.h
+++ b/bitmap.h
@@ -46,7 +46,7 @@
*
* The counter counts pending write requests, plus the on-disk bit.
* When the counter is '1' and the resync bits are clear, the on-disk
- * bit can be cleared aswell, thus setting the counter to 0.
+ * bit can be cleared as well, thus setting the counter to 0.
* When we set a bit, or in the counter (to start a write), if the fields is
* 0, we first set the disk bit and set the counter to 1.
*
@@ -185,7 +185,7 @@ struct bitmap_page {
*/
char *map;
/*
- * in emergencies (when map cannot be alloced), hijack the map
+ * in emergencies (when map cannot be allocated), hijack the map
* pointer and use it as two counters itself
*/
unsigned int hijacked;
diff --git a/clustermd_tests/00r10_Create b/clustermd_tests/00r10_Create
new file mode 100644
index 0000000..8aa5a70
--- /dev/null
+++ b/clustermd_tests/00r10_Create
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l10 -b clustered --layout n2 -n2 $dev0 $dev1
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check $NODE1 resync
+check $NODE2 PENDING
+check all wait
+check all raid10
+check all bitmap
+check all nosync
+check all state UU
+check all dmesg
+stop_md all $md0
+
+mdadm -CR $md0 -l10 -b clustered -n3 --layout n3 $dev0 $dev1 $dev2 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1 $dev2
+check all nosync
+check all raid10
+check all bitmap
+check all state UUU
+check all dmesg
+stop_md all $md0
+
+mdadm -CR $md0 -l10 -b clustered -n2 -x1 --layout n2 $dev0 $dev1 $dev2 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1 $dev2
+check all nosync
+check all raid10
+check all bitmap
+check all spares 1
+check all state UU
+check all dmesg
+stop_md all $md0
+
+name=tstmd
+mdadm -CR $md0 -l10 -b clustered -n2 $dev0 $dev1 --layout n2 --name=$name --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid10
+check all bitmap
+check all state UU
+for ip in $NODE1 $NODE2
+do
+ ssh $ip "mdadm -D $md0 | grep 'Name' | grep -q $name"
+ [ $? -ne '0' ] &&
+ die "$ip: check --name=$name failed."
+done
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/00r1_Create b/clustermd_tests/00r1_Create
new file mode 100644
index 0000000..709bb7b
--- /dev/null
+++ b/clustermd_tests/00r1_Create
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l1 -b clustered -n2 $dev0 $dev1
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check $NODE1 resync
+check $NODE2 PENDING
+check all wait
+check all raid1
+check all bitmap
+check all nosync
+check all state UU
+check all dmesg
+stop_md all $md0
+
+mdadm -CR $md0 -l1 -b clustered -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid1
+check all bitmap
+check all state UU
+check all dmesg
+stop_md all $md0
+
+mdadm -CR $md0 -l1 -b clustered -n2 -x1 $dev0 $dev1 $dev2 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1 $dev2
+check all nosync
+check all raid1
+check all bitmap
+check all spares 1
+check all state UU
+check all dmesg
+stop_md all $md0
+
+name=tstmd
+mdadm -CR $md0 -l1 -b clustered -n2 $dev0 $dev1 --name=$name --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid1
+check all bitmap
+check all state UU
+for ip in $NODE1 $NODE2
+do
+ ssh $ip "mdadm -D $md0 | grep 'Name' | grep -q $name"
+ [ $? -ne '0' ] &&
+ die "$ip: check --name=$name failed."
+done
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/01r10_Grow_bitmap-switch b/clustermd_tests/01r10_Grow_bitmap-switch
new file mode 100644
index 0000000..1794719
--- /dev/null
+++ b/clustermd_tests/01r10_Grow_bitmap-switch
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l10 -b clustered --layout n2 -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid10
+check all bitmap
+check all state UU
+
+# switch 'clustered' bitmap to 'none', and then 'none' to 'internal'
+stop_md $NODE2 $md0
+mdadm --grow $md0 --bitmap=none
+[ $? -eq '0' ] ||
+ die "$NODE1: change bitmap 'clustered' to 'none' failed."
+mdadm -X $dev0 $dev1 &> /dev/null
+[ $? -eq '0' ] &&
+ die "$NODE1: bitmap still exists in member_disks."
+check all nobitmap
+mdadm --grow $md0 --bitmap=internal
+[ $? -eq '0' ] ||
+ die "$NODE1: change bitmap 'none' to 'internal' failed."
+sleep 1
+mdadm -X $dev0 $dev1 &> /dev/null
+[ $? -eq '0' ] ||
+ die "$NODE1: create 'internal' bitmap failed."
+check $NODE1 bitmap
+
+# switch 'internal' bitmap to 'none', and then 'none' to 'clustered'
+mdadm --grow $md0 --bitmap=none
+[ $? -eq '0' ] ||
+ die "$NODE1: change bitmap 'internal' to 'none' failed."
+mdadm -X $dev0 $dev1 &> /dev/null
+[ $? -eq '0' ] &&
+ die "$NODE1: bitmap still exists in member_disks."
+check $NODE1 nobitmap
+mdadm --grow $md0 --bitmap=clustered
+[ $? -eq '0' ] ||
+ die "$NODE1: change bitmap 'none' to 'clustered' failed."
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+sleep 1
+for ip in $NODES
+do
+ ssh $ip "mdadm -X $dev0 $dev1 | grep -q 'Cluster name'" ||
+ die "$ip: create 'clustered' bitmap failed."
+done
+check all bitmap
+check all state UU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/01r10_Grow_resize b/clustermd_tests/01r10_Grow_resize
new file mode 100644
index 0000000..c69b785
--- /dev/null
+++ b/clustermd_tests/01r10_Grow_resize
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+size=20000
+
+mdadm -CR $md0 -l10 -b clustered --layout n2 --size $size --chunk=64 -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid10
+check all bitmap
+check all state UU
+
+mdadm --grow $md0 --size max
+check $NODE1 resync
+check $NODE1 wait
+check all state UU
+
+mdadm --grow $md0 --size $size
+check all nosync
+check all state UU
+check all dmesg
+stop_md all $md0
+
+mdadm -CR $md0 -l10 -b clustered --layout n2 --chunk=64 -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid10
+check all bitmap
+check all state UU
+
+mdadm --grow $md0 --chunk=128
+check $NODE1 reshape
+check $NODE1 wait
+check all chunk 128
+check all state UU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/01r1_Grow_add b/clustermd_tests/01r1_Grow_add
new file mode 100644
index 0000000..5706114
--- /dev/null
+++ b/clustermd_tests/01r1_Grow_add
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l1 -b clustered -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid1
+check all bitmap
+check all state UU
+check all dmesg
+mdadm --grow $md0 --raid-devices=3 --add $dev2
+sleep 0.3
+grep recovery /proc/mdstat
+if [ $? -eq '0' ]
+then
+ check $NODE1 wait
+else
+ check $NODE2 recovery
+ check $NODE2 wait
+fi
+check all state UUU
+check all dmesg
+stop_md all $md0
+
+mdadm -CR $md0 -l1 -b clustered -n2 -x1 $dev0 $dev1 $dev2 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1 $dev2
+check all nosync
+check all raid1
+check all bitmap
+check all spares 1
+check all state UU
+check all dmesg
+mdadm --grow $md0 --raid-devices=3 --add $dev3
+sleep 0.3
+grep recovery /proc/mdstat
+if [ $? -eq '0' ]
+then
+ check $NODE1 wait
+else
+ check $NODE2 recovery
+ check $NODE2 wait
+fi
+check all state UUU
+check all dmesg
+stop_md all $md0
+
+mdadm -CR $md0 -l1 -b clustered -n2 -x1 $dev0 $dev1 $dev2 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1 $dev2
+check all nosync
+check all raid1
+check all bitmap
+check all spares 1
+check all state UU
+check all dmesg
+mdadm --grow $md0 --raid-devices=3
+sleep 0.3
+grep recovery /proc/mdstat
+if [ $? -eq '0' ]
+then
+ check $NODE1 wait
+else
+ check $NODE2 recovery
+ check $NODE2 wait
+fi
+check all state UUU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/01r1_Grow_bitmap-switch b/clustermd_tests/01r1_Grow_bitmap-switch
new file mode 100644
index 0000000..3b363d9
--- /dev/null
+++ b/clustermd_tests/01r1_Grow_bitmap-switch
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l1 -b clustered -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid1
+check all bitmap
+check all state UU
+
+# switch 'clustered' bitmap to 'none', and then 'none' to 'internal'
+stop_md $NODE2 $md0
+mdadm --grow $md0 --bitmap=none
+[ $? -eq '0' ] ||
+ die "$NODE1: change bitmap 'clustered' to 'none' failed."
+mdadm -X $dev0 $dev1 &> /dev/null
+[ $? -eq '0' ] &&
+ die "$NODE1: bitmap still exists in member_disks."
+check all nobitmap
+mdadm --grow $md0 --bitmap=internal
+[ $? -eq '0' ] ||
+ die "$NODE1: change bitmap 'none' to 'internal' failed."
+sleep 2
+mdadm -X $dev0 $dev1 &> /dev/null
+[ $? -eq '0' ] ||
+ die "$NODE1: create 'internal' bitmap failed."
+check $NODE1 bitmap
+
+# switch 'internal' bitmap to 'none', and then 'none' to 'clustered'
+mdadm --grow $md0 --bitmap=none
+[ $? -eq '0' ] ||
+ die "$NODE1: change bitmap 'internal' to 'none' failed."
+mdadm -X $dev0 $dev1 &> /dev/null
+[ $? -eq '0' ] &&
+ die "$NODE1: bitmap still exists in member_disks."
+check $NODE1 nobitmap
+mdadm --grow $md0 --bitmap=clustered
+[ $? -eq '0' ] ||
+ die "$NODE1: change bitmap 'none' to 'clustered' failed."
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+sleep 2
+for ip in $NODES
+do
+ ssh $ip "mdadm -X $dev0 $dev1 | grep -q 'Cluster name'" ||
+ die "$ip: create 'clustered' bitmap failed."
+done
+check all bitmap
+check all state UU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/01r1_Grow_resize b/clustermd_tests/01r1_Grow_resize
new file mode 100644
index 0000000..6d6e22a
--- /dev/null
+++ b/clustermd_tests/01r1_Grow_resize
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+size=10000
+
+mdadm -CR $md0 -l1 -b clustered --size $size -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid1
+check all bitmap
+check all state UU
+
+mdadm --grow $md0 --size max
+check $NODE1 resync
+check $NODE1 wait
+check all state UU
+
+mdadm --grow $md0 --size $size
+check all nosync
+check all state UU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/02r10_Manage_add b/clustermd_tests/02r10_Manage_add
new file mode 100644
index 0000000..8e878ab
--- /dev/null
+++ b/clustermd_tests/02r10_Manage_add
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l10 -b clustered --layout n2 -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid10
+check all bitmap
+check all state UU
+check all dmesg
+mdadm --manage $md0 --fail $dev0 --remove $dev0
+mdadm --zero $dev2
+mdadm --manage $md0 --add $dev2
+sleep 0.3
+check $NODE1 recovery
+check $NODE1 wait
+check all state UU
+check all dmesg
+stop_md all $md0
+
+mdadm -CR $md0 -l10 -b clustered --layout n2 -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid10
+check all bitmap
+check all state UU
+check all dmesg
+mdadm --manage $md0 --add $dev2
+check all spares 1
+check all state UU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/02r10_Manage_add-spare b/clustermd_tests/02r10_Manage_add-spare
new file mode 100644
index 0000000..9924aa8
--- /dev/null
+++ b/clustermd_tests/02r10_Manage_add-spare
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l10 -b clustered --layout n2 -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid10
+check all bitmap
+check all state UU
+check all dmesg
+mdadm --manage $md0 --add-spare $dev2
+check all spares 1
+check all state UU
+check all dmesg
+stop_md all $md0
+
+mdadm -CR $md0 -l10 -b clustered --layout n2 -n2 -x1 $dev0 $dev1 $dev2 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1 $dev2
+check all nosync
+check all raid10
+check all bitmap
+check all spares 1
+check all state UU
+check all dmesg
+mdadm --manage $md0 --add-spare $dev3
+check all spares 2
+check all state UU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/02r10_Manage_re-add b/clustermd_tests/02r10_Manage_re-add
new file mode 100644
index 0000000..2288a00
--- /dev/null
+++ b/clustermd_tests/02r10_Manage_re-add
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l10 -b clustered --layout n2 -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid10
+check all bitmap
+check all state UU
+check all dmesg
+mdadm --manage $md0 --fail $dev0 --remove $dev0
+mdadm --manage $md0 --re-add $dev0
+check $NODE1 recovery
+check all wait
+check all state UU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/02r1_Manage_add b/clustermd_tests/02r1_Manage_add
new file mode 100644
index 0000000..ab2751c
--- /dev/null
+++ b/clustermd_tests/02r1_Manage_add
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l1 -b clustered -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid1
+check all bitmap
+check all state UU
+check all dmesg
+mdadm --manage $md0 --fail $dev0 --remove $dev0
+mdadm --zero $dev2
+mdadm --manage $md0 --add $dev2
+sleep 0.3
+check $NODE1 recovery
+check $NODE1 wait
+check all state UU
+check all dmesg
+stop_md all $md0
+
+mdadm -CR $md0 -l1 -b clustered -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid1
+check all bitmap
+check all state UU
+check all dmesg
+mdadm --manage $md0 --add $dev2
+check all spares 1
+check all state UU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/02r1_Manage_add-spare b/clustermd_tests/02r1_Manage_add-spare
new file mode 100644
index 0000000..eab8111
--- /dev/null
+++ b/clustermd_tests/02r1_Manage_add-spare
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l1 -b clustered -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid1
+check all bitmap
+check all state UU
+check all dmesg
+mdadm --manage $md0 --add-spare $dev2
+check all spares 1
+check all state UU
+check all dmesg
+stop_md all $md0
+
+mdadm -CR $md0 -l1 -b clustered -n2 -x1 $dev0 $dev1 $dev2 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1 $dev2
+check all nosync
+check all raid1
+check all bitmap
+check all spares 1
+check all state UU
+check all dmesg
+mdadm --manage $md0 --add-spare $dev3
+check all spares 2
+check all state UU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/02r1_Manage_re-add b/clustermd_tests/02r1_Manage_re-add
new file mode 100644
index 0000000..dd9c416
--- /dev/null
+++ b/clustermd_tests/02r1_Manage_re-add
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l1 -b clustered -n2 $dev0 $dev1 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check all nosync
+check all raid1
+check all bitmap
+check all state UU
+check all dmesg
+mdadm --manage $md0 --fail $dev0 --remove $dev0
+mdadm --manage $md0 --re-add $dev0
+check $NODE1 recovery
+check all wait
+check all state UU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/03r10_switch-recovery b/clustermd_tests/03r10_switch-recovery
new file mode 100644
index 0000000..867388d
--- /dev/null
+++ b/clustermd_tests/03r10_switch-recovery
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l10 -b clustered --layout n2 -n2 -x1 $dev0 $dev1 $dev2 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1 $dev2
+check all nosync
+check all raid10
+check all bitmap
+check all spares 1
+check all state UU
+check all dmesg
+mdadm --manage $md0 --fail $dev0
+sleep 0.2
+check $NODE1 recovery
+stop_md $NODE1 $md0
+check $NODE2 recovery
+check $NODE2 wait
+check $NODE2 state UU
+check all dmesg
+stop_md $NODE2 $md0
+
+exit 0
diff --git a/clustermd_tests/03r10_switch-resync b/clustermd_tests/03r10_switch-resync
new file mode 100644
index 0000000..127c569
--- /dev/null
+++ b/clustermd_tests/03r10_switch-resync
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l10 -b clustered --layout n2 -n2 $dev0 $dev1
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check $NODE1 resync
+check $NODE2 PENDING
+stop_md $NODE1 $md0
+check $NODE2 resync
+check $NODE2 wait
+mdadm -A $md0 $dev0 $dev1
+check all raid10
+check all bitmap
+check all nosync
+check all state UU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/03r1_switch-recovery b/clustermd_tests/03r1_switch-recovery
new file mode 100644
index 0000000..a1a7cbe
--- /dev/null
+++ b/clustermd_tests/03r1_switch-recovery
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l1 -b clustered -n2 -x1 $dev0 $dev1 $dev2 --assume-clean
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1 $dev2
+check all nosync
+check all raid1
+check all bitmap
+check all spares 1
+check all state UU
+check all dmesg
+mdadm --manage $md0 --fail $dev0
+sleep 0.3
+check $NODE1 recovery
+stop_md $NODE1 $md0
+check $NODE2 recovery
+check $NODE2 wait
+check $NODE2 state UU
+check all dmesg
+stop_md $NODE2 $md0
+
+exit 0
diff --git a/clustermd_tests/03r1_switch-resync b/clustermd_tests/03r1_switch-resync
new file mode 100644
index 0000000..d99e1c5
--- /dev/null
+++ b/clustermd_tests/03r1_switch-resync
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+mdadm -CR $md0 -l1 -b clustered -n2 $dev0 $dev1
+ssh $NODE2 mdadm -A $md0 $dev0 $dev1
+check $NODE1 resync
+check $NODE2 PENDING
+stop_md $NODE1 $md0
+check $NODE2 resync
+check $NODE2 wait
+mdadm -A $md0 $dev0 $dev1
+check all raid1
+check all bitmap
+check all nosync
+check all state UU
+check all dmesg
+stop_md all $md0
+
+exit 0
diff --git a/clustermd_tests/cluster_conf b/clustermd_tests/cluster_conf
new file mode 100644
index 0000000..4f0c9fb
--- /dev/null
+++ b/clustermd_tests/cluster_conf
@@ -0,0 +1,43 @@
+# Prerequisite:
+# 1. The clustermd_tests/ cases only support to test 2-node-cluster, cluster
+# requires packages: 'pacemaker+corosync+sbd+crmsh', all packages link at
+# "https://github.com/ClusterLabs/", and also requires dlm resource running
+# on each node of cluster.
+# For quick start HA-cluster with SUSE distributions, refer to the chapter 6-8:
+# https://www.suse.com/documentation/sle-ha-12/install-quick/data/install-quick.html
+# For Redhat distributions, please refer to:
+# https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html-single/high_availability_add-on_administration/index
+# 2. Setup ssh-access with no-authorized mode, it should be:
+# # 'ssh $node1 -l root ls' and 'ssh $node2 -l root ls' success on any node.
+# 3. Fill-up node-ip part and disks part as following.
+
+# Set node1 as the master node, the cluster-md cases should run on this node,
+# and node2 is the slave node.
+# For example:
+# NODE1=192.168.1.100 (testing run here)
+# NODE2=192.168.1.101
+NODE1=
+NODE2=
+
+# Provide the devlist for clustermd-testing, alternative: if set the step 1,
+# don't set step 2, and vice versa.
+# 1. Use ISCSI service to provide shared storage, then login ISCSI target via
+# to ISCSI_TARGET_ID and ISCSI_TARGET_IP on iscsi clients, commands like:
+# Execute on iscsi clients:
+# 1) discover the iscsi server.
+# # iscsiadm -m discovery -t st -p $ISCSI_TARGET_IP
+# 2) login and establish connection.
+# # iscsiadm -m node -T $ISCSI_TARGET_ID -p $ISCSI_TARGET_IP -l
+# Note:
+# On ISCSI server, must create all iscsi-luns in one target_id, recommend more
+# than 6 luns/disks for testing, and each disk should be: 100M < disk < 800M.
+# 2. If all cluster-nodes mounted the same disks directly, and the devname are
+# the same on all nodes, then put them to 'devlist'.
+
+# For example: (Only set $ISCSI_TARGET_ID is enough if iscsi has already connected)
+# ISCSI_TARGET_ID=iqn.2018-01.example.com:clustermd-testing
+# ISCSI_TARGET_IP=192.168.1.102
+ISCSI_TARGET_ID=
+
+#devlist=/dev/sda /dev/sdb /dev/sdc /dev/sdd
+devlist=
diff --git a/clustermd_tests/func.sh b/clustermd_tests/func.sh
new file mode 100644
index 0000000..c2be0e5
--- /dev/null
+++ b/clustermd_tests/func.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+
+check_ssh()
+{
+ NODE1="$(grep '^NODE1' $CLUSTER_CONF | cut -d'=' -f2)"
+ NODE2="$(grep '^NODE2' $CLUSTER_CONF | cut -d'=' -f2)"
+ [ -z "$NODE1" -o -z "$NODE2" ] && {
+ echo "Please provide node-ip in $CLUSTER_CONF."
+ exit 1
+ }
+ for ip in $NODE1 $NODE2
+ do
+ ssh -o NumberOfPasswordPrompts=0 $ip -l root "pwd" > /dev/null
+ [ $? -ne 0 ] && {
+ echo "Please setup ssh-access with no-authorized mode."
+ exit 1
+ }
+ done
+}
+
+fetch_devlist()
+{
+ ISCSI_ID="$(grep '^ISCSI_TARGET_ID' $CLUSTER_CONF | cut -d'=' -f2)"
+ devlist="$(grep '^devlist' $CLUSTER_CONF | cut -d'=' -f2)"
+ if [ ! -z "$ISCSI_ID" -a ! -z "$devlist" ]
+ then
+ echo "Config ISCSI_TARGET_ID or devlist in $CLUSTER_CONF."
+ exit 1
+ elif [ ! -z "$ISCSI_ID" -a -z "$devlist" ]
+ then
+ for ip in $NODE1 $NODE2
+ do
+ ssh $ip "ls /dev/disk/by-path/*$ISCSI_ID*" > /dev/null
+ [ $? -ne 0 ] && {
+ echo "$ip: No disks found in '$ISCSI_ID' connection."
+ exit 1
+ }
+ done
+ devlist=($(ls /dev/disk/by-path/*$ISCSI_ID*))
+ fi
+ # sbd disk cannot use in testing
+ for i in ${devlist[@]}
+ do
+ sbd -d $i dump &> /dev/null
+ [ $? -eq '0' ] && devlist=(${devlist[@]#$i})
+ done
+ for i in $(seq 0 ${#devlist[@]})
+ do
+ eval "dev$i=${devlist[$i]}"
+ done
+ [ "${#devlist[@]}" -lt 6 ] && {
+ echo "Cluster-md testing requires 6 disks at least."
+ exit 1
+ }
+}
+
+check_dlm()
+{
+ if ! crm configure show | grep -q dlm
+ then
+ crm configure primitive dlm ocf:pacemaker:controld \
+ op monitor interval=60 timeout=60 \
+ meta target-role=Started &> /dev/null
+ crm configure group base-group dlm
+ crm configure clone base-clone base-group \
+ meta interleave=true
+ fi
+ sleep 1
+ for ip in $NODE1 $NODE2
+ do
+ ssh $ip "pgrep dlm_controld > /dev/null" || {
+ echo "$ip: dlm_controld daemon doesn't exist."
+ exit 1
+ }
+ done
+ crm_mon -r -n1 | grep -iq "fail\|not" && {
+ echo "Please clear cluster-resource errors."
+ exit 1
+ }
+}
+
+check_env()
+{
+ user=$(id -un)
+ [ "X$user" = "Xroot" ] || {
+ echo "testing can only be done as 'root'."
+ exit 1
+ }
+ check_ssh
+ commands=(mdadm iscsiadm bc modinfo dlm_controld
+ udevadm crm crm_mon lsblk pgrep sbd)
+ mdadm_src_ver="$($mdadm -V 2>&1)"
+ for ip in $NODE1 $NODE2
+ do
+ for cmd in ${commands[@]}
+ do
+ ssh $ip "which $cmd &> /dev/null" || {
+ echo "$ip: $cmd, command not found!"
+ exit 1
+ }
+ done
+ mdadm_sbin_ver="$(ssh $ip "mdadm -V 2>&1")"
+ if [ "$mdadm_src_ver" != "$mdadm_sbin_ver" ]
+ then
+ echo "$ip: please run 'make install' before testing."
+ exit 1
+ fi
+ mods=(raid1 raid10 md_mod dlm md-cluster)
+ for mod in ${mods[@]}
+ do
+ ssh $ip "modinfo $mod > /dev/null" || {
+ echo "$ip: $mod, module doesn't exist."
+ exit 1
+ }
+ done
+ ssh $ip "lsblk -a | grep -iq raid"
+ [ $? -eq 0 ] && {
+ echo "$ip: Please run testing without running RAIDs environment."
+ exit 1
+ }
+ ssh $ip "modprobe md_mod"
+ done
+ fetch_devlist
+ check_dlm
+ [ -d $logdir ] || mkdir -p $logdir
+}
+
+# $1/node, $2/optional
+stop_md()
+{
+ if [ "$1" == "all" ]
+ then
+ NODES=($NODE1 $NODE2)
+ elif [ "$1" == "$NODE1" -o "$1" == "$NODE2" ]
+ then
+ NODES=$1
+ else
+ die "$1: unknown parameter."
+ fi
+ if [ -z "$2" ]
+ then
+ for ip in ${NODES[@]}
+ do
+ ssh $ip mdadm -Ssq
+ done
+ else
+ for ip in ${NODES[@]}
+ do
+ ssh $ip mdadm -S $2
+ done
+ fi
+}
+
+# $1/optional, it shows why to save log
+save_log()
+{
+ status=$1
+ logfile="$status""$_basename".log
+
+ cat $targetdir/stderr >> $targetdir/log
+ cp $targetdir/log $logdir/$_basename.log
+
+ for ip in $NODE1 $NODE2
+ do
+ echo "##$ip: saving dmesg." >> $logdir/$logfile
+ ssh $ip "dmesg -c" >> $logdir/$logfile
+ echo "##$ip: saving proc mdstat." >> $logdir/$logfile
+ ssh $ip "cat /proc/mdstat" >> $logdir/$logfile
+ array=($(ssh $ip "mdadm -Ds | cut -d' ' -f2"))
+
+ if [ ! -z "$array" -a ${#array[@]} -ge 1 ]
+ then
+ echo "##$ip: mdadm -D ${array[@]}" >> $logdir/$logfile
+ ssh $ip "mdadm -D ${array[@]}" >> $logdir/$logfile
+ md_disks=($(ssh $ip "mdadm -DY ${array[@]} | grep "/dev/" | cut -d'=' -f2"))
+ cat /proc/mdstat | grep -q "bitmap"
+ if [ $? -eq 0 ]
+ then
+ echo "##$ip: mdadm -X ${md_disks[@]}" >> $logdir/$logfile
+ ssh $ip "mdadm -X ${md_disks[@]}" >> $logdir/$logfile
+ echo "##$ip: mdadm -E ${md_disks[@]}" >> $logdir/$logfile
+ ssh $ip "mdadm -E ${md_disks[@]}" >> $logdir/$logfile
+ fi
+ else
+ echo "##$ip: no array assembled!" >> $logdir/$logfile
+ fi
+ done
+ [ "$1" == "fail" ] &&
+ echo "See $logdir/$_basename.log and $logdir/$logfile for details"
+ stop_md all
+}
+
+do_setup()
+{
+ check_env
+ ulimit -c unlimited
+}
+
+do_clean()
+{
+ for ip in $NODE1 $NODE2
+ do
+ ssh $ip "mdadm -Ssq; dmesg -c > /dev/null"
+ done
+ mdadm --zero ${devlist[@]} &> /dev/null
+}
+
+cleanup()
+{
+ check_ssh
+ do_clean
+}
+
+# check: $1/cluster_node $2/feature $3/optional
+check()
+{
+ NODES=()
+ if [ "$1" == "all" ]
+ then
+ NODES=($NODE1 $NODE2)
+ elif [ "$1" == "$NODE1" -o "$1" == "$NODE2" ]
+ then
+ NODES=$1
+ else
+ die "$1: unknown parameter."
+ fi
+ case $2 in
+ spares )
+ for ip in ${NODES[@]}
+ do
+ spares=$(ssh $ip "tr '] ' '\012\012' < /proc/mdstat | grep -c '(S)'")
+ [ "$spares" -ne "$3" ] &&
+ die "$ip: expected $3 spares, but found $spares"
+ done
+ ;;
+ raid* )
+ for ip in ${NODES[@]}
+ do
+ ssh $ip "grep -sq "$2" /proc/mdstat" ||
+ die "$ip: check '$2' failed."
+ done
+ ;;
+ PENDING | recovery | resync | reshape )
+ cnt=5
+ for ip in ${NODES[@]}
+ do
+ while ! ssh $ip "grep -sq '$2' /proc/mdstat"
+ do
+ if [ "$cnt" -gt '0' ]
+ then
+ sleep 0.2
+ cnt=$[cnt-1]
+ else
+ die "$ip: no '$2' happening!"
+ fi
+ done
+ done
+ ;;
+ wait )
+ local cnt=60
+ for ip in ${NODES[@]}
+ do
+ p=$(ssh $ip "cat /proc/sys/dev/raid/speed_limit_max")
+ ssh $ip "echo 200000 > /proc/sys/dev/raid/speed_limit_max"
+ while ssh $ip "grep -Esq '(resync|recovery|reshape|check|repair)' /proc/mdstat"
+ do
+ if [ "$cnt" -gt '0' ]
+ then
+ sleep 5
+ cnt=$[cnt-1]
+ else
+ die "$ip: Check '$2' timeout over 300 seconds."
+ fi
+ done
+ ssh $ip "echo $p > /proc/sys/dev/raid/speed_limit_max"
+ done
+ ;;
+ bitmap )
+ for ip in ${NODES[@]}
+ do
+ ssh $ip "grep -sq '$2' /proc/mdstat" ||
+ die "$ip: no '$2' found in /proc/mdstat."
+ done
+ ;;
+ nobitmap )
+ for ip in ${NODES[@]}
+ do
+ ssh $ip "grep -sq 'bitmap' /proc/mdstat" &&
+ die "$ip: 'bitmap' found in /proc/mdstat."
+ done
+ ;;
+ chunk )
+ for ip in ${NODES[@]}
+ do
+ chunk_size=`awk -F',' '/chunk/{print $2}' /proc/mdstat | awk -F'[a-z]' '{print $1}'`
+ [ "$chunk_size" -ne "$3" ] &&
+ die "$ip: chunksize should be $3, but it's $chunk_size"
+ done
+ ;;
+ state )
+ for ip in ${NODES[@]}
+ do
+ ssh $ip "grep -Esq 'blocks.*\[$3\]\$' /proc/mdstat" ||
+ die "$ip: no '$3' found in /proc/mdstat."
+ done
+ ;;
+ nosync )
+ for ip in ${NODES[@]}
+ do
+ ssh $ip "grep -Eq '(resync|recovery)' /proc/mdstat" &&
+ die "$ip: resync or recovery is happening!"
+ done
+ ;;
+ readonly )
+ for ip in ${NODES[@]}
+ do
+ ssh $ip "grep -sq "read-only" /proc/mdstat" ||
+ die "$ip: check '$2' failed!"
+ done
+ ;;
+ dmesg )
+ for ip in ${NODES[@]}
+ do
+ ssh $ip "dmesg | grep -iq 'error\|call trace\|segfault'" &&
+ die "$ip: check '$2' prints errors!"
+ done
+ ;;
+ * )
+ die "unknown parameter $2"
+ ;;
+ esac
+}
diff --git a/config.c b/config.c
index 9b008e3..48e0278 100644
--- a/config.c
+++ b/config.c
@@ -1118,8 +1118,8 @@ struct mddev_ident *conf_match(struct supertype *st,
match = NULL;
for (; array_list; array_list = array_list->next) {
if (array_list->uuid_set &&
- same_uuid(array_list->uuid, info->uuid, st->ss->swapuuid)
- == 0) {
+ same_uuid(array_list->uuid, info->uuid,
+ st->ss->swapuuid) == 0) {
if (verbose >= 2 && array_list->devname)
pr_err("UUID differs from %s.\n",
array_list->devname);
diff --git a/debian/changelog b/debian/changelog
index 0f6b9d2..bf6b2a6 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+mdadm (4.1~rc1-1) unstable; urgency=medium
+
+ * New upstream release.
+
+ -- Dimitri John Ledkov <xnox@ubuntu.com> Tue, 15 May 2018 12:10:33 +0100
+
mdadm (4.0-2) unstable; urgency=medium
* Ship mdadm-shutdown.service and suggest dracut-core. Users of systemd
diff --git a/inventory b/inventory
index f0277d2..2e3c1be 100755
--- a/inventory
+++ b/inventory
@@ -24,6 +24,7 @@ ANNOUNCE-3.3.3
ANNOUNCE-3.3.4
ANNOUNCE-3.4
ANNOUNCE-4.0
+ANNOUNCE-4.1-rc1
Assemble.c
Build.c
COPYING
@@ -45,6 +46,26 @@ ReadMe.c
TODO
bitmap.c
bitmap.h
+clustermd_tests/
+clustermd_tests/00r10_Create
+clustermd_tests/00r1_Create
+clustermd_tests/01r10_Grow_bitmap-switch
+clustermd_tests/01r10_Grow_resize
+clustermd_tests/01r1_Grow_add
+clustermd_tests/01r1_Grow_bitmap-switch
+clustermd_tests/01r1_Grow_resize
+clustermd_tests/02r10_Manage_add
+clustermd_tests/02r10_Manage_add-spare
+clustermd_tests/02r10_Manage_re-add
+clustermd_tests/02r1_Manage_add
+clustermd_tests/02r1_Manage_add-spare
+clustermd_tests/02r1_Manage_re-add
+clustermd_tests/03r10_switch-recovery
+clustermd_tests/03r10_switch-resync
+clustermd_tests/03r1_switch-recovery
+clustermd_tests/03r1_switch-resync
+clustermd_tests/cluster_conf
+clustermd_tests/func.sh
config.c
crc32.c
crc32.h
@@ -53,11 +74,6 @@ dlink.c
dlink.h
external-reshape-design.txt
inventory
-kernel-patch-2.6.18
-kernel-patch-2.6.18.6
-kernel-patch-2.6.19
-kernel-patch-2.6.25
-kernel-patch-2.6.27
lib.c
makedist
managemon.c
@@ -73,8 +89,6 @@ mdadm.conf-example
mdadm.conf.5
mdadm.h
mdadm.spec
-mdassemble.8
-mdassemble.c
mdmon-design.txt
mdmon.8
mdmon.c
@@ -129,6 +143,7 @@ tests/00raid10
tests/00raid4
tests/00raid5
tests/00raid6
+tests/00readonly
tests/01r1fail
tests/01r5fail
tests/01r5integ
@@ -245,13 +260,15 @@ tests/19repair-does-not-destroy
tests/20raid5journal
tests/21raid5cache
tests/ToTest
-tests/check
tests/env-ddf-template
tests/env-imsm-template
+tests/func.sh
tests/imsm-grow-template
-tests/testdev
tests/utils
+udev-md-clustered-confirm-device.rules
udev-md-raid-arrays.rules
udev-md-raid-assembly.rules
+udev-md-raid-creating.rules
+udev-md-raid-safe-timeouts.rules
util.c
xmalloc.c
diff --git a/kernel-patch-2.6.18 b/kernel-patch-2.6.18
deleted file mode 100644
index 87496ea..0000000
--- a/kernel-patch-2.6.18
+++ /dev/null
@@ -1,35 +0,0 @@
-
-### Diffstat output
- ./drivers/md/md.c | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
-diff .prev/drivers/md/md.c ./drivers/md/md.c
---- .prev/drivers/md/md.c 2006-10-23 10:26:37.000000000 +1000
-+++ ./drivers/md/md.c 2006-12-21 16:28:29.000000000 +1100
-@@ -1783,7 +1783,8 @@ state_store(mdk_rdev_t *rdev, const char
- else {
- mddev_t *mddev = rdev->mddev;
- kick_rdev_from_array(rdev);
-- md_update_sb(mddev);
-+ if (mddev->pers)
-+ md_update_sb(mddev);
- md_new_event(mddev);
- err = 0;
- }
-@@ -1994,6 +1995,8 @@ static mdk_rdev_t *md_import_device(dev_
- kobject_init(&rdev->kobj);
-
- rdev->desc_nr = -1;
-+ rdev->saved_raid_disk = -1;
-+ rdev->raid_disk = -1;
- rdev->flags = 0;
- rdev->data_offset = 0;
- rdev->sb_events = 0;
-@@ -3991,6 +3994,7 @@ static int set_array_info(mddev_t * mdde
- mddev->major_version = info->major_version;
- mddev->minor_version = info->minor_version;
- mddev->patch_version = info->patch_version;
-+ mddev->persistent = ! info->not_persistent;
- return 0;
- }
- mddev->major_version = MD_MAJOR_VERSION;
diff --git a/kernel-patch-2.6.18.6 b/kernel-patch-2.6.18.6
deleted file mode 100644
index e702e14..0000000
--- a/kernel-patch-2.6.18.6
+++ /dev/null
@@ -1,35 +0,0 @@
-Signed-off-by: Neil Brown <neilb@suse.de>
-
-### Diffstat output
- ./drivers/md/md.c | 5 ++++-
- 1 file changed, 4 insertions(+), 1 deletion(-)
-
-diff .prev/drivers/md/md.c ./drivers/md/md.c
---- .prev/drivers/md/md.c 2006-12-21 17:08:23.000000000 +1100
-+++ ./drivers/md/md.c 2006-12-21 17:08:26.000000000 +1100
-@@ -1783,7 +1783,8 @@ state_store(mdk_rdev_t *rdev, const char
- else {
- mddev_t *mddev = rdev->mddev;
- kick_rdev_from_array(rdev);
-- md_update_sb(mddev);
-+ if (mddev->pers)
-+ md_update_sb(mddev);
- md_new_event(mddev);
- err = 0;
- }
-@@ -1995,6 +1996,7 @@ static mdk_rdev_t *md_import_device(dev_
-
- rdev->desc_nr = -1;
- rdev->saved_raid_disk = -1;
-+ rdev->raid_disk = -1;
- rdev->flags = 0;
- rdev->data_offset = 0;
- rdev->sb_events = 0;
-@@ -3993,6 +3995,7 @@ static int set_array_info(mddev_t * mdde
- mddev->major_version = info->major_version;
- mddev->minor_version = info->minor_version;
- mddev->patch_version = info->patch_version;
-+ mddev->persistent = ! info->not_persistent;
- return 0;
- }
- mddev->major_version = MD_MAJOR_VERSION;
diff --git a/kernel-patch-2.6.19 b/kernel-patch-2.6.19
deleted file mode 100644
index 22a67a3..0000000
--- a/kernel-patch-2.6.19
+++ /dev/null
@@ -1,34 +0,0 @@
-
-### Diffstat output
- ./drivers/md/md.c | 5 ++++-
- 1 file changed, 4 insertions(+), 1 deletion(-)
-
-diff .prev/drivers/md/md.c ./drivers/md/md.c
---- .prev/drivers/md/md.c 2006-12-21 15:55:01.000000000 +1100
-+++ ./drivers/md/md.c 2006-12-21 16:28:09.000000000 +1100
-@@ -1792,7 +1792,8 @@ state_store(mdk_rdev_t *rdev, const char
- else {
- mddev_t *mddev = rdev->mddev;
- kick_rdev_from_array(rdev);
-- md_update_sb(mddev, 1);
-+ if (mddev->pers)
-+ md_update_sb(mddev, 1);
- md_new_event(mddev);
- err = 0;
- }
-@@ -2004,6 +2005,7 @@ static mdk_rdev_t *md_import_device(dev_
-
- rdev->desc_nr = -1;
- rdev->saved_raid_disk = -1;
-+ rdev->raid_disk = -1;
- rdev->flags = 0;
- rdev->data_offset = 0;
- rdev->sb_events = 0;
-@@ -3977,6 +3979,7 @@ static int set_array_info(mddev_t * mdde
- mddev->major_version = info->major_version;
- mddev->minor_version = info->minor_version;
- mddev->patch_version = info->patch_version;
-+ mddev->persistent = ! info->not_persistent;
- return 0;
- }
- mddev->major_version = MD_MAJOR_VERSION;
diff --git a/kernel-patch-2.6.25 b/kernel-patch-2.6.25
deleted file mode 100644
index 2329007..0000000
--- a/kernel-patch-2.6.25
+++ /dev/null
@@ -1,199 +0,0 @@
-Status: ok
-
-Support adding a spare to a live md array with external metadata.
-
-i.e. extend the 'md/dev-XXX/slot' attribute so that you can
-tell a device to fill an vacant slot in an and md array.
-
-
-Signed-off-by: Neil Brown <neilb@suse.de>
-
-### Diffstat output
- ./drivers/md/md.c | 44 ++++++++++++++++++++++++++++++++++++++++----
- ./drivers/md/multipath.c | 7 ++++++-
- ./drivers/md/raid1.c | 7 ++++++-
- ./drivers/md/raid10.c | 10 ++++++++--
- ./drivers/md/raid5.c | 10 ++++++++--
- 5 files changed, 68 insertions(+), 10 deletions(-)
-
-diff .prev/drivers/md/md.c ./drivers/md/md.c
---- .prev/drivers/md/md.c 2008-06-05 09:19:56.000000000 +1000
-+++ ./drivers/md/md.c 2008-06-10 10:41:21.000000000 +1000
-@@ -1932,7 +1932,7 @@ slot_store(mdk_rdev_t *rdev, const char
- slot = -1;
- else if (e==buf || (*e && *e!= '\n'))
- return -EINVAL;
-- if (rdev->mddev->pers) {
-+ if (rdev->mddev->pers && slot == -1) {
- /* Setting 'slot' on an active array requires also
- * updating the 'rd%d' link, and communicating
- * with the personality with ->hot_*_disk.
-@@ -1940,8 +1940,6 @@ slot_store(mdk_rdev_t *rdev, const char
- * failed/spare devices. This normally happens automatically,
- * but not when the metadata is externally managed.
- */
-- if (slot != -1)
-- return -EBUSY;
- if (rdev->raid_disk == -1)
- return -EEXIST;
- /* personality does all needed checks */
-@@ -1955,6 +1953,44 @@ slot_store(mdk_rdev_t *rdev, const char
- sysfs_remove_link(&rdev->mddev->kobj, nm);
- set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
- md_wakeup_thread(rdev->mddev->thread);
-+ } else if (rdev->mddev->pers) {
-+ mdk_rdev_t *rdev2;
-+ struct list_head *tmp;
-+ /* Activating a spare .. or possibly reactivating
-+ * if we every get bitmaps working here.
-+ */
-+
-+ if (rdev->raid_disk != -1)
-+ return -EBUSY;
-+
-+ if (rdev->mddev->pers->hot_add_disk == NULL)
-+ return -EINVAL;
-+
-+ rdev_for_each(rdev2, tmp, rdev->mddev)
-+ if (rdev2->raid_disk == slot)
-+ return -EEXIST;
-+
-+ rdev->raid_disk = slot;
-+ if (test_bit(In_sync, &rdev->flags))
-+ rdev->saved_raid_disk = slot;
-+ else
-+ rdev->saved_raid_disk = -1;
-+ err = rdev->mddev->pers->
-+ hot_add_disk(rdev->mddev, rdev);
-+ if (err != 1) {
-+ rdev->raid_disk = -1;
-+ if (err == 0)
-+ return -EEXIST;
-+ return err;
-+ }
-+ sprintf(nm, "rd%d", rdev->raid_disk);
-+ if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
-+ printk(KERN_WARNING
-+ "md: cannot register "
-+ "%s for %s\n",
-+ nm, mdname(rdev->mddev));
-+
-+ /* don't wakeup anyone, leave that to userspace. */
- } else {
- if (slot >= rdev->mddev->raid_disks)
- return -ENOSPC;
-@@ -4205,7 +4241,7 @@ static int add_new_disk(mddev_t * mddev,
- super_types[mddev->major_version].
- validate_super(mddev, rdev);
- err = mddev->pers->hot_add_disk(mddev, rdev);
-- if (err)
-+ if (err < 0)
- unbind_rdev_from_array(rdev);
- }
- if (err)
-
-diff .prev/drivers/md/multipath.c ./drivers/md/multipath.c
---- .prev/drivers/md/multipath.c 2008-05-30 14:49:31.000000000 +1000
-+++ ./drivers/md/multipath.c 2008-06-10 10:35:03.000000000 +1000
-@@ -284,10 +284,15 @@ static int multipath_add_disk(mddev_t *m
- int found = 0;
- int path;
- struct multipath_info *p;
-+ int first = 0;
-+ int last = mddev->raid_disks - 1;
-+
-+ if (rdev->raid_disk >= 0)
-+ first = last = rdev->raid_disk;
-
- print_multipath_conf(conf);
-
-- for (path=0; path<mddev->raid_disks; path++)
-+ for (path = first; path <= last; path++)
- if ((p=conf->multipaths+path)->rdev == NULL) {
- q = rdev->bdev->bd_disk->queue;
- blk_queue_stack_limits(mddev->queue, q);
-
-diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
---- .prev/drivers/md/raid10.c 2008-05-30 14:49:31.000000000 +1000
-+++ ./drivers/md/raid10.c 2008-06-10 10:28:53.000000000 +1000
-@@ -1116,6 +1116,8 @@ static int raid10_add_disk(mddev_t *mdde
- int found = 0;
- int mirror;
- mirror_info_t *p;
-+ int first = 0;
-+ int last = mddev->raid_disks - 1;
-
- if (mddev->recovery_cp < MaxSector)
- /* only hot-add to in-sync arrays, as recovery is
-@@ -1125,12 +1127,16 @@ static int raid10_add_disk(mddev_t *mdde
- if (!enough(conf))
- return 0;
-
-+ if (rdev->raid_disk)
-+ first = last = rdev->raid_disk;
-+
- if (rdev->saved_raid_disk >= 0 &&
-+ rdev->saved_raid_disk >= first &&
- conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
- mirror = rdev->saved_raid_disk;
- else
-- mirror = 0;
-- for ( ; mirror < mddev->raid_disks; mirror++)
-+ mirror = first;
-+ for ( ; mirror <= last ; mirror++)
- if ( !(p=conf->mirrors+mirror)->rdev) {
-
- blk_queue_stack_limits(mddev->queue,
-
-diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
---- .prev/drivers/md/raid1.c 2008-05-30 14:49:31.000000000 +1000
-+++ ./drivers/md/raid1.c 2008-06-10 10:41:00.000000000 +1000
-@@ -1103,8 +1103,13 @@ static int raid1_add_disk(mddev_t *mddev
- int found = 0;
- int mirror = 0;
- mirror_info_t *p;
-+ int first = 0;
-+ int last = mddev->raid_disks - 1;
-
-- for (mirror=0; mirror < mddev->raid_disks; mirror++)
-+ if (rdev->raid_disk >= 0)
-+ first = last = rdev->raid_disk;
-+
-+ for (mirror = first; mirror <= last; mirror++)
- if ( !(p=conf->mirrors+mirror)->rdev) {
-
- blk_queue_stack_limits(mddev->queue,
-
-diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
---- .prev/drivers/md/raid5.c 2008-05-30 14:49:35.000000000 +1000
-+++ ./drivers/md/raid5.c 2008-06-10 10:27:51.000000000 +1000
-@@ -4399,21 +4399,27 @@ static int raid5_add_disk(mddev_t *mddev
- int found = 0;
- int disk;
- struct disk_info *p;
-+ int first = 0;
-+ int last = conf->raid_disks - 1;
-
- if (mddev->degraded > conf->max_degraded)
- /* no point adding a device */
- return 0;
-
-+ if (rdev->raid_disk >= 0)
-+ first = last = rdev->raid_disk;
-+
- /*
- * find the disk ... but prefer rdev->saved_raid_disk
- * if possible.
- */
- if (rdev->saved_raid_disk >= 0 &&
-+ rdev->saved_raid_disk >= first &&
- conf->disks[rdev->saved_raid_disk].rdev == NULL)
- disk = rdev->saved_raid_disk;
- else
-- disk = 0;
-- for ( ; disk < conf->raid_disks; disk++)
-+ disk = first;
-+ for ( ; disk <= last ; disk++)
- if ((p=conf->disks + disk)->rdev == NULL) {
- clear_bit(In_sync, &rdev->flags);
- rdev->raid_disk = disk;
diff --git a/kernel-patch-2.6.27 b/kernel-patch-2.6.27
deleted file mode 100644
index 8d0785d..0000000
--- a/kernel-patch-2.6.27
+++ /dev/null
@@ -1,36 +0,0 @@
-touch_mnt_namespace when the mount flags change
-
-From: Dan Williams <dan.j.williams@intel.com>
-
-Daemons that need to be launched while the rootfs is read-only can now
-poll /proc/mounts to be notified when their O_RDWR requests may no
-longer end in EROFS.
-
-Cc: Kay Sievers <kay.sievers@vrfy.org>
-Cc: Neil Brown <neilb@suse.de>
-Signed-off-by: Dan Williams <dan.j.williams@intel.com>
----
-
- fs/namespace.c | 7 ++++++-
- 1 files changed, 6 insertions(+), 1 deletions(-)
-
-
-diff --git a/fs/namespace.c b/fs/namespace.c
-index 6e283c9..1bd5ba2 100644
---- a/fs/namespace.c
-+++ b/fs/namespace.c
-@@ -1553,8 +1553,13 @@ static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags,
- if (!err)
- nd->path.mnt->mnt_flags = mnt_flags;
- up_write(&sb->s_umount);
-- if (!err)
-+ if (!err) {
- security_sb_post_remount(nd->path.mnt, flags, data);
-+
-+ spin_lock(&vfsmount_lock);
-+ touch_mnt_namespace(nd->path.mnt->mnt_ns);
-+ spin_unlock(&vfsmount_lock);
-+ }
- return err;
- }
-
diff --git a/lib.c b/lib.c
index b640634..60890b9 100644
--- a/lib.c
+++ b/lib.c
@@ -61,7 +61,7 @@ int get_mdp_major(void)
return mdp_major;
}
-char *devid2kname(int devid)
+char *devid2kname(dev_t devid)
{
char path[30];
char link[PATH_MAX];
@@ -73,8 +73,7 @@ char *devid2kname(int devid)
* /sys/dev/block/%d:%d link which must look like
* and take the last component.
*/
- sprintf(path, "/sys/dev/block/%d:%d", major(devid),
- minor(devid));
+ sprintf(path, "/sys/dev/block/%d:%d", major(devid), minor(devid));
n = readlink(path, link, sizeof(link) - 1);
if (n > 0) {
link[n] = 0;
@@ -163,6 +162,35 @@ char *fd2devnm(int fd)
return NULL;
}
+/* When we create a new array, we don't want the content to
+ * be immediately examined by udev - it is probably meaningless.
+ * So create /run/mdadm/creating-mdXXX and expect that a udev
+ * rule will noticed this and act accordingly.
+ */
+static char block_path[] = "/run/mdadm/creating-%s";
+static char *unblock_path = NULL;
+void udev_block(char *devnm)
+{
+ int fd;
+ char *path = NULL;
+
+ xasprintf(&path, block_path, devnm);
+ fd = open(path, O_CREAT|O_RDWR, 0600);
+ if (fd >= 0) {
+ close(fd);
+ unblock_path = path;
+ } else
+ free(path);
+}
+
+void udev_unblock(void)
+{
+ if (unblock_path)
+ unlink(unblock_path);
+ free(unblock_path);
+ unblock_path = NULL;
+}
+
/*
* convert a major/minor pair for a block device into a name in /dev, if possible.
* On the first call, walk /dev collecting name.
diff --git a/makedist b/makedist
index e4f20ac..0c4b39e 100755
--- a/makedist
+++ b/makedist
@@ -44,7 +44,7 @@ else { cat inventory ; echo ANNOUNCE-$version ; } | sort -o inventory
fi
echo version = $version
-base=mdadm-$version.tar.gz
+base=mdadm-$rpmv.tar.gz
if [ " $arg" != " diff" ]
then
if [ -f $target/$base ]
@@ -53,7 +53,7 @@ then
exit 1
fi
trap "rm $target/$base; exit" 1 2 3
- git archive --prefix=mdadm-$version/ HEAD | gzip --best > $target/$base
+ git archive --prefix=mdadm-$rpmv/ HEAD | gzip --best > $target/$base
chmod a+r $target/$base
ls -l $target/$base
if tar tzf $target/$base | sed 's,[^/]*/,,' | sort | diff -u inventory -
@@ -63,7 +63,7 @@ then
exit 1
fi
rpmbuild -ta $target/$base || exit 1
- find /home/neilb/src/RPM -name "*mdadm-$version-*" \
+ find ~/rpmbuild/RPMS -name "*mdadm-$version-*" \
-exec cp {} $target/RPM \;
cp ANNOUNCE-$version $target/ANNOUNCE
cp ChangeLog $target/ChangeLog
diff --git a/managemon.c b/managemon.c
index 3c1d4cb..101231c 100644
--- a/managemon.c
+++ b/managemon.c
@@ -129,6 +129,8 @@ static void close_aa(struct active_array *aa)
close(aa->metadata_fd);
if (aa->sync_completed_fd >= 0)
close(aa->sync_completed_fd);
+ if (aa->safe_mode_delay_fd >= 0)
+ close(aa->safe_mode_delay_fd);
}
static void free_aa(struct active_array *aa)
@@ -264,9 +266,7 @@ static void add_disk_to_container(struct supertype *st, struct mdinfo *sd)
{
int dfd;
char nm[20];
- struct supertype *st2;
struct metadata_update *update = NULL;
- struct mdinfo info;
mdu_disk_info_t dk = {
.number = -1,
.major = sd->disk.major,
@@ -285,25 +285,6 @@ static void add_disk_to_container(struct supertype *st, struct mdinfo *sd)
if (dfd < 0)
return;
- /* Check the metadata and see if it is already part of this
- * array
- */
- st2 = dup_super(st);
- if (st2->ss->load_super(st2, dfd, NULL) == 0) {
- st2->ss->getinfo_super(st2, &info, NULL);
- if (st->ss->compare_super(st, st2) == 0 &&
- info.disk.raid_disk >= 0) {
- /* Looks like a good member of array.
- * Just accept it.
- * mdadm will incorporate any parts into
- * active arrays.
- */
- st2->ss->free_super(st2);
- return;
- }
- }
- st2->ss->free_super(st2);
-
st->update_tail = &update;
st->ss->add_to_super(st, &dk, dfd, NULL, INVALID_SECTORS);
st->ss->write_init_super(st);
@@ -477,7 +458,7 @@ static void manage_member(struct mdstat_ent *mdstat,
char buf[64];
int frozen;
struct supertype *container = a->container;
- unsigned long long int component_size = 0;
+ struct mdinfo *mdi;
if (container == NULL)
/* Raced with something */
@@ -489,8 +470,13 @@ static void manage_member(struct mdstat_ent *mdstat,
// MORE
}
- if (sysfs_get_ll(&a->info, NULL, "component_size", &component_size) >= 0)
- a->info.component_size = component_size << 1;
+ mdi = sysfs_read(-1, mdstat->devnm,
+ GET_COMPONENT|GET_CONSISTENCY_POLICY);
+ if (mdi) {
+ a->info.component_size = mdi->component_size;
+ a->info.consistency_policy = mdi->consistency_policy;
+ sysfs_free(mdi);
+ }
/* honor 'frozen' */
if (sysfs_get_str(&a->info, NULL, "metadata_version", buf, sizeof(buf)) > 0)
@@ -499,9 +485,9 @@ static void manage_member(struct mdstat_ent *mdstat,
frozen = 1; /* can't read metadata_version assume the worst */
/* If sync_action is not 'idle' then don't try recovery now */
- if (!frozen
- && sysfs_get_str(&a->info, NULL, "sync_action", buf, sizeof(buf)) > 0
- && strncmp(buf, "idle", 4) != 0)
+ if (!frozen &&
+ sysfs_get_str(&a->info, NULL, "sync_action",
+ buf, sizeof(buf)) > 0 && strncmp(buf, "idle", 4) != 0)
frozen = 1;
if (mdstat->level) {
@@ -527,9 +513,15 @@ static void manage_member(struct mdstat_ent *mdstat,
if (a->container == NULL)
return;
- if (sigterm && a->info.safe_mode_delay != 1) {
- sysfs_set_safemode(&a->info, 1);
- a->info.safe_mode_delay = 1;
+ if (sigterm && a->info.safe_mode_delay != 1 &&
+ a->safe_mode_delay_fd >= 0) {
+ long int new_delay = 1;
+ char delay[10];
+ ssize_t len;
+
+ len = snprintf(delay, sizeof(delay), "0.%03ld\n", new_delay);
+ if (write(a->safe_mode_delay_fd, delay, len) == len)
+ a->info.safe_mode_delay = new_delay;
}
/* We don't check the array while any update is pending, as it
@@ -580,8 +572,8 @@ static void manage_member(struct mdstat_ent *mdstat,
usleep(15*1000);
}
replace_array(container, a, newa);
- if (sysfs_set_str(&a->info, NULL, "sync_action", "recover")
- == 0)
+ if (sysfs_set_str(&a->info, NULL,
+ "sync_action", "recover") == 0)
newa->prev_action = recover;
dprintf("recovery started on %s\n", a->info.sys_name);
out:
@@ -626,8 +618,8 @@ static void manage_member(struct mdstat_ent *mdstat,
newd = xmalloc(sizeof(*newd));
disk_init_and_add(newd, d, newa);
}
- if (sysfs_get_ll(info, NULL, "array_size", &array_size) == 0
- && a->info.custom_array_size > array_size*2) {
+ if (sysfs_get_ll(info, NULL, "array_size", &array_size) == 0 &&
+ a->info.custom_array_size > array_size*2) {
sysfs_set_num(info, NULL, "array_size",
a->info.custom_array_size/2);
}
@@ -685,8 +677,8 @@ static void manage_new(struct mdstat_ent *mdstat,
mdi = sysfs_read(-1, mdstat->devnm,
GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
- GET_DEGRADED|GET_SAFEMODE|
- GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|GET_LAYOUT);
+ GET_SAFEMODE|GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
+ GET_LAYOUT);
if (!mdi)
return;
@@ -729,6 +721,8 @@ static void manage_new(struct mdstat_ent *mdstat,
new->resync_start_fd = sysfs_open2(new->info.sys_name, NULL, "resync_start");
new->metadata_fd = sysfs_open2(new->info.sys_name, NULL, "metadata_version");
new->sync_completed_fd = sysfs_open2(new->info.sys_name, NULL, "sync_completed");
+ new->safe_mode_delay_fd = sysfs_open2(new->info.sys_name, NULL,
+ "safe_mode_delay");
dprintf("inst: %s action: %d state: %d\n", inst,
new->action_fd, new->info.state_fd);
diff --git a/mapfile.c b/mapfile.c
index c89d403..f3c8191 100644
--- a/mapfile.c
+++ b/mapfile.c
@@ -439,8 +439,8 @@ void RebuildMap(void)
if ((homehost == NULL ||
st->ss->match_home(st, homehost) != 1) &&
st->ss->match_home(st, "any") != 1 &&
- (require_homehost
- || ! conf_name_is_free(info->name)))
+ (require_homehost ||
+ !conf_name_is_free(info->name)))
/* require a numeric suffix */
unum = 0;
else
diff --git a/maps.c b/maps.c
index 64f1df2..02a0474 100644
--- a/maps.c
+++ b/maps.c
@@ -44,7 +44,7 @@ mapping_t r5layout[] = {
{ "ddf-N-restart", ALGORITHM_LEFT_ASYMMETRIC},
{ "ddf-N-continue", ALGORITHM_LEFT_SYMMETRIC},
- { NULL, 0}
+ { NULL, UnSet }
};
mapping_t r6layout[] = {
{ "left-asymmetric", ALGORITHM_LEFT_ASYMMETRIC},
@@ -70,7 +70,7 @@ mapping_t r6layout[] = {
{ "right-symmetric-6", ALGORITHM_RIGHT_SYMMETRIC_6},
{ "parity-first-6", ALGORITHM_PARITY_0_6},
- { NULL, 0}
+ { NULL, UnSet }
};
mapping_t pers[] = {
@@ -93,7 +93,7 @@ mapping_t pers[] = {
{ "10", 10},
{ "faulty", LEVEL_FAULTY},
{ "container", LEVEL_CONTAINER},
- { NULL, 0}
+ { NULL, UnSet }
};
mapping_t modes[] = {
@@ -106,6 +106,7 @@ mapping_t modes[] = {
{ "grow", GROW},
{ "incremental", INCREMENTAL},
{ "auto-detect", AUTODETECT},
+ { NULL, UnSet }
};
mapping_t faultylayout[] = {
@@ -126,7 +127,30 @@ mapping_t faultylayout[] = {
{ "flush", ClearFaults},
{ "none", ClearErrors},
{ "default", ClearErrors},
- { NULL, 0}
+ { NULL, UnSet }
+};
+
+mapping_t consistency_policies[] = {
+ { "unknown", CONSISTENCY_POLICY_UNKNOWN},
+ { "none", CONSISTENCY_POLICY_NONE},
+ { "resync", CONSISTENCY_POLICY_RESYNC},
+ { "bitmap", CONSISTENCY_POLICY_BITMAP},
+ { "journal", CONSISTENCY_POLICY_JOURNAL},
+ { "ppl", CONSISTENCY_POLICY_PPL},
+ { NULL, CONSISTENCY_POLICY_UNKNOWN }
+};
+
+mapping_t sysfs_array_states[] = {
+ { "active-idle", ARRAY_ACTIVE_IDLE },
+ { "active", ARRAY_ACTIVE },
+ { "clear", ARRAY_CLEAR },
+ { "inactive", ARRAY_INACTIVE },
+ { "suspended", ARRAY_SUSPENDED },
+ { "readonly", ARRAY_READONLY },
+ { "read-auto", ARRAY_READ_AUTO },
+ { "clean", ARRAY_CLEAN },
+ { "write-pending", ARRAY_WRITE_PENDING },
+ { NULL, ARRAY_UNKNOWN_STATE }
};
char *map_num(mapping_t *map, int num)
@@ -141,10 +165,8 @@ char *map_num(mapping_t *map, int num)
int map_name(mapping_t *map, char *name)
{
- while (map->name) {
- if (strcmp(map->name, name)==0)
- return map->num;
+ while (map->name && strcmp(map->name, name) != 0)
map++;
- }
- return UnSet;
+
+ return map->num;
}
diff --git a/md.4 b/md.4
index 5bdf7a7..3a1d677 100644
--- a/md.4
+++ b/md.4
@@ -4,6 +4,7 @@
.\" the Free Software Foundation; either version 2 of the License, or
.\" (at your option) any later version.
.\" See file COPYING in distribution for details.
+.if n .pl 1000v
.TH MD 4
.SH NAME
md \- Multiple Device driver aka Linux Software RAID
diff --git a/md_p.h b/md_p.h
index dc9fec1..358a28c 100644
--- a/md_p.h
+++ b/md_p.h
@@ -267,4 +267,29 @@ struct r5l_meta_block {
#define R5LOG_VERSION 0x1
#define R5LOG_MAGIC 0x6433c509
+struct ppl_header_entry {
+ __u64 data_sector; /* raid sector of the new data */
+ __u32 pp_size; /* length of partial parity */
+ __u32 data_size; /* length of data */
+ __u32 parity_disk; /* member disk containing parity */
+ __u32 checksum; /* checksum of this entry's partial parity */
+} __attribute__ ((__packed__));
+
+#define PPL_HEADER_SIZE 4096
+#define PPL_HDR_RESERVED 512
+#define PPL_HDR_ENTRY_SPACE \
+ (PPL_HEADER_SIZE - PPL_HDR_RESERVED - 4 * sizeof(__u32) - sizeof(__u64))
+#define PPL_HDR_MAX_ENTRIES \
+ (PPL_HDR_ENTRY_SPACE / sizeof(struct ppl_header_entry))
+
+struct ppl_header {
+ __u8 reserved[PPL_HDR_RESERVED];/* reserved space, fill with 0xff */
+ __u32 signature; /* signature (family number of volume) */
+ __u32 padding; /* zero pad */
+ __u64 generation; /* generation number of the header */
+ __u32 entries_count; /* number of entries in entry array */
+ __u32 checksum; /* checksum of the header */
+ struct ppl_header_entry entries[PPL_HDR_MAX_ENTRIES];
+} __attribute__ ((__packed__));
+
#endif
diff --git a/md_u.h b/md_u.h
index f570a34..2d66d52 100644
--- a/md_u.h
+++ b/md_u.h
@@ -21,26 +21,19 @@
#define RAID_VERSION _IOR (MD_MAJOR, 0x10, mdu_version_t)
#define GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, mdu_array_info_t)
#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t)
-#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13)
#define RAID_AUTORUN _IO (MD_MAJOR, 0x14)
#define GET_BITMAP_FILE _IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t)
/* configuration */
-#define CLEAR_ARRAY _IO (MD_MAJOR, 0x20)
#define ADD_NEW_DISK _IOW (MD_MAJOR, 0x21, mdu_disk_info_t)
#define HOT_REMOVE_DISK _IO (MD_MAJOR, 0x22)
#define SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, mdu_array_info_t)
-#define SET_DISK_INFO _IO (MD_MAJOR, 0x24)
-#define WRITE_RAID_INFO _IO (MD_MAJOR, 0x25)
-#define UNPROTECT_ARRAY _IO (MD_MAJOR, 0x26)
-#define PROTECT_ARRAY _IO (MD_MAJOR, 0x27)
#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28)
#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29)
#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int)
/* usage */
#define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t)
-#define START_ARRAY _IO (MD_MAJOR, 0x31)
#define STOP_ARRAY _IO (MD_MAJOR, 0x32)
#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33)
#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34)
diff --git a/mdadm.8.in b/mdadm.8.in
index f789d43..ea45bbc 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -5,7 +5,7 @@
.\" the Free Software Foundation; either version 2 of the License, or
.\" (at your option) any later version.
.\" See file COPYING in distribution for details.
-.TH MDADM 8 "" v4.0
+.TH MDADM 8 "" v4.1-rc1
.SH NAME
mdadm \- manage MD devices
.I aka
@@ -126,7 +126,7 @@ of component devices and changing the number of active devices in
Linear and RAID levels 0/1/4/5/6,
changing the RAID level between 0, 1, 5, and 6, and between 0 and 10,
changing the chunk size and layout for RAID 0,4,5,6,10 as well as adding or
-removing a write-intent bitmap.
+removing a write-intent bitmap and changing the array's consistency policy.
.TP
.B "Incremental Assembly"
@@ -267,13 +267,13 @@ the exact meaning of this option in different contexts.
.TP
.BR \-c ", " \-\-config=
Specify the config file or directory. Default is to use
-.B /etc/mdadm/mdadm.conf
+.B /etc/mdadm.conf
and
-.BR /etc/mdadm/mdadm.conf.d ,
+.BR /etc/mdadm.conf.d ,
or if those are missing then
-.B /etc/mdadm.conf
+.B /etc/mdadm/mdadm.conf
and
-.BR /etc/mdadm.conf.d .
+.BR /etc/mdadm/mdadm.conf.d .
If the config file given is
.B "partitions"
then nothing will be read, but
@@ -541,6 +541,8 @@ A value of
restores the apparent size of the array to be whatever the real
amount of available space is.
+Clustered arrays do not support this parameter yet.
+
.TP
.BR \-c ", " \-\-chunk=
Specify chunk size of kilobytes. The default when creating an
@@ -724,7 +726,9 @@ When creating an array on devices which are 100G or larger,
.I mdadm
automatically adds an internal bitmap as it will usually be
beneficial. This can be suppressed with
-.B "\-\-bitmap=none".
+.B "\-\-bitmap=none"
+or by selecting a different consistency policy with
+.BR \-\-consistency\-policy .
.TP
.BR \-\-bitmap\-chunk=
@@ -923,7 +927,8 @@ will not try to be so clever.
Start the array
.B read only
rather than read-write as normal. No writes will be allowed to the
-array, and no resync, recovery, or reshape will be started.
+array, and no resync, recovery, or reshape will be started. It works with
+Create, Assemble, Manage and Misc mode.
.TP
.BR \-a ", " "\-\-auto{=yes,md,mdp,part,p}{NN}"
@@ -1015,6 +1020,45 @@ simultaneously. If not specified, this defaults to 4.
Specify journal device for the RAID-4/5/6 array. The journal device
should be a SSD with reasonable lifetime.
+.TP
+.BR \-\-symlinks
+Auto creation of symlinks in /dev to /dev/md, option --symlinks must
+be 'no' or 'yes' and work with --create and --build.
+
+.TP
+.BR \-k ", " \-\-consistency\-policy=
+Specify how the array maintains consistency in case of unexpected shutdown.
+Only relevant for RAID levels with redundancy.
+Currently supported options are:
+.RS
+
+.TP
+.B resync
+Full resync is performed and all redundancy is regenerated when the array is
+started after unclean shutdown.
+
+.TP
+.B bitmap
+Resync assisted by a write-intent bitmap. Implicitly selected when using
+.BR \-\-bitmap .
+
+.TP
+.B journal
+For RAID levels 4/5/6, journal device is used to log transactions and replay
+after unclean shutdown. Implicitly selected when using
+.BR \-\-write\-journal .
+
+.TP
+.B ppl
+For RAID5 only, Partial Parity Log is used to close the write hole and
+eliminate resync. PPL is stored in the metadata region of RAID member drives,
+no additional journal drive is needed.
+
+.PP
+Can be used with \-\-grow to change the consistency policy of an active array
+in some cases. See CONSISTENCY POLICY CHANGES below.
+.RE
+
.SH For assemble:
@@ -1139,6 +1183,8 @@ argument given to this flag can be one of
.BR no\-bitmap ,
.BR bbl ,
.BR no\-bbl ,
+.BR ppl ,
+.BR no\-ppl ,
.BR metadata ,
or
.BR super\-minor .
@@ -1218,7 +1264,8 @@ is correct.
The
.B byteorder
option allows arrays to be moved between machines with different
-byte-order.
+byte-order, such as from a big-endian machine like a Sparc or some
+MIPS machines, to a little-endian x86_64 machine.
When assembling such an array for the first time after a move, giving
.B "\-\-update=byteorder"
will cause
@@ -1279,6 +1326,16 @@ option will cause any reservation of space for a bad block list to be
removed. If the bad block list contains entries, this will fail, as
removing the list could cause data corruption.
+The
+.B ppl
+option will enable PPL for a RAID5 array and reserve space for PPL on each
+device. There must be enough free space between the data and superblock and a
+write-intent bitmap or journal must not be used.
+
+The
+.B no\-ppl
+option will disable PPL in the superblock.
+
.TP
.BR \-\-freeze\-reshape
Option is intended to be used in start-up scripts during initrd boot phase.
@@ -1291,6 +1348,10 @@ Reshape can be continued later using the
.B \-\-continue
option for the grow command.
+.TP
+.BR \-\-symlinks
+See this option under Create and Build options.
+
.SH For Manage mode:
.TP
@@ -1465,9 +1526,8 @@ the device is found or <slot>:missing in case the device is not found.
.TP
.BR \-\-add-journal
-Recreate journal for RAID-4/5/6 array that lost a journal device. In the
-current implementation, this command cannot add a journal to an array
-that had a failed journal. To avoid interrupting on-going write opertions,
+Add journal to an existing array, or recreate journal for RAID-4/5/6 array
+that lost a journal device. To avoid interrupting on-going write opertions,
.B \-\-add-journal
only works for array in Read-Only state.
@@ -1631,6 +1691,10 @@ overwritten with zeros. With
the block where the superblock would be is overwritten even if it
doesn't appear to be valid.
+.B Note:
+Be careful to call \-\-zero\-superblock with clustered raid, make sure
+array isn't used or assembled in other cluster node before execute it.
+
.TP
.B \-\-kill\-subarray=
If the device is a container and the argument to \-\-kill\-subarray
@@ -1888,9 +1952,9 @@ The config file is only used if explicitly named with
or requested with (a possibly implicit)
.BR \-\-scan .
In the later case,
-.B /etc/mdadm/mdadm.conf
-or
.B /etc/mdadm.conf
+or
+.B /etc/mdadm/mdadm.conf
is used.
If
@@ -2144,12 +2208,14 @@ in the array exceed 100G is size, an internal write-intent bitmap
will automatically be added unless some other option is explicitly
requested with the
.B \-\-bitmap
-option. In any case space for a bitmap will be reserved so that one
-can be added layer with
+option or a different consistency policy is selected with the
+.B \-\-consistency\-policy
+option. In any case space for a bitmap will be reserved so that one
+can be added later with
.BR "\-\-grow \-\-bitmap=internal" .
-If the metadata type supports it (currently only 1.x metadata), space
-will be allocated to store a bad block list. This allows a modest
+If the metadata type supports it (currently only 1.x and IMSM metadata),
+space will be allocated to store a bad block list. This allows a modest
number of bad blocks to be recorded, allowing the drive to remain in
service while only partially functional.
@@ -2173,7 +2239,7 @@ be in use.
.TP
.B \-\-readonly
-start the array readonly \(em not supported yet.
+start the array in readonly mode.
.SH MANAGE MODE
.HP 12
@@ -2284,9 +2350,11 @@ superblock field in the subarray. Similar to updating an array in
.B \-U
or
.B \-\-update=
-option. Currently only
-.B name
-is supported.
+option. The supported options are
+.BR name ,
+.B ppl
+and
+.BR no\-ppl .
The
.B name
@@ -2297,6 +2365,13 @@ re\-assembled. If updating
would change the UUID of an active subarray this operation is blocked,
and the command will end in an error.
+The
+.B ppl
+and
+.B no\-ppl
+options enable and disable PPL in the metadata. Currently supported only for
+IMSM subarrays.
+
.TP
.B \-\-examine
The device should be a component of an md array.
@@ -2630,6 +2705,8 @@ RAID0, RAID4, and RAID5, and between RAID0 and RAID10 (in the near-2 mode).
.IP \(bu 4
add a write-intent bitmap to any array which supports these bitmaps, or
remove a write-intent bitmap from such an array.
+.IP \(bu 4
+change the array's consistency policy.
.PP
Using GROW on containers is currently supported only for Intel's IMSM
@@ -2685,6 +2762,11 @@ Also the size of an array cannot be changed while it has an active
bitmap. If an array has a bitmap, it must be removed before the size
can be changed. Once the change is complete a new bitmap can be created.
+.PP
+Note:
+.B "--grow --size"
+is not yet supported for external file bitmap.
+
.SS RAID\-DEVICES CHANGES
A RAID1 array can work with any number of devices from 1 upwards
@@ -2786,6 +2868,16 @@ can be added. Note that if you add a bitmap stored in a file which is
in a filesystem that is on the RAID array being affected, the system
will deadlock. The bitmap must be on a separate filesystem.
+.SS CONSISTENCY POLICY CHANGES
+
+The consistency policy of an active array can be changed by using the
+.B \-\-consistency\-policy
+option in Grow mode. Currently this works only for the
+.B ppl
+and
+.B resync
+policies and allows to enable or disable the RAID5 Partial Parity Log (PPL).
+
.SH INCREMENTAL MODE
.HP 12
@@ -2864,7 +2956,7 @@ This is the only context where the aliases are used. They are
usually provided by a
.I udev
rules mentioning
-.BR ${DEVLINKS} .
+.BR $env{DEVLINKS} .
.IP +
Does the device have a valid md superblock? If a specific metadata
@@ -3184,7 +3276,7 @@ uses this to find arrays when
is given in Misc mode, and to monitor array reconstruction
on Monitor mode.
-.SS /etc/mdadm/mdadm.conf (or /etc/mdadm.conf)
+.SS /etc/mdadm.conf
The config file lists which devices may be scanned to see if
they contain MD super block, and gives identifying information
@@ -3192,7 +3284,7 @@ they contain MD super block, and gives identifying information
.BR mdadm.conf (5)
for more details.
-.SS /etc/mdadm/mdadm.conf.d (or /etc/mdadm.conf.d)
+.SS /etc/mdadm.conf.d
A directory containing configuration files which are read in lexical
order.
diff --git a/mdadm.c b/mdadm.c
index c3a265b..5afe415 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -57,6 +57,7 @@ int main(int argc, char *argv[])
struct mddev_dev *devlist = NULL;
struct mddev_dev **devlistend = & devlist;
struct mddev_dev *dv;
+ mdu_array_info_t array;
int devs_found = 0;
char *symlinks = NULL;
int grow_continue = 0;
@@ -78,6 +79,7 @@ int main(int argc, char *argv[])
.level = UnSet,
.layout = UnSet,
.bitmap_chunk = UnSet,
+ .consistency_policy = CONSISTENCY_POLICY_UNKNOWN,
};
char sys_hostname[256];
@@ -102,14 +104,15 @@ int main(int argc, char *argv[])
FILE *outf;
int mdfd = -1;
+ int locked = 0;
srandom(time(0) ^ getpid());
- ident.uuid_set=0;
+ ident.uuid_set = 0;
ident.level = UnSet;
ident.raid_disks = UnSet;
- ident.super_minor= UnSet;
- ident.devices=0;
+ ident.super_minor = UnSet;
+ ident.devices = 0;
ident.spare_group = NULL;
ident.autof = 0;
ident.st = NULL;
@@ -119,10 +122,14 @@ int main(int argc, char *argv[])
ident.container = NULL;
ident.member = NULL;
- while ((option_index = -1) ,
- (opt=getopt_long(argc, argv,
- shortopt, long_options,
- &option_index)) != -1) {
+ if (get_linux_version() < 2006015) {
+ pr_err("This version of mdadm does not support kernels older than 2.6.15\n");
+ exit(1);
+ }
+
+ while ((option_index = -1),
+ (opt = getopt_long(argc, argv, shortopt, long_options,
+ &option_index)) != -1) {
int newmode = mode;
/* firstly, some mode-independent options */
switch(opt) {
@@ -396,7 +403,7 @@ int main(int argc, char *argv[])
pr_err("metadata information already given\n");
exit(2);
}
- for(i=0; !ss && superlist[i]; i++)
+ for(i = 0; !ss && superlist[i]; i++)
ss = superlist[i]->match_metadata_desc(optarg);
if (!ss) {
@@ -541,7 +548,7 @@ int main(int argc, char *argv[])
case 5:
s.layout = map_name(r5layout, optarg);
- if (s.layout==UnSet) {
+ if (s.layout == UnSet) {
pr_err("layout %s not understood for raid5.\n",
optarg);
exit(2);
@@ -549,7 +556,7 @@ int main(int argc, char *argv[])
break;
case 6:
s.layout = map_name(r6layout, optarg);
- if (s.layout==UnSet) {
+ if (s.layout == UnSet) {
pr_err("layout %s not understood for raid6.\n",
optarg);
exit(2);
@@ -603,8 +610,8 @@ int main(int argc, char *argv[])
case O(GROW, Nodes):
case O(CREATE, Nodes):
c.nodes = parse_num(optarg);
- if (c.nodes <= 0) {
- pr_err("invalid number for the number of cluster nodes: %s\n",
+ if (c.nodes < 2) {
+ pr_err("clustered array needs two nodes at least: %s\n",
optarg);
exit(2);
}
@@ -614,7 +621,7 @@ int main(int argc, char *argv[])
c.homecluster = optarg;
if (strlen(c.homecluster) > 64) {
pr_err("Cluster name too big.\n");
- exit(ERANGE);
+ exit(2);
}
continue;
case O(CREATE,'x'): /* number of spare (eXtra) disks */
@@ -664,7 +671,7 @@ int main(int argc, char *argv[])
case O(MISC,'f'): /* force zero */
case O(MISC,Force): /* force zero */
case O(MANAGE,Force): /* add device which is too large */
- c.force=1;
+ c.force = 1;
continue;
/* now for the Assemble options */
case O(ASSEMBLE, FreezeReshape): /* Freeze reshape during
@@ -768,16 +775,20 @@ int main(int argc, char *argv[])
continue;
if (strcmp(c.update, "force-no-bbl") == 0)
continue;
+ if (strcmp(c.update, "ppl") == 0)
+ continue;
+ if (strcmp(c.update, "no-ppl") == 0)
+ continue;
if (strcmp(c.update, "metadata") == 0)
continue;
if (strcmp(c.update, "revert-reshape") == 0)
continue;
- if (strcmp(c.update, "byteorder")==0) {
+ if (strcmp(c.update, "byteorder") == 0) {
if (ss) {
pr_err("must not set metadata type with --update=byteorder.\n");
exit(2);
}
- for(i=0; !ss && superlist[i]; i++)
+ for(i = 0; !ss && superlist[i]; i++)
ss = superlist[i]->match_metadata_desc(
"0.swap");
if (!ss) {
@@ -801,7 +812,7 @@ int main(int argc, char *argv[])
" 'sparc2.2', 'super-minor', 'uuid', 'name', 'nodes', 'resync',\n"
" 'summaries', 'homehost', 'home-cluster', 'byteorder', 'devicesize',\n"
" 'no-bitmap', 'metadata', 'revert-reshape'\n"
- " 'bbl', 'no-bbl', 'force-no-bbl'\n"
+ " 'bbl', 'no-bbl', 'force-no-bbl', 'ppl', 'no-ppl'\n"
);
exit(outf == stdout ? 0 : 2);
@@ -1095,8 +1106,10 @@ int main(int argc, char *argv[])
pr_err("bitmap file needed with -b in --assemble mode\n");
exit(2);
}
- if (strcmp(optarg, "internal") == 0) {
- pr_err("there is no need to specify --bitmap when assembling arrays with internal bitmaps\n");
+ if (strcmp(optarg, "internal") == 0 ||
+ strcmp(optarg, "clustered") == 0) {
+ pr_err("no need to specify --bitmap when assembling"
+ " arrays with internal or clustered bitmap\n");
continue;
}
bitmap_fd = open(optarg, O_RDWR);
@@ -1137,6 +1150,10 @@ int main(int argc, char *argv[])
case O(CREATE,Bitmap): /* here we create the bitmap */
case O(GROW,'b'):
case O(GROW,Bitmap):
+ if (s.bitmap_file) {
+ pr_err("bitmap cannot be set twice. Second value: %s.\n", optarg);
+ exit(2);
+ }
if (strcmp(optarg, "internal") == 0 ||
strcmp(optarg, "none") == 0 ||
strchr(optarg, '/') != NULL) {
@@ -1209,6 +1226,16 @@ int main(int argc, char *argv[])
s.journaldisks = 1;
continue;
+ case O(CREATE, 'k'):
+ case O(GROW, 'k'):
+ s.consistency_policy = map_name(consistency_policies,
+ optarg);
+ if (s.consistency_policy < CONSISTENCY_POLICY_RESYNC) {
+ pr_err("Invalid consistency policy: %s\n",
+ optarg);
+ exit(2);
+ }
+ continue;
}
/* We have now processed all the valid options. Anything else is
* an error
@@ -1236,9 +1263,48 @@ int main(int argc, char *argv[])
exit(0);
}
- if (s.journaldisks && (s.level < 4 || s.level > 6)) {
- pr_err("--write-journal is only supported for RAID level 4/5/6.\n");
- exit(2);
+ if (s.journaldisks) {
+ if (s.level < 4 || s.level > 6) {
+ pr_err("--write-journal is only supported for RAID level 4/5/6.\n");
+ exit(2);
+ }
+ if (s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN &&
+ s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
+ pr_err("--write-journal is not supported with consistency policy: %s\n",
+ map_num(consistency_policies, s.consistency_policy));
+ exit(2);
+ }
+ }
+
+ if (mode == CREATE &&
+ s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN) {
+ if (s.level <= 0) {
+ pr_err("--consistency-policy not meaningful with level %s.\n",
+ map_num(pers, s.level));
+ exit(2);
+ } else if (s.consistency_policy == CONSISTENCY_POLICY_JOURNAL &&
+ !s.journaldisks) {
+ pr_err("--write-journal is required for consistency policy: %s\n",
+ map_num(consistency_policies, s.consistency_policy));
+ exit(2);
+ } else if (s.consistency_policy == CONSISTENCY_POLICY_PPL &&
+ s.level != 5) {
+ pr_err("PPL consistency policy is only supported for RAID level 5.\n");
+ exit(2);
+ } else if (s.consistency_policy == CONSISTENCY_POLICY_BITMAP &&
+ (!s.bitmap_file ||
+ strcmp(s.bitmap_file, "none") == 0)) {
+ pr_err("--bitmap is required for consistency policy: %s\n",
+ map_num(consistency_policies, s.consistency_policy));
+ exit(2);
+ } else if (s.bitmap_file &&
+ strcmp(s.bitmap_file, "none") != 0 &&
+ s.consistency_policy != CONSISTENCY_POLICY_BITMAP &&
+ s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
+ pr_err("--bitmap is not compatible with consistency policy: %s\n",
+ map_num(consistency_policies, s.consistency_policy));
+ exit(2);
+ }
}
if (!mode && devs_found) {
@@ -1268,7 +1334,7 @@ int main(int argc, char *argv[])
* hopefully it's mostly right but there might be some stuff
* missing
*
- * That is mosty checked in the per-mode stuff but...
+ * That is mostly checked in the per-mode stuff but...
*
* For @,B,C and A without -s, the first device listed must be
* an md device. We check that here and open it.
@@ -1370,6 +1436,22 @@ int main(int argc, char *argv[])
/* --scan implied --brief unless -vv */
c.brief = 1;
+ if (mode == CREATE) {
+ if (s.bitmap_file && strcmp(s.bitmap_file, "clustered") == 0) {
+ locked = cluster_get_dlmlock();
+ if (locked != 1)
+ exit(1);
+ }
+ } else if (mode == MANAGE || mode == GROW || mode == INCREMENTAL) {
+ if (!md_get_array_info(mdfd, &array) && (devmode != 'c')) {
+ if (array.state & (1 << MD_SB_CLUSTERED)) {
+ locked = cluster_get_dlmlock();
+ if (locked != 1)
+ exit(1);
+ }
+ }
+ }
+
switch(mode) {
case MANAGE:
/* readonly, add/remove, readwrite, runstop */
@@ -1416,7 +1498,7 @@ int main(int argc, char *argv[])
pr_err("can only assemble a single array when providing a backup file.\n");
exit(1);
}
- for (dv = devlist ; dv ; dv=dv->next) {
+ for (dv = devlist; dv; dv = dv->next) {
struct mddev_ident *array_ident = conf_get_ident(dv->devname);
if (array_ident == NULL) {
pr_err("%s not identified in config file.\n",
@@ -1478,8 +1560,13 @@ int main(int argc, char *argv[])
break;
}
- if (s.level != 1) {
- pr_err("--bitmap=clustered is currently supported with RAID mirror only\n");
+ if (s.level != 1 && s.level != 10) {
+ pr_err("--bitmap=clustered is currently supported with raid1/10 only\n");
+ rv = 1;
+ break;
+ }
+ if (s.level == 10 && !(is_near_layout_10(s.layout) || s.layout == UnSet)) {
+ pr_err("only near layout is supported with clustered raid10\n");
rv = 1;
break;
}
@@ -1551,10 +1638,10 @@ int main(int argc, char *argv[])
else
c.delay = 60;
}
- rv= Monitor(devlist, mailaddr, program,
- &c, daemonise, oneshot,
- dosyslog, pidfile, increments,
- spare_sharing);
+ rv = Monitor(devlist, mailaddr, program,
+ &c, daemonise, oneshot,
+ dosyslog, pidfile, increments,
+ spare_sharing);
break;
case GROW:
@@ -1572,7 +1659,10 @@ int main(int argc, char *argv[])
rv = 1;
break;
}
- sysfs_init(&sra, mdfd, NULL);
+ if (sysfs_init(&sra, mdfd, NULL)) {
+ rv = 1;
+ break;
+ }
if (array_size == MAX_SIZE)
err = sysfs_set_str(&sra, NULL, "array_size", "default");
else
@@ -1594,7 +1684,7 @@ int main(int argc, char *argv[])
rv = 1;
break;
}
- for (dv=devlist->next; dv ; dv=dv->next) {
+ for (dv = devlist->next; dv; dv = dv->next) {
rv = Grow_Add_device(devlist->devname, mdfd,
dv->devname);
if (rv)
@@ -1620,6 +1710,8 @@ int main(int argc, char *argv[])
rv = Grow_reshape(devlist->devname, mdfd,
devlist->next,
data_offset, &c, &s);
+ } else if (s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN) {
+ rv = Grow_consistency_policy(devlist->devname, mdfd, &c, &s);
} else if (array_size == 0)
pr_err("no changes to --grow\n");
break;
@@ -1665,6 +1757,10 @@ int main(int argc, char *argv[])
autodetect();
break;
}
+ if (locked)
+ cluster_release_dlmlock();
+ if (mdfd > 0)
+ close(mdfd);
exit(rv);
}
@@ -1687,7 +1783,7 @@ static int scan_assemble(struct supertype *ss,
pr_err("No devices listed in conf file were found.\n");
return 1;
}
- for (a = array_list; a ; a = a->next) {
+ for (a = array_list; a; a = a->next) {
a->assembled = 0;
if (a->autof == 0)
a->autof = c->autof;
@@ -1698,7 +1794,7 @@ static int scan_assemble(struct supertype *ss,
failures = 0;
successes = 0;
rv = 0;
- for (a = array_list; a ; a = a->next) {
+ for (a = array_list; a; a = a->next) {
int r;
if (a->assembled)
continue;
@@ -1764,7 +1860,7 @@ static int misc_scan(char devmode, struct context *c)
int rv = 0;
for (members = 0; members <= 1; members++) {
- for (e=ms ; e ; e=e->next) {
+ for (e = ms; e; e = e->next) {
char *name = NULL;
struct map_ent *me;
struct stat stb;
@@ -1774,8 +1870,7 @@ static int misc_scan(char devmode, struct context *c)
if (members != member)
continue;
me = map_by_devnm(&map, e->devnm);
- if (me && me->path
- && strcmp(me->path, "/unknown") != 0)
+ if (me && me->path && strcmp(me->path, "/unknown") != 0)
name = me->path;
if (name == NULL || stat(name, &stb) != 0)
name = get_md_name(e->devnm);
@@ -1788,7 +1883,7 @@ static int misc_scan(char devmode, struct context *c)
if (devmode == 'D')
rv |= Detail(name, c);
else
- rv |= WaitClean(name, -1, c->verbose);
+ rv |= WaitClean(name, c->verbose);
put_md_name(name);
}
}
@@ -1802,7 +1897,7 @@ static int stop_scan(int verbose)
/* Due to possible stacking of devices, repeat until
* nothing more can be stopped
*/
- int progress=1, err;
+ int progress = 1, err;
int last = 0;
int rv = 0;
do {
@@ -1811,7 +1906,7 @@ static int stop_scan(int verbose)
if (!progress) last = 1;
progress = 0; err = 0;
- for (e=ms ; e ; e=e->next) {
+ for (e = ms; e; e = e->next) {
char *name = get_md_name(e->devnm);
int mdfd;
@@ -1846,8 +1941,8 @@ static int misc_list(struct mddev_dev *devlist,
struct mddev_dev *dv;
int rv = 0;
- for (dv=devlist ; dv; dv=(rv & 16) ? NULL : dv->next) {
- int mdfd;
+ for (dv = devlist; dv; dv = (rv & 16) ? NULL : dv->next) {
+ int mdfd = -1;
switch(dv->disposition) {
case 'D':
@@ -1866,16 +1961,21 @@ static int misc_list(struct mddev_dev *devlist,
}
continue;
case 'Q':
- rv |= Query(dv->devname); continue;
+ rv |= Query(dv->devname);
+ continue;
case 'X':
- rv |= ExamineBitmap(dv->devname, c->brief, ss); continue;
+ rv |= ExamineBitmap(dv->devname, c->brief, ss);
+ continue;
case ExamineBB:
- rv |= ExamineBadblocks(dv->devname, c->brief, ss); continue;
+ rv |= ExamineBadblocks(dv->devname, c->brief, ss);
+ continue;
case 'W':
case WaitOpt:
- rv |= Wait(dv->devname); continue;
+ rv |= Wait(dv->devname);
+ continue;
case Waitclean:
- rv |= WaitClean(dv->devname, -1, c->verbose); continue;
+ rv |= WaitClean(dv->devname, c->verbose);
+ continue;
case KillSubarray:
rv |= Kill_subarray(dv->devname, c->subarray, c->verbose);
continue;
@@ -1899,24 +1999,27 @@ static int misc_list(struct mddev_dev *devlist,
rv |= SetAction(dv->devname, c->action);
continue;
}
- if (dv->devname[0] == '/')
- mdfd = open_mddev(dv->devname, 1);
- else {
+
+ if (dv->devname[0] != '/')
mdfd = open_dev(dv->devname);
- if (mdfd < 0)
- pr_err("Cannot open %s\n", dv->devname);
- }
- if (mdfd>=0) {
+ if (dv->devname[0] == '/' || mdfd < 0)
+ mdfd = open_mddev(dv->devname, 1);
+
+ if (mdfd >= 0) {
switch(dv->disposition) {
case 'R':
c->runstop = 1;
- rv |= Manage_run(dv->devname, mdfd, c); break;
+ rv |= Manage_run(dv->devname, mdfd, c);
+ break;
case 'S':
- rv |= Manage_stop(dv->devname, mdfd, c->verbose, 0); break;
+ rv |= Manage_stop(dv->devname, mdfd, c->verbose, 0);
+ break;
case 'o':
- rv |= Manage_ro(dv->devname, mdfd, 1); break;
+ rv |= Manage_ro(dv->devname, mdfd, 1);
+ break;
case 'w':
- rv |= Manage_ro(dv->devname, mdfd, -1); break;
+ rv |= Manage_ro(dv->devname, mdfd, -1);
+ break;
}
close(mdfd);
} else
@@ -1929,13 +2032,15 @@ int SetAction(char *dev, char *action)
{
int fd = open(dev, O_RDONLY);
struct mdinfo mdi;
+ int retval;
+
if (fd < 0) {
pr_err("Couldn't open %s: %s\n", dev, strerror(errno));
return 1;
}
- sysfs_init(&mdi, fd, NULL);
+ retval = sysfs_init(&mdi, fd, NULL);
close(fd);
- if (!mdi.sys_name[0]) {
+ if (retval) {
pr_err("%s is no an md array\n", dev);
return 1;
}
diff --git a/mdadm.conf.5 b/mdadm.conf.5
index 542e263..18512cb 100644
--- a/mdadm.conf.5
+++ b/mdadm.conf.5
@@ -8,7 +8,7 @@
.SH NAME
mdadm.conf \- configuration for management of Software RAID with mdadm
.SH SYNOPSIS
-/etc/mdadm/mdadm.conf
+/etc/mdadm.conf
.SH DESCRIPTION
.PP
.I mdadm
diff --git a/mdadm.h b/mdadm.h
index 71b8afb..61bc7af 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -279,6 +279,15 @@ struct mdinfo {
int journal_device_required;
int journal_clean;
+ enum {
+ CONSISTENCY_POLICY_UNKNOWN,
+ CONSISTENCY_POLICY_NONE,
+ CONSISTENCY_POLICY_RESYNC,
+ CONSISTENCY_POLICY_BITMAP,
+ CONSISTENCY_POLICY_JOURNAL,
+ CONSISTENCY_POLICY_PPL,
+ } consistency_policy;
+
/* During reshape we can sometimes change the data_offset to avoid
* over-writing still-valid data. We need to know if there is space.
* So getinfo_super will fill in space_before and space_after in sectors.
@@ -291,6 +300,9 @@ struct mdinfo {
#define MaxSector (~0ULL) /* resync/recovery complete position */
};
long bitmap_offset; /* 0 == none, 1 == a file */
+ unsigned int ppl_size;
+ int ppl_offset;
+ unsigned long long ppl_sector;
unsigned long safe_mode_delay; /* ms delay to mark clean */
int new_level, delta_disks, new_layout, new_chunk;
int errors;
@@ -323,8 +335,18 @@ struct mdinfo {
int prev_state, curr_state, next_state;
/* info read from sysfs */
- char sysfs_array_state[20];
-
+ enum {
+ ARRAY_CLEAR,
+ ARRAY_INACTIVE,
+ ARRAY_SUSPENDED,
+ ARRAY_READONLY,
+ ARRAY_READ_AUTO,
+ ARRAY_CLEAN,
+ ARRAY_ACTIVE,
+ ARRAY_WRITE_PENDING,
+ ARRAY_ACTIVE_IDLE,
+ ARRAY_UNKNOWN_STATE,
+ } array_state;
struct md_bb bb;
};
@@ -339,6 +361,11 @@ struct createinfo {
struct supertype *supertype;
};
+struct spare_criteria {
+ unsigned long long min_size;
+ unsigned int sector_size;
+};
+
enum mode {
ASSEMBLE=1,
BUILD,
@@ -426,6 +453,7 @@ enum special_options {
ClusterName,
ClusterConfirm,
WriteJournal,
+ ConsistencyPolicy,
};
enum prefix_standard {
@@ -527,6 +555,7 @@ struct shape {
int assume_clean;
int write_behind;
unsigned long long size;
+ int consistency_policy;
};
/* List of device names - wildcards expanded */
@@ -552,7 +581,7 @@ struct mdstat_ent {
char devnm[32];
int active;
char *level;
- char *pattern; /* U or up, _ for down */
+ char *pattern; /* U for up, _ for down */
int percent; /* -1 if no resync */
int resync; /* 3 if check, 2 if reshape, 1 if resync, 0 if recovery */
int devcnt;
@@ -608,7 +637,6 @@ enum sysfs_read_flags {
GET_MISMATCH = (1 << 5),
GET_VERSION = (1 << 6),
GET_DISKS = (1 << 7),
- GET_DEGRADED = (1 << 8),
GET_SAFEMODE = (1 << 9),
GET_BITMAP_LOCATION = (1 << 10),
@@ -618,14 +646,15 @@ enum sysfs_read_flags {
GET_STATE = (1 << 23),
GET_ERROR = (1 << 24),
GET_ARRAY_STATE = (1 << 25),
+ GET_CONSISTENCY_POLICY = (1 << 26),
};
/* If fd >= 0, get the array it is open on,
* else use devnm.
*/
extern int sysfs_open(char *devnm, char *devname, char *attr);
-extern void sysfs_init(struct mdinfo *mdi, int fd, char *devnm);
-extern void sysfs_init_dev(struct mdinfo *mdi, unsigned long devid);
+extern int sysfs_init(struct mdinfo *mdi, int fd, char *devnm);
+extern void sysfs_init_dev(struct mdinfo *mdi, dev_t devid);
extern void sysfs_free(struct mdinfo *sra);
extern struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options);
extern int sysfs_attr_match(const char *attr, const char *str);
@@ -658,6 +687,7 @@ extern int sysfs_unique_holder(char *devnm, long rdev);
extern int sysfs_freeze_array(struct mdinfo *sra);
extern int sysfs_wait(int fd, int *msec);
extern int load_sys(char *path, char *buf, int len);
+extern int zero_disk_range(int fd, unsigned long long sector, size_t count);
extern int reshape_prepare_fdlist(char *devname,
struct mdinfo *sra,
int raid_disks,
@@ -702,6 +732,7 @@ extern int restore_stripes(int *dest, unsigned long long *offsets,
extern char *map_num(mapping_t *map, int num);
extern int map_name(mapping_t *map, char *name);
extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[];
+extern mapping_t consistency_policies[], sysfs_array_states[];
extern char *map_dev_preferred(int major, int minor, int create,
char *prefer);
@@ -742,7 +773,7 @@ struct reshape {
unsigned long long new_size; /* New size of array in sectors */
};
-/* A superswitch provides entry point the a metadata handler.
+/* A superswitch provides entry point to a metadata handler.
*
* The superswitch primarily operates on some "metadata" that
* is accessed via the 'supertype'.
@@ -863,7 +894,7 @@ extern struct superswitch {
* metadata.
*/
int (*init_super)(struct supertype *st, mdu_array_info_t *info,
- unsigned long long size, char *name,
+ struct shape *s, char *name,
char *homehost, int *uuid,
unsigned long long data_offset);
@@ -914,11 +945,14 @@ extern struct superswitch {
*/
__u64 (*avail_size)(struct supertype *st, __u64 size,
unsigned long long data_offset);
- /* This is similar to 'avail_size' in purpose, but is used for
- * containers for which there is no 'component size' to compare.
- * This reports that whole-device size which is a minimum
+ /*
+ * Return spare criteria for array:
+ * - minimum disk size can be used in array;
+ * - sector size can be used in array.
+ * Return values: 0 - for success and -EINVAL on error.
*/
- unsigned long long (*min_acceptable_spare_size)(struct supertype *st);
+ int (*get_spare_criteria)(struct supertype *st,
+ struct spare_criteria *sc);
/* Find somewhere to put a bitmap - possibly auto-size it - and
* update the metadata to record this. The array may be newly
* created, in which case data_size may be updated, or it might
@@ -961,7 +995,7 @@ extern struct superswitch {
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *subdev, unsigned long long *freesize,
- int verbose);
+ int consistency_policy, int verbose);
/* Return a linked list of 'mdinfo' structures for all arrays
* in the container. For non-containers, it is like
@@ -1059,6 +1093,13 @@ extern struct superswitch {
/* validate container after assemble */
int (*validate_container)(struct mdinfo *info);
+ /* write initial empty PPL on device */
+ int (*write_init_ppl)(struct supertype *st, struct mdinfo *info, int fd);
+
+ /* validate ppl before assemble */
+ int (*validate_ppl)(struct supertype *st, struct mdinfo *info,
+ struct mdinfo *disk);
+
/* records new bad block in metadata */
int (*record_bad_block)(struct active_array *a, int n,
unsigned long long sector, int length);
@@ -1150,6 +1191,7 @@ extern int get_dev_size(int fd, char *dname, unsigned long long *sizep);
extern int get_dev_sector_size(int fd, char *dname, unsigned int *sectsizep);
extern int must_be_container(int fd);
extern int dev_size_from_id(dev_t id, unsigned long long *size);
+extern int dev_sector_size_from_id(dev_t id, unsigned int *size);
void wait_for(char *dev, int fd);
/*
@@ -1309,6 +1351,8 @@ extern int Grow_restart(struct supertype *st, struct mdinfo *info,
extern int Grow_continue(int mdfd, struct supertype *st,
struct mdinfo *info, char *backup_file,
int forked, int freeze_reshape);
+extern int Grow_consistency_policy(char *devname, int fd,
+ struct context *c, struct shape *s);
extern int restore_backup(struct supertype *st,
struct mdinfo *content,
@@ -1351,7 +1395,7 @@ extern int Kill(char *dev, struct supertype *st, int force, int verbose, int noe
extern int Kill_subarray(char *dev, char *subarray, int verbose);
extern int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int quiet);
extern int Wait(char *dev);
-extern int WaitClean(char *dev, int sock, int verbose);
+extern int WaitClean(char *dev, int verbose);
extern int SetAction(char *dev, char *action);
extern int Incremental(struct mddev_dev *devlist, struct context *c,
@@ -1380,11 +1424,17 @@ extern int Dump_metadata(char *dev, char *dir, struct context *c,
extern int Restore_metadata(char *dev, char *dir, struct context *c,
struct supertype *st, int only);
-extern int md_get_version(int fd);
+int md_array_valid(int fd);
+int md_array_active(int fd);
+int md_array_is_active(struct mdinfo *info);
+int md_get_array_info(int fd, struct mdu_array_info_s *array);
+int md_set_array_info(int fd, struct mdu_array_info_s *array);
+int md_get_disk_info(int fd, struct mdu_disk_info_s *disk);
extern int get_linux_version(void);
extern int mdadm_version(char *version);
extern unsigned long long parse_size(char *size);
extern int parse_uuid(char *str, int uuid[4]);
+extern int is_near_layout_10(int layout);
extern int parse_layout_10(char *layout);
extern int parse_layout_faulty(char *layout);
extern long parse_num(char *num);
@@ -1395,6 +1445,8 @@ extern int check_raid(int fd, char *name);
extern int check_partitions(int fd, char *dname,
unsigned long long freesize,
unsigned long long size);
+extern int fstat_is_blkdev(int fd, char *devname, dev_t *rdev);
+extern int stat_is_blkdev(char *devname, dev_t *rdev);
extern int get_mdp_major(void);
extern int get_maj_min(char *dev, int *major, int *minor);
@@ -1447,7 +1499,6 @@ extern char *fname_from_uuid(struct supertype *st,
extern unsigned long calc_csum(void *super, int bytes);
extern int enough(int level, int raid_disks, int layout, int clean,
char *avail);
-extern int enough_fd(int fd);
extern int ask(char *mesg);
extern unsigned long long get_component_size(int fd);
extern void remove_partitions(int fd);
@@ -1467,7 +1518,7 @@ extern int assemble_container_content(struct supertype *st, int mdfd,
#define INCR_ALREADY 4
#define INCR_YES 8
extern struct mdinfo *container_choose_spares(struct supertype *st,
- unsigned long long min_size,
+ struct spare_criteria *criteria,
struct domainlist *domlist,
char *spare_group,
const char *metadata, int get_one);
@@ -1476,6 +1527,8 @@ extern int add_disk(int mdfd, struct supertype *st,
struct mdinfo *sra, struct mdinfo *info);
extern int remove_disk(int mdfd, struct supertype *st,
struct mdinfo *sra, struct mdinfo *info);
+extern int hot_remove_disk(int mdfd, unsigned long dev, int force);
+extern int sys_hot_remove_disk(int statefd, int force);
extern int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info);
unsigned long long min_recovery_start(struct mdinfo *array);
@@ -1486,7 +1539,7 @@ extern void print_r10_layout(int layout);
extern char *find_free_devnm(int use_partitions);
extern void put_md_name(char *name);
-extern char *devid2kname(int devid);
+extern char *devid2kname(dev_t devid);
extern char *devid2devnm(dev_t devid);
extern dev_t devnm2devid(char *devnm);
extern char *get_md_name(char *devnm);
@@ -1494,7 +1547,7 @@ extern char *get_md_name(char *devnm);
extern char DefaultConfFile[];
extern int create_mddev(char *dev, char *name, int autof, int trustworthy,
- char *chosen);
+ char *chosen, int block_udev);
/* values for 'trustworthy' */
#define LOCAL 1
#define LOCAL_ANY 10
@@ -1528,6 +1581,8 @@ extern char *stat2kname(struct stat *st);
extern char *fd2kname(int fd);
extern char *stat2devnm(struct stat *st);
extern char *fd2devnm(int fd);
+extern void udev_block(char *devnm);
+extern void udev_unblock(void);
extern int in_initrd(void);
@@ -1549,6 +1604,7 @@ struct dlm_hooks {
dlm_lshandle_t (*create_lockspace)(const char *name,
unsigned int mode);
+ dlm_lshandle_t (*open_lockspace)(const char *name);
int (*release_lockspace)(const char *name, dlm_lshandle_t ls,
int force);
int (*ls_lock)(dlm_lshandle_t lockspace, uint32_t mode,
@@ -1557,17 +1613,16 @@ struct dlm_hooks {
uint32_t parent, void (*astaddr) (void *astarg),
void *astarg, void (*bastaddr) (void *astarg),
void *range);
- int (*ls_unlock)(dlm_lshandle_t lockspace, uint32_t lkid,
- uint32_t flags, struct dlm_lksb *lksb,
- void *astarg);
+ int (*ls_unlock_wait)(dlm_lshandle_t lockspace, uint32_t lkid,
+ uint32_t flags, struct dlm_lksb *lksb);
int (*ls_get_fd)(dlm_lshandle_t ls);
int (*dispatch)(int fd);
};
extern int get_cluster_name(char **name);
extern int dlm_funs_ready(void);
-extern int cluster_get_dlmlock(int *lockid);
-extern int cluster_release_dlmlock(int lockid);
+extern int cluster_get_dlmlock(void);
+extern int cluster_release_dlmlock(void);
extern void set_dlm_hooks(void);
#define _ROUND_UP(val, base) (((val) + (base) - 1) & ~(base - 1))
diff --git a/mdadm.spec b/mdadm.spec
index 4e97efb..f6b0e76 100644
--- a/mdadm.spec
+++ b/mdadm.spec
@@ -1,6 +1,6 @@
Summary: mdadm is used for controlling Linux md devices (aka RAID arrays)
Name: mdadm
-Version: 4.0
+Version: 4.1_rc1
Release: 1
Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.gz
URL: http://neil.brown.name/blog/mdadm
@@ -37,8 +37,10 @@ rm -rf $RPM_BUILD_ROOT
%doc TODO ChangeLog mdadm.conf-example COPYING
%{_sbindir}/mdadm
%{_sbindir}/mdmon
+/usr/lib/udev/rules.d/01-md-raid-creating.rules
/usr/lib/udev/rules.d/63-md-raid-arrays.rules
/usr/lib/udev/rules.d/64-md-raid-assembly.rules
+/usr/lib/udev/rules.d/69-md-clustered-confirm-device.rules
%config(noreplace,missingok)/%{_sysconfdir}/mdadm.conf
%{_mandir}/man*/md*
diff --git a/mdassemble.8 b/mdassemble.8
deleted file mode 100644
index 9ac8cd2..0000000
--- a/mdassemble.8
+++ /dev/null
@@ -1,65 +0,0 @@
-.\" -*- nroff -*-
-.TH MDASSEMBLE 8 "" v4.0
-.SH NAME
-mdassemble \- assemble MD devices
-.I aka
-Linux Software RAID
-
-.SH SYNOPSIS
-
-.BI mdassemble
-
-.SH DESCRIPTION
-.B mdassemble
-is a tiny program that can be used to assemble MD devices inside an
-initial ramdisk (initrd) or initramfs; it is meant to replace the in-kernel
-automatic RAID detection and activation.
-It can be built statically and linked against lightweight libc alternatives, like
-.B dietlibc,
-.B klibc
-or
-.B uClibc.
-
-.SH USAGE
-Invoking
-.B mdassemble
-has the same effect as invoking
-.B mdadm \-\-assemble \-\-scan.
-.PP
-Invoking
-.B mdassemble
-a second time will make all defined arrays readwrite, this is useful if
-using the
-.B start_ro
-module parameter.
-
-.SH OPTIONS
-
-There are no options to
-.B mdassemble.
-
-.SH FILES
-
-.SS /etc/mdadm/mdadm.conf
-
-The config file lists which devices may be scanned to see if
-they contain MD super block, and gives identifying information
-(e.g. UUID) about known MD arrays. See
-.BR mdadm.conf (5)
-for more details.
-
-.B mdassemble
-supports all configuration parameters defined in
-.B mdadm.conf
-with the exception of
-.B auto=
-which is supported only if mdadm was built with the
-.B \-DMDASSEMBLE_AUTO
-define.
-
-.SH SEE ALSO
-.PP
-.BR mdadm (8),
-.BR mdadm.conf (5),
-.BR md (4),
-.BR diet (1).
diff --git a/mdassemble.c b/mdassemble.c
deleted file mode 100644
index 471ffeb..0000000
--- a/mdassemble.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * mdassemble - assemble Linux "md" devices aka RAID arrays.
- *
- * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
- * Copyright (C) 2003 Luca Berra <bluca@vodka.it>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Author: Neil Brown
- * Email: <neilb@suse.de>
- */
-
-#include "mdadm.h"
-#include "md_p.h"
-
-char const Name[] = "mdassemble";
-
-#ifndef MDASSEMBLE_AUTO
-/* from mdopen.c */
-int open_mddev(char *dev, int report_errors/*unused*/)
-{
- int mdfd = open(dev, O_RDONLY);
- if (mdfd < 0)
- pr_err("error opening %s: %s\n",
- dev, strerror(errno));
- else if (md_get_version(mdfd) <= 0) {
- pr_err("%s does not appear to be an md device\n",
- dev);
- close(mdfd);
- mdfd = -1;
- }
- return mdfd;
-}
-int create_mddev(char *dev, char *name, int autof/*unused*/, int trustworthy,
- char *chosen)
-{
- return open_mddev(dev, 0);
-}
-#endif
-
-int rv;
-int mdfd = -1;
-
-int main(int argc, char *argv[])
-{
- struct mddev_ident *array_list = conf_get_ident(NULL);
- struct context c = { .freeze_reshape = 1 };
- if (!array_list) {
- pr_err("No arrays found in config file\n");
- rv = 1;
- } else
- for (; array_list; array_list = array_list->next) {
- mdu_array_info_t array;
- if (strcasecmp(array_list->devname, "<ignore>") == 0)
- continue;
- mdfd = open_mddev(array_list->devname, 0);
- if (mdfd >= 0 && ioctl(mdfd, GET_ARRAY_INFO, &array) == 0) {
- rv |= Manage_ro(array_list->devname, mdfd, -1); /* make it readwrite */
- continue;
- }
- if (mdfd >= 0)
- close(mdfd);
- rv |= Assemble(array_list->st, array_list->devname,
- array_list, NULL, &c);
- }
- return rv;
-}
diff --git a/mdmon.8 b/mdmon.8
index ac7352f..531c64e 100644
--- a/mdmon.8
+++ b/mdmon.8
@@ -1,5 +1,5 @@
.\" See file COPYING in distribution for details.
-.TH MDMON 8 "" v4.0
+.TH MDMON 8 "" v4.1-rc1
.SH NAME
mdmon \- monitor MD external metadata arrays
diff --git a/mdmon.c b/mdmon.c
index e4b73d9..0955fcc 100644
--- a/mdmon.c
+++ b/mdmon.c
@@ -408,10 +408,6 @@ static int mdmon(char *devnm, int must_fork, int takeover)
pr_err("%s: %s\n", devnm, strerror(errno));
return 1;
}
- if (md_get_version(mdfd) < 0) {
- pr_err("%s: Not an md device\n", devnm);
- return 1;
- }
/* Fork, and have the child tell us when they are ready */
if (must_fork) {
@@ -580,11 +576,6 @@ int restore_stripes(int *dest, unsigned long long *offsets,
return 1;
}
-void abort_reshape(struct mdinfo *sra)
-{
- return;
-}
-
int save_stripes(int *source, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int nwrites, int *dest,
diff --git a/mdmon.h b/mdmon.h
index 0b08c3d..818367c 100644
--- a/mdmon.h
+++ b/mdmon.h
@@ -35,6 +35,7 @@ struct active_array {
int resync_start_fd;
int metadata_fd; /* for monitoring rw/ro status */
int sync_completed_fd; /* for checkpoint notification events */
+ int safe_mode_delay_fd;
unsigned long long last_checkpoint; /* sync_completed fires for many
* reasons this field makes sure the
* kernel has made progress before
diff --git a/mdopen.c b/mdopen.c
index 685ca32..4ec13f5 100644
--- a/mdopen.c
+++ b/mdopen.c
@@ -100,6 +100,31 @@ void make_parts(char *dev, int cnt)
free(name);
}
+int create_named_array(char *devnm)
+{
+ int fd;
+ int n = -1;
+ static const char new_array_file[] = {
+ "/sys/module/md_mod/parameters/new_array"
+ };
+
+ fd = open(new_array_file, O_WRONLY);
+ if (fd < 0 && errno == ENOENT) {
+ if (system("modprobe md_mod") == 0)
+ fd = open(new_array_file, O_WRONLY);
+ }
+ if (fd >= 0) {
+ n = write(fd, devnm, strlen(devnm));
+ close(fd);
+ }
+ if (fd < 0 || n != (int)strlen(devnm)) {
+ pr_err("Fail create %s when using %s\n", devnm, new_array_file);
+ return 0;
+ }
+
+ return 1;
+}
+
/*
* We need a new md device to assemble/build/create an array.
* 'dev' is a name given us by the user (command line or mdadm.conf)
@@ -135,7 +160,7 @@ void make_parts(char *dev, int cnt)
*/
int create_mddev(char *dev, char *name, int autof, int trustworthy,
- char *chosen)
+ char *chosen, int block_udev)
{
int mdfd;
struct stat stb;
@@ -147,6 +172,10 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
char devname[37];
char devnm[32];
char cbuf[400];
+
+ if (!use_udev())
+ block_udev = 0;
+
if (chosen == NULL)
chosen = cbuf;
@@ -194,7 +223,7 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
return -1;
}
if (cname[0] == 0) {
- pr_err("%s is an invalid name for an md device (empty!).", dev);
+ pr_err("%s is an invalid name for an md device (empty!).\n", dev);
return -1;
}
if (num < 0) {
@@ -302,34 +331,42 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
devnm[0] = 0;
if (num < 0 && cname && ci->names) {
- int fd;
- int n = -1;
sprintf(devnm, "md_%s", cname);
- fd = open("/sys/module/md_mod/parameters/new_array", O_WRONLY);
- if (fd >= 0) {
- n = write(fd, devnm, strlen(devnm));
- close(fd);
- }
- if (n < 0)
+ if (block_udev)
+ udev_block(devnm);
+ if (!create_named_array(devnm)) {
devnm[0] = 0;
+ udev_unblock();
+ }
}
- if (devnm[0])
- ;
- else if (num < 0) {
- /* need to choose a free number. */
- char *_devnm = find_free_devnm(use_mdp);
- if (_devnm == NULL) {
- pr_err("No avail md devices - aborting\n");
- return -1;
+ if (num >= 0) {
+ sprintf(devnm, "md%d", num);
+ if (block_udev)
+ udev_block(devnm);
+ if (!create_named_array(devnm)) {
+ devnm[0] = 0;
+ udev_unblock();
}
- strcpy(devnm, _devnm);
- } else {
- sprintf(devnm, "%s%d", use_mdp?"md_d":"md", num);
- if (mddev_busy(devnm)) {
- pr_err("%s is already in use.\n",
- dev);
- return -1;
+ }
+ if (devnm[0] == 0) {
+ if (num < 0) {
+ /* need to choose a free number. */
+ char *_devnm = find_free_devnm(use_mdp);
+ if (_devnm == NULL) {
+ pr_err("No avail md devices - aborting\n");
+ return -1;
+ }
+ strcpy(devnm, _devnm);
+ } else {
+ sprintf(devnm, "%s%d", use_mdp?"md_d":"md", num);
+ if (mddev_busy(devnm)) {
+ pr_err("%s is already in use.\n",
+ dev);
+ return -1;
+ }
}
+ if (block_udev)
+ udev_block(devnm);
}
sprintf(devname, "/dev/%s", devnm);
@@ -417,18 +454,21 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
int open_mddev(char *dev, int report_errors)
{
int mdfd = open(dev, O_RDONLY);
+
if (mdfd < 0) {
if (report_errors)
pr_err("error opening %s: %s\n",
dev, strerror(errno));
return -1;
}
- if (md_get_version(mdfd) <= 0) {
+
+ if (md_array_valid(mdfd) == 0) {
close(mdfd);
if (report_errors)
pr_err("%s does not appear to be an md device\n", dev);
return -2;
}
+
return mdfd;
}
diff --git a/mdstat.c b/mdstat.c
index 3962896..7e600d0 100644
--- a/mdstat.c
+++ b/mdstat.c
@@ -158,16 +158,16 @@ struct mdstat_ent *mdstat_read(int hold, int start)
char devnm[32];
int in_devs = 0;
- if (strcmp(line, "Personalities")==0)
+ if (strcmp(line, "Personalities") == 0)
continue;
- if (strcmp(line, "read_ahead")==0)
+ if (strcmp(line, "read_ahead") == 0)
continue;
- if (strcmp(line, "unused")==0)
+ if (strcmp(line, "unused") == 0)
continue;
insert_here = NULL;
/* Better be an md line.. */
- if (strncmp(line, "md", 2)!= 0 || strlen(line) >= 32
- || (line[2] != '_' && !isdigit(line[2])))
+ if (strncmp(line, "md", 2)!= 0 || strlen(line) >= 32 ||
+ (line[2] != '_' && !isdigit(line[2])))
continue;
strcpy(devnm, line);
@@ -187,9 +187,9 @@ struct mdstat_ent *mdstat_read(int hold, int start)
for (w=dl_next(line); w!= line ; w=dl_next(w)) {
int l = strlen(w);
char *eq;
- if (strcmp(w, "active")==0)
+ if (strcmp(w, "active") == 0)
ent->active = 1;
- else if (strcmp(w, "inactive")==0) {
+ else if (strcmp(w, "inactive") == 0) {
ent->active = 0;
in_devs = 1;
} else if (ent->active > 0 &&
@@ -197,13 +197,13 @@ struct mdstat_ent *mdstat_read(int hold, int start)
w[0] != '(' /*readonly*/) {
ent->level = xstrdup(w);
in_devs = 1;
- } else if (in_devs && strcmp(w, "blocks")==0)
+ } else if (in_devs && strcmp(w, "blocks") == 0)
in_devs = 0;
else if (in_devs) {
char *ep = strchr(w, '[');
ent->devcnt +=
add_member_devname(&ent->members, w);
- if (ep && strncmp(w, "md", 2)==0) {
+ if (ep && strncmp(w, "md", 2) == 0) {
/* This has an md device as a component.
* If that device is already in the
* list, make sure we insert before
@@ -212,8 +212,10 @@ struct mdstat_ent *mdstat_read(int hold, int start)
struct mdstat_ent **ih;
ih = &all;
while (ih != insert_here && *ih &&
- ((int)strlen((*ih)->devnm) != ep-w
- || strncmp((*ih)->devnm, w, ep-w) != 0))
+ ((int)strlen((*ih)->devnm) !=
+ ep-w ||
+ strncmp((*ih)->devnm, w,
+ ep-w) != 0))
ih = & (*ih)->next;
insert_here = ih;
}
@@ -224,31 +226,31 @@ struct mdstat_ent *mdstat_read(int hold, int start)
} else if (w[0] == '[' && isdigit(w[1])) {
ent->raid_disks = atoi(w+1);
} else if (!ent->pattern &&
- w[0] == '[' &&
- (w[1] == 'U' || w[1] == '_')) {
+ w[0] == '[' &&
+ (w[1] == 'U' || w[1] == '_')) {
ent->pattern = xstrdup(w+1);
- if (ent->pattern[l-2]==']')
+ if (ent->pattern[l-2] == ']')
ent->pattern[l-2] = '\0';
} else if (ent->percent == RESYNC_NONE &&
- strncmp(w, "re", 2)== 0 &&
+ strncmp(w, "re", 2) == 0 &&
w[l-1] == '%' &&
- (eq=strchr(w, '=')) != NULL ) {
+ (eq = strchr(w, '=')) != NULL ) {
ent->percent = atoi(eq+1);
- if (strncmp(w,"resync", 6)==0)
+ if (strncmp(w,"resync", 6) == 0)
ent->resync = 1;
- else if (strncmp(w, "reshape", 7)==0)
+ else if (strncmp(w, "reshape", 7) == 0)
ent->resync = 2;
else
ent->resync = 0;
} else if (ent->percent == RESYNC_NONE &&
(w[0] == 'r' || w[0] == 'c')) {
- if (strncmp(w, "resync", 4)==0)
+ if (strncmp(w, "resync", 6) == 0)
ent->resync = 1;
- if (strncmp(w, "reshape", 7)==0)
+ if (strncmp(w, "reshape", 7) == 0)
ent->resync = 2;
- if (strncmp(w, "recovery", 8)==0)
+ if (strncmp(w, "recovery", 8) == 0)
ent->resync = 0;
- if (strncmp(w, "check", 5)==0)
+ if (strncmp(w, "check", 5) == 0)
ent->resync = 3;
if (l > 8 && strcmp(w+l-8, "=DELAYED") == 0)
@@ -287,7 +289,8 @@ struct mdstat_ent *mdstat_read(int hold, int start)
e->next = rv;
rv = e;
}
- } else rv = all;
+ } else
+ rv = all;
return rv;
}
diff --git a/misc/mdcheck b/misc/mdcheck
index 2c8f54d..42d4094 100644
--- a/misc/mdcheck
+++ b/misc/mdcheck
@@ -1,6 +1,6 @@
#!/bin/bash
-# Copyright (C) 2014 Neil Brown <neilb@suse.de>
+# Copyright (C) 2014-2017 Neil Brown <neilb@suse.de>
#
#
# This program is free software; you can redistribute it and/or modify
@@ -14,7 +14,7 @@
# GNU General Public License for more details.
#
# Author: Neil Brown
-# Email: <neilb@suse.de>
+# Email: <neilb@suse.com>
# This script should be run periodically to automatically
# perform a 'check' on any md arrays.
@@ -66,7 +66,7 @@ shift
# We need a temp file occasionally...
tmp=/var/lib/mdcheck/.md-check-$$
-trap 'rm -f "$tmp"' 0
+trap 'rm -f "$tmp"' 0 2 3 15
# firstly, clean out really old state files
@@ -94,17 +94,20 @@ do
if [ -z "$cont" ]
then
start=0
+ logger -p daemon.info mdcheck start checking $dev
elif [ -z "$MD_UUID" -o ! -f "$fl" ]
then
# Nothing to continue here
continue
else
start=`cat "$fl"`
+ logger -p daemon.info mdcheck continue checking $dev from $start
fi
cnt=$[cnt+1]
eval MD_${cnt}_fl=\$fl
eval MD_${cnt}_sys=\$sys
+ eval MD_${cnt}_dev=\$dev
echo $start > $fl
echo $start > $sys/md/sync_min
echo check > $sys/md/sync_action
@@ -145,6 +148,7 @@ for i in `eval echo {1..$cnt}`
do
eval fl=\$MD_${i}_fl
eval sys=\$MD_${i}_sys
+ eval dev=\$MD_${i}_dev
if [ -z "$fl" ]; then continue; fi
@@ -156,4 +160,5 @@ do
fi
echo idle > $sys/md/sync_action
cat $sys/md/sync_min > $fl
+ logger -p daemon.info pause checking $dev at `cat $fl`
done
diff --git a/monitor.c b/monitor.c
index 00b7c68..81537ed 100644
--- a/monitor.c
+++ b/monitor.c
@@ -576,8 +576,8 @@ static int read_and_act(struct active_array *a, fd_set *fds)
a->last_checkpoint = sync_completed;
a->container->ss->set_array_state(a, a->curr_state <= clean);
} else if ((a->curr_action == idle && a->prev_action == reshape) ||
- (a->curr_action == reshape
- && sync_completed > a->last_checkpoint) ) {
+ (a->curr_action == reshape &&
+ sync_completed > a->last_checkpoint)) {
/* Reshape has progressed or completed so we need to
* update the array state - and possibly the array size
*/
diff --git a/msg.c b/msg.c
index c66b0a1..45cd450 100644
--- a/msg.c
+++ b/msg.c
@@ -171,8 +171,6 @@ int connect_monitor(char *devname)
addr.sun_family = PF_LOCAL;
strcpy(addr.sun_path, path);
if (connect(sfd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
- pr_err("Error connecting monitor with %s: %s\n",
- addr.sun_path, strerror(errno));
close(sfd);
return -1;
}
diff --git a/platform-intel.c b/platform-intel.c
index 9867697..a11101d 100644
--- a/platform-intel.c
+++ b/platform-intel.c
@@ -548,8 +548,8 @@ static int read_efi_variable(void *buffer, ssize_t buf_size, char *variable_name
errno = 0;
var_data_len = strtoul(buf, NULL, 16);
- if ((errno == ERANGE && (var_data_len == LONG_MAX))
- || (errno != 0 && var_data_len == 0))
+ if ((errno == ERANGE && (var_data_len == LONG_MAX)) ||
+ (errno != 0 && var_data_len == 0))
return 1;
/* get data */
diff --git a/policy.c b/policy.c
index 064d349..c0d18a7 100644
--- a/policy.c
+++ b/policy.c
@@ -661,6 +661,7 @@ int domain_test(struct domainlist *dom, struct dev_policy *pol,
* 1: has domains, all match
*/
int found_any = -1;
+ int has_one_domain = 1;
struct dev_policy *p;
pol = pol_find(pol, pol_domain);
@@ -670,6 +671,9 @@ int domain_test(struct domainlist *dom, struct dev_policy *pol,
dom = dom->next;
if (!dom || strcmp(dom->dom, p->value) != 0)
return 0;
+ if (has_one_domain && metadata && strcmp(metadata, "imsm") == 0)
+ found_any = -1;
+ has_one_domain = 0;
}
return found_any;
}
@@ -887,9 +891,8 @@ int Write_rules(char *rule_name)
fd = 1;
/* write static invocation */
- if (write(fd, udev_template_start,
- sizeof(udev_template_start) - 1)
- != (int)sizeof(udev_template_start)-1)
+ if (write(fd, udev_template_start, sizeof(udev_template_start) - 1) !=
+ (int)sizeof(udev_template_start) - 1)
goto abort;
/* iterate, if none created or error occurred, remove file */
diff --git a/raid6check.c b/raid6check.c
index 551f835..a8e6005 100644
--- a/raid6check.c
+++ b/raid6check.c
@@ -562,7 +562,7 @@ int main(int argc, char *argv[])
GET_LEVEL|
GET_LAYOUT|
GET_DISKS|
- GET_DEGRADED |
+ GET_STATE |
GET_COMPONENT|
GET_CHUNK|
GET_DEVS|
diff --git a/restripe.c b/restripe.c
index de85ee4..31b07e8 100644
--- a/restripe.c
+++ b/restripe.c
@@ -581,14 +581,16 @@ int save_stripes(int *source, unsigned long long *offsets,
raid_disks, level, layout);
if (dnum < 0) abort();
if (source[dnum] < 0 ||
- lseek64(source[dnum], offsets[dnum]+offset, 0) < 0 ||
- read(source[dnum], buf+disk * chunk_size, chunk_size)
- != chunk_size)
+ lseek64(source[dnum],
+ offsets[dnum] + offset, 0) < 0 ||
+ read(source[dnum], buf+disk * chunk_size,
+ chunk_size) != chunk_size) {
if (failed <= 2) {
fdisk[failed] = dnum;
fblock[failed] = disk;
failed++;
}
+ }
}
if (failed == 0 || fblock[0] >= data_disks)
/* all data disks are good */
@@ -731,8 +733,8 @@ int restore_stripes(int *dest, unsigned long long *offsets,
zero_size = chunk_size;
}
- if (stripe_buf == NULL || stripes == NULL || blocks == NULL
- || zero == NULL) {
+ if (stripe_buf == NULL || stripes == NULL || blocks == NULL ||
+ zero == NULL) {
rv = -2;
goto abort;
}
diff --git a/sg_io.c b/sg_io.c
index 42c91e1..7889a95 100644
--- a/sg_io.c
+++ b/sg_io.c
@@ -46,6 +46,9 @@ int scsi_get_serial(int fd, void *buf, size_t buf_len)
if (rv)
return rv;
+ if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK)
+ return -1;
+
rsp_len = rsp_buf[3];
if (!rsp_len || buf_len < rsp_len)
diff --git a/sha1.h b/sha1.h
index 0f98658..999fc6a 100644
--- a/sha1.h
+++ b/sha1.h
@@ -22,7 +22,7 @@
#include <stdio.h>
-#if 1 /* defined HAVE_LIMITS_H || _LIBC */
+#if defined HAVE_LIMITS_H || _LIBC
# include <limits.h>
#endif
@@ -33,9 +33,9 @@
the resulting executable. Locally running cross-compiled executables
is usually not possible. */
-#if 1 /* def _LIBC */
-# include <stdint.h>
-typedef uint32_t sha1_uint32;
+#ifdef _LIBC
+# include <sys/types.h>
+typedef u_int32_t sha1_uint32;
typedef uintptr_t sha1_uintptr;
#else
# define INT_MAX_32_BITS 2147483647
diff --git a/super-ddf.c b/super-ddf.c
index 1707ad1..d02a19a 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -500,7 +500,6 @@ struct ddf_super {
} *dlist, *add_list;
};
-#ifndef MDASSEMBLE
static int load_super_ddf_all(struct supertype *st, int fd,
void **sbp, char *devname);
static int get_svd_state(const struct ddf_super *, const struct vcl *);
@@ -518,7 +517,6 @@ static int validate_geometry_ddf_bvd(struct supertype *st,
unsigned long long data_offset,
char *dev, unsigned long long *freesize,
int verbose);
-#endif
static void free_super_ddf(struct supertype *st);
static int all_ff(const char *guid);
@@ -672,8 +670,8 @@ static int layout_md2ddf(const mdu_array_info_t *array,
sec_elmnt_count = array->raid_disks / 2;
srl = DDF_2SPANNED;
prl = DDF_RAID1;
- } else if (array->raid_disks % 3 == 0
- && array->layout == 0x103) {
+ } else if (array->raid_disks % 3 == 0 &&
+ array->layout == 0x103) {
rlq = DDF_RAID1_MULTI;
prim_elmnt_count = cpu_to_be16(3);
sec_elmnt_count = array->raid_disks / 3;
@@ -855,8 +853,8 @@ static void *load_section(int fd, struct ddf_super *super, void *buf,
int dofree = (buf == NULL);
if (check)
- if (len != 2 && len != 8 && len != 32
- && len != 128 && len != 512)
+ if (len != 2 && len != 8 && len != 32 &&
+ len != 128 && len != 512)
return NULL;
if (len > 1024)
@@ -934,14 +932,13 @@ static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
if (load_ddf_header(fd, be64_to_cpu(super->anchor.secondary_lba),
dsize >> 9, 2,
&super->secondary, &super->anchor)) {
- if (super->active == NULL
- || (be32_to_cpu(super->primary.seq)
- < be32_to_cpu(super->secondary.seq) &&
- !super->secondary.openflag)
- || (be32_to_cpu(super->primary.seq)
- == be32_to_cpu(super->secondary.seq) &&
- super->primary.openflag && !super->secondary.openflag)
- )
+ if (super->active == NULL ||
+ (be32_to_cpu(super->primary.seq)
+ < be32_to_cpu(super->secondary.seq) &&
+ !super->secondary.openflag) ||
+ (be32_to_cpu(super->primary.seq) ==
+ be32_to_cpu(super->secondary.seq) &&
+ super->primary.openflag && !super->secondary.openflag))
super->active = &super->secondary;
} else if (devname &&
be64_to_cpu(super->anchor.secondary_lba) != ~(__u64)0)
@@ -1305,8 +1302,6 @@ static struct supertype *match_metadata_desc_ddf(char *arg)
return st;
}
-#ifndef MDASSEMBLE
-
static mapping_t ddf_state[] = {
{ "Optimal", 0},
{ "Degraded", 1},
@@ -1355,7 +1350,6 @@ static mapping_t ddf_sec_level[] = {
{ "Spanned", DDF_2SPANNED},
{ NULL, 0}
};
-#endif
static int all_ff(const char *guid)
{
@@ -1382,7 +1376,6 @@ static const char *guid_str(const char *guid)
return (const char *) buf;
}
-#ifndef MDASSEMBLE
static void print_guid(char *guid, int tstamp)
{
/* A GUIDs are part (or all) ASCII and part binary.
@@ -1742,13 +1735,12 @@ static void detail_super_ddf(struct supertype *st, char *homehost)
struct ddf_super *sb = st->sb;
int cnt = be16_to_cpu(sb->virt->populated_vdes);
- printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
+ printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
printf("\n");
- printf(" Seq : %08x\n", be32_to_cpu(sb->active->seq));
- printf(" Virtual Disks : %d\n", cnt);
+ printf(" Seq : %08x\n", be32_to_cpu(sb->active->seq));
+ printf(" Virtual Disks : %d\n", cnt);
printf("\n");
}
-#endif
static const char *vendors_with_variable_volume_UUID[] = {
"LSI ",
@@ -1795,7 +1787,6 @@ static void uuid_of_ddf_subarray(const struct ddf_super *ddf,
memcpy(uuid, sha, 4*4);
}
-#ifndef MDASSEMBLE
static void brief_detail_super_ddf(struct supertype *st)
{
struct mdinfo info;
@@ -1811,7 +1802,6 @@ static void brief_detail_super_ddf(struct supertype *st)
fname_from_uuid(st, &info, nbuf,':');
printf(" UUID=%s", nbuf + 5);
}
-#endif
static int match_home_ddf(struct supertype *st, char *homehost)
{
@@ -1833,7 +1823,6 @@ static int match_home_ddf(struct supertype *st, char *homehost)
ddf->controller.vendor_data[len] == 0);
}
-#ifndef MDASSEMBLE
static int find_index_in_bvd(const struct ddf_super *ddf,
const struct vd_config *conf, unsigned int n,
unsigned int *n_bvd)
@@ -1894,8 +1883,8 @@ static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
nsec = n / be16_to_cpu(conf->prim_elmnt_count);
if (conf->sec_elmnt_seq != nsec) {
for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
- if (v->other_bvds[ibvd-1]->sec_elmnt_seq
- == nsec)
+ if (v->other_bvds[ibvd-1]->sec_elmnt_seq ==
+ nsec)
break;
}
if (ibvd == conf->sec_elmnt_count)
@@ -1914,7 +1903,6 @@ bad:
pr_err("Could't find disk %d in array %u\n", n, inst);
return NULL;
}
-#endif
static int find_phys(const struct ddf_super *ddf, be32 phys_refnum)
{
@@ -2039,8 +2027,8 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *m
be32_to_cpu(ddf->phys->entries[e].refnum) == 0xffffffff)
e++;
if (i < info->array.raid_disks && e < max &&
- !(be16_to_cpu(ddf->phys->entries[e].state)
- & DDF_Failed))
+ !(be16_to_cpu(ddf->phys->entries[e].state) &
+ DDF_Failed))
map[i] = 1;
else
map[i] = 0;
@@ -2125,11 +2113,10 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, cha
info->resync_start = 0;
info->reshape_active = 0;
info->recovery_blocked = 0;
- if (!(ddf->virt->entries[info->container_member].state
- & DDF_state_inconsistent) &&
- (ddf->virt->entries[info->container_member].init_state
- & DDF_initstate_mask)
- == DDF_init_full)
+ if (!(ddf->virt->entries[info->container_member].state &
+ DDF_state_inconsistent) &&
+ (ddf->virt->entries[info->container_member].init_state &
+ DDF_initstate_mask) == DDF_init_full)
info->resync_start = MaxSector;
uuid_from_super_ddf(st, info->uuid);
@@ -2146,7 +2133,7 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, cha
if (map)
for (j = 0; j < map_disks; j++) {
map[j] = 0;
- if (j < info->array.raid_disks) {
+ if (j < info->array.raid_disks) {
int i = find_phys(ddf, vc->conf.phys_refnum[j]);
if (i >= 0 &&
(be16_to_cpu(ddf->phys->entries[i].state)
@@ -2274,7 +2261,6 @@ static unsigned int find_vde_by_name(const struct ddf_super *ddf,
return DDF_NOTFOUND;
}
-#ifndef MDASSEMBLE
static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
const char *guid)
{
@@ -2286,11 +2272,10 @@ static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
return i;
return DDF_NOTFOUND;
}
-#endif
static int init_super_ddf(struct supertype *st,
mdu_array_info_t *info,
- unsigned long long size, char *name, char *homehost,
+ struct shape *s, char *name, char *homehost,
int *uuid, unsigned long long data_offset)
{
/* This is primarily called by Create when creating a new array.
@@ -2328,7 +2313,7 @@ static int init_super_ddf(struct supertype *st,
struct virtual_disk *vd;
if (st->sb)
- return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
+ return init_super_ddf_bvd(st, info, s->size, name, homehost, uuid,
data_offset);
if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
@@ -2507,7 +2492,6 @@ static int chunk_to_shift(int chunksize)
return ffs(chunksize/512)-1;
}
-#ifndef MDASSEMBLE
struct extent {
unsigned long long start, size;
};
@@ -2608,7 +2592,6 @@ static unsigned long long find_space(
free(e);
return INVALID_SECTORS;
}
-#endif
static int init_super_ddf_bvd(struct supertype *st,
mdu_array_info_t *info,
@@ -2727,7 +2710,6 @@ static int init_super_ddf_bvd(struct supertype *st,
return 1;
}
-#ifndef MDASSEMBLE
static void add_to_super_ddf_bvd(struct supertype *st,
mdu_disk_info_t *dk, int fd, char *devname,
unsigned long long data_offset)
@@ -3015,7 +2997,6 @@ static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
}
return 0;
}
-#endif
/*
* This is the write_init_super method for a ddf container. It is
@@ -3176,7 +3157,6 @@ static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d)
return 1;
}
-#ifndef MDASSEMBLE
static int __write_init_super_ddf(struct supertype *st)
{
struct ddf_super *ddf = st->sb;
@@ -3259,8 +3239,6 @@ static int write_init_super_ddf(struct supertype *st)
}
}
-#endif
-
static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
unsigned long long data_offset)
{
@@ -3270,8 +3248,6 @@ static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
return devsize - 32*1024*2;
}
-#ifndef MDASSEMBLE
-
static int reserve_space(struct supertype *st, int raiddisks,
unsigned long long size, int chunk,
unsigned long long data_offset,
@@ -3347,7 +3323,7 @@ static int validate_geometry_ddf(struct supertype *st,
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *dev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
int fd;
struct mdinfo *sra;
@@ -3512,7 +3488,7 @@ static int validate_geometry_ddf_bvd(struct supertype *st,
char *dev, unsigned long long *freesize,
int verbose)
{
- struct stat stb;
+ dev_t rdev;
struct ddf_super *ddf = st->sb;
struct dl *dl;
unsigned long long maxsize;
@@ -3535,8 +3511,8 @@ static int validate_geometry_ddf_bvd(struct supertype *st,
if (minsize == 0)
minsize = 8;
for (dl = ddf->dlist; dl ; dl = dl->next) {
- if (find_space(ddf, dl, data_offset, &minsize)
- != INVALID_SECTORS)
+ if (find_space(ddf, dl, data_offset, &minsize) !=
+ INVALID_SECTORS)
dcnt++;
}
if (dcnt < raiddisks) {
@@ -3548,13 +3524,11 @@ static int validate_geometry_ddf_bvd(struct supertype *st,
return 1;
}
/* This device must be a member of the set */
- if (stat(dev, &stb) < 0)
- return 0;
- if ((S_IFMT & stb.st_mode) != S_IFBLK)
+ if (!stat_is_blkdev(dev, &rdev))
return 0;
for (dl = ddf->dlist ; dl ; dl = dl->next) {
- if (dl->major == (int)major(stb.st_rdev) &&
- dl->minor == (int)minor(stb.st_rdev))
+ if (dl->major == (int)major(rdev) &&
+ dl->minor == (int)minor(rdev))
break;
}
if (!dl) {
@@ -3581,7 +3555,7 @@ static int load_super_ddf_all(struct supertype *st, int fd,
char nm[20];
int dfd;
- sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
+ sra = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
if (!sra)
return 1;
if (sra->array.major_version != -1 ||
@@ -3653,8 +3627,6 @@ static int load_container_ddf(struct supertype *st, int fd,
return load_super_ddf_all(st, fd, &st->sb, devname);
}
-#endif /* MDASSEMBLE */
-
static int check_secondary(const struct vcl *vc)
{
const struct vd_config *conf = &vc->conf;
@@ -3842,13 +3814,13 @@ static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray
unsigned int iphys;
int stt;
- if (be32_to_cpu(ddf->phys->entries[pd].refnum)
- == 0xFFFFFFFF)
+ if (be32_to_cpu(ddf->phys->entries[pd].refnum) ==
+ 0xffffffff)
continue;
stt = be16_to_cpu(ddf->phys->entries[pd].state);
- if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
- != DDF_Online)
+ if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding)) !=
+ DDF_Online)
continue;
i = get_pd_index_from_refnum(
@@ -4032,8 +4004,8 @@ static int compare_super_ddf(struct supertype *st, struct supertype *tst)
continue;
if (posix_memalign((void **)&dl1, 512,
- sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
- != 0) {
+ sizeof(*dl1) + (first->max_part) *
+ sizeof(dl1->vlist[0])) != 0) {
pr_err("could not allocate disk info buffer\n");
return 3;
}
@@ -4075,7 +4047,6 @@ static int compare_super_ddf(struct supertype *st, struct supertype *tst)
return 0;
}
-#ifndef MDASSEMBLE
/*
* A new array 'a' has been started which claims to be instance 'inst'
* within container 'c'.
@@ -4234,8 +4205,8 @@ static int get_bvd_state(const struct ddf_super *ddf,
if (pd < 0)
continue;
st = be16_to_cpu(ddf->phys->entries[pd].state);
- if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
- == DDF_Online) {
+ if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding)) ==
+ DDF_Online) {
working++;
avail[i] = 1;
}
@@ -4645,9 +4616,9 @@ static void ddf_remove_failed(struct ddf_super *ddf)
0xFFFFFFFF)
continue;
if (be16_and(ddf->phys->entries[pdnum].state,
- cpu_to_be16(DDF_Failed))
- && be16_and(ddf->phys->entries[pdnum].state,
- cpu_to_be16(DDF_Transition))) {
+ cpu_to_be16(DDF_Failed)) &&
+ be16_and(ddf->phys->entries[pdnum].state,
+ cpu_to_be16(DDF_Transition))) {
/* skip this one unless in dlist*/
for (dl = ddf->dlist; dl; dl = dl->next)
if (dl->pdnum == (int)pdnum)
@@ -5178,8 +5149,8 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
vc = (struct vd_config *)(mu->buf
+ i_sec * ddf->conf_rec_len * 512);
for (dl = ddf->dlist; dl; dl = dl->next)
- if (dl->major == di->disk.major
- && dl->minor == di->disk.minor)
+ if (dl->major == di->disk.major &&
+ dl->minor == di->disk.minor)
break;
if (!dl || dl->pdnum < 0) {
pr_err("BUG: can't find disk %d (%d/%d)\n",
@@ -5196,7 +5167,6 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
*updates = mu;
return rv;
}
-#endif /* MDASSEMBLE */
static int ddf_level_to_layout(int level)
{
@@ -5225,7 +5195,6 @@ static void default_geometry_ddf(struct supertype *st, int *level, int *layout,
}
struct superswitch super_ddf = {
-#ifndef MDASSEMBLE
.examine_super = examine_super_ddf,
.brief_examine_super = brief_examine_super_ddf,
.brief_examine_subarrays = brief_examine_subarrays_ddf,
@@ -5239,7 +5208,6 @@ struct superswitch super_ddf = {
.load_container = load_container_ddf,
.copy_metadata = copy_metadata_ddf,
.kill_subarray = kill_subarray_ddf,
-#endif
.match_home = match_home_ddf,
.uuid_from_super= uuid_from_super_ddf,
.getinfo_super = getinfo_super_ddf,
@@ -5259,7 +5227,6 @@ struct superswitch super_ddf = {
.external = 1,
-#ifndef MDASSEMBLE
/* for mdmon */
.open_new = ddf_open_new,
.set_array_state= ddf_set_array_state,
@@ -5268,6 +5235,5 @@ struct superswitch super_ddf = {
.process_update = ddf_process_update,
.prepare_update = ddf_prepare_update,
.activate_spare = ddf_activate_spare,
-#endif
.name = "ddf",
};
diff --git a/super-gpt.c b/super-gpt.c
index 8b080a0..a1e9aa9 100644
--- a/super-gpt.c
+++ b/super-gpt.c
@@ -47,7 +47,6 @@ static void free_gpt(struct supertype *st)
st->sb = NULL;
}
-#ifndef MDASSEMBLE
static void examine_gpt(struct supertype *st, char *homehost)
{
struct GPT *gpt = st->sb + 512;
@@ -66,7 +65,6 @@ static void examine_gpt(struct supertype *st, char *homehost)
);
}
}
-#endif /* MDASSEMBLE */
static int load_gpt(struct supertype *st, int fd, char *devname)
{
@@ -199,24 +197,20 @@ static struct supertype *match_metadata_desc(char *arg)
return st;
}
-#ifndef MDASSEMBLE
static int validate_geometry(struct supertype *st, int level,
int layout, int raiddisks,
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *subdev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
pr_err("gpt metadata cannot be used this way\n");
return 0;
}
-#endif
struct superswitch gpt = {
-#ifndef MDASSEMBLE
.examine_super = examine_gpt,
.validate_geometry = validate_geometry,
-#endif
.match_metadata_desc = match_metadata_desc,
.load_super = load_gpt,
.store_super = store_gpt,
diff --git a/super-intel.c b/super-intel.c
index 433bb6d..fb1b693 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -92,6 +92,9 @@
#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056
#define SECT_PER_MB_SHIFT 11
#define MAX_SECTOR_SIZE 4096
+#define MULTIPLE_PPL_AREA_SIZE_IMSM (1024 * 1024) /* Size of the whole
+ * mutliple PPL area
+ */
/* Disk configuration info. */
#define IMSM_MAX_DEVICES 255
@@ -102,6 +105,7 @@ struct imsm_disk {
#define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
#define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
#define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
+#define JOURNAL_DISK __cpu_to_le32(0x2000000) /* Device marked as Journaling Drive */
__u32 status; /* 0xF0 - 0xF3 */
__u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
__u32 total_blocks_hi; /* 0xF4 - 0xF5 total blocks hi */
@@ -155,6 +159,9 @@ struct imsm_vol {
#define MIGR_STATE_CHANGE 4
#define MIGR_REPAIR 5
__u8 migr_type; /* Initializing, Rebuilding, ... */
+#define RAIDVOL_CLEAN 0
+#define RAIDVOL_DIRTY 1
+#define RAIDVOL_DSRECORD_VALID 2
__u8 dirty;
__u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
__u16 verify_errors; /* number of mismatches */
@@ -190,7 +197,27 @@ struct imsm_dev {
__u16 cache_policy;
__u8 cng_state;
__u8 cng_sub_state;
-#define IMSM_DEV_FILLERS 10
+ __u16 my_vol_raid_dev_num; /* Used in Unique volume Id for this RaidDev */
+
+ /* NVM_EN */
+ __u8 nv_cache_mode;
+ __u8 nv_cache_flags;
+
+ /* Unique Volume Id of the NvCache Volume associated with this volume */
+ __u32 nvc_vol_orig_family_num;
+ __u16 nvc_vol_raid_dev_num;
+
+#define RWH_OFF 0
+#define RWH_DISTRIBUTED 1
+#define RWH_JOURNALING_DRIVE 2
+#define RWH_MULTIPLE_DISTRIBUTED 3
+#define RWH_MULTIPLE_PPLS_JOURNALING_DRIVE 4
+#define RWH_MULTIPLE_OFF 5
+ __u8 rwh_policy; /* Raid Write Hole Policy */
+ __u8 jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */
+ __u8 filler1;
+
+#define IMSM_DEV_FILLERS 3
__u32 filler[IMSM_DEV_FILLERS];
struct imsm_vol vol;
} __attribute__ ((packed));
@@ -211,8 +238,13 @@ struct imsm_super {
__u32 orig_family_num; /* 0x40 - 0x43 original family num */
__u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
__u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
-#define IMSM_FILLERS 35
- __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
+ __u16 num_raid_devs_created; /* 0x4C - 0x4D Used for generating unique
+ * volume IDs for raid_dev created in this array
+ * (starts at 1)
+ */
+ __u16 filler1; /* 0x4E - 0x4F */
+#define IMSM_FILLERS 34
+ __u32 filler[IMSM_FILLERS]; /* 0x50 - 0xD7 RAID_MPB_FILLERS */
struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
/* here comes imsm_dev[num_raid_devs] */
/* here comes BBM logs */
@@ -239,9 +271,9 @@ struct bbm_log {
struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES];
} __attribute__ ((__packed__));
-#ifndef MDASSEMBLE
static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
-#endif
+
+#define BLOCKS_PER_KB (1024/512)
#define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
@@ -257,6 +289,9 @@ static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed"
#define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
* already been migrated and must
* be recovered from checkpoint area */
+
+#define PPL_ENTRY_SPACE (128 * 1024) /* Size of single PPL, without the header */
+
struct migr_record {
__u32 rec_status; /* Status used to determine how to restart
* migration in case it aborts
@@ -427,6 +462,7 @@ enum imsm_update_type {
update_general_migration_checkpoint,
update_size_change,
update_prealloc_badblocks_mem,
+ update_rwh_policy,
};
struct imsm_update_activate_spare {
@@ -519,6 +555,12 @@ struct imsm_update_prealloc_bb_mem {
enum imsm_update_type type;
};
+struct imsm_update_rwh_policy {
+ enum imsm_update_type type;
+ int new_policy;
+ int dev_idx;
+};
+
static const char *_sys_dev_type[] = {
[SYS_DEV_UNKNOWN] = "Unknown",
[SYS_DEV_SAS] = "SAS",
@@ -639,12 +681,10 @@ static struct supertype *match_metadata_desc_imsm(char *arg)
return st;
}
-#ifndef MDASSEMBLE
static __u8 *get_imsm_version(struct imsm_super *mpb)
{
return &mpb->sig[MPB_SIG_LEN];
}
-#endif
/* retrieve a disk directly from the anchor when the anchor is known to be
* up-to-date, currently only at load time
@@ -751,7 +791,6 @@ static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
return size;
}
-#ifndef MDASSEMBLE
/* retrieve disk serial number list from a metadata update */
static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
{
@@ -763,7 +802,6 @@ static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
return inf;
}
-#endif
static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
{
@@ -814,7 +852,6 @@ static inline struct bbm_log_block_addr __cpu_to_le48(unsigned long long sec)
return addr;
}
-#ifndef MDASSEMBLE
/* get size of the bbm log */
static __u32 get_imsm_bbm_log_size(struct bbm_log *log)
{
@@ -945,7 +982,6 @@ static int clear_badblock(struct bbm_log *log, const __u8 idx, const unsigned
return 1;
}
-#endif /* MDASSEMBLE */
/* allocate and load BBM log from metadata */
static int load_bbm_log(struct intel_super *super)
@@ -1288,6 +1324,38 @@ static int is_failed(struct imsm_disk *disk)
return (disk->status & FAILED_DISK) == FAILED_DISK;
}
+static int is_journal(struct imsm_disk *disk)
+{
+ return (disk->status & JOURNAL_DISK) == JOURNAL_DISK;
+}
+
+/* round array size down to closest MB and ensure it splits evenly
+ * between members
+ */
+static unsigned long long round_size_to_mb(unsigned long long size, unsigned int
+ disk_count)
+{
+ size /= disk_count;
+ size = (size >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
+ size *= disk_count;
+
+ return size;
+}
+
+static int able_to_resync(int raid_level, int missing_disks)
+{
+ int max_missing_disks = 0;
+
+ switch (raid_level) {
+ case 10:
+ max_missing_disks = 1;
+ break;
+ default:
+ max_missing_disks = 0;
+ }
+ return missing_disks <= max_missing_disks;
+}
+
/* try to determine how much space is reserved for metadata from
* the last get_extents() entry on the smallest active disk,
* otherwise fallback to the default
@@ -1335,44 +1403,52 @@ static __u32 imsm_min_reserved_sectors(struct intel_super *super)
return (remainder < rv) ? remainder : rv;
}
-/* Return minimum size of a spare that can be used in this array*/
-static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
+/*
+ * Return minimum size of a spare and sector size
+ * that can be used in this array
+ */
+int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c)
{
struct intel_super *super = st->sb;
struct dl *dl;
struct extent *e;
int i;
- unsigned long long rv = 0;
+ unsigned long long size = 0;
+
+ c->min_size = 0;
+ c->sector_size = 0;
if (!super)
- return rv;
+ return -EINVAL;
/* find first active disk in array */
dl = super->disks;
while (dl && (is_failed(&dl->disk) || dl->index == -1))
dl = dl->next;
if (!dl)
- return rv;
+ return -EINVAL;
/* find last lba used by subarrays */
e = get_extents(super, dl);
if (!e)
- return rv;
+ return -EINVAL;
for (i = 0; e[i].size; i++)
continue;
if (i > 0)
- rv = e[i-1].start + e[i-1].size;
+ size = e[i-1].start + e[i-1].size;
free(e);
/* add the amount of space needed for metadata */
- rv = rv + imsm_min_reserved_sectors(super);
+ size += imsm_min_reserved_sectors(super);
+
+ c->min_size = size * 512;
+ c->sector_size = super->sector_size;
- return rv * 512;
+ return 0;
}
static int is_gen_migration(struct imsm_dev *dev);
#define IMSM_4K_DIV 8
-#ifndef MDASSEMBLE
static __u64 blocks_per_migr_unit(struct intel_super *super,
struct imsm_dev *dev);
@@ -1426,13 +1502,16 @@ static void print_imsm_dev(struct intel_super *super,
ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
} else
printf(" This Slot : ?\n");
+ printf(" Sector Size : %u\n", super->sector_size);
sz = __le32_to_cpu(dev->size_high);
sz <<= 32;
sz += __le32_to_cpu(dev->size_low);
- printf(" Array Size : %llu%s\n", (unsigned long long)sz,
+ printf(" Array Size : %llu%s\n",
+ (unsigned long long)sz * 512 / super->sector_size,
human_size(sz * 512));
sz = blocks_per_member(map);
- printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
+ printf(" Per Dev Size : %llu%s\n",
+ (unsigned long long)sz * 512 / super->sector_size,
human_size(sz * 512));
printf(" Sector Offset : %llu\n",
pba_of_lba0(map));
@@ -1477,7 +1556,21 @@ static void print_imsm_dev(struct intel_super *super,
blocks_per_migr_unit(super, dev));
}
printf("\n");
- printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
+ printf(" Dirty State : %s\n", (dev->vol.dirty & RAIDVOL_DIRTY) ?
+ "dirty" : "clean");
+ printf(" RWH Policy : ");
+ if (dev->rwh_policy == RWH_OFF || dev->rwh_policy == RWH_MULTIPLE_OFF)
+ printf("off\n");
+ else if (dev->rwh_policy == RWH_DISTRIBUTED)
+ printf("PPL distributed\n");
+ else if (dev->rwh_policy == RWH_JOURNALING_DRIVE)
+ printf("PPL journaling drive\n");
+ else if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
+ printf("Multiple distributed PPLs\n");
+ else if (dev->rwh_policy == RWH_MULTIPLE_PPLS_JOURNALING_DRIVE)
+ printf("Multiple PPLs on journaling drive\n");
+ else
+ printf("<unknown:%d>\n", dev->rwh_policy);
}
static void print_imsm_disk(struct imsm_disk *disk,
@@ -1496,9 +1589,10 @@ static void print_imsm_disk(struct imsm_disk *disk,
printf(" Disk%02d Serial : %s\n", index, str);
else
printf(" Disk Serial : %s\n", str);
- printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
- is_configured(disk) ? " active" : "",
- is_failed(disk) ? " failed" : "");
+ printf(" State :%s%s%s%s\n", is_spare(disk) ? " spare" : "",
+ is_configured(disk) ? " active" : "",
+ is_failed(disk) ? " failed" : "",
+ is_journal(disk) ? " journal" : "");
printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
sz = total_blocks(disk) - reserved;
printf(" Usable Size : %llu%s\n",
@@ -1636,7 +1730,6 @@ void examine_migr_rec_imsm(struct intel_super *super)
break;
}
}
-#endif /* MDASSEMBLE */
void convert_from_4k_imsm_migr_rec(struct intel_super *super)
{
@@ -1796,7 +1889,6 @@ static int imsm_check_attributes(__u32 attributes)
return ret_val;
}
-#ifndef MDASSEMBLE
static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
static void examine_super_imsm(struct supertype *st, char *homehost)
@@ -1811,7 +1903,8 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
__u32 reserved = imsm_reserved_sectors(super, super->disks);
struct dl *dl;
- snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
+ strncpy(str, (char *)mpb->sig, MPB_SIG_LEN);
+ str[MPB_SIG_LEN-1] = '\0';
printf(" Magic : %s\n", str);
snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
printf(" Version : %s\n", get_imsm_version(mpb));
@@ -1986,7 +2079,7 @@ static void detail_super_imsm(struct supertype *st, char *homehost)
getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
- printf("\n UUID : %s\n", nbuf + 5);
+ printf("\n UUID : %s\n", nbuf + 5);
}
static void brief_detail_super_imsm(struct supertype *st)
@@ -2276,6 +2369,8 @@ static void print_imsm_capability(const struct imsm_orom *orom)
printf(" Platform : Intel(R) ");
if (orom->capabilities == 0 && orom->driver_features == 0)
printf("Matrix Storage Manager\n");
+ else if (imsm_orom_is_enterprise(orom) && orom->major_ver >= 6)
+ printf("Virtual RAID on CPU\n");
else
printf("Rapid Storage Technology%s\n",
imsm_orom_is_enterprise(orom) ? " enterprise" : "");
@@ -2509,8 +2604,6 @@ static int export_detail_platform_imsm(int verbose, char *controller_path)
return result;
}
-#endif
-
static int match_home_imsm(struct supertype *st, char *homehost)
{
/* the imsm metadata format does not specify any host
@@ -2921,7 +3014,6 @@ out:
return retval;
}
-#ifndef MDASSEMBLE
/*******************************************************************************
* function: imsm_create_metadata_checkpoint_update
* Description: It creates update for checkpoint change.
@@ -3062,7 +3154,6 @@ static int write_imsm_migr_rec(struct supertype *st)
close(fd);
return retval;
}
-#endif /* MDASSEMBLE */
/* spare/missing disks activations are not allowe when
* array/container performs reshape operation, because
@@ -3113,6 +3204,15 @@ static unsigned long long imsm_component_size_aligment_check(int level,
return component_size;
}
+static unsigned long long get_ppl_sector(struct intel_super *super, int dev_idx)
+{
+ struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+
+ return pba_of_lba0(map) +
+ (num_data_stripes(map) * map->blocks_per_strip);
+}
+
static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
{
struct intel_super *super = st->sb;
@@ -3139,7 +3239,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
info->array.utime = 0;
info->array.chunk_size =
__le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
- info->array.state = !dev->vol.dirty;
+ info->array.state = !(dev->vol.dirty & RAIDVOL_DIRTY);
info->custom_array_size = __le32_to_cpu(dev->size_high);
info->custom_array_size <<= 32;
info->custom_array_size |= __le32_to_cpu(dev->size_low);
@@ -3220,10 +3320,26 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
memset(info->uuid, 0, sizeof(info->uuid));
info->recovery_start = MaxSector;
+ if (info->array.level == 5 &&
+ (dev->rwh_policy == RWH_DISTRIBUTED ||
+ dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)) {
+ info->consistency_policy = CONSISTENCY_POLICY_PPL;
+ info->ppl_sector = get_ppl_sector(super, super->current_vol);
+ if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
+ info->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9;
+ else
+ info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE)
+ >> 9;
+ } else if (info->array.level <= 0) {
+ info->consistency_policy = CONSISTENCY_POLICY_NONE;
+ } else {
+ info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+ }
+
info->reshape_progress = 0;
info->resync_start = MaxSector;
if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
- dev->vol.dirty) &&
+ !(info->array.state & 1)) &&
imsm_reshape_blocks_arrays_changes(super) == 0) {
info->resync_start = 0;
}
@@ -3263,11 +3379,10 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
if (used_disks > 0) {
array_blocks = blocks_per_member(map) *
used_disks;
- /* round array size down to closest MB
- */
- info->custom_array_size = (array_blocks
- >> SECT_PER_MB_SHIFT)
- << SECT_PER_MB_SHIFT;
+ info->custom_array_size =
+ round_size_to_mb(array_blocks,
+ used_disks);
+
}
}
case MIGR_VERIFY:
@@ -3315,7 +3430,6 @@ static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
int look_in_map);
-#ifndef MDASSEMBLE
static void manage_second_map(struct intel_super *super, struct imsm_dev *dev)
{
if (is_gen_migration(dev)) {
@@ -3331,7 +3445,6 @@ static void manage_second_map(struct intel_super *super, struct imsm_dev *dev)
}
}
}
-#endif
static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
{
@@ -3405,6 +3518,9 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *
__u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0);
__u32 idx = ord_to_idx(ord);
+ if (super->disks && super->disks->index == (int)idx)
+ info->disk.raid_disk = j;
+
if (!(ord & IMSM_ORD_REBUILD) &&
get_imsm_missing(super, idx)) {
missing = 1;
@@ -3450,7 +3566,8 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *
* found the 'most fresh' version of the metadata
*/
info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
- info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
+ info->disk.state |= (is_spare(disk) || is_journal(disk)) ?
+ 0 : (1 << MD_DISK_SYNC);
}
/* only call uuid_from_super_imsm when this disk is part of a populated container,
@@ -3905,14 +4022,13 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
*/
if (is_failed(&dl->disk))
dl->index = -2;
- else if (is_spare(&dl->disk))
+ else if (is_spare(&dl->disk) || is_journal(&dl->disk))
dl->index = -1;
}
return 0;
}
-#ifndef MDASSEMBLE
/* When migrating map0 contains the 'destination' state while map1
* contains the current state. When not migrating map0 contains the
* current state. This routine assumes that map[0].map_state is set to
@@ -3942,7 +4058,7 @@ static void migrate(struct imsm_dev *dev, struct intel_super *super,
/* duplicate and then set the target end state in map[0] */
memcpy(dest, src, sizeof_imsm_map(src));
- if (migr_type == MIGR_REBUILD || migr_type == MIGR_GEN_MIGR) {
+ if (migr_type == MIGR_GEN_MIGR) {
__u32 ord;
int i;
@@ -4000,7 +4116,6 @@ static void end_migration(struct imsm_dev *dev, struct intel_super *super,
dev->vol.curr_migr_unit = 0;
map->map_state = map_state;
}
-#endif
static int parse_raid_devices(struct intel_super *super)
{
@@ -4085,8 +4200,8 @@ int check_mpb_migr_compatibility(struct intel_super *super)
if (pba_of_lba0(map0) != pba_of_lba0(map1))
/* migration optimization area was used */
return -1;
- if (migr_rec->ascending_migr == 0
- && migr_rec->dest_depth_per_unit > 0)
+ if (migr_rec->ascending_migr == 0 &&
+ migr_rec->dest_depth_per_unit > 0)
/* descending reshape not supported yet */
return -1;
}
@@ -4161,8 +4276,8 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
sectors = mpb_sectors(anchor, sector_size) - 1;
free(anchor);
- if (posix_memalign(&super->migr_rec_buf, sector_size,
- MIGR_REC_BUF_SECTORS*sector_size) != 0) {
+ if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE,
+ MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE) != 0) {
pr_err("could not allocate migr_rec buffer\n");
free(super->buf);
return 2;
@@ -4401,8 +4516,7 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de
hba = hba->next;
}
fprintf(stderr, ").\n"
- " Mixing devices attached to different %s is not allowed.\n",
- hba_name->type == SYS_DEV_VMD ? "VMD domains" : "controllers");
+ " Mixing devices attached to different controllers is not allowed.\n");
}
return 2;
}
@@ -4446,7 +4560,6 @@ static int find_missing(struct intel_super *super)
return 0;
}
-#ifndef MDASSEMBLE
static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
{
struct intel_disk *idisk = disk_list;
@@ -4984,7 +5097,6 @@ static int load_container_imsm(struct supertype *st, int fd, char *devname)
{
return load_super_imsm_all(st, fd, &st->sb, devname, NULL, 1);
}
-#endif
static int load_super_imsm(struct supertype *st, int fd, char *devname)
{
@@ -5154,7 +5266,7 @@ static int check_name(struct intel_super *super, char *name, int quiet)
}
static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
- unsigned long long size, char *name,
+ struct shape *s, char *name,
char *homehost, int *uuid,
long long data_offset)
{
@@ -5173,6 +5285,8 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
unsigned long long array_blocks;
size_t size_old, size_new;
unsigned long long num_data_stripes;
+ unsigned int data_disks;
+ unsigned long long size_per_member;
if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
pr_err("This imsm-container already has the maximum of %d volumes\n", super->orom->vpa);
@@ -5190,8 +5304,9 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
pr_err("could not allocate new mpb\n");
return 0;
}
- if (posix_memalign(&super->migr_rec_buf, sector_size,
- MIGR_REC_BUF_SECTORS*sector_size) != 0) {
+ if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE,
+ MIGR_REC_BUF_SECTORS*
+ MAX_SECTOR_SIZE) != 0) {
pr_err("could not allocate migr_rec buffer\n");
free(super->buf);
free(super);
@@ -5227,7 +5342,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
disk->status = CONFIGURED_DISK | FAILED_DISK;
disk->scsi_id = __cpu_to_le32(~(__u32)0);
snprintf((char *) disk->serial, MAX_RAID_SERIAL_LEN,
- "missing:%d", i);
+ "missing:%d", (__u8)i);
}
find_missing(super);
} else {
@@ -5249,9 +5364,11 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
array_blocks = calc_array_size(info->level, info->raid_disks,
info->layout, info->chunk_size,
- size * 2);
- /* round array size down to closest MB */
- array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
+ s->size * BLOCKS_PER_KB);
+ data_disks = get_data_disks(info->level, info->layout,
+ info->raid_disks);
+ array_blocks = round_size_to_mb(array_blocks, data_disks);
+ size_per_member = array_blocks / data_disks;
dev->size_low = __cpu_to_le32((__u32) array_blocks);
dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
@@ -5263,7 +5380,9 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
vol->curr_migr_unit = 0;
map = get_imsm_map(dev, MAP_0);
set_pba_of_lba0(map, super->create_offset);
- set_blocks_per_member(map, info_to_blocks_per_member(info, size));
+ set_blocks_per_member(map, info_to_blocks_per_member(info,
+ size_per_member /
+ BLOCKS_PER_KB));
map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
map->failed_disk_num = ~0;
if (info->level > 0)
@@ -5291,7 +5410,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
map->num_domains = 1;
/* info->size is only int so use the 'size' parameter instead */
- num_data_stripes = (size * 2) / info_to_blocks_per_strip(info);
+ num_data_stripes = size_per_member / info_to_blocks_per_strip(info);
num_data_stripes /= map->num_domains;
set_num_data_stripes(map, num_data_stripes);
@@ -5301,6 +5420,20 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
}
mpb->num_raid_devs++;
+ mpb->num_raid_devs_created++;
+ dev->my_vol_raid_dev_num = mpb->num_raid_devs_created;
+
+ if (s->consistency_policy <= CONSISTENCY_POLICY_RESYNC) {
+ dev->rwh_policy = RWH_MULTIPLE_OFF;
+ } else if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
+ dev->rwh_policy = RWH_MULTIPLE_DISTRIBUTED;
+ } else {
+ free(dev);
+ free(dv);
+ pr_err("imsm does not support consistency policy %s\n",
+ map_num(consistency_policies, s->consistency_policy));
+ return 0;
+ }
dv->dev = dev;
dv->index = super->current_vol;
@@ -5313,7 +5446,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
}
static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
- unsigned long long size, char *name,
+ struct shape *s, char *name,
char *homehost, int *uuid,
unsigned long long data_offset)
{
@@ -5336,7 +5469,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
}
if (st->sb)
- return init_super_imsm_volume(st, info, size, name, homehost, uuid,
+ return init_super_imsm_volume(st, info, s, name, homehost, uuid,
data_offset);
if (info)
@@ -5381,7 +5514,22 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
return 1;
}
-#ifndef MDASSEMBLE
+static int drive_validate_sector_size(struct intel_super *super, struct dl *dl)
+{
+ unsigned int member_sector_size;
+
+ if (dl->fd < 0) {
+ pr_err("Invalid file descriptor for %s\n", dl->devname);
+ return 0;
+ }
+
+ if (!get_dev_sector_size(dl->fd, dl->devname, &member_sector_size))
+ return 0;
+ if (member_sector_size != super->sector_size)
+ return 0;
+ return 1;
+}
+
static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
int fd, char *devname)
{
@@ -5421,6 +5569,11 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
return 1;
}
+ if (!drive_validate_sector_size(super, dl)) {
+ pr_err("Combining drives of different sector size in one volume is not allowed\n");
+ return 1;
+ }
+
/* add a pristine spare to the metadata */
if (dl->index < 0) {
dl->index = super->anchor->num_disks;
@@ -5618,7 +5771,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
} else if (super->hba->type == SYS_DEV_VMD && super->orom &&
!imsm_orom_has_tpv_support(super->orom)) {
pr_err("\tPlatform configuration does not support non-Intel NVMe drives.\n"
- "\tPlease refer to Intel(R) RSTe user guide.\n");
+ "\tPlease refer to Intel(R) RSTe/VROC user guide.\n");
free(dd->devname);
free(dd);
return 1;
@@ -5631,21 +5784,15 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
if (super->sector_size == 0) {
/* this a first device, so sector_size is not set yet */
super->sector_size = member_sector_size;
- } else if (member_sector_size != super->sector_size) {
- pr_err("Mixing between different sector size is forbidden, aborting...\n");
- if (dd->devname)
- free(dd->devname);
- free(dd);
- return 1;
}
/* clear migr_rec when adding disk to container */
- memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*super->sector_size);
- if (lseek64(fd, size - MIGR_REC_SECTOR_POSITION*super->sector_size,
+ memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE);
+ if (lseek64(fd, size - MIGR_REC_SECTOR_POSITION*member_sector_size,
SEEK_SET) >= 0) {
if ((unsigned int)write(fd, super->migr_rec_buf,
- MIGR_REC_BUF_SECTORS*super->sector_size) !=
- MIGR_REC_BUF_SECTORS*super->sector_size)
+ MIGR_REC_BUF_SECTORS*member_sector_size) !=
+ MIGR_REC_BUF_SECTORS*member_sector_size)
perror("Write migr_rec failed");
}
@@ -5837,7 +5984,7 @@ static int write_super_imsm(struct supertype *st, int doclose)
}
if (clear_migration_record)
memset(super->migr_rec_buf, 0,
- MIGR_REC_BUF_SECTORS*sector_size);
+ MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE);
if (sector_size == 4096)
convert_to_4k(super);
@@ -5927,10 +6074,256 @@ static int mgmt_disk(struct supertype *st)
return 0;
}
+__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len);
+
+static int write_ppl_header(unsigned long long ppl_sector, int fd, void *buf)
+{
+ struct ppl_header *ppl_hdr = buf;
+ int ret;
+
+ ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
+
+ if (lseek64(fd, ppl_sector * 512, SEEK_SET) < 0) {
+ ret = -errno;
+ perror("Failed to seek to PPL header location");
+ return ret;
+ }
+
+ if (write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+ ret = -errno;
+ perror("Write PPL header failed");
+ return ret;
+ }
+
+ fsync(fd);
+
+ return 0;
+}
+
+static int write_init_ppl_imsm(struct supertype *st, struct mdinfo *info, int fd)
+{
+ struct intel_super *super = st->sb;
+ void *buf;
+ struct ppl_header *ppl_hdr;
+ int ret;
+
+ /* first clear entire ppl space */
+ ret = zero_disk_range(fd, info->ppl_sector, info->ppl_size);
+ if (ret)
+ return ret;
+
+ ret = posix_memalign(&buf, MAX_SECTOR_SIZE, PPL_HEADER_SIZE);
+ if (ret) {
+ pr_err("Failed to allocate PPL header buffer\n");
+ return -ret;
+ }
+
+ memset(buf, 0, PPL_HEADER_SIZE);
+ ppl_hdr = buf;
+ memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED);
+ ppl_hdr->signature = __cpu_to_le32(super->anchor->orig_family_num);
+
+ if (info->mismatch_cnt) {
+ /*
+ * We are overwriting an invalid ppl. Make one entry with wrong
+ * checksum to prevent the kernel from skipping resync.
+ */
+ ppl_hdr->entries_count = __cpu_to_le32(1);
+ ppl_hdr->entries[0].checksum = ~0;
+ }
+
+ ret = write_ppl_header(info->ppl_sector, fd, buf);
+
+ free(buf);
+ return ret;
+}
+
+static int is_rebuilding(struct imsm_dev *dev);
+
+static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info,
+ struct mdinfo *disk)
+{
+ struct intel_super *super = st->sb;
+ struct dl *d;
+ void *buf_orig, *buf, *buf_prev = NULL;
+ int ret = 0;
+ struct ppl_header *ppl_hdr = NULL;
+ __u32 crc;
+ struct imsm_dev *dev;
+ __u32 idx;
+ unsigned int i;
+ unsigned long long ppl_offset = 0;
+ unsigned long long prev_gen_num = 0;
+
+ if (disk->disk.raid_disk < 0)
+ return 0;
+
+ dev = get_imsm_dev(super, info->container_member);
+ idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_0);
+ d = get_imsm_dl_disk(super, idx);
+
+ if (!d || d->index < 0 || is_failed(&d->disk))
+ return 0;
+
+ if (posix_memalign(&buf_orig, MAX_SECTOR_SIZE, PPL_HEADER_SIZE * 2)) {
+ pr_err("Failed to allocate PPL header buffer\n");
+ return -1;
+ }
+ buf = buf_orig;
+
+ ret = 1;
+ while (ppl_offset < MULTIPLE_PPL_AREA_SIZE_IMSM) {
+ void *tmp;
+
+ dprintf("Checking potential PPL at offset: %llu\n", ppl_offset);
+
+ if (lseek64(d->fd, info->ppl_sector * 512 + ppl_offset,
+ SEEK_SET) < 0) {
+ perror("Failed to seek to PPL header location");
+ ret = -1;
+ break;
+ }
+
+ if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+ perror("Read PPL header failed");
+ ret = -1;
+ break;
+ }
+
+ ppl_hdr = buf;
+
+ crc = __le32_to_cpu(ppl_hdr->checksum);
+ ppl_hdr->checksum = 0;
+
+ if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) {
+ dprintf("Wrong PPL header checksum on %s\n",
+ d->devname);
+ break;
+ }
+
+ if (prev_gen_num > __le64_to_cpu(ppl_hdr->generation)) {
+ /* previous was newest, it was already checked */
+ break;
+ }
+
+ if ((__le32_to_cpu(ppl_hdr->signature) !=
+ super->anchor->orig_family_num)) {
+ dprintf("Wrong PPL header signature on %s\n",
+ d->devname);
+ ret = 1;
+ break;
+ }
+
+ ret = 0;
+ prev_gen_num = __le64_to_cpu(ppl_hdr->generation);
+
+ ppl_offset += PPL_HEADER_SIZE;
+ for (i = 0; i < __le32_to_cpu(ppl_hdr->entries_count); i++)
+ ppl_offset +=
+ __le32_to_cpu(ppl_hdr->entries[i].pp_size);
+
+ if (!buf_prev)
+ buf_prev = buf + PPL_HEADER_SIZE;
+ tmp = buf_prev;
+ buf_prev = buf;
+ buf = tmp;
+ }
+
+ if (buf_prev) {
+ buf = buf_prev;
+ ppl_hdr = buf_prev;
+ }
+
+ /*
+ * Update metadata to use mutliple PPLs area (1MB).
+ * This is done once for all RAID members
+ */
+ if (info->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ info->ppl_size != (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9)) {
+ char subarray[20];
+ struct mdinfo *member_dev;
+
+ sprintf(subarray, "%d", info->container_member);
+
+ if (mdmon_running(st->container_devnm))
+ st->update_tail = &st->updates;
+
+ if (st->ss->update_subarray(st, subarray, "ppl", NULL)) {
+ pr_err("Failed to update subarray %s\n",
+ subarray);
+ } else {
+ if (st->update_tail)
+ flush_metadata_updates(st);
+ else
+ st->ss->sync_metadata(st);
+ info->ppl_size = (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9);
+ for (member_dev = info->devs; member_dev;
+ member_dev = member_dev->next)
+ member_dev->ppl_size =
+ (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9);
+ }
+ }
+
+ if (ret == 1) {
+ struct imsm_map *map = get_imsm_map(dev, MAP_X);
+
+ if (map->map_state == IMSM_T_STATE_UNINITIALIZED ||
+ (map->map_state == IMSM_T_STATE_NORMAL &&
+ !(dev->vol.dirty & RAIDVOL_DIRTY)) ||
+ (is_rebuilding(dev) &&
+ dev->vol.curr_migr_unit == 0 &&
+ get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_1) != idx))
+ ret = st->ss->write_init_ppl(st, info, d->fd);
+ else
+ info->mismatch_cnt++;
+ } else if (ret == 0 &&
+ ppl_hdr->entries_count == 0 &&
+ is_rebuilding(dev) &&
+ info->resync_start == 0) {
+ /*
+ * The header has no entries - add a single empty entry and
+ * rewrite the header to prevent the kernel from going into
+ * resync after an interrupted rebuild.
+ */
+ ppl_hdr->entries_count = __cpu_to_le32(1);
+ ret = write_ppl_header(info->ppl_sector, d->fd, buf);
+ }
+
+ free(buf_orig);
+
+ return ret;
+}
+
+static int write_init_ppl_imsm_all(struct supertype *st, struct mdinfo *info)
+{
+ struct intel_super *super = st->sb;
+ struct dl *d;
+ int ret = 0;
+
+ if (info->consistency_policy != CONSISTENCY_POLICY_PPL ||
+ info->array.level != 5)
+ return 0;
+
+ for (d = super->disks; d ; d = d->next) {
+ if (d->index < 0 || is_failed(&d->disk))
+ continue;
+
+ ret = st->ss->write_init_ppl(st, info, d->fd);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
static int write_init_super_imsm(struct supertype *st)
{
struct intel_super *super = st->sb;
int current_vol = super->current_vol;
+ int rv = 0;
+ struct mdinfo info;
+
+ getinfo_super_imsm(st, &info, NULL);
/* we are done with current_vol reset it to point st at the container */
super->current_vol = -1;
@@ -5938,26 +6331,30 @@ static int write_init_super_imsm(struct supertype *st)
if (st->update_tail) {
/* queue the recently created array / added disk
* as a metadata update */
- int rv;
/* determine if we are creating a volume or adding a disk */
if (current_vol < 0) {
/* in the mgmt (add/remove) disk case we are running
* in mdmon context, so don't close fd's
*/
- return mgmt_disk(st);
- } else
- rv = create_array(st, current_vol);
-
- return rv;
+ rv = mgmt_disk(st);
+ } else {
+ rv = write_init_ppl_imsm_all(st, &info);
+ if (!rv)
+ rv = create_array(st, current_vol);
+ }
} else {
struct dl *d;
for (d = super->disks; d; d = d->next)
Kill(d->devname, NULL, 0, -1, 1);
- return write_super_imsm(st, 1);
+ if (current_vol >= 0)
+ rv = write_init_ppl_imsm_all(st, &info);
+ if (!rv)
+ rv = write_super_imsm(st, 1);
}
+
+ return rv;
}
-#endif
static int store_super_imsm(struct supertype *st, int fd)
{
@@ -5967,16 +6364,11 @@ static int store_super_imsm(struct supertype *st, int fd)
if (!mpb)
return 1;
-#ifndef MDASSEMBLE
if (super->sector_size == 4096)
convert_to_4k(super);
return store_imsm_mpb(fd, mpb);
-#else
- return 1;
-#endif
}
-#ifndef MDASSEMBLE
static int validate_geometry_imsm_container(struct supertype *st, int level,
int layout, int raiddisks, int chunk,
unsigned long long size,
@@ -6198,7 +6590,7 @@ active_arrays_by_format(char *name, char* hba, struct md_list **devlist,
for (memb = mdstat ; memb ; memb = memb->next) {
if (memb->metadata_version &&
- (strncmp(memb->metadata_version, "external:", 9) == 0) &&
+ (strncmp(memb->metadata_version, "external:", 9) == 0) &&
(strcmp(&memb->metadata_version[9], name) == 0) &&
!is_subarray(memb->metadata_version+9) &&
memb->members) {
@@ -6346,7 +6738,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
char *devname = tmpdev->devname;
- struct stat stb;
+ dev_t rdev;
struct supertype *tst;
int dfd;
if (tmpdev->used > 1)
@@ -6362,14 +6754,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
dprintf("cannot open device %s: %s\n",
devname, strerror(errno));
tmpdev->used = 2;
- } else if (fstat(dfd, &stb)< 0) {
- /* Impossible! */
- dprintf("fstat failed for %s: %s\n",
- devname, strerror(errno));
- tmpdev->used = 2;
- } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
- dprintf("%s is not a block device.\n",
- devname);
+ } else if (!fstat_is_blkdev(dfd, devname, &rdev)) {
tmpdev->used = 2;
} else if (must_be_container(dfd)) {
struct supertype *cst;
@@ -6391,7 +6776,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
if (cst)
cst->ss->free_super(cst);
} else {
- tmpdev->st_rdev = stb.st_rdev;
+ tmpdev->st_rdev = rdev;
if (tst->ss->load_super(tst,dfd, NULL)) {
dprintf("no RAID superblock on %s\n",
devname);
@@ -6646,7 +7031,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
unsigned long long *freesize,
int verbose)
{
- struct stat stb;
+ dev_t rdev;
struct intel_super *super = st->sb;
struct imsm_super *mpb;
struct dl *dl;
@@ -6711,13 +7096,11 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
}
/* This device must be a member of the set */
- if (stat(dev, &stb) < 0)
- return 0;
- if ((S_IFMT & stb.st_mode) != S_IFBLK)
+ if (!stat_is_blkdev(dev, &rdev))
return 0;
for (dl = super->disks ; dl ; dl = dl->next) {
- if (dl->major == (int)major(stb.st_rdev) &&
- dl->minor == (int)minor(stb.st_rdev))
+ if (dl->major == (int)major(rdev) &&
+ dl->minor == (int)minor(rdev))
break;
}
if (!dl) {
@@ -6913,7 +7296,7 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
int raiddisks, int *chunk, unsigned long long size,
unsigned long long data_offset,
char *dev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
int fd, cfd;
struct mdinfo *sra;
@@ -7142,26 +7525,55 @@ static int update_subarray_imsm(struct supertype *st, char *subarray,
u->type = update_rename_array;
u->dev_idx = vol;
- snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
+ strncpy((char *) u->name, name, MAX_RAID_SERIAL_LEN);
+ u->name[MAX_RAID_SERIAL_LEN-1] = '\0';
append_metadata_update(st, u, sizeof(*u));
} else {
struct imsm_dev *dev;
int i;
dev = get_imsm_dev(super, vol);
- snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
+ strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
+ dev->volume[MAX_RAID_SERIAL_LEN-1] = '\0';
for (i = 0; i < mpb->num_raid_devs; i++) {
dev = get_imsm_dev(super, i);
handle_missing(super, dev);
}
super->updates_pending++;
}
+ } else if (strcmp(update, "ppl") == 0 ||
+ strcmp(update, "no-ppl") == 0) {
+ int new_policy;
+ char *ep;
+ int vol = strtoul(subarray, &ep, 10);
+
+ if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
+ return 2;
+
+ if (strcmp(update, "ppl") == 0)
+ new_policy = RWH_MULTIPLE_DISTRIBUTED;
+ else
+ new_policy = RWH_MULTIPLE_OFF;
+
+ if (st->update_tail) {
+ struct imsm_update_rwh_policy *u = xmalloc(sizeof(*u));
+
+ u->type = update_rwh_policy;
+ u->dev_idx = vol;
+ u->new_policy = new_policy;
+ append_metadata_update(st, u, sizeof(*u));
+ } else {
+ struct imsm_dev *dev;
+
+ dev = get_imsm_dev(super, vol);
+ dev->rwh_policy = new_policy;
+ super->updates_pending++;
+ }
} else
return 2;
return 0;
}
-#endif /* MDASSEMBLE */
static int is_gen_migration(struct imsm_dev *dev)
{
@@ -7195,7 +7607,6 @@ static int is_rebuilding(struct imsm_dev *dev)
return 0;
}
-#ifndef MDASSEMBLE
static int is_initializing(struct imsm_dev *dev)
{
struct imsm_map *migr_map;
@@ -7213,7 +7624,6 @@ static int is_initializing(struct imsm_dev *dev)
return 0;
}
-#endif
static void update_recovery_start(struct intel_super *super,
struct imsm_dev *dev,
@@ -7247,9 +7657,7 @@ static void update_recovery_start(struct intel_super *super,
rebuild->recovery_start = units * blocks_per_migr_unit(super, dev);
}
-#ifndef MDASSEMBLE
static int recover_backup_imsm(struct supertype *st, struct mdinfo *info);
-#endif
static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
{
@@ -7288,10 +7696,9 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
struct imsm_map *map2;
struct mdinfo *this;
int slot;
-#ifndef MDASSEMBLE
int chunk;
-#endif
char *ep;
+ int level;
if (subarray &&
(i != strtoul(subarray, &ep, 10) || *ep != '\0'))
@@ -7300,6 +7707,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
dev = get_imsm_dev(super, i);
map = get_imsm_map(dev, MAP_0);
map2 = get_imsm_map(dev, MAP_1);
+ level = get_imsm_raid_level(map);
/* do not publish arrays that are in the middle of an
* unsupported migration
@@ -7319,12 +7727,11 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
super->current_vol = i;
getinfo_super_imsm_volume(st, this, NULL);
this->next = rest;
-#ifndef MDASSEMBLE
chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
/* mdadm does not support all metadata features- set the bit in all arrays state */
if (!validate_geometry_imsm_orom(super,
- get_imsm_raid_level(map), /* RAID level */
- imsm_level_to_layout(get_imsm_raid_level(map)),
+ level, /* RAID level */
+ imsm_level_to_layout(level),
map->num_members, /* raid disks */
&chunk, join_u32(dev->size_low, dev->size_high),
1 /* verbose */)) {
@@ -7334,7 +7741,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
(1<<MD_SB_BLOCK_CONTAINER_RESHAPE) |
(1<<MD_SB_BLOCK_VOLUME);
}
-#endif
/* if array has bad blocks, set suitable bit in all arrays state */
if (sb_errors)
@@ -7349,6 +7755,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
int idx;
int skip;
__u32 ord;
+ int missing = 0;
skip = 0;
idx = get_imsm_disk_idx(dev, slot, MAP_0);
@@ -7362,18 +7769,27 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
skip = 1;
if (d && is_failed(&d->disk))
skip = 1;
- if (ord & IMSM_ORD_REBUILD)
+ if (!skip && (ord & IMSM_ORD_REBUILD))
recovery_start = 0;
/*
* if we skip some disks the array will be assmebled degraded;
* reset resync start to avoid a dirty-degraded
* situation when performing the intial sync
- *
- * FIXME handle dirty degraded
*/
- if ((skip || recovery_start == 0) && !dev->vol.dirty)
- this->resync_start = MaxSector;
+ if (skip)
+ missing++;
+
+ if (!(dev->vol.dirty & RAIDVOL_DIRTY)) {
+ if ((!able_to_resync(level, missing) ||
+ recovery_start == 0))
+ this->resync_start = MaxSector;
+ } else {
+ /*
+ * FIXME handle dirty degraded
+ */
+ }
+
if (skip)
continue;
@@ -7407,6 +7823,11 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
info_d->component_size =
num_data_stripes(map) *
map->blocks_per_strip;
+ info_d->ppl_sector = this->ppl_sector;
+ info_d->ppl_size = this->ppl_size;
+ if (this->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ recovery_start == 0)
+ this->resync_start = 0;
} else {
info_d->component_size = blocks_per_member(map);
}
@@ -7421,11 +7842,9 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
update_recovery_start(super, dev, this);
this->array.spare_disks += spare_disks;
-#ifndef MDASSEMBLE
/* check for reshape */
if (this->reshape_active == 1)
recover_backup_imsm(st, this);
-#endif
rest = this;
}
@@ -7555,7 +7974,6 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
return failed;
}
-#ifndef MDASSEMBLE
static int imsm_open_new(struct supertype *c, struct active_array *a,
char *inst)
{
@@ -7679,14 +8097,35 @@ static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
/* end process for initialization and rebuild only
*/
if (is_gen_migration(dev) == 0) {
- __u8 map_state;
- int failed;
+ int failed = imsm_count_failed(super, dev, MAP_0);
- failed = imsm_count_failed(super, dev, MAP_0);
- map_state = imsm_check_degraded(super, dev, failed, MAP_0);
+ if (failed) {
+ __u8 map_state;
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *map1;
+ int i, ord, ord_map1;
+ int rebuilt = 1;
- if (failed)
- end_migration(dev, super, map_state);
+ for (i = 0; i < map->num_members; i++) {
+ ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
+ if (!(ord & IMSM_ORD_REBUILD))
+ continue;
+
+ map1 = get_imsm_map(dev, MAP_1);
+ if (!map1)
+ continue;
+
+ ord_map1 = __le32_to_cpu(map1->disk_ord_tbl[i]);
+ if (ord_map1 & IMSM_ORD_REBUILD)
+ rebuilt = 0;
+ }
+
+ if (rebuilt) {
+ map_state = imsm_check_degraded(super, dev,
+ failed, MAP_0);
+ end_migration(dev, super, map_state);
+ }
+ }
}
for (dl = super->missing; dl; dl = dl->next)
mark_missing(super, dev, &dl->disk, dl->index);
@@ -7725,9 +8164,7 @@ static unsigned long long imsm_set_array_size(struct imsm_dev *dev,
array_blocks = new_size;
}
- /* round array size down to closest MB
- */
- array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
+ array_blocks = round_size_to_mb(array_blocks, used_disks);
dev->size_low = __cpu_to_le32((__u32)array_blocks);
dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
@@ -7840,11 +8277,9 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
array_blocks =
blocks_per_member(map) *
used_disks;
- /* round array size down to closest MB
- */
- array_blocks = (array_blocks
- >> SECT_PER_MB_SHIFT)
- << SECT_PER_MB_SHIFT;
+ array_blocks =
+ round_size_to_mb(array_blocks,
+ used_disks);
a->info.custom_array_size = array_blocks;
/* encourage manager to update array
* size
@@ -7925,12 +8360,17 @@ mark_checkpoint:
skip_mark_checkpoint:
/* mark dirty / clean */
- if (dev->vol.dirty != !consistent) {
+ if (((dev->vol.dirty & RAIDVOL_DIRTY) && consistent) ||
+ (!(dev->vol.dirty & RAIDVOL_DIRTY) && !consistent)) {
dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
- if (consistent)
- dev->vol.dirty = 0;
- else
- dev->vol.dirty = 1;
+ if (consistent) {
+ dev->vol.dirty = RAIDVOL_CLEAN;
+ } else {
+ dev->vol.dirty = RAIDVOL_DIRTY;
+ if (dev->rwh_policy == RWH_DISTRIBUTED ||
+ dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
+ dev->vol.dirty |= RAIDVOL_DSRECORD_VALID;
+ }
super->updates_pending++;
}
@@ -7968,8 +8408,10 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
int failed;
int ord;
__u8 map_state;
+ int rebuild_done = 0;
+ int i;
- ord = imsm_disk_slot_to_ord(a, n);
+ ord = get_imsm_ord_tbl_ent(dev, n, MAP_X);
if (ord < 0)
return;
@@ -7987,6 +8429,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
+ rebuild_done = 1;
super->updates_pending++;
}
@@ -8049,7 +8492,39 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
dprintf_cont(" Map state change");
end_migration(dev, super, map_state);
super->updates_pending++;
+ } else if (!rebuild_done) {
+ break;
+ }
+
+ /* check if recovery is really finished */
+ for (mdi = a->info.devs; mdi ; mdi = mdi->next)
+ if (mdi->recovery_start != MaxSector) {
+ recovery_not_finished = 1;
+ break;
+ }
+ if (recovery_not_finished) {
+ dprintf_cont("\n");
+ dprintf("Rebuild has not finished yet, state not changed");
+ if (a->last_checkpoint < mdi->recovery_start) {
+ a->last_checkpoint =
+ mdi->recovery_start;
+ super->updates_pending++;
+ }
+ break;
}
+
+ dprintf_cont(" Rebuild done, still degraded");
+ dev->vol.migr_state = 0;
+ set_migr_type(dev, 0);
+ dev->vol.curr_migr_unit = 0;
+
+ for (i = 0; i < map->num_members; i++) {
+ int idx = get_imsm_ord_tbl_ent(dev, i, MAP_0);
+
+ if (idx & IMSM_ORD_REBUILD)
+ map->failed_disk_num = i;
+ }
+ super->updates_pending++;
break;
}
if (is_gen_migration(dev)) {
@@ -8216,6 +8691,9 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot,
if (dl->index == -1 && !activate_new)
continue;
+ if (!drive_validate_sector_size(super, dl))
+ continue;
+
/* Does this unused device have the requisite free space?
* It needs to be able to cover all member volumes
*/
@@ -8442,6 +8920,10 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
di->component_size = a->info.component_size;
di->container_member = inst;
di->bb.supported = 1;
+ if (a->info.consistency_policy == CONSISTENCY_POLICY_PPL) {
+ di->ppl_sector = get_ppl_sector(super, inst);
+ di->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9;
+ }
super->random = random32();
di->next = rv;
rv = di;
@@ -9369,6 +9851,21 @@ static void imsm_process_update(struct supertype *st,
}
case update_prealloc_badblocks_mem:
break;
+ case update_rwh_policy: {
+ struct imsm_update_rwh_policy *u = (void *)update->buf;
+ int target = u->dev_idx;
+ struct imsm_dev *dev = get_imsm_dev(super, target);
+ if (!dev) {
+ dprintf("could not find subarray-%d\n", target);
+ break;
+ }
+
+ if (dev->rwh_policy != u->new_policy) {
+ dev->rwh_policy = u->new_policy;
+ super->updates_pending++;
+ }
+ break;
+ }
default:
pr_err("error: unsuported process update type:(type: %d)\n", type);
}
@@ -9614,6 +10111,11 @@ static int imsm_prepare_update(struct supertype *st,
super->extra_space += sizeof(struct bbm_log) -
get_imsm_bbm_log_size(super->bbm_log);
break;
+ case update_rwh_policy: {
+ if (update->len < (int)sizeof(struct imsm_update_rwh_policy))
+ return 0;
+ break;
+ }
default:
return 0;
}
@@ -9651,7 +10153,7 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
struct imsm_dev *dev;
struct imsm_map *map;
unsigned int i, j, num_members;
- __u32 ord;
+ __u32 ord, ord_map0;
struct bbm_log *log = super->bbm_log;
dprintf("deleting device[%d] from imsm_super\n", index);
@@ -9673,12 +10175,13 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
* ord-flags to the first map
*/
ord = get_imsm_ord_tbl_ent(dev, j, MAP_X);
+ ord_map0 = get_imsm_ord_tbl_ent(dev, j, MAP_0);
if (ord_to_idx(ord) <= index)
continue;
map = get_imsm_map(dev, MAP_0);
- set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
+ set_imsm_ord_tbl_ent(map, j, ord_map0 - 1);
map = get_imsm_map(dev, MAP_1);
if (map)
set_imsm_ord_tbl_ent(map, j, ord - 1);
@@ -9702,7 +10205,6 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
__free_imsm_disk(dl);
}
}
-#endif /* MDASSEMBLE */
static void close_targets(int *targets, int new_disks)
{
@@ -9905,7 +10407,7 @@ int validate_container_imsm(struct mdinfo *info)
return 0;
}
-#ifndef MDASSEMBLE
+
/*******************************************************************************
* Function: imsm_record_badblock
* Description: This routine stores new bad block record in BBM log
@@ -10408,6 +10910,10 @@ static const char *imsm_get_disk_controller_domain(const char *path)
drv = "isci";
else if (hba && hba->type == SYS_DEV_SATA)
drv = "ahci";
+ else if (hba && hba->type == SYS_DEV_VMD)
+ drv = "vmd";
+ else if (hba && hba->type == SYS_DEV_NVME)
+ drv = "nvme";
else
drv = "unknown";
dprintf("path: %s hba: %s attached: %s\n",
@@ -10545,8 +11051,10 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st,
*/
static struct mdinfo *get_spares_for_grow(struct supertype *st)
{
- unsigned long long min_size = min_acceptable_spare_size_imsm(st);
- return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
+ struct spare_criteria sc;
+
+ get_spare_criteria_imsm(st, &sc);
+ return container_choose_spares(st, &sc, NULL, NULL, NULL, 0);
}
/******************************************************************************
@@ -10588,8 +11096,7 @@ static int imsm_create_metadata_update_for_reshape(
*/
spares = get_spares_for_grow(st);
- if (spares == NULL
- || delta_disks > spares->array.spare_disks) {
+ if (spares == NULL || delta_disks > spares->array.spare_disks) {
pr_err("imsm: ERROR: Cannot get spare devices for %s.\n", geo->dev_name);
i = -1;
goto abort;
@@ -10718,7 +11225,7 @@ static int imsm_create_metadata_update_for_migration(
free(u);
sysfs_free(spares);
update_memory_size = 0;
- dprintf("error: cannot get spare device for requested migration");
+ pr_err("cannot get spare device for requested migration\n");
return 0;
}
sysfs_free(spares);
@@ -10950,7 +11457,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
geo->raid_disks + devNumChange,
&chunk,
geo->size, INVALID_SECTORS,
- 0, 0, 1))
+ 0, 0, info.consistency_policy, 1))
change = -1;
if (check_devs) {
@@ -11354,8 +11861,8 @@ static int imsm_manage_reshape(
/* Find volume during the reshape */
for (dv = super->devlist; dv; dv = dv->next) {
- if (dv->dev->vol.migr_type == MIGR_GEN_MIGR
- && dv->dev->vol.migr_state == 1) {
+ if (dv->dev->vol.migr_type == MIGR_GEN_MIGR &&
+ dv->dev->vol.migr_state == 1) {
dev = dv->dev;
migr_vol_qan++;
}
@@ -11523,7 +12030,7 @@ static int imsm_manage_reshape(
/* clear migr_rec on disks after successful migration */
struct dl *d;
- memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*sector_size);
+ memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE);
for (d = super->disks; d; d = d->next) {
if (d->index < 0 || is_failed(&d->disk))
continue;
@@ -11551,10 +12058,7 @@ abort:
return ret_val;
}
-#endif /* MDASSEMBLE */
-
struct superswitch super_imsm = {
-#ifndef MDASSEMBLE
.examine_super = examine_super_imsm,
.brief_examine_super = brief_examine_super_imsm,
.brief_examine_subarrays = brief_examine_subarrays_imsm,
@@ -11577,7 +12081,6 @@ struct superswitch super_imsm = {
.recover_backup = recover_backup_imsm,
.copy_metadata = copy_metadata_imsm,
.examine_badblocks = examine_badblocks_imsm,
-#endif
.match_home = match_home_imsm,
.uuid_from_super= uuid_from_super_imsm,
.getinfo_super = getinfo_super_imsm,
@@ -11585,7 +12088,7 @@ struct superswitch super_imsm = {
.update_super = update_super_imsm,
.avail_size = avail_size_imsm,
- .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
+ .get_spare_criteria = get_spare_criteria_imsm,
.compare_super = compare_super_imsm,
@@ -11597,10 +12100,12 @@ struct superswitch super_imsm = {
.container_content = container_content_imsm,
.validate_container = validate_container_imsm,
+ .write_init_ppl = write_init_ppl_imsm,
+ .validate_ppl = validate_ppl_imsm,
+
.external = 1,
.name = "imsm",
-#ifndef MDASSEMBLE
/* for mdmon */
.open_new = imsm_open_new,
.set_array_state= imsm_set_array_state,
@@ -11612,5 +12117,4 @@ struct superswitch super_imsm = {
.record_bad_block = imsm_record_badblock,
.clear_bad_block = imsm_clear_badblock,
.get_bad_blocks = imsm_get_badblocks,
-#endif /* MDASSEMBLE */
};
diff --git a/super-mbr.c b/super-mbr.c
index f5e4cea..839f000 100644
--- a/super-mbr.c
+++ b/super-mbr.c
@@ -48,8 +48,6 @@ static void free_mbr(struct supertype *st)
st->sb = NULL;
}
-#ifndef MDASSEMBLE
-
static void examine_mbr(struct supertype *st, char *homehost)
{
struct MBR *sb = st->sb;
@@ -71,8 +69,6 @@ static void examine_mbr(struct supertype *st, char *homehost)
}
-#endif /*MDASSEMBLE */
-
static int load_super_mbr(struct supertype *st, int fd, char *devname)
{
/* try to read an mbr
@@ -187,24 +183,20 @@ static struct supertype *match_metadata_desc(char *arg)
return st;
}
-#ifndef MDASSEMBLE
static int validate_geometry(struct supertype *st, int level,
int layout, int raiddisks,
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *subdev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
pr_err("mbr metadata cannot be used this way\n");
return 0;
}
-#endif
struct superswitch mbr = {
-#ifndef MDASSEMBLE
.examine_super = examine_mbr,
.validate_geometry = validate_geometry,
-#endif
.match_metadata_desc = match_metadata_desc,
.load_super = load_super_mbr,
.store_super = store_mbr,
diff --git a/super0.c b/super0.c
index 938cfd9..756cab5 100644
--- a/super0.c
+++ b/super0.c
@@ -76,8 +76,6 @@ static void super0_swap_endian(struct mdp_superblock_s *sb)
}
-#ifndef MDASSEMBLE
-
static void examine_super0(struct supertype *st, char *homehost)
{
mdp_super_t *sb = st->sb;
@@ -231,7 +229,7 @@ static void examine_super0(struct supertype *st, char *homehost)
d++) {
mdp_disk_t *dp;
char *dv;
- char nb[5];
+ char nb[11];
int wonly, failfast;
if (d>=0) dp = &sb->disks[d];
else dp = &sb->this_disk;
@@ -353,7 +351,7 @@ err:
static void detail_super0(struct supertype *st, char *homehost)
{
mdp_super_t *sb = st->sb;
- printf(" UUID : ");
+ printf(" UUID : ");
if (sb->minor_version >= 90)
printf("%08x:%08x:%08x:%08x", sb->set_uuid0, sb->set_uuid1,
sb->set_uuid2, sb->set_uuid3);
@@ -367,7 +365,7 @@ static void detail_super0(struct supertype *st, char *homehost)
if (memcmp(&sb->set_uuid2, hash, 8)==0)
printf(" (local to host %s)", homehost);
}
- printf("\n Events : %d.%d\n\n", sb->events_hi, sb->events_lo);
+ printf("\n Events : %d.%d\n\n", sb->events_hi, sb->events_lo);
}
static void brief_detail_super0(struct supertype *st)
@@ -380,7 +378,6 @@ static void brief_detail_super0(struct supertype *st)
else
printf("%08x", sb->set_uuid0);
}
-#endif
static int match_home0(struct supertype *st, char *homehost)
{
@@ -592,8 +589,8 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
* being marked 'sync'
*/
add = (1<<MD_DISK_SYNC);
- if (((sb->disks[d].state & ~mask) | add)
- != (unsigned)info->disk.state) {
+ if (((sb->disks[d].state & ~mask) | add) !=
+ (unsigned)info->disk.state) {
sb->disks[d].state = info->disk.state | wonly |failfast;
rv = 1;
}
@@ -725,7 +722,7 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
* We use the first 8 bytes (64bits) of the sha1 of the host name
*/
static int init_super0(struct supertype *st, mdu_array_info_t *info,
- unsigned long long size, char *ignored_name,
+ struct shape *s, char *ignored_name,
char *homehost, int *uuid,
unsigned long long data_offset)
{
@@ -764,8 +761,8 @@ static int init_super0(struct supertype *st, mdu_array_info_t *info,
sb->gvalid_words = 0; /* ignored */
sb->ctime = time(0);
sb->level = info->level;
- sb->size = size;
- if (size != (unsigned long long)sb->size)
+ sb->size = s->size;
+ if (s->size != (unsigned long long)sb->size)
return 0;
sb->nr_disks = info->nr_disks;
sb->raid_disks = info->raid_disks;
@@ -814,7 +811,6 @@ struct devinfo {
struct devinfo *next;
};
-#ifndef MDASSEMBLE
/* Add a device to the superblock being created */
static int add_to_super0(struct supertype *st, mdu_disk_info_t *dinfo,
int fd, char *devname, unsigned long long data_offset)
@@ -845,7 +841,6 @@ static int add_to_super0(struct supertype *st, mdu_disk_info_t *dinfo,
return 0;
}
-#endif
static int store_super0(struct supertype *st, int fd)
{
@@ -899,7 +894,6 @@ static int store_super0(struct supertype *st, int fd)
return 0;
}
-#ifndef MDASSEMBLE
static int write_init_super0(struct supertype *st)
{
mdp_super_t *sb = st->sb;
@@ -930,7 +924,6 @@ static int write_init_super0(struct supertype *st)
}
return rv;
}
-#endif
static int compare_super0(struct supertype *st, struct supertype *tst)
{
@@ -1065,8 +1058,8 @@ static int load_super0(struct supertype *st, int fd, char *devname)
* valid. If it doesn't clear the bit. An --assemble --force
* should get that written out.
*/
- if (read(fd, super+1, ROUND_UP(sizeof(struct bitmap_super_s),4096))
- != ROUND_UP(sizeof(struct bitmap_super_s),4096))
+ if (read(fd, super+1, ROUND_UP(sizeof(struct bitmap_super_s),4096)) !=
+ ROUND_UP(sizeof(struct bitmap_super_s), 4096))
goto no_bitmap;
uuid_from_super0(st, uuid);
@@ -1261,13 +1254,12 @@ static void free_super0(struct supertype *st)
st->sb = NULL;
}
-#ifndef MDASSEMBLE
static int validate_geometry0(struct supertype *st, int level,
int layout, int raiddisks,
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *subdev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
unsigned long long ldsize;
int fd;
@@ -1320,10 +1312,8 @@ static int validate_geometry0(struct supertype *st, int level,
*freesize = MD_NEW_SIZE_SECTORS(ldsize >> 9);
return 1;
}
-#endif /* MDASSEMBLE */
struct superswitch super0 = {
-#ifndef MDASSEMBLE
.examine_super = examine_super0,
.brief_examine_super = brief_examine_super0,
.export_examine_super = export_examine_super0,
@@ -1333,7 +1323,6 @@ struct superswitch super0 = {
.validate_geometry = validate_geometry0,
.add_to_super = add_to_super0,
.copy_metadata = copy_metadata0,
-#endif
.match_home = match_home0,
.uuid_from_super = uuid_from_super0,
.getinfo_super = getinfo_super0,
diff --git a/super1.c b/super1.c
index 87a74cb..6774fbd 100644
--- a/super1.c
+++ b/super1.c
@@ -48,10 +48,18 @@ struct mdp_superblock_1 {
__u32 chunksize; /* in 512byte sectors */
__u32 raid_disks;
- __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts
- * NOTE: signed, so bitmap can be before superblock
- * only meaningful of feature_map[0] is set.
- */
+ union {
+ __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts
+ * NOTE: signed, so bitmap can be before superblock
+ * only meaningful of feature_map[0] is set.
+ */
+
+ /* only meaningful when feature_map[MD_FEATURE_PPL] is set */
+ struct {
+ __s16 offset; /* sectors from start of superblock that ppl starts */
+ __u16 size; /* ppl size in sectors */
+ } ppl;
+ };
/* These are only valid with feature bit '4' */
__u32 new_level; /* new level we are reshaping to */
@@ -81,12 +89,12 @@ struct mdp_superblock_1 {
/* bad block log. If there are any bad blocks the feature flag is set.
* if offset and size are non-zero, that space is reserved and available.
*/
- __u8 bblog_shift; /* shift from sectors to block size for badblocklist */
- __u16 bblog_size; /* number of sectors reserved for badblocklist */
+ __u8 bblog_shift; /* shift from sectors to block size for badblock list */
+ __u16 bblog_size; /* number of sectors reserved for badblock list */
__u32 bblog_offset; /* sector offset from superblock to bblog, signed */
/* array state information - 64 bytes */
- __u64 utime; /* 40 bits second, 24 btes microseconds */
+ __u64 utime; /* 40 bits second, 24 bits microseconds */
__u64 events; /* incremented when superblock updated */
__u64 resync_offset; /* data before this offset (from data_offset) known to be in sync */
__u32 sb_csum; /* checksum upto dev_roles[max_dev] */
@@ -113,6 +121,9 @@ struct misc_dev_info {
__u64 device_size;
};
+#define MULTIPLE_PPL_AREA_SIZE_SUPER1 (1024 * 1024) /* Size of the whole
+ * mutliple PPL area
+ */
/* feature_map bits */
#define MD_FEATURE_BITMAP_OFFSET 1
#define MD_FEATURE_RECOVERY_OFFSET 2 /* recovery_offset is present and
@@ -131,6 +142,8 @@ struct misc_dev_info {
#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */
#define MD_FEATURE_BITMAP_VERSIONED 256 /* bitmap version number checked properly */
#define MD_FEATURE_JOURNAL 512 /* support write journal */
+#define MD_FEATURE_PPL 1024 /* support PPL */
+#define MD_FEATURE_MUTLIPLE_PPLS 2048 /* support for multiple PPLs */
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \
@@ -140,9 +153,10 @@ struct misc_dev_info {
|MD_FEATURE_NEW_OFFSET \
|MD_FEATURE_BITMAP_VERSIONED \
|MD_FEATURE_JOURNAL \
+ |MD_FEATURE_PPL \
+ |MD_FEATURE_MULTIPLE_PPLS \
)
-#ifndef MDASSEMBLE
static int role_from_sb(struct mdp_superblock_1 *sb)
{
unsigned int d;
@@ -155,7 +169,6 @@ static int role_from_sb(struct mdp_superblock_1 *sb)
role = MD_DISK_ROLE_SPARE;
return role;
}
-#endif
/* return how many bytes are needed for bitmap, for cluster-md each node
* should have it's own bitmap */
@@ -219,6 +232,7 @@ static void init_afd(struct align_fd *afd, int fd)
}
static char abuf[4096+4096];
+
static int aread(struct align_fd *afd, void *buf, int len)
{
/* aligned read.
@@ -289,7 +303,17 @@ static int awrite(struct align_fd *afd, void *buf, int len)
return len;
}
-#ifndef MDASSEMBLE
+static inline unsigned int md_feature_any_ppl_on(__u32 feature_map)
+{
+ return ((__cpu_to_le32(feature_map) &
+ (MD_FEATURE_PPL | MD_FEATURE_MUTLIPLE_PPLS)));
+}
+
+static inline unsigned int choose_ppl_space(int chunk)
+{
+ return (PPL_HEADER_SIZE >> 9) + (chunk > 128*2 ? chunk : 128*2);
+}
+
static void examine_super1(struct supertype *st, char *homehost)
{
struct mdp_superblock_1 *sb = st->sb;
@@ -327,7 +351,8 @@ static void examine_super1(struct supertype *st, char *homehost)
strncmp(sb->set_name, homehost, l) == 0)
printf(" (local to host %s)", homehost);
printf("\n");
- if (bms->nodes > 0 && (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET))
+ if (bms->nodes > 0 &&
+ (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET))
printf(" Cluster Name : %-64s\n", bms->cluster_name);
atime = __le64_to_cpu(sb->ctime) & 0xFFFFFFFFFFULL;
printf(" Creation Time : %.24s\n", ctime(&atime));
@@ -373,7 +398,8 @@ static void examine_super1(struct supertype *st, char *homehost)
printf(" Super Offset : %llu sectors\n",
(unsigned long long)__le64_to_cpu(sb->super_offset));
if (__le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET)
- printf("Recovery Offset : %llu sectors\n", (unsigned long long)__le64_to_cpu(sb->recovery_offset));
+ printf("Recovery Offset : %llu sectors\n",
+ (unsigned long long)__le64_to_cpu(sb->recovery_offset));
st->ss->getinfo_super(st, &info, NULL);
if (info.space_after != 1 &&
@@ -381,10 +407,12 @@ static void examine_super1(struct supertype *st, char *homehost)
printf(" Unused Space : before=%llu sectors, after=%llu sectors\n",
info.space_before, info.space_after);
- printf(" State : %s\n", (__le64_to_cpu(sb->resync_offset)+1)? "active":"clean");
+ printf(" State : %s\n",
+ (__le64_to_cpu(sb->resync_offset)+1)? "active":"clean");
printf(" Device UUID : ");
for (i=0; i<16; i++) {
- if ((i&3)==0 && i != 0) printf(":");
+ if ((i&3)==0 && i != 0)
+ printf(":");
printf("%02x", sb->device_uuid[i]);
}
printf("\n");
@@ -392,14 +420,21 @@ static void examine_super1(struct supertype *st, char *homehost)
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
printf("Internal Bitmap : %ld sectors from superblock\n",
(long)(int32_t)__le32_to_cpu(sb->bitmap_offset));
+ } else if (md_feature_any_ppl_on(sb->feature_map)) {
+ printf(" PPL : %u sectors at offset %d sectors from superblock\n",
+ __le16_to_cpu(sb->ppl.size),
+ __le16_to_cpu(sb->ppl.offset));
}
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)) {
- printf(" Reshape pos'n : %llu%s\n", (unsigned long long)__le64_to_cpu(sb->reshape_position)/2,
+ printf(" Reshape pos'n : %llu%s\n", (unsigned long long)
+ __le64_to_cpu(sb->reshape_position)/2,
human_size(__le64_to_cpu(sb->reshape_position)<<9));
if (__le32_to_cpu(sb->delta_disks)) {
- printf(" Delta Devices : %d", __le32_to_cpu(sb->delta_disks));
+ printf(" Delta Devices : %d",
+ __le32_to_cpu(sb->delta_disks));
printf(" (%d->%d)\n",
- __le32_to_cpu(sb->raid_disks)-__le32_to_cpu(sb->delta_disks),
+ __le32_to_cpu(sb->raid_disks) -
+ __le32_to_cpu(sb->delta_disks),
__le32_to_cpu(sb->raid_disks));
if ((int)__le32_to_cpu(sb->delta_disks) < 0)
delta_extra = -__le32_to_cpu(sb->delta_disks);
@@ -408,13 +443,16 @@ static void examine_super1(struct supertype *st, char *homehost)
c = map_num(pers, __le32_to_cpu(sb->new_level));
printf(" New Level : %s\n", c?c:"-unknown-");
}
- if (__le32_to_cpu(sb->new_layout) != __le32_to_cpu(sb->layout)) {
+ if (__le32_to_cpu(sb->new_layout) !=
+ __le32_to_cpu(sb->layout)) {
if (__le32_to_cpu(sb->level) == 5) {
- c = map_num(r5layout, __le32_to_cpu(sb->new_layout));
+ c = map_num(r5layout,
+ __le32_to_cpu(sb->new_layout));
printf(" New Layout : %s\n", c?c:"-unknown-");
}
if (__le32_to_cpu(sb->level) == 6) {
- c = map_num(r6layout, __le32_to_cpu(sb->new_layout));
+ c = map_num(r6layout,
+ __le32_to_cpu(sb->new_layout));
printf(" New Layout : %s\n", c?c:"-unknown-");
}
if (__le32_to_cpu(sb->level) == 10) {
@@ -423,8 +461,10 @@ static void examine_super1(struct supertype *st, char *homehost)
printf("\n");
}
}
- if (__le32_to_cpu(sb->new_chunk) != __le32_to_cpu(sb->chunksize))
- printf(" New Chunksize : %dK\n", __le32_to_cpu(sb->new_chunk)/2);
+ if (__le32_to_cpu(sb->new_chunk) !=
+ __le32_to_cpu(sb->chunksize))
+ printf(" New Chunksize : %dK\n",
+ __le32_to_cpu(sb->new_chunk)/2);
printf("\n");
}
if (sb->devflags) {
@@ -443,18 +483,20 @@ static void examine_super1(struct supertype *st, char *homehost)
printf(" Bad Block Log : %d entries available at offset %ld sectors",
__le16_to_cpu(sb->bblog_size)*512/8,
(long)(int32_t)__le32_to_cpu(sb->bblog_offset));
- if (sb->feature_map &
- __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
printf(" - bad blocks present.");
printf("\n");
}
if (calc_sb_1_csum(sb) == sb->sb_csum)
- printf(" Checksum : %x - correct\n", __le32_to_cpu(sb->sb_csum));
+ printf(" Checksum : %x - correct\n",
+ __le32_to_cpu(sb->sb_csum));
else
- printf(" Checksum : %x - expected %x\n", __le32_to_cpu(sb->sb_csum),
+ printf(" Checksum : %x - expected %x\n",
+ __le32_to_cpu(sb->sb_csum),
__le32_to_cpu(calc_sb_1_csum(sb)));
- printf(" Events : %llu\n", (unsigned long long)__le64_to_cpu(sb->events));
+ printf(" Events : %llu\n",
+ (unsigned long long)__le64_to_cpu(sb->events));
printf("\n");
if (__le32_to_cpu(sb->level) == 5) {
c = map_num(r5layout, __le32_to_cpu(sb->layout));
@@ -476,26 +518,34 @@ static void examine_super1(struct supertype *st, char *homehost)
case 5:
case 6:
case 10:
- printf(" Chunk Size : %dK\n", __le32_to_cpu(sb->chunksize)/2);
+ printf(" Chunk Size : %dK\n",
+ __le32_to_cpu(sb->chunksize)/2);
break;
case -1:
- printf(" Rounding : %dK\n", __le32_to_cpu(sb->chunksize)/2);
+ printf(" Rounding : %dK\n",
+ __le32_to_cpu(sb->chunksize)/2);
+ break;
+ default:
break;
- default: break;
}
printf("\n");
#if 0
/* This turns out to just be confusing */
printf(" Array Slot : %d (", __le32_to_cpu(sb->dev_number));
- for (i= __le32_to_cpu(sb->max_dev); i> 0 ; i--)
+ for (i = __le32_to_cpu(sb->max_dev); i> 0 ; i--)
if (__le16_to_cpu(sb->dev_roles[i-1]) != MD_DISK_ROLE_SPARE)
break;
- for (d=0; d < i; d++) {
+ for (d = 0; d < i; d++) {
int role = __le16_to_cpu(sb->dev_roles[d]);
- if (d) printf(", ");
- if (role == MD_DISK_ROLE_SPARE) printf("empty");
- else if(role == MD_DISK_ROLE_FAULTY) printf("failed");
- else printf("%d", role);
+ if (d)
+ printf(", ");
+ if (role == MD_DISK_ROLE_SPARE)
+ printf("empty");
+ else
+ if(role == MD_DISK_ROLE_FAULTY)
+ printf("failed");
+ else
+ printf("%d", role);
}
printf(")\n");
#endif
@@ -511,10 +561,10 @@ static void examine_super1(struct supertype *st, char *homehost)
printf("Active device %d\n", role);
printf(" Array State : ");
- for (d=0; d<__le32_to_cpu(sb->raid_disks) + delta_extra; d++) {
+ for (d = 0; d < __le32_to_cpu(sb->raid_disks) + delta_extra; d++) {
int cnt = 0;
unsigned int i;
- for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
+ for (i = 0; i < __le32_to_cpu(sb->max_dev); i++) {
unsigned int role = __le16_to_cpu(sb->dev_roles[i]);
if (role == d)
cnt++;
@@ -531,12 +581,13 @@ static void examine_super1(struct supertype *st, char *homehost)
#if 0
/* This is confusing too */
faulty = 0;
- for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
+ for (i = 0; i< __le32_to_cpu(sb->max_dev); i++) {
int role = __le16_to_cpu(sb->dev_roles[i]);
if (role == MD_DISK_ROLE_FAULTY)
faulty++;
}
- if (faulty) printf(" %d failed", faulty);
+ if (faulty)
+ printf(" %d failed", faulty);
#endif
printf(" ('A' == active, '.' == missing, 'R' == replacing)");
printf("\n");
@@ -548,7 +599,7 @@ static void brief_examine_super1(struct supertype *st, int verbose)
int i;
unsigned long long sb_offset;
char *nm;
- char *c=map_num(pers, __le32_to_cpu(sb->level));
+ char *c = map_num(pers, __le32_to_cpu(sb->level));
nm = strchr(sb->set_name, ':');
if (nm)
@@ -576,8 +627,9 @@ static void brief_examine_super1(struct supertype *st, int verbose)
if (verbose)
printf("num-devices=%d ", __le32_to_cpu(sb->raid_disks));
printf("UUID=");
- for (i=0; i<16; i++) {
- if ((i&3)==0 && i != 0) printf(":");
+ for (i = 0; i < 16; i++) {
+ if ((i&3)==0 && i != 0)
+ printf(":");
printf("%02x", sb->set_uuid[i]);
}
if (sb->set_name[0]) {
@@ -596,9 +648,8 @@ static void export_examine_super1(struct supertype *st)
printf("MD_LEVEL=%s\n", map_num(pers, __le32_to_cpu(sb->level)));
printf("MD_DEVICES=%d\n", __le32_to_cpu(sb->raid_disks));
- for (i=0; i<32; i++)
- if (sb->set_name[i] == '\n' ||
- sb->set_name[i] == '\0') {
+ for (i = 0; i < 32; i++)
+ if (sb->set_name[i] == '\n' || sb->set_name[i] == '\0') {
len = i;
break;
}
@@ -607,10 +658,16 @@ static void export_examine_super1(struct supertype *st)
if (__le32_to_cpu(sb->level) > 0) {
int ddsks = 0, ddsks_denom = 1;
switch(__le32_to_cpu(sb->level)) {
- case 1: ddsks=1;break;
+ case 1:
+ ddsks = 1;
+ break;
case 4:
- case 5: ddsks = __le32_to_cpu(sb->raid_disks)-1; break;
- case 6: ddsks = __le32_to_cpu(sb->raid_disks)-2; break;
+ case 5:
+ ddsks = __le32_to_cpu(sb->raid_disks)-1;
+ break;
+ case 6:
+ ddsks = __le32_to_cpu(sb->raid_disks)-2;
+ break;
case 10:
layout = __le32_to_cpu(sb->layout);
ddsks = __le32_to_cpu(sb->raid_disks);
@@ -619,20 +676,23 @@ static void export_examine_super1(struct supertype *st)
if (ddsks) {
long long asize = __le64_to_cpu(sb->size);
asize = (asize << 9) * ddsks / ddsks_denom;
- printf("MD_ARRAY_SIZE=%s\n",human_size_brief(asize,JEDEC));
+ printf("MD_ARRAY_SIZE=%s\n",
+ human_size_brief(asize, JEDEC));
}
}
printf("MD_UUID=");
- for (i=0; i<16; i++) {
- if ((i&3)==0 && i != 0) printf(":");
+ for (i = 0; i < 16; i++) {
+ if ((i&3) == 0 && i != 0)
+ printf(":");
printf("%02x", sb->set_uuid[i]);
}
printf("\n");
printf("MD_UPDATE_TIME=%llu\n",
__le64_to_cpu(sb->utime) & 0xFFFFFFFFFFULL);
printf("MD_DEV_UUID=");
- for (i=0; i<16; i++) {
- if ((i&3)==0 && i != 0) printf(":");
+ for (i = 0; i < 16; i++) {
+ if ((i&3) == 0 && i != 0)
+ printf(":");
printf("%02x", sb->device_uuid[i]);
}
printf("\n");
@@ -780,19 +840,21 @@ static void detail_super1(struct supertype *st, char *homehost)
int i;
int l = homehost ? strlen(homehost) : 0;
- printf(" Name : %.32s", sb->set_name);
- if (l > 0 && l < 32 &&
- sb->set_name[l] == ':' &&
+ printf(" Name : %.32s", sb->set_name);
+ if (l > 0 && l < 32 && sb->set_name[l] == ':' &&
strncmp(sb->set_name, homehost, l) == 0)
printf(" (local to host %s)", homehost);
- if (bms->nodes > 0 && (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET))
- printf("\n Cluster Name : %-64s", bms->cluster_name);
- printf("\n UUID : ");
- for (i=0; i<16; i++) {
- if ((i&3)==0 && i != 0) printf(":");
+ if (bms->nodes > 0 &&
+ (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET))
+ printf("\n Cluster Name : %-64s", bms->cluster_name);
+ printf("\n UUID : ");
+ for (i = 0; i < 16; i++) {
+ if ((i&3) == 0 && i != 0)
+ printf(":");
printf("%02x", sb->set_uuid[i]);
}
- printf("\n Events : %llu\n\n", (unsigned long long)__le64_to_cpu(sb->events));
+ printf("\n Events : %llu\n\n",
+ (unsigned long long)__le64_to_cpu(sb->events));
}
static void brief_detail_super1(struct supertype *st)
@@ -805,8 +867,9 @@ static void brief_detail_super1(struct supertype *st)
print_quoted(sb->set_name);
}
printf(" UUID=");
- for (i=0; i<16; i++) {
- if ((i&3)==0 && i != 0) printf(":");
+ for (i = 0; i < 16; i++) {
+ if ((i & 3) == 0 && i != 0)
+ printf(":");
printf("%02x", sb->set_uuid[i]);
}
}
@@ -817,9 +880,8 @@ static void export_detail_super1(struct supertype *st)
int i;
int len = 32;
- for (i=0; i<32; i++)
- if (sb->set_name[i] == '\n' ||
- sb->set_name[i] == '\0') {
+ for (i = 0; i < 32; i++)
+ if (sb->set_name[i] == '\n' || sb->set_name[i] == '\0') {
len = i;
break;
}
@@ -835,13 +897,12 @@ static int examine_badblocks_super1(struct supertype *st, int fd, char *devname)
__u64 *bbl, *bbp;
int i;
- if (!sb->bblog_size || __le16_to_cpu(sb->bblog_size) > 100
- || !sb->bblog_offset){
+ if (!sb->bblog_size || __le16_to_cpu(sb->bblog_size) > 100 ||
+ !sb->bblog_offset){
printf("No bad-blocks list configured on %s\n", devname);
return 0;
}
- if ((sb->feature_map & __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
- == 0) {
+ if ((sb->feature_map & __cpu_to_le32(MD_FEATURE_BAD_BLOCKS)) == 0) {
printf("Bad-blocks list is empty in %s\n", devname);
return 0;
}
@@ -883,15 +944,12 @@ static int examine_badblocks_super1(struct supertype *st, int fd, char *devname)
return 0;
}
-#endif
-
static int match_home1(struct supertype *st, char *homehost)
{
struct mdp_superblock_1 *sb = st->sb;
int l = homehost ? strlen(homehost) : 0;
- return (l > 0 && l < 32 &&
- sb->set_name[l] == ':' &&
+ return (l > 0 && l < 32 && sb->set_name[l] == ':' &&
strncmp(sb->set_name, homehost, l) == 0);
}
@@ -900,7 +958,7 @@ static void uuid_from_super1(struct supertype *st, int uuid[4])
struct mdp_superblock_1 *super = st->sb;
char *cuuid = (char*)uuid;
int i;
- for (i=0; i<16; i++)
+ for (i = 0; i < 16; i++)
cuuid[i] = super->set_uuid[i];
}
@@ -908,7 +966,8 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
{
struct mdp_superblock_1 *sb = st->sb;
struct bitmap_super_s *bsb = (void*)(((char*)sb)+MAX_SB_SIZE);
- struct misc_dev_info *misc = (void*)(((char*)sb)+MAX_SB_SIZE+BM_SUPER_SIZE);
+ struct misc_dev_info *misc =
+ (void*)(((char*)sb)+MAX_SB_SIZE+BM_SUPER_SIZE);
int working = 0;
unsigned int i;
unsigned int role;
@@ -928,15 +987,20 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
info->array.utime = __le64_to_cpu(sb->utime);
info->array.chunk_size = __le32_to_cpu(sb->chunksize)*512;
info->array.state =
- (__le64_to_cpu(sb->resync_offset) == MaxSector)
- ? 1 : 0;
- if (__le32_to_cpu(bsb->nodes) > 1)
- info->array.state |= (1 << MD_SB_CLUSTERED);
+ (__le64_to_cpu(sb->resync_offset) == MaxSector) ? 1 : 0;
+ super_offset = __le64_to_cpu(sb->super_offset);
info->data_offset = __le64_to_cpu(sb->data_offset);
info->component_size = __le64_to_cpu(sb->size);
- if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET))
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET)) {
info->bitmap_offset = (int32_t)__le32_to_cpu(sb->bitmap_offset);
+ if (__le32_to_cpu(bsb->nodes) > 1)
+ info->array.state |= (1 << MD_SB_CLUSTERED);
+ } else if (md_feature_any_ppl_on(sb->feature_map)) {
+ info->ppl_offset = __le16_to_cpu(sb->ppl.offset);
+ info->ppl_size = __le16_to_cpu(sb->ppl.size);
+ info->ppl_sector = super_offset + info->ppl_offset;
+ }
info->disk.major = 0;
info->disk.minor = 0;
@@ -947,7 +1011,6 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
else
role = __le16_to_cpu(sb->dev_roles[__le32_to_cpu(sb->dev_number)]);
- super_offset = __le64_to_cpu(sb->super_offset);
if (info->array.level <= 0)
data_size = __le64_to_cpu(sb->data_size);
else
@@ -964,8 +1027,9 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
end = bboffset;
}
- if (super_offset + info->bitmap_offset < end)
- end = super_offset + info->bitmap_offset;
+ if (super_offset + info->bitmap_offset + info->ppl_offset < end)
+ end = super_offset + info->bitmap_offset +
+ info->ppl_offset;
if (info->data_offset + data_size < end)
info->space_after = end - data_size - info->data_offset;
@@ -981,6 +1045,12 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
bmend += size;
if (bmend > earliest)
earliest = bmend;
+ } else if (info->ppl_offset > 0) {
+ unsigned long long pplend;
+
+ pplend = info->ppl_offset + info->ppl_size;
+ if (pplend > earliest)
+ earliest = pplend;
}
if (sb->bblog_offset && sb->bblog_size) {
unsigned long long bbend = super_offset;
@@ -993,7 +1063,8 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
info->space_before = info->data_offset - earliest;
else
info->space_before = 0;
- info->space_after = misc->device_size - data_size - info->data_offset;
+ info->space_after = misc->device_size - data_size -
+ info->data_offset;
}
if (info->space_before == 0 && info->space_after == 0) {
/* It will look like we don't support data_offset changes,
@@ -1007,15 +1078,17 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
info->disk.raid_disk = -1;
switch(role) {
case MD_DISK_ROLE_SPARE:
- info->disk.state = 0; /* spare: not active, not sync, not faulty */
+ /* spare: not active, not sync, not faulty */
+ info->disk.state = 0;
break;
case MD_DISK_ROLE_FAULTY:
- info->disk.state = 1; /* faulty */
+ info->disk.state = (1 << MD_DISK_FAULTY); /* faulty */
break;
case MD_DISK_ROLE_JOURNAL:
info->disk.state = (1 << MD_DISK_JOURNAL);
info->disk.raid_disk = role;
- info->space_after = (misc->device_size - info->data_offset) % 8; /* journal uses all 4kB blocks*/
+ /* journal uses all 4kB blocks*/
+ info->space_after = (misc->device_size - info->data_offset) % 8;
break;
default:
info->disk.state = 6; /* active and in sync */
@@ -1074,8 +1147,20 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
}
info->array.working_disks = working;
- if (sb->feature_map & __le32_to_cpu(MD_FEATURE_JOURNAL))
+
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_JOURNAL)) {
info->journal_device_required = 1;
+ info->consistency_policy = CONSISTENCY_POLICY_JOURNAL;
+ } else if (md_feature_any_ppl_on(sb->feature_map)) {
+ info->consistency_policy = CONSISTENCY_POLICY_PPL;
+ } else if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET)) {
+ info->consistency_policy = CONSISTENCY_POLICY_BITMAP;
+ } else if (info->array.level <= 0) {
+ info->consistency_policy = CONSISTENCY_POLICY_NONE;
+ } else {
+ info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+ }
+
info->journal_clean = 0;
}
@@ -1092,8 +1177,7 @@ static struct mdinfo *container_content1(struct supertype *st, char *subarray)
}
static int update_super1(struct supertype *st, struct mdinfo *info,
- char *update,
- char *devname, int verbose,
+ char *update, char *devname, int verbose,
int uuid_set, char *homehost)
{
/* NOTE: for 'assemble' and 'force' we need to return non-zero
@@ -1101,19 +1185,9 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
* ignored.
*/
int rv = 0;
- int lockid;
struct mdp_superblock_1 *sb = st->sb;
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
- if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready()) {
- rv = cluster_get_dlmlock(&lockid);
- if (rv) {
- pr_err("Cannot get dlmlock in %s return %d\n", __func__, rv);
- cluster_release_dlmlock(lockid);
- return rv;
- }
- }
-
if (strcmp(update, "homehost") == 0 &&
homehost) {
/* Note that 'homehost' is special as it is really
@@ -1141,7 +1215,9 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
* maybe need to mark it 'clean'.
*/
switch(__le32_to_cpu(sb->level)) {
- case 5: case 4: case 6:
+ case 4:
+ case 5:
+ case 6:
/* need to force clean */
if (sb->resync_offset != MaxSector)
rv = 1;
@@ -1161,17 +1237,23 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
rv = 1;
}
if (info->reshape_active &&
- sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE) &&
+ sb->feature_map &
+ __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE) &&
info->delta_disks >= 0 &&
- info->reshape_progress < __le64_to_cpu(sb->reshape_position)) {
- sb->reshape_position = __cpu_to_le64(info->reshape_progress);
+ info->reshape_progress <
+ __le64_to_cpu(sb->reshape_position)) {
+ sb->reshape_position =
+ __cpu_to_le64(info->reshape_progress);
rv = 1;
}
if (info->reshape_active &&
- sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE) &&
+ sb->feature_map &
+ __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE) &&
info->delta_disks < 0 &&
- info->reshape_progress > __le64_to_cpu(sb->reshape_position)) {
- sb->reshape_position = __cpu_to_le64(info->reshape_progress);
+ info->reshape_progress >
+ __le64_to_cpu(sb->reshape_position)) {
+ sb->reshape_position =
+ __cpu_to_le64(info->reshape_progress);
rv = 1;
}
} else if (strcmp(update, "linear-grow-new") == 0) {
@@ -1179,18 +1261,19 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
int fd;
unsigned int max = __le32_to_cpu(sb->max_dev);
- for (i=0 ; i < max ; i++)
- if (__le16_to_cpu(sb->dev_roles[i]) >= MD_DISK_ROLE_FAULTY)
+ for (i = 0; i < max; i++)
+ if (__le16_to_cpu(sb->dev_roles[i]) >=
+ MD_DISK_ROLE_FAULTY)
break;
sb->dev_number = __cpu_to_le32(i);
info->disk.number = i;
- if (max >= __le32_to_cpu(sb->max_dev))
+ if (i >= max) {
sb->max_dev = __cpu_to_le32(max+1);
+ }
random_uuid(sb->device_uuid);
- sb->dev_roles[i] =
- __cpu_to_le16(info->disk.raid_disk);
+ sb->dev_roles[i] = __cpu_to_le16(info->disk.raid_disk);
fd = open(devname, O_RDONLY);
if (fd >= 0) {
@@ -1211,7 +1294,11 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
}
}
} else if (strcmp(update, "linear-grow-update") == 0) {
+ int max = __le32_to_cpu(sb->max_dev);
sb->raid_disks = __cpu_to_le32(info->array.raid_disks);
+ if (info->array.raid_disks > max) {
+ sb->max_dev = __cpu_to_le32(max+1);
+ }
sb->dev_roles[info->disk.number] =
__cpu_to_le16(info->disk.raid_disk);
} else if (strcmp(update, "resync") == 0) {
@@ -1234,21 +1321,25 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
long bm_sectors = 0;
long space;
-#ifndef MDASSEMBLE
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
bitmap_offset = (long)__le32_to_cpu(sb->bitmap_offset);
bm_sectors = calc_bitmap_size(bms, 4096) >> 9;
+ } else if (md_feature_any_ppl_on(sb->feature_map)) {
+ bitmap_offset = (long)__le16_to_cpu(sb->ppl.offset);
+ bm_sectors = (long)__le16_to_cpu(sb->ppl.size);
}
-#endif
+
if (sb_offset < data_offset) {
- /* 1.1 or 1.2. Put bbl after bitmap leaving at least 32K
+ /*
+ * 1.1 or 1.2. Put bbl after bitmap leaving
+ * at least 32K
*/
long bb_offset;
bb_offset = sb_offset + 8;
if (bm_sectors && bitmap_offset > 0)
bb_offset = bitmap_offset + bm_sectors;
- while (bb_offset < (long)sb_offset + 8 + 32*2
- && bb_offset + 8+8 <= (long)data_offset)
+ while (bb_offset < (long)sb_offset + 8 + 32*2 &&
+ bb_offset + 8+8 <= (long)data_offset)
/* too close to bitmap, and room to grow */
bb_offset += 8;
if (bb_offset + 8 <= (long)data_offset) {
@@ -1280,6 +1371,59 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
sb->bblog_size = 0;
sb->bblog_shift = 0;
sb->bblog_offset = 0;
+ } else if (strcmp(update, "ppl") == 0) {
+ unsigned long long sb_offset = __le64_to_cpu(sb->super_offset);
+ unsigned long long data_offset = __le64_to_cpu(sb->data_offset);
+ unsigned long long data_size = __le64_to_cpu(sb->data_size);
+ long bb_offset = __le32_to_cpu(sb->bblog_offset);
+ int space;
+ int offset;
+
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
+ pr_err("Cannot add PPL to array with bitmap\n");
+ return -2;
+ }
+
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_JOURNAL)) {
+ pr_err("Cannot add PPL to array with journal\n");
+ return -2;
+ }
+
+ if (sb_offset < data_offset) {
+ if (bb_offset)
+ space = bb_offset - 8;
+ else
+ space = data_offset - sb_offset - 8;
+ offset = 8;
+ } else {
+ offset = -(sb_offset - data_offset - data_size);
+ if (offset < INT16_MIN)
+ offset = INT16_MIN;
+ space = -(offset - bb_offset);
+ }
+
+ if (space < (PPL_HEADER_SIZE >> 9) + 8) {
+ pr_err("Not enough space to add ppl\n");
+ return -2;
+ }
+
+ if (space >= (MULTIPLE_PPL_AREA_SIZE_SUPER1 >> 9)) {
+ space = (MULTIPLE_PPL_AREA_SIZE_SUPER1 >> 9);
+ } else {
+ int optimal_space = choose_ppl_space(
+ __le32_to_cpu(sb->chunksize));
+ if (space > optimal_space)
+ space = optimal_space;
+ if (space > UINT16_MAX)
+ space = UINT16_MAX;
+ }
+
+ sb->ppl.offset = __cpu_to_le16(offset);
+ sb->ppl.size = __cpu_to_le16(space);
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_PPL);
+ } else if (strcmp(update, "no-ppl") == 0) {
+ sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_PPL |
+ MD_FEATURE_MUTLIPLE_PPLS);
} else if (strcmp(update, "name") == 0) {
if (info->name[0] == 0)
sprintf(info->name, "%d", info->array.md_minor);
@@ -1293,8 +1437,8 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
} else
strncpy(sb->set_name, info->name, sizeof(sb->set_name));
} else if (strcmp(update, "devicesize") == 0 &&
- __le64_to_cpu(sb->super_offset) <
- __le64_to_cpu(sb->data_offset)) {
+ __le64_to_cpu(sb->super_offset) <
+ __le64_to_cpu(sb->data_offset)) {
/* set data_size to device size less data_offset */
struct misc_dev_info *misc = (struct misc_dev_info*)
(st->sb + MAX_SB_SIZE + BM_SUPER_SIZE);
@@ -1302,7 +1446,8 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
misc->device_size - __le64_to_cpu(sb->data_offset));
} else if (strncmp(update, "revert-reshape", 14) == 0) {
rv = -2;
- if (!(sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)))
+ if (!(sb->feature_map &
+ __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)))
pr_err("No active reshape to revert on %s\n",
devname);
else {
@@ -1343,9 +1488,11 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
*/
if (__le32_to_cpu(sb->level) >= 4 &&
__le32_to_cpu(sb->level) <= 6) {
- reshape_sectors = __le64_to_cpu(sb->reshape_position);
+ reshape_sectors =
+ __le64_to_cpu(sb->reshape_position);
reshape_chunk = __le32_to_cpu(sb->new_chunk);
- reshape_chunk *= __le32_to_cpu(sb->raid_disks) - __le32_to_cpu(sb->delta_disks) -
+ reshape_chunk *= __le32_to_cpu(sb->raid_disks) -
+ __le32_to_cpu(sb->delta_disks) -
(__le32_to_cpu(sb->level)==6 ? 2 : 1);
if (reshape_sectors % reshape_chunk) {
pr_err("Reshape position is not suitably aligned.\n");
@@ -1353,8 +1500,9 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
return -2;
}
}
- sb->raid_disks = __cpu_to_le32(__le32_to_cpu(sb->raid_disks) -
- __le32_to_cpu(sb->delta_disks));
+ sb->raid_disks =
+ __cpu_to_le32(__le32_to_cpu(sb->raid_disks) -
+ __le32_to_cpu(sb->delta_disks));
if (sb->delta_disks == 0)
sb->feature_map ^= __cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
else
@@ -1368,19 +1516,21 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
sb->new_chunk = sb->chunksize;
sb->chunksize = temp;
- if (sb->feature_map & __cpu_to_le32(MD_FEATURE_NEW_OFFSET)) {
- long offset_delta = (int32_t)__le32_to_cpu(sb->new_offset);
+ if (sb->feature_map &
+ __cpu_to_le32(MD_FEATURE_NEW_OFFSET)) {
+ long offset_delta =
+ (int32_t)__le32_to_cpu(sb->new_offset);
sb->data_offset = __cpu_to_le64(__le64_to_cpu(sb->data_offset) + offset_delta);
sb->new_offset = __cpu_to_le32(-offset_delta);
sb->data_size = __cpu_to_le64(__le64_to_cpu(sb->data_size) - offset_delta);
}
done:;
}
- } else if (strcmp(update, "_reshape_progress")==0)
+ } else if (strcmp(update, "_reshape_progress") == 0)
sb->reshape_position = __cpu_to_le64(info->reshape_progress);
- else if (strcmp(update, "writemostly")==0)
+ else if (strcmp(update, "writemostly") == 0)
sb->devflags |= WriteMostly1;
- else if (strcmp(update, "readwrite")==0)
+ else if (strcmp(update, "readwrite") == 0)
sb->devflags &= ~WriteMostly1;
else if (strcmp(update, "failfast") == 0)
sb->devflags |= FailFast1;
@@ -1390,14 +1540,12 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
rv = -1;
sb->sb_csum = calc_sb_1_csum(sb);
- if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready())
- cluster_release_dlmlock(lockid);
return rv;
}
static int init_super1(struct supertype *st, mdu_array_info_t *info,
- unsigned long long size, char *name, char *homehost,
+ struct shape *s, char *name, char *homehost,
int *uuid, unsigned long long data_offset)
{
struct mdp_superblock_1 *sb;
@@ -1450,7 +1598,7 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
sb->ctime = __cpu_to_le64((unsigned long long)time(0));
sb->level = __cpu_to_le32(info->level);
sb->layout = __cpu_to_le32(info->layout);
- sb->size = __cpu_to_le64(size*2ULL);
+ sb->size = __cpu_to_le64(s->size*2ULL);
sb->chunksize = __cpu_to_le32(info->chunk_size>>9);
sb->raid_disks = __cpu_to_le32(info->raid_disks);
@@ -1465,11 +1613,17 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
sb->resync_offset = MaxSector;
else
sb->resync_offset = 0;
- sbsize = sizeof(struct mdp_superblock_1) + 2 * (info->raid_disks + spares);
+ sbsize = sizeof(struct mdp_superblock_1) +
+ 2 * (info->raid_disks + spares);
sbsize = ROUND_UP(sbsize, 512);
- sb->max_dev = __cpu_to_le32((sbsize - sizeof(struct mdp_superblock_1)) / 2);
+ sb->max_dev =
+ __cpu_to_le32((sbsize - sizeof(struct mdp_superblock_1)) / 2);
+
+ memset(sb->dev_roles, 0xff,
+ MAX_SB_SIZE - sizeof(struct mdp_superblock_1));
- memset(sb->dev_roles, 0xff, MAX_SB_SIZE - sizeof(struct mdp_superblock_1));
+ if (s->consistency_policy == CONSISTENCY_POLICY_PPL)
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_PPL);
return 1;
}
@@ -1481,7 +1635,7 @@ struct devinfo {
mdu_disk_info_t disk;
struct devinfo *next;
};
-#ifndef MDASSEMBLE
+
/* Add a device to the superblock being created */
static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
int fd, char *devname, unsigned long long data_offset)
@@ -1489,23 +1643,16 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
struct mdp_superblock_1 *sb = st->sb;
__u16 *rp = sb->dev_roles + dk->number;
struct devinfo *di, **dip;
- bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
- int rv, lockid;
-
- if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready()) {
- rv = cluster_get_dlmlock(&lockid);
- if (rv) {
- pr_err("Cannot get dlmlock in %s return %d\n", __func__, rv);
- cluster_release_dlmlock(lockid);
- return rv;
- }
- }
+ int dk_state;
- if ((dk->state & 6) == 6) /* active, sync */
+ dk_state = dk->state & ~(1<<MD_DISK_FAILFAST);
+ if ((dk_state & (1<<MD_DISK_ACTIVE)) &&
+ (dk_state & (1<<MD_DISK_SYNC)))/* active, sync */
*rp = __cpu_to_le16(dk->raid_disk);
- else if (dk->state & (1<<MD_DISK_JOURNAL))
+ else if (dk_state & (1<<MD_DISK_JOURNAL))
*rp = MD_DISK_ROLE_JOURNAL;
- else if ((dk->state & ~2) == 0) /* active or idle -> spare */
+ else if ((dk_state & ~(1<<MD_DISK_ACTIVE)) == 0)
+ /* active or idle -> spare */
*rp = MD_DISK_ROLE_SPARE;
else
*rp = MD_DISK_ROLE_FAULTY;
@@ -1529,12 +1676,8 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
di->next = NULL;
*dip = di;
- if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready())
- cluster_release_dlmlock(lockid);
-
return 0;
}
-#endif
static int locate_bitmap1(struct supertype *st, int fd, int node_num);
@@ -1545,17 +1688,6 @@ static int store_super1(struct supertype *st, int fd)
struct align_fd afd;
int sbsize;
unsigned long long dsize;
- bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
- int rv, lockid;
-
- if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready()) {
- rv = cluster_get_dlmlock(&lockid);
- if (rv) {
- pr_err("Cannot get dlmlock in %s return %d\n", __func__, rv);
- cluster_release_dlmlock(lockid);
- return rv;
- }
- }
if (!get_dev_size(fd, NULL, &dsize))
return 1;
@@ -1616,8 +1748,6 @@ static int store_super1(struct supertype *st, int fd)
}
}
fsync(fd);
- if (bms->version == BITMAP_MAJOR_CLUSTERED && dlm_funs_ready())
- cluster_release_dlmlock(lockid);
return 0;
}
@@ -1642,10 +1772,52 @@ static unsigned long choose_bm_space(unsigned long devsize)
static void free_super1(struct supertype *st);
-#define META_BLOCK_SIZE 4096
__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len);
-#ifndef MDASSEMBLE
+static int write_init_ppl1(struct supertype *st, struct mdinfo *info, int fd)
+{
+ struct mdp_superblock_1 *sb = st->sb;
+ void *buf;
+ struct ppl_header *ppl_hdr;
+ int ret;
+
+ /* first clear entire ppl space */
+ ret = zero_disk_range(fd, info->ppl_sector, info->ppl_size);
+ if (ret)
+ return ret;
+
+ ret = posix_memalign(&buf, 4096, PPL_HEADER_SIZE);
+ if (ret) {
+ pr_err("Failed to allocate PPL header buffer\n");
+ return ret;
+ }
+
+ memset(buf, 0, PPL_HEADER_SIZE);
+ ppl_hdr = buf;
+ memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED);
+ ppl_hdr->signature = __cpu_to_le32(~crc32c_le(~0, sb->set_uuid,
+ sizeof(sb->set_uuid)));
+ ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
+
+ if (lseek64(fd, info->ppl_sector * 512, SEEK_SET) < 0) {
+ ret = errno;
+ perror("Failed to seek to PPL header location");
+ }
+
+ if (!ret && write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+ ret = errno;
+ perror("Write PPL header failed");
+ }
+
+ if (!ret)
+ fsync(fd);
+
+ free(buf);
+ return ret;
+}
+
+#define META_BLOCK_SIZE 4096
+
static int write_empty_r5l_meta_block(struct supertype *st, int fd)
{
struct r5l_meta_block *mb;
@@ -1672,7 +1844,7 @@ static int write_empty_r5l_meta_block(struct supertype *st, int fd)
crc = crc32c_le(crc, (void *)mb, META_BLOCK_SIZE);
mb->checksum = crc;
- if (lseek64(fd, (sb->data_offset) * 512, 0) < 0LL) {
+ if (lseek64(fd, __le64_to_cpu(sb->data_offset) * 512, 0) < 0LL) {
pr_err("cannot seek to offset of the meta block\n");
goto fail_to_write;
}
@@ -1705,7 +1877,7 @@ static int write_init_super1(struct supertype *st)
for (di = st->info; di; di = di->next) {
if (di->disk.state & (1 << MD_DISK_JOURNAL))
- sb->feature_map |= MD_FEATURE_JOURNAL;
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_JOURNAL);
}
for (di = st->info; di; di = di->next) {
@@ -1780,6 +1952,14 @@ static int write_init_super1(struct supertype *st)
(((char *)sb) + MAX_SB_SIZE);
bm_space = calc_bitmap_size(bms, 4096) >> 9;
bm_offset = (long)__le32_to_cpu(sb->bitmap_offset);
+ } else if (md_feature_any_ppl_on(sb->feature_map)) {
+ bm_space = MULTIPLE_PPL_AREA_SIZE_SUPER1 >> 9;
+ if (st->minor_version == 0)
+ bm_offset = -bm_space - 8;
+ else
+ bm_offset = 8;
+ sb->ppl.offset = __cpu_to_le16(bm_offset);
+ sb->ppl.size = __cpu_to_le16(bm_space);
} else {
bm_space = choose_bm_space(array_size);
bm_offset = 8;
@@ -1836,7 +2016,10 @@ static int write_init_super1(struct supertype *st)
rv = -EINVAL;
goto out;
}
- /* Disable badblock log on clusters, or when explicitly requested */
+ /*
+ * Disable badblock log on clusters, or when
+ * explicitly requested
+ */
if (st->nodes > 0 || conf_get_create_info()->bblist == 0) {
sb->bblog_size = 0;
sb->bblog_offset = 0;
@@ -1851,8 +2034,18 @@ static int write_init_super1(struct supertype *st)
goto error_out;
}
- if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
+ if (rv == 0 &&
+ (__le32_to_cpu(sb->feature_map) &
+ MD_FEATURE_BITMAP_OFFSET)) {
rv = st->ss->write_bitmap(st, di->fd, NodeNumUpdate);
+ } else if (rv == 0 &&
+ md_feature_any_ppl_on(sb->feature_map)) {
+ struct mdinfo info;
+
+ st->ss->getinfo_super(st, &info, NULL);
+ rv = st->ss->write_init_ppl(st, &info, di->fd);
+ }
+
close(di->fd);
di->fd = -1;
if (rv)
@@ -1860,12 +2053,10 @@ static int write_init_super1(struct supertype *st)
}
error_out:
if (rv)
- pr_err("Failed to write metadata to %s\n",
- di->devname);
+ pr_err("Failed to write metadata to %s\n", di->devname);
out:
return rv;
}
-#endif
static int compare_super1(struct supertype *st, struct supertype *tst)
{
@@ -1927,7 +2118,8 @@ static int load_super1(struct supertype *st, int fd, char *devname)
/* guess... choose latest ctime */
memset(&tst, 0, sizeof(tst));
tst.ss = &super1;
- for (tst.minor_version = 0; tst.minor_version <= 2 ; tst.minor_version++) {
+ for (tst.minor_version = 0; tst.minor_version <= 2;
+ tst.minor_version++) {
switch(load_super1(&tst, fd, devname)) {
case 0: super = tst.sb;
if (bestvers == -1 ||
@@ -2014,7 +2206,8 @@ static int load_super1(struct supertype *st, int fd, char *devname)
if (__le32_to_cpu(super->magic) != MD_SB_MAGIC) {
if (devname)
pr_err("No super block found on %s (Expected magic %08x, got %08x)\n",
- devname, MD_SB_MAGIC, __le32_to_cpu(super->magic));
+ devname, MD_SB_MAGIC,
+ __le32_to_cpu(super->magic));
free(super);
return 2;
}
@@ -2037,7 +2230,8 @@ static int load_super1(struct supertype *st, int fd, char *devname)
bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
- misc = (struct misc_dev_info*) (((char*)super)+MAX_SB_SIZE+BM_SUPER_SIZE);
+ misc = (struct misc_dev_info*)
+ (((char*)super)+MAX_SB_SIZE+BM_SUPER_SIZE);
misc->device_size = dsize;
if (st->data_offset == INVALID_SECTORS)
st->data_offset = __le64_to_cpu(super->data_offset);
@@ -2077,13 +2271,11 @@ static struct supertype *match_metadata_desc1(char *arg)
/* leading zeros can be safely ignored. --detail generates them. */
while (*arg == '0')
arg++;
- if (strcmp(arg, "1.0") == 0 ||
- strcmp(arg, "1.00") == 0) {
+ if (strcmp(arg, "1.0") == 0 || strcmp(arg, "1.00") == 0) {
st->minor_version = 0;
return st;
}
- if (strcmp(arg, "1.1") == 0 ||
- strcmp(arg, "1.01") == 0
+ if (strcmp(arg, "1.1") == 0 || strcmp(arg, "1.01") == 0
) {
st->minor_version = 1;
return st;
@@ -2096,8 +2288,7 @@ static struct supertype *match_metadata_desc1(char *arg)
st->minor_version = 2;
return st;
}
- if (strcmp(arg, "1") == 0 ||
- strcmp(arg, "default") == 0) {
+ if (strcmp(arg, "1") == 0 || strcmp(arg, "default") == 0) {
st->minor_version = -1;
return st;
}
@@ -2119,14 +2310,15 @@ static __u64 avail_size1(struct supertype *st, __u64 devsize,
if (devsize < 24)
return 0;
-#ifndef MDASSEMBLE
- if (__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
+ if (__le32_to_cpu(super->feature_map) & MD_FEATURE_BITMAP_OFFSET) {
/* hot-add. allow for actual size of bitmap */
struct bitmap_super_s *bsb;
bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
bmspace = calc_bitmap_size(bsb, 4096) >> 9;
+ } else if (md_feature_any_ppl_on(super->feature_map)) {
+ bmspace = __le16_to_cpu(super->ppl.size);
}
-#endif
+
/* Allow space for bad block log */
if (super->bblog_size)
bbspace = __le16_to_cpu(super->bblog_size);
@@ -2174,7 +2366,7 @@ add_internal_bitmap1(struct supertype *st,
/*
* If not may_change, then this is a 'Grow' without sysfs support for
* bitmaps, and the bitmap must fit after the superblock at 1K offset.
- * If may_change, then this is create or a Grow with sysfs syupport,
+ * If may_change, then this is create or a Grow with sysfs support,
* and we can put the bitmap wherever we like.
*
* size is in sectors, chunk is in bytes !!!
@@ -2194,20 +2386,26 @@ add_internal_bitmap1(struct supertype *st,
int uuid[4];
if (__le64_to_cpu(sb->data_size) == 0)
- /* Must be creating the array, else data_size would be non-zero */
+ /*
+ * Must be creating the array, else data_size
+ * would be non-zero
+ */
creating = 1;
switch(st->minor_version) {
case 0:
- /* either 3K after the superblock (when hot-add),
+ /*
+ * either 3K after the superblock (when hot-add),
* or some amount of space before.
*/
if (creating) {
- /* We are creating array, so we *know* how much room has
+ /*
+ * We are creating array, so we *know* how much room has
* been left.
*/
offset = 0;
bbl_size = 8;
- room = choose_bm_space(__le64_to_cpu(sb->size)) + bbl_size;
+ room =
+ choose_bm_space(__le64_to_cpu(sb->size)) + bbl_size;
} else {
room = __le64_to_cpu(sb->super_offset)
- __le64_to_cpu(sb->data_offset)
@@ -2219,8 +2417,8 @@ add_internal_bitmap1(struct supertype *st,
if (bbl_size < -bbl_offset)
bbl_size = -bbl_offset;
- if (!may_change || (room < 3*2 &&
- __le32_to_cpu(sb->max_dev) <= 384)) {
+ if (!may_change ||
+ (room < 3*2 && __le32_to_cpu(sb->max_dev) <= 384)) {
room = 3*2;
offset = 1*2;
bbl_size = 0;
@@ -2234,13 +2432,15 @@ add_internal_bitmap1(struct supertype *st,
if (creating) {
offset = 4*2;
bbl_size = 8;
- room = choose_bm_space(__le64_to_cpu(sb->size)) + bbl_size;
+ room =
+ choose_bm_space(__le64_to_cpu(sb->size)) + bbl_size;
} else {
room = __le64_to_cpu(sb->data_offset)
- __le64_to_cpu(sb->super_offset);
bbl_size = __le16_to_cpu(sb->bblog_size);
if (bbl_size)
- room = __le32_to_cpu(sb->bblog_offset) + bbl_size;
+ room =
+ __le32_to_cpu(sb->bblog_offset) + bbl_size;
else
bbl_size = 8;
@@ -2299,8 +2499,8 @@ add_internal_bitmap1(struct supertype *st,
sb->bitmap_offset = (int32_t)__cpu_to_le32(offset);
- sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map)
- | MD_FEATURE_BITMAP_OFFSET);
+ sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map) |
+ MD_FEATURE_BITMAP_OFFSET);
memset(bms, 0, sizeof(*bms));
bms->magic = __cpu_to_le32(BITMAP_MAGIC);
bms->version = __cpu_to_le32(major);
@@ -2312,8 +2512,8 @@ add_internal_bitmap1(struct supertype *st,
bms->write_behind = __cpu_to_le32(write_behind);
bms->nodes = __cpu_to_le32(st->nodes);
if (st->nodes)
- sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map)
- | MD_FEATURE_BITMAP_VERSIONED);
+ sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map) |
+ MD_FEATURE_BITMAP_VERSIONED);
if (st->cluster_name) {
len = sizeof(bms->cluster_name);
strncpy((char *)bms->cluster_name, st->cluster_name, len);
@@ -2374,38 +2574,43 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update
break;
case NodeNumUpdate:
/* cluster md only supports superblock 1.2 now */
- if (st->minor_version != 2 && bms->version == BITMAP_MAJOR_CLUSTERED) {
+ if (st->minor_version != 2 &&
+ bms->version == BITMAP_MAJOR_CLUSTERED) {
pr_err("Warning: cluster md only works with superblock 1.2\n");
return -EINVAL;
}
if (bms->version == BITMAP_MAJOR_CLUSTERED) {
- if (st->nodes == 1) {
- /* the parameter for nodes is not valid */
- pr_err("Warning: cluster-md at least needs two nodes\n");
- return -EINVAL;
- } else if (st->nodes == 0)
- /* --nodes is not specified */
- break;
- else if (__cpu_to_le32(st->nodes) < bms->nodes) {
- /* Since the nodes num is not increased, no need to check the space
- * is enough or not, just update bms->nodes */
+ if (__cpu_to_le32(st->nodes) < bms->nodes) {
+ /*
+ * Since the nodes num is not increased, no
+ * need to check the space enough or not,
+ * just update bms->nodes
+ */
bms->nodes = __cpu_to_le32(st->nodes);
break;
}
} else {
- /* no need to change bms->nodes for other bitmap types */
+ /*
+ * no need to change bms->nodes for other
+ * bitmap types
+ */
if (st->nodes)
pr_err("Warning: --nodes option is only suitable for clustered bitmap\n");
break;
}
- /* Each node has an independent bitmap, it is necessary to calculate the
- * space is enough or not, first get how many bytes for the total bitmap */
+ /*
+ * Each node has an independent bitmap, it is necessary to
+ * calculate the space is enough or not, first get how many
+ * bytes for the total bitmap
+ */
bm_space_per_node = calc_bitmap_size(bms, 4096);
- total_bm_space = 512 * (__le64_to_cpu(sb->data_offset) - __le64_to_cpu(sb->super_offset));
- total_bm_space = total_bm_space - 4096; /* leave another 4k for superblock */
+ total_bm_space = 512 * (__le64_to_cpu(sb->data_offset) -
+ __le64_to_cpu(sb->super_offset));
+ /* leave another 4k for superblock */
+ total_bm_space = total_bm_space - 4096;
if (bm_space_per_node * st->nodes > total_bm_space) {
pr_err("Warning: The max num of nodes can't exceed %llu\n",
@@ -2486,13 +2691,12 @@ static void free_super1(struct supertype *st)
st->sb = NULL;
}
-#ifndef MDASSEMBLE
static int validate_geometry1(struct supertype *st, int level,
int layout, int raiddisks,
int *chunk, unsigned long long size,
unsigned long long data_offset,
char *subdev, unsigned long long *freesize,
- int verbose)
+ int consistency_policy, int verbose)
{
unsigned long long ldsize, devsize;
int bmspace;
@@ -2534,8 +2738,11 @@ static int validate_geometry1(struct supertype *st, int level,
return 0;
}
- /* creating: allow suitable space for bitmap */
- bmspace = choose_bm_space(devsize);
+ /* creating: allow suitable space for bitmap or PPL */
+ if (consistency_policy == CONSISTENCY_POLICY_PPL)
+ bmspace = MULTIPLE_PPL_AREA_SIZE_SUPER1 >> 9;
+ else
+ bmspace = choose_bm_space(devsize);
if (data_offset == INVALID_SECTORS)
data_offset = st->data_offset;
@@ -2561,8 +2768,7 @@ static int validate_geometry1(struct supertype *st, int level,
headroom >>= 1;
data_offset = 12*2 + bmspace + headroom;
#define ONE_MEG (2*1024)
- if (data_offset > ONE_MEG)
- data_offset = (data_offset / ONE_MEG) * ONE_MEG;
+ data_offset = ROUND_UP(data_offset, ONE_MEG);
break;
}
if (st->data_offset == INVALID_SECTORS)
@@ -2570,7 +2776,7 @@ static int validate_geometry1(struct supertype *st, int level,
switch(st->minor_version) {
case 0: /* metadata at end. Round down and subtract space to reserve */
devsize = (devsize & ~(4ULL*2-1));
- /* space for metadata, bblog, bitmap */
+ /* space for metadata, bblog, bitmap/ppl */
devsize -= 8*2 + 8 + bmspace;
break;
case 1:
@@ -2581,7 +2787,6 @@ static int validate_geometry1(struct supertype *st, int level,
*freesize = devsize;
return 1;
}
-#endif /* MDASSEMBLE */
void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0)
{
@@ -2634,7 +2839,6 @@ void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0
}
struct superswitch super1 = {
-#ifndef MDASSEMBLE
.examine_super = examine_super1,
.brief_examine_super = brief_examine_super1,
.export_examine_super = export_examine_super1,
@@ -2646,7 +2850,7 @@ struct superswitch super1 = {
.add_to_super = add_to_super1,
.examine_badblocks = examine_badblocks_super1,
.copy_metadata = copy_metadata1,
-#endif
+ .write_init_ppl = write_init_ppl1,
.match_home = match_home1,
.uuid_from_super = uuid_from_super1,
.getinfo_super = getinfo_super1,
diff --git a/sysfs.c b/sysfs.c
index b0657a0..df6fdda 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -78,24 +78,36 @@ int sysfs_open(char *devnm, char *devname, char *attr)
return fd;
}
-void sysfs_init_dev(struct mdinfo *mdi, unsigned long devid)
+void sysfs_init_dev(struct mdinfo *mdi, dev_t devid)
{
snprintf(mdi->sys_name,
sizeof(mdi->sys_name), "dev-%s", devid2kname(devid));
}
-void sysfs_init(struct mdinfo *mdi, int fd, char *devnm)
+int sysfs_init(struct mdinfo *mdi, int fd, char *devnm)
{
+ struct stat stb;
+ char fname[MAX_SYSFS_PATH_LEN];
+ int retval = -ENODEV;
+
mdi->sys_name[0] = 0;
- if (fd >= 0) {
- mdu_version_t vers;
- if (ioctl(fd, RAID_VERSION, &vers) != 0)
- return;
+ if (fd >= 0)
devnm = fd2devnm(fd);
- }
+
if (devnm == NULL)
- return;
+ goto out;
+
+ snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md", devnm);
+
+ if (stat(fname, &stb))
+ goto out;
+ if (!S_ISDIR(stb.st_mode))
+ goto out;
strcpy(mdi->sys_name, devnm);
+
+ retval = 0;
+out:
+ return retval;
}
struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
@@ -110,8 +122,7 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
struct dirent *de;
sra = xcalloc(1, sizeof(*sra));
- sysfs_init(sra, fd, devnm);
- if (sra->sys_name[0] == 0) {
+ if (sysfs_init(sra, fd, devnm)) {
free(sra);
return NULL;
}
@@ -151,18 +162,12 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
goto abort;
sra->array.layout = strtoul(buf, NULL, 0);
}
- if (options & GET_DISKS) {
+ if (options & (GET_DISKS|GET_STATE)) {
strcpy(base, "raid_disks");
if (load_sys(fname, buf, sizeof(buf)))
goto abort;
sra->array.raid_disks = strtoul(buf, NULL, 0);
}
- if (options & GET_DEGRADED) {
- strcpy(base, "degraded");
- if (load_sys(fname, buf, sizeof(buf)))
- goto abort;
- sra->array.failed_disks = strtoul(buf, NULL, 0);
- }
if (options & GET_COMPONENT) {
strcpy(base, "component_size");
if (load_sys(fname, buf, sizeof(buf)))
@@ -236,11 +241,19 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
if (options & GET_ARRAY_STATE) {
strcpy(base, "array_state");
- if (load_sys(fname, sra->sysfs_array_state,
- sizeof(sra->sysfs_array_state)))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
- } else
- sra->sysfs_array_state[0] = 0;
+ sra->array_state = map_name(sysfs_array_states, buf);
+ }
+
+ if (options & GET_CONSISTENCY_POLICY) {
+ strcpy(base, "consistency_policy");
+ if (load_sys(fname, buf, sizeof(buf)))
+ sra->consistency_policy = CONSISTENCY_POLICY_UNKNOWN;
+ else
+ sra->consistency_policy = map_name(consistency_policies,
+ buf);
+ }
if (! (options & GET_DEVS))
return sra;
@@ -251,6 +264,9 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
if (!dir)
goto abort;
sra->array.spare_disks = 0;
+ sra->array.active_disks = 0;
+ sra->array.failed_disks = 0;
+ sra->array.working_disks = 0;
devp = &sra->devs;
sra->devs = NULL;
@@ -291,6 +307,7 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
dev->disk.raid_disk = strtoul(buf, &ep, 10);
if (*ep) dev->disk.raid_disk = -1;
+ sra->array.nr_disks++;
strcpy(dbase, "block/dev");
if (load_sys(fname, buf, sizeof(buf))) {
/* assume this is a stale reference to a hot
@@ -299,7 +316,6 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
free(dev);
continue;
}
- sra->array.nr_disks++;
sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
/* special case check for block devices that can go 'offline' */
@@ -337,12 +353,17 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
strcpy(dbase, "state");
if (load_sys(fname, buf, sizeof(buf)))
goto abort;
- if (strstr(buf, "in_sync"))
- dev->disk.state |= (1<<MD_DISK_SYNC);
if (strstr(buf, "faulty"))
dev->disk.state |= (1<<MD_DISK_FAULTY);
- if (dev->disk.state == 0)
- sra->array.spare_disks++;
+ else {
+ sra->array.working_disks++;
+ if (strstr(buf, "in_sync")) {
+ dev->disk.state |= (1<<MD_DISK_SYNC);
+ sra->array.active_disks++;
+ }
+ if (dev->disk.state == 0)
+ sra->array.spare_disks++;
+ }
}
if (options & GET_ERROR) {
strcpy(buf, "errors");
@@ -351,6 +372,11 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
dev->errors = strtoul(buf, NULL, 0);
}
}
+
+ if ((options & GET_STATE) && sra->array.raid_disks)
+ sra->array.failed_disks = sra->array.raid_disks -
+ sra->array.active_disks - sra->array.spare_disks;
+
closedir(dir);
return sra;
@@ -678,6 +704,16 @@ int sysfs_set_array(struct mdinfo *info, int vers)
* once the reshape completes.
*/
}
+
+ if (info->consistency_policy == CONSISTENCY_POLICY_PPL) {
+ if (sysfs_set_str(info, NULL, "consistency_policy",
+ map_num(consistency_policies,
+ info->consistency_policy))) {
+ pr_err("This kernel does not support PPL. Falling back to consistency-policy=resync.\n");
+ info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+ }
+ }
+
return rv;
}
@@ -709,6 +745,10 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
if (sra->array.level != LEVEL_CONTAINER) {
+ if (sra->consistency_policy == CONSISTENCY_POLICY_PPL) {
+ rv |= sysfs_set_num(sra, sd, "ppl_sector", sd->ppl_sector);
+ rv |= sysfs_set_num(sra, sd, "ppl_size", sd->ppl_size);
+ }
if (sd->recovery_start == MaxSector)
/* This can correctly fail if array isn't started,
* yet, so just ignore status for now.
diff --git a/systemd/mdadm-last-resort@.service b/systemd/mdadm-last-resort@.service
index e93d72b..efeb3f6 100644
--- a/systemd/mdadm-last-resort@.service
+++ b/systemd/mdadm-last-resort@.service
@@ -1,7 +1,7 @@
[Unit]
-Description=Activate md array even though degraded
+Description=Activate md array %I even though degraded
DefaultDependencies=no
-Conflicts=sys-devices-virtual-block-%i.device
+ConditionPathExists=!/sys/devices/virtual/block/%i/md/sync_action
[Service]
Type=oneshot
diff --git a/systemd/mdadm-last-resort@.timer b/systemd/mdadm-last-resort@.timer
index 52b3f22..45ad223 100644
--- a/systemd/mdadm-last-resort@.timer
+++ b/systemd/mdadm-last-resort@.timer
@@ -1,5 +1,5 @@
[Unit]
-Description=Timer to wait for more drives before activating degraded array.
+Description=Timer to wait for more drives before activating degraded array %I.
DefaultDependencies=no
Conflicts=sys-devices-virtual-block-%i.device
diff --git a/systemd/mdmonitor.service b/systemd/mdmonitor.service
index 9aff2f5..46f7b88 100644
--- a/systemd/mdmonitor.service
+++ b/systemd/mdmonitor.service
@@ -10,4 +10,7 @@ Description=MD array monitor
DefaultDependencies=no
[Service]
-ExecStart=BINDIR/mdadm --monitor --scan
+Environment= MDADM_MONITOR_ARGS=--scan
+EnvironmentFile=-/run/sysconfig/mdadm
+ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
+ExecStart=BINDIR/mdadm --monitor $MDADM_MONITOR_ARGS
diff --git a/test b/test
index 13f1bda..111a2e7 100755
--- a/test
+++ b/test
@@ -1,440 +1,283 @@
#!/bin/bash
#
# run test suite for mdadm
-user=`id -un`
-if [ " $user" != " root" ]
-then echo >&2 "test: testing can only be done as 'root'."
- exit 1;
-fi
-
-prefix='[0-9][0-9]'
-
-dir=`pwd`
-mdadm=$dir/mdadm
-if [ \! -x $mdadm ]
-then
- echo >&2 "test: $mdadm isn't usable."
-fi
+mdadm=$PWD/mdadm
+targetdir="/var/tmp"
+logdir="$targetdir"
+config=/tmp/mdadm.conf
+testdir=
+devlist=
-testdir="tests"
-logdir="$testdir/logs"
-logsave=0
+savelogs=0
exitonerror=1
+prefix='[0-9][0-9]'
-echo "Testing on linux-$(uname -r) kernel"
-
-# Check whether to run multipath tests
-modprobe multipath 2> /dev/null
-if grep -s 'Personalities : .*multipath' > /dev/null /proc/mdstat ; then
- MULTIPATH="yes"
-fi
-INTEGRITY=yes
+# use loop devices by default if doesn't specify --dev
DEVTYPE=loop
+INTEGRITY=yes
LVM_VOLGROUP=mdtest
# make sure to test local mdmon, not system one
export MDADM_NO_SYSTEMCTL=1
# assume md0, md1, md2 exist in /dev
-md0=/dev/md0 md1=/dev/md1 md2=/dev/md2
+md0=/dev/md0
+md1=/dev/md1
+md2=/dev/md2
mdp0=/dev/md_d0
mdp1=/dev/md_d1
-# We test mdadm on loop-back block devices.
-# dir for storing files should be settable by command line maybe
-targetdir=/var/tmp
-size=20000
-# super0, round down to multiple of 64 and substract 64
-mdsize0=19904
-# super00 is nested, subtract 128
-mdsize00=19840
-# super1.0 round down to multiple of 2, subtract 8
-mdsize1=19992
-mdsize1a=19988
-mdsize12=19988
-# super1.2 for linear: round to multiple of 2, subtract 4
-mdsize1_l=19996
-mdsize2_l=19996
-# subtract another 4 for bitmaps
-mdsize1b=19988
-mdsize11=19992
-mdsize11a=19456
-mdsize12=19988
-
-# ddf needs bigger devices as 32Meg is reserved!
-ddfsize=65536
-
-config=/tmp/mdadm.conf
-
-cleanup() {
- udevadm settle
- $mdadm -Ssq 2> /dev/null
- case $DEVTYPE in
- loop)
- for d in 0 1 2 3 4 5 6 7 8 9 10 11 12 13
- do
- losetup -d /dev/loop$d ; # rm -f $targetdir/mdtest$d
- rm -f /dev/disk/by-path/loop*
- done
- ;;
- lvm)
- for d in 0 1 2 3 4 5 6 7 8 9 10 11 12 13
- do
- eval "lvremove --quiet -f \$dev$d"
- done
- ;;
- esac
+die() {
+ echo -e "\n\tERROR: $* \n"
+ save_log fail
+ exit 2
}
ctrl_c() {
exitonerror=1
}
-do_setup() {
- trap cleanup 0 1 3 15
- trap ctrl_c 2
-
- # make sure there are no loop devices remaining.
- # udev started things can sometimes prevent them being stopped
- # immediately
- while grep loop /proc/partitions > /dev/null 2>&1
- do
- mdadm -Ss
- losetup -d /dev/loop[0-9]* 2> /dev/null
- sleep 1
- done
- devlist=
- for d in 0 1 2 3 4 5 6 7 8 9 10 11 12 13
- do
- sz=$size
- if [ $d -gt 7 ]; then sz=$ddfsize ; fi
- case $DEVTYPE in
- loop)
- [ -f $targetdir/mdtest$d ] || dd if=/dev/zero of=$targetdir/mdtest$d count=$sz bs=1K > /dev/null 2>&1
- # make sure udev doesn't touch
- mdadm --zero $targetdir/mdtest$d 2> /dev/null
- [ -b /dev/loop$d ] || mknod /dev/loop$d b 7 $d
- if [ $d -eq 7 ]
- then
- losetup /dev/loop$d $targetdir/mdtest6 # for multipath use
- else
- losetup /dev/loop$d $targetdir/mdtest$d
- fi
- eval dev$d=/dev/loop$d
- eval file$d=$targetdir/mdtest$d
- ;;
- lvm)
- unset MULTIPATH
- eval dev$d=/dev/mapper/${LVM_VOLGROUP}-mdtest$d
- if ! lvcreate --quiet -L ${sz}K -n mdtest$d $LVM_VOLGROUP; then
- trap '' 0 # make sure lvremove is not called
- eval echo error creating \$dev$d
- exit 129
- fi
- ;;
- ram)
- unset MULTIPATH
- eval dev$d=/dev/ram$d
- ;;
- esac
- eval devlist=\"\$devlist \$dev$d\"
- eval devlist$d=\"\$devlist\"
- #" <-- add this quote to un-confuse vim syntax highlighting
- done
- path0=$dev6
- path1=$dev7
-
- ulimit -c unlimited
- [ -f /proc/mdstat ] || modprobe md_mod
- echo 2000 > /proc/sys/dev/raid/speed_limit_max
- echo 0 > /sys/module/md_mod/parameters/start_ro
-}
-
# mdadm always adds --quiet, and we want to see any unexpected messages
mdadm() {
- rm -f $targetdir/stderr
- case $* in
- *-S* ) udevadm settle
- p=`cat /proc/sys/dev/raid/speed_limit_max`
- echo 20000 > /proc/sys/dev/raid/speed_limit_max
- esac
- case $* in
- *-C* ) $mdadm 2> $targetdir/stderr --quiet "$@" --auto=yes;;
- * ) $mdadm 2> $targetdir/stderr --quiet "$@"
- esac
- rv=$?
- case $* in
- *-S* ) udevadm settle
- echo $p > /proc/sys/dev/raid/speed_limit_max
- esac
- cat >&2 $targetdir/stderr
- return $rv
-}
-
-# check various things
-check() {
- case $1 in
- spares )
- spares=`tr '] ' '\012\012' < /proc/mdstat | grep -c '(S)' || exit 0`
- if [ $spares -ne $2 ]
- then
- echo >&2 "ERROR expected $2 spares, found $spares"; exit 1;
- fi
- ;;
- raid* | linear )
- grep -s "active $1 " /proc/mdstat > /dev/null || {
- echo >&2 "ERROR active $1 not found" ; cat /proc/mdstat ; exit 1;}
- ;;
- algorithm )
- grep -s " algorithm $2 " /proc/mdstat > /dev/null || {
- echo >&2 "ERROR algorithm $2 not found"; cat /proc/mdstat; exit 1;}
- ;;
- resync | recovery | reshape)
- cnt=5
- while ! grep -s $1 /proc/mdstat > /dev/null
- do
- if [ $cnt -gt 0 ] && grep -v idle /sys/block/md*/md/sync_action > /dev/null
- then # Something isn't idle - wait a bit
- sleep 0.5
- cnt=$[cnt-1]
- else
- echo >&2 ERROR no $1 happening; cat /proc/mdstat; exit 1
- fi
- done
- ;;
-
- nosync )
- sleep 0.5
- # Since 4.2 we delay the close of recovery until there has been a chance for
- # spares to be activated. That means that a recovery that finds nothing
- # to do can still take a little longer than expected.
- # add an extra check: is sync_completed shows the end is reached, assume
- # there is no recovery.
- if grep -s -E '(resync|recovery|reshape) *=' > /dev/null /proc/mdstat ; then
- incomplete=`grep / /sys/block/md*/md/sync_completed 2> /dev/null | sed '/^ *\([0-9]*\) \/ \1/d'`
- if [ -n "$incomplete" ]; then
- echo >&2 "ERROR resync or recovery is happening!"; cat /proc/mdstat ; exit 1;
- fi
- fi
- ;;
-
- wait )
- p=`cat /proc/sys/dev/raid/speed_limit_max`
- echo 2000000 > /proc/sys/dev/raid/speed_limit_max
- sleep 0.1
- while grep -E '(resync|recovery|reshape|check|repair) *=' > /dev/null /proc/mdstat ||
- grep -v idle > /dev/null /sys/block/md*/md/sync_action
- do sleep 0.5;
- done
- echo $p > /proc/sys/dev/raid/speed_limit_max
- ;;
-
- state )
- grep -s "blocks.*\[$2\]\$" /proc/mdstat > /dev/null || {
- echo >&2 "ERROR state $2 not found!"; cat /proc/mdstat ; exit 1; }
- sleep 0.5
- ;;
-
- bitmap )
- grep -s bitmap > /dev/null /proc/mdstat || {
- echo >&2 ERROR no bitmap ; cat /proc/mdstat ; exit 1; }
- ;;
- nobitmap )
- if grep -s "bitmap" > /dev/null /proc/mdstat
- then
- echo >&2 ERROR bitmap present ; cat /proc/mdstat ; exit 1;
- fi
- ;;
-
- readonly )
- grep -s "read-only" > /dev/null /proc/mdstat || {
- echo >&2 "ERROR array is not read-only!"; cat /proc/mdstat ; exit 1; }
- ;;
-
- inactive )
- grep -s "inactive" > /dev/null /proc/mdstat || {
- echo >&2 "ERROR array is not inactive!"; cat /proc/mdstat ; exit 1; }
- ;;
- * ) echo >&2 ERROR unknown check $1 ; exit 1;
- esac
-}
-
-no_errors() {
- if [ -s $targetdir/stderr ]
- then echo Bad errors from mdadm: ; cat $targetdir/stderr; exit 2;
- fi
-}
-# basic device test
-
-testdev() {
- udevadm settle
- dev=$1
- cnt=$2
- dvsize=$3
- chunk=$4
- if [ -z "$5" ]; then
- mkfs.ext3 -F -j $dev > /dev/null 2>&1 && fsck -fn $dev >&2
- fi
- dsize=$[dvsize/chunk]
- dsize=$[dsize*chunk]
- rasize=$[dsize*2*cnt]
- # rasize is in sectors
- if [ -n "$DEV_ROUND_K" ]; then
- rasize=$[rasize/DEV_ROUND_K/2]
- rasize=$[rasize*DEV_ROUND_K*2]
- fi
- if [ `/sbin/blockdev --getsize $dev` -eq 0 ]; then sleep 2 ; fi
- _sz=`/sbin/blockdev --getsize $dev`
- if [ $rasize -lt $_sz -o $[rasize*4/5] -gt $_sz ]
- then
- echo "ERROR: size is wrong for $dev: $cnt * $dvsize (chunk=$chunk) = $rasize, not $_sz"
- exit 1
- fi
-}
-
-fast_sync() {
- echo 200000 > /proc/sys/dev/raid/speed_limit_max
-}
-
-rotest() {
- dev=$1
- fsck -fn $dev >&2
+ rm -f $targetdir/stderr
+ case $* in
+ *-S* )
+ udevadm settle
+ p=`cat /proc/sys/dev/raid/speed_limit_max`
+ echo 20000 > /proc/sys/dev/raid/speed_limit_max
+ ;;
+ esac
+ case $* in
+ *-C* | *--create* | *-B* | *--build* )
+ # clear superblock every time once creating or
+ # building arrays, because it's always creating
+ # and building array many times in a test case.
+ for args in $*
+ do
+ [[ $args =~ "/dev/" ]] && {
+ [[ $args =~ "md" ]] ||
+ $mdadm --zero $args > /dev/null
+ }
+ done
+ $mdadm 2> $targetdir/stderr --quiet "$@" --auto=yes
+ ;;
+ * )
+ $mdadm 2> $targetdir/stderr --quiet "$@"
+ ;;
+ esac
+ rv=$?
+ case $* in
+ *-S* )
+ udevadm settle
+ echo $p > /proc/sys/dev/raid/speed_limit_max
+ ;;
+ esac
+ cat >&2 $targetdir/stderr
+ return $rv
}
do_test() {
- _script=$1
- _basename=`basename $_script`
- if [ -f "$_script" ]
- then
- rm -f $targetdir/stderr
- # stop all arrays, just incase some script left an array active.
- $mdadm -Ssq 2> /dev/null
- mdadm --zero $devlist 2> /dev/null
- mdadm --zero $devlist 2> /dev/null
- # this might have been reset: restore the default.
- echo 2000 > /proc/sys/dev/raid/speed_limit_max
- # source script in a subshell, so it has access to our
- # namespace, but cannot change it.
- echo -ne "$_script... "
- if ( set -ex ; . $_script ) &> $targetdir/log
- then
- echo "succeeded"
- _fail=0
- else
- log=log
- cat $targetdir/stderr >> $targetdir/log
- echo "=======================dmesg=================" >> $targetdir/log
- dmesg | tail -n 200 >> $targetdir/log
- if [ $exitonerror == 0 ]; then
- log=log-`basename $_script`
- mv $targetdir/log $logdir/$log
- fi
- echo "FAILED - see $logdir/$log for details"
- _fail=1
- fi
- if [ "$savelogs" == "1" ]; then
- cp $targetdir/log $logdir/$_basename.log
- fi
- if [ "$_fail" == "1" -a "$exitonerror" == "1" ]; then
- exit 1
- fi
- fi
+ _script=$1
+ _basename=`basename $_script`
+ if [ -f "$_script" ]
+ then
+ rm -f $targetdir/stderr
+ # this might have been reset: restore the default.
+ echo 2000 > /proc/sys/dev/raid/speed_limit_max
+ do_clean
+ # source script in a subshell, so it has access to our
+ # namespace, but cannot change it.
+ echo -ne "$_script... "
+ if ( set -ex ; . $_script ) &> $targetdir/log
+ then
+ dmesg | grep -iq "error\|call trace\|segfault" &&
+ die "dmesg prints errors when testing $_basename!"
+ echo "succeeded"
+ _fail=0
+ else
+ save_log fail
+ _fail=1
+ fi
+ [ "$savelogs" == "1" ] &&
+ mv -f $targetdir/log $logdir/$_basename.log
+ [ "$_fail" == "1" -a "$exitonerror" == "1" ] && exit 1
+ fi
}
do_help() {
- echo "Usage: $0 [options]"
- echo " Options:"
- echo " --tests=<test1,test2,..> Comma separated list of tests to run"
- echo " --disable-multipath Disable any tests involving multipath"
- echo " --disable-integrity Disable slow tests of RAID[56] consistency"
- echo " --logdir=<directory> Directory to save logfiles in"
- echo " --save-logs Save all logs in <logdir>"
- echo " --keep-going Don't stop on error, ie. run all tests"
- echo " --dev=[loop|lvm|ram] Use loop devices (default), LVM, or RAM disk"
- echo " --volgroup=<name> LVM volume group for LVM test"
- echo " setup Setup test environment and exit"
- echo " cleanup Cleanup test environment"
- echo " <prefix> Run tests with <prefix>"
+ cat <<-EOF
+ Usage: $0 [options]
+ Example for disk mode: ./test --dev=disk --disks=/dev/sda{2..15}
+ Options:
+ --tests=test1,test2,... Comma separated list of tests to run
+ --testdir= Specify testdir as tests|clustermd_tests
+ --raidtype= raid0|linear|raid1|raid456|raid10|ddf|imsm
+ --disable-multipath Disable any tests involving multipath
+ --disable-integrity Disable slow tests of RAID[56] consistency
+ --logdir=directory Directory to save all logfiles in
+ --save-logs Usually use with --logdir together
+ --keep-going | --no-error Don't stop on error, ie. run all tests
+ --dev=loop|lvm|ram|disk Use loop devices (default), LVM, RAM or disk
+ --disks= Provide a bunch of physical devices for test
+ --volgroup=name LVM volume group for LVM test
+ setup Setup test environment and exit
+ cleanup Cleanup test environment
+ prefix Run tests with <prefix>
+ --help | -h Print this usage
+ EOF
}
parse_args() {
- for i in $*
- do
- case $i in
- [0-9]*)
- prefix=$i
- ;;
- setup)
- echo "mdadm test environment setup"
- do_setup
- trap 0; exit 0
- ;;
- cleanup)
- cleanup
- exit 0
- ;;
- --tests=*)
- TESTLIST=`expr "x$i" : 'x[^=]*=\(.*\)' | sed -e 's/,/ /g'`
- ;;
- --logdir=*)
- logdir=`expr "x$i" : 'x[^=]*=\(.*\)'`
- ;;
- --save-logs)
- savelogs=1
- ;;
- --keep-going | --no-error)
- exitonerror=0
- ;;
- --disable-multipath)
- unset MULTIPATH
- ;;
- --disable-integrity)
- unset INTEGRITY
- ;;
- --dev=loop)
- DEVTYPE=loop
- ;;
- --dev=lvm)
- DEVTYPE=lvm
- ;;
- --dev=ram)
- DEVTYPE=ram
- ;;
- --volgroup=*)
- LVM_VOLGROUP=`expr "x$i" : 'x[^=]*=\(.*\)'`
- ;;
- --help)
- do_help
- exit 0;
- ;;
- -*)
- echo " $0: Unknown argument: $i"
- do_help
- exit 0;
- ;;
- esac
-done
+ for i in $*
+ do
+ case $i in
+ --testdir=* )
+ case ${i##*=} in
+ tests )
+ testdir=tests
+ ;;
+ clustermd_tests )
+ testdir=clustermd_tests
+ CLUSTER_CONF="$PWD/$testdir/cluster_conf"
+ ;;
+ * )
+ echo "Unknown argument: $i"
+ do_help
+ exit 1
+ ;;
+ esac
+ ;;
+ esac
+ done
+ [ -z "$testdir" ] && testdir=tests
+ . $testdir/func.sh
+ for i in $*
+ do
+ case $i in
+ [0-9][0-9] )
+ prefix=$i
+ ;;
+ setup )
+ echo "mdadm test environment setup"
+ do_setup
+ trap 0
+ exit 0
+ ;;
+ cleanup )
+ cleanup
+ exit 0
+ ;;
+ --testdir=* )
+ ;;
+ --tests=* )
+ TESTLIST=($(echo ${i##*=} | sed -e 's/,/ /g'))
+ ;;
+ --raidtype=* )
+ case ${i##*=} in
+ raid0 )
+ TESTLIST=($(ls $testdir | grep "[0-9][0-9]r0\|raid0"))
+ ;;
+ linear )
+ TESTLIST=($(ls $testdir | grep "linear"))
+ ;;
+ raid1 )
+ TESTLIST=($(ls $testdir | grep "[0-9][0-9]r1\|raid1" | grep -vi "r10\|raid10"))
+ ;;
+ raid456 )
+ TESTLIST=($(ls $testdir | grep "[0-9][0-9]r[4-6]\|raid[4-6]"))
+ ;;
+ raid10 )
+ TESTLIST=($(ls $testdir | grep "[0-9][0-9]r10\|raid10"))
+ ;;
+ ddf )
+ TESTLIST=($(ls $testdir | grep "[0-9][0-9]ddf"))
+ ;;
+ imsm )
+ TESTLIST=($(ls $testdir | grep "[0-9][0-9]imsm"))
+ ;;
+ * )
+ echo "Unknown argument: $i"
+ do_help
+ exit 1
+ ;;
+ esac
+ ;;
+ --logdir=* )
+ logdir="${i##*=}"
+ ;;
+ --save-logs )
+ savelogs=1
+ ;;
+ --keep-going | --no-error )
+ exitonerror=0
+ ;;
+ --disable-multipath )
+ unset MULTIPATH
+ ;;
+ --disable-integrity )
+ unset INTEGRITY
+ ;;
+ --dev=* )
+ case ${i##*=} in
+ loop )
+ DEVTYPE=loop
+ ;;
+ lvm )
+ DEVTYPE=lvm
+ ;;
+ ram )
+ DEVTYPE=ram
+ ;;
+ disk )
+ DEVTYPE=disk
+ ;;
+ * )
+ echo "Unknown argument: $i"
+ do_help
+ exit 1
+ ;;
+ esac
+ ;;
+ --disks=* )
+ disks=(${disks[*]} ${i##*=})
+ ;;
+ --volgroup=* )
+ LVM_VOLGROUP=`expr "x$i" : 'x[^=]*=\(.*\)'`
+ ;;
+ --help | -h )
+ do_help
+ exit 0
+ ;;
+ * )
+ echo " $0: Unknown argument: $i"
+ do_help
+ exit 1
+ ;;
+ esac
+ done
}
-logdir=$targetdir
-parse_args $@
-
-do_setup
-mkdir -p $logdir
-
-if [ "$savelogs" == "1" ]; then
- echo "Saving logs to $logdir"
-fi
+main() {
+ do_setup
+
+ echo "Testing on linux-$(uname -r) kernel"
+ [ "$savelogs" == "1" ] &&
+ echo "Saving logs to $logdir"
+ if [ "x$TESTLIST" != "x" ]
+ then
+ for script in ${TESTLIST[@]}
+ do
+ do_test $testdir/$script
+ done
+ else
+ for script in $testdir/$prefix $testdir/$prefix*[^~]
+ do
+ do_test $script
+ done
+ fi
+
+ exit 0
+}
-if [ "x$TESTLIST" != "x" ]; then
- for script in $TESTLIST
- do
- do_test $testdir/$script
- done
-else
- for script in $testdir/$prefix $testdir/$prefix*[^~]
- do
- do_test $script
- done
-fi
-exit 0
+parse_args $@
+main
diff --git a/tests/00raid1 b/tests/00raid1
index c93465d..f6b8be1 100644
--- a/tests/00raid1
+++ b/tests/00raid1
@@ -3,6 +3,10 @@
# test version0, version1, and no super
# test resync and recovery.
+# It's just a sanity check. This command shouldn't run successfully
+mdadm -CR $md0 -l 1 -n2 missing missing
+check opposite_result
+
mdadm -CR $md0 -l 1 -n2 $dev0 $dev1
check resync
check raid1
diff --git a/tests/00readonly b/tests/00readonly
new file mode 100644
index 0000000..28b0fa1
--- /dev/null
+++ b/tests/00readonly
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+for metadata in 0.9 1.0 1.1 1.2
+do
+ for level in linear raid0 raid1 raid4 raid5 raid6 raid10
+ do
+ mdadm -CR $md0 -l $level -n 4 --metadata=$metadata \
+ $dev1 $dev2 $dev3 $dev4 --assume-clean
+ check nosync
+ check $level
+ mdadm -ro $md0
+ check readonly
+ state=$(cat /sys/block/md0/md/array_state)
+ [ "$state" == "readonly" ] ||
+ die "array_state should be 'readonly', but is $state"
+ mdadm -w $md0
+ check $level
+ mdadm -S $md0
+ done
+done
+
+exit 0
diff --git a/tests/02r5grow b/tests/02r5grow
index 386e82e..bb9bd6d 100644
--- a/tests/02r5grow
+++ b/tests/02r5grow
@@ -34,3 +34,20 @@ check nosync
sh tests/testdev $md0 3 $[size/2] 128
mdadm -S $md0
+
+# create a raid5 array and change the chunk
+mdadm -CR $md0 --level raid5 --metadata=1.1 --chunk=32 --raid-disks 3 --size $[size/2] $dev1 $dev2 $dev3
+check wait
+check state UUU
+check chunk 32
+
+mdadm $md0 --grow --chunk=64
+check reshape
+check wait
+check chunk 64
+
+mdadm -S $md0
+mdadm -A $md0 $dev1 $dev2 $dev3
+check state UUU
+check chunk 64
+mdadm -S $md0
diff --git a/tests/check b/tests/check
deleted file mode 100644
index f4ed6d5..0000000
--- a/tests/check
+++ /dev/null
@@ -1,35 +0,0 @@
-
-case $1 in
- raid* | linear )
- grep -s "active $1 " /proc/mdstat > /dev/null || {
- echo >&2 "ERROR active $1 not found" ; cat /proc/mdstat ; exit 1;}
- ;;
- resync | recovery )
- sleep 0.1
- grep -s $1 /proc/mdstat > /dev/null || {
- echo >&2 ERROR no $1 happening; cat /proc/mdstat; exit 1; }
- ;;
-
- nosync )
- sleep 0.5
- grep -s 're[synccovery]* =' > /dev/null /proc/mdstat && {
- echo >&2 "ERROR resync or recovery is happening!"; cat /proc/mdstat ; exit 1; }
- ;;
-
- wait )
- sleep 0.1
- while grep 're[synccovery]* =' > /dev/null /proc/mdstat
- do sleep 2;
- done
- ;;
-
- state )
- grep -s "blocks.*\[$2\]\$" /proc/mdstat > /dev/null || {
- echo >&2 "ERROR state $2 not found!"; cat /proc/mdstat ; exit 1; }
- sleep 0.5
- ;;
-
- * ) echo >&2 ERROR unknown check $1 ; exit 1;
-esac
-
-exit 0
diff --git a/tests/func.sh b/tests/func.sh
new file mode 100644
index 0000000..a6995f1
--- /dev/null
+++ b/tests/func.sh
@@ -0,0 +1,360 @@
+#!/bin/bash
+
+# We test mdadm on loop-back block devices.
+# dir for storing files should be settable by command line maybe
+size=20000
+# super0, round down to multiple of 64 and substract 64
+mdsize0=19904
+# super00 is nested, subtract 128
+mdsize00=19840
+# super1.0 round down to multiple of 2, subtract 8
+mdsize1=19992
+mdsize1a=19988
+mdsize12=19988
+# super1.2 for linear: round to multiple of 2, subtract 4
+mdsize1_l=19996
+mdsize2_l=19996
+# subtract another 4 for bitmaps
+mdsize1b=19988
+mdsize11=19992
+mdsize11a=19456
+mdsize12=19988
+
+# ddf needs bigger devices as 32Meg is reserved!
+ddfsize=65536
+
+# $1 is optional parameter, it shows why to save log
+save_log() {
+ status=$1
+ logfile="$status""$_basename".log
+
+ cat $targetdir/stderr >> $targetdir/log
+ cp $targetdir/log $logdir/$_basename.log
+ echo "## $HOSTNAME: saving dmesg." >> $logdir/$logfile
+ dmesg -c >> $logdir/$logfile
+ echo "## $HOSTNAME: saving proc mdstat." >> $logdir/$logfile
+ cat /proc/mdstat >> $logdir/$logfile
+ array=($(mdadm -Ds | cut -d' ' -f2))
+ [ "$1" == "fail" ] &&
+ echo "FAILED - see $logdir/$_basename.log and $logdir/$logfile for details"
+ if [ $DEVTYPE == 'lvm' ]
+ then
+ # not supported lvm type yet
+ echo
+ elif [ "$DEVTYPE" == 'loop' -o "$DEVTYPE" == 'disk' ]
+ then
+ if [ ! -z "$array" -a ${#array[@]} -ge 1 ]
+ then
+ echo "## $HOSTNAME: mdadm -D ${array[@]}" >> $logdir/$logfile
+ $mdadm -D ${array[@]} >> $logdir/$logfile
+ # ignore saving external(external file, imsm...) bitmap
+ cat /proc/mdstat | grep -q "linear\|external" && return 0
+ md_disks=($($mdadm -D -Y ${array[@]} | grep "/dev/" | cut -d'=' -f2))
+ cat /proc/mdstat | grep -q "bitmap"
+ if [ $? -eq 0 ]
+ then
+ echo "## $HOSTNAME: mdadm -X ${md_disks[@]}" >> $logdir/$logfile
+ $mdadm -X ${md_disks[@]} >> $logdir/$logfile
+ echo "## $HOSTNAME: mdadm -E ${md_disks[@]}" >> $logdir/$logfile
+ $mdadm -E ${md_disks[@]} >> $logdir/$logfile
+ fi
+ else
+ echo "## $HOSTNAME: no array assembled!" >> $logdir/$logfile
+ fi
+ fi
+}
+
+cleanup() {
+ udevadm settle
+ $mdadm -Ssq 2> /dev/null
+ case $DEVTYPE in
+ loop )
+ for d in 0 1 2 3 4 5 6 7 8 9 10 11 12 13
+ do
+ losetup -d /dev/loop$d &> /dev/null
+ rm -f /dev/disk/by-path/loop*
+ rm -f /var/tmp/mdtest$d
+ done
+ ;;
+ lvm )
+ for d in 0 1 2 3 4 5 6 7 8 9 10 11 12 13
+ do
+ eval "lvremove --quiet -f \$dev$d"
+ done
+ ;;
+ disk )
+ $mdadm --zero ${disks[@]} &> /dev/null
+ ;;
+ esac
+}
+
+do_clean()
+{
+ mdadm -Ss > /dev/null
+ mdadm --zero $devlist 2> /dev/null
+ dmesg -c > /dev/null
+}
+
+check_env() {
+ user=$(id -un)
+ [ "X$user" != "Xroot" ] && {
+ echo "test: testing can only be done as 'root'."
+ exit 1
+ }
+ [ -x "raid6check" -a -x $mdadm ] || {
+ echo "test: please run 'make everything' before perform testing."
+ exit 1
+ }
+ cmds=(mdadm lsblk df udevadm losetup mkfs.ext3 fsck seq)
+ for cmd in ${cmds[@]}
+ do
+ which $cmd > /dev/null || {
+ echo "$cmd command not found!"
+ exit 1
+ }
+ done
+ mdadm_src_ver="$($mdadm -V 2>&1)"
+ mdadm_sbin_ver="$($(which mdadm) -V 2>&1)"
+ if [ "$mdadm_src_ver" != "$mdadm_sbin_ver" ]
+ then
+ # it's nessesary to 'make install' mdadm to /SBIN/DIR,
+ # such as systemd/mdadm-grow-continue@.service, would
+ # run as an instance by systemd when reshape happens,
+ # thus ensure that the correct mdadm is in testing.
+ echo "test: please run 'make install' before testing."
+ exit 1
+ fi
+ if ! $(df -T . | grep -iq ext)
+ then
+ # 'external file' bitmap only supports with ext[2-4] file system
+ echo "test: please run test suite with ext[2-4] file system."
+ exit 1
+ fi
+ if $(lsblk -a | grep -iq raid)
+ then
+ # donot run mdadm -Ss directly if there are RAIDs working.
+ echo "test: please run test suite without running RAIDs environment."
+ exit 1
+ fi
+ # Check whether to run multipath tests
+ modprobe multipath 2> /dev/null
+ grep -sq 'Personalities : .*multipath' /proc/mdstat &&
+ MULTIPATH="yes"
+}
+
+do_setup() {
+ trap cleanup 0 1 3 15
+ trap ctrl_c 2
+
+ check_env
+ [ -d $logdir ] || mkdir -p $logdir
+
+ devlist=
+ if [ "$DEVTYPE" == "loop" ]
+ then
+ # make sure there are no loop devices remaining.
+ # udev started things can sometimes prevent them being stopped
+ # immediately
+ while grep loop /proc/partitions > /dev/null 2>&1
+ do
+ $mdadm -Ssq
+ losetup -d /dev/loop[0-9]* 2> /dev/null
+ sleep 0.2
+ done
+ elif [ "$DEVTYPE" == "disk" ]
+ then
+ if [ ! -z "$disks" ]
+ then
+ for d in $(seq 0 ${#disks[@]})
+ do
+ eval "dev$d=${disks[$d]}"
+ eval devlist=\"\$devlist \$dev$d\"
+ eval devlist$d=\"\$devlist\"
+ done
+ $mdadm --zero ${disks[@]} &> /dev/null
+ else
+ echo "Forget to provide physical devices for disk mode."
+ exit 1
+ fi
+ fi
+ for d in 0 1 2 3 4 5 6 7 8 9 10 11 12 13
+ do
+ sz=$size
+ [ $d -gt 7 ] && sz=$ddfsize
+ case $DEVTYPE in
+ loop)
+ [ -f $targetdir/mdtest$d ] ||
+ dd if=/dev/zero of=$targetdir/mdtest$d count=$sz bs=1K > /dev/null 2>&1
+ # make sure udev doesn't touch
+ mdadm --zero $targetdir/mdtest$d 2> /dev/null
+ [ -b /dev/loop$d ] || mknod /dev/loop$d b 7 $d
+ if [ $d -eq 7 ]
+ then
+ losetup /dev/loop$d $targetdir/mdtest6 # for multipath use
+ else
+ losetup /dev/loop$d $targetdir/mdtest$d
+ fi
+ eval dev$d=/dev/loop$d
+ eval file$d=$targetdir/mdtest$d
+ ;;
+ lvm)
+ unset MULTIPATH
+ eval dev$d=/dev/mapper/${LVM_VOLGROUP}-mdtest$d
+ if ! lvcreate --quiet -L ${sz}K -n mdtest$d $LVM_VOLGROUP
+ then
+ trap '' 0 # make sure lvremove is not called
+ eval echo error creating \$dev$d
+ exit 129
+ fi
+ ;;
+ ram)
+ unset MULTIPATH
+ eval dev$d=/dev/ram$d
+ ;;
+ esac
+ eval devlist=\"\$devlist \$dev$d\"
+ eval devlist$d=\"\$devlist\"
+ #" <-- add this quote to un-confuse vim syntax highlighting
+ done
+ path0=$dev6
+ path1=$dev7
+ ulimit -c unlimited
+ [ -f /proc/mdstat ] || modprobe md_mod
+ echo 2000 > /proc/sys/dev/raid/speed_limit_max
+ echo 0 > /sys/module/md_mod/parameters/start_ro
+}
+
+# check various things
+check() {
+ case $1 in
+ opposite_result )
+ if [ $? -eq 0 ]; then
+ die "This command shouldn't run successfully"
+ fi
+ spares )
+ spares=$(tr '] ' '\012\012' < /proc/mdstat | grep -c '(S)' || exit 0)
+ [ $spares -ne $2 ] &&
+ die "expected $2 spares, found $spares"
+ ;;
+ raid* | linear )
+ grep -sq "active $1 " /proc/mdstat ||
+ die "active $1 not found"
+ ;;
+ algorithm )
+ grep -sq " algorithm $2 " /proc/mdstat ||
+ die "algorithm $2 not found"
+ ;;
+ resync | recovery | reshape )
+ cnt=5
+ while ! grep -sq $1 /proc/mdstat
+ do
+ if [ $cnt -gt 0 ] && grep -v idle /sys/block/md*/md/sync_action > /dev/null
+ then # Something isn't idle - wait a bit
+ sleep 0.5
+ cnt=$[cnt-1]
+ else
+ die "no $1 happening"
+ fi
+ done
+ ;;
+ nosync )
+ sleep 0.5
+ # Since 4.2 we delay the close of recovery until there has been a chance for
+ # spares to be activated. That means that a recovery that finds nothing
+ # to do can still take a little longer than expected.
+ # add an extra check: is sync_completed shows the end is reached, assume
+ # there is no recovery.
+ if grep -sq -E '(resync|recovery|reshape) *=' /proc/mdstat
+ then
+ incomplete=`grep / /sys/block/md*/md/sync_completed 2> /dev/null | sed '/^ *\([0-9]*\) \/ \1/d'`
+ [ -n "$incomplete" ] &&
+ die "resync or recovery is happening!"
+ fi
+ ;;
+ wait )
+ p=`cat /proc/sys/dev/raid/speed_limit_max`
+ echo 2000000 > /proc/sys/dev/raid/speed_limit_max
+ sleep 0.1
+ while grep -Eq '(resync|recovery|reshape|check|repair) *=' /proc/mdstat ||
+ grep -v idle > /dev/null /sys/block/md*/md/sync_action
+ do
+ sleep 0.5
+ done
+ echo $p > /proc/sys/dev/raid/speed_limit_max
+ ;;
+ state )
+ grep -sq "blocks.*\[$2\]\$" /proc/mdstat ||
+ die "state $2 not found!"
+ sleep 0.5
+ ;;
+ bitmap )
+ grep -sq bitmap /proc/mdstat ||
+ die "no bitmap"
+ ;;
+ nobitmap )
+ grep -sq "bitmap" /proc/mdstat &&
+ die "bitmap present"
+ ;;
+ readonly )
+ grep -sq "read-only" /proc/mdstat ||
+ die "array is not read-only!"
+ ;;
+ inactive )
+ grep -sq "inactive" /proc/mdstat ||
+ die "array is not inactive!"
+ ;;
+ # It only can be used when there is only one raid
+ chunk )
+ chunk_size=`awk -F',' '/chunk/{print $2}' /proc/mdstat | awk -F'[a-z]' '{print $1}'`
+ if [ "$chunk_size" -ne "$2" ] ; then
+ die "chunksize should be $2, but it's $chunk_size"
+ fi
+ ;;
+ * )
+ die "unknown check $1"
+ ;;
+ esac
+}
+
+no_errors() {
+ if [ -s $targetdir/stderr ]
+ then
+ echo Bad errors from mdadm:
+ cat $targetdir/stderr
+ exit 2
+ fi
+}
+
+# basic device test
+testdev() {
+ [ -b $1 ] || die "$1 isn't a block device."
+ [ "$DEVTYPE" == "disk" ] && return 0
+ udevadm settle
+ dev=$1
+ cnt=$2
+ dvsize=$3
+ chunk=$4
+ if [ -z "$5" ]
+ then
+ mkfs.ext3 -F -j $dev > /dev/null 2>&1 && fsck -fn $dev >&2
+ fi
+ dsize=$[dvsize/chunk]
+ dsize=$[dsize*chunk]
+ rasize=$[dsize*2*cnt]
+ # rasize is in sectors
+ if [ -n "$DEV_ROUND_K" ]
+ then
+ rasize=$[rasize/DEV_ROUND_K/2]
+ rasize=$[rasize*DEV_ROUND_K*2]
+ fi
+ [ `/sbin/blockdev --getsize $dev` -eq 0 ] && sleep 2
+ _sz=`/sbin/blockdev --getsize $dev`
+ [ $rasize -lt $_sz -o $[rasize*4/5] -gt $_sz ] &&
+ die "size is wrong for $dev: $cnt * $dvsize (chunk=$chunk) = $rasize, not $_sz"
+ return 0
+}
+
+rotest() {
+ dev=$1
+ fsck -fn $dev >&2
+}
diff --git a/tests/testdev b/tests/testdev
deleted file mode 100644
index 8b6e6f0..0000000
--- a/tests/testdev
+++ /dev/null
@@ -1,13 +0,0 @@
-dev=$1
-cnt=$2
-size=$3
-chunk=$4
-mkfs -j $dev > /dev/null 2>&1 && fsck -fn $dev >&2
-dsize=$[size/chunk]
-dsize=$[dsize*chunk]
-rasize=$[dsize*2*cnt]
-if [ $rasize -ne `/sbin/blockdev --getsize $dev` ]
-then
- echo "ERROR: size is wrong for $dev: $cnt * $size (chunk=$chunk) = $rasize, not `/sbin/blockdev --getsize $dev`"
- exit 1;
-fi
diff --git a/udev-md-clustered-confirm-device.rules b/udev-md-clustered-confirm-device.rules
new file mode 100644
index 0000000..3e5381e
--- /dev/null
+++ b/udev-md-clustered-confirm-device.rules
@@ -0,0 +1,21 @@
+# do not edit this file, it will be overwritten on update
+
+SUBSYSTEM!="block", GOTO="clustermd_end"
+
+# handle md arrays
+KERNEL!="md*", GOTO="clustermd_end"
+ENV{DEVTYPE}!="disk", GOTO="clustermd_end"
+ACTION!="change", GOTO="clustermd_end"
+ENV{EVENT}!="ADD_DEVICE", GOTO="clustermd_end"
+ENV{DEVICE_UUID}!="?*", GOTO="clustermd_end"
+ENV{RAID_DISK}!="?*", GOTO="clustermd_end"
+
+# Based on the received UUID, node confirms the device if
+# it is found by blkid, otherwise the node reports it is
+# missing.
+PROGRAM="BINDIR/blkid -o device -t UUID_SUB=$env{DEVICE_UUID}", ENV{.md.newdevice} = "$result"
+
+ENV{.md.newdevice}!="", RUN+="BINDIR/mdadm --manage $env{DEVNAME} --cluster-confirm $env{RAID_DISK}:$env{.md.newdevice}"
+ENV{.md.newdevice}=="", RUN+="BINDIR/mdadm --manage $env{DEVNAME} --cluster-confirm $env{RAID_DISK}:missing"
+
+LABEL="clustermd_end"
diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules
index 440febc..c95ec7b 100644
--- a/udev-md-raid-arrays.rules
+++ b/udev-md-raid-arrays.rules
@@ -35,7 +35,7 @@ ENV{ID_FS_USAGE}=="filesystem|other", ENV{ID_FS_LABEL_ENC}=="?*", SYMLINK+="disk
ENV{MD_LEVEL}=="raid[1-9]*", ENV{SYSTEMD_WANTS}+="mdmonitor.service"
# Tell systemd to run mdmon for our container, if we need it.
-ENV{MD_LEVEL}=="raid[1-9]*", ENV{MD_CONTAINER}=="?*", PROGRAM="/bin/readlink $env{MD_CONTAINER}", ENV{MD_MON_THIS}="%c"
+ENV{MD_LEVEL}=="raid[1-9]*", ENV{MD_CONTAINER}=="?*", PROGRAM="/usr/bin/readlink $env{MD_CONTAINER}", ENV{MD_MON_THIS}="%c"
ENV{MD_MON_THIS}=="?*", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@%c.service"
LABEL="md_end"
diff --git a/udev-md-raid-assembly.rules b/udev-md-raid-assembly.rules
index d0d440a..9f055ed 100644
--- a/udev-md-raid-assembly.rules
+++ b/udev-md-raid-assembly.rules
@@ -7,6 +7,9 @@ ENV{ANACONDA}=="?*", GOTO="md_inc_end"
SUBSYSTEM!="block", GOTO="md_inc_end"
+# skip non-initialized devices
+ENV{SYSTEMD_READY}=="0", GOTO="md_inc_end"
+
# handle potential components of arrays (the ones supported by md)
ENV{ID_FS_TYPE}=="linux_raid_member", GOTO="md_inc"
@@ -27,7 +30,7 @@ LABEL="md_inc"
# remember you can limit what gets auto/incrementally assembled by
# mdadm.conf(5)'s 'AUTO' and selectively whitelist using 'ARRAY'
-ACTION=="add|change", IMPORT{program}="BINDIR/mdadm --incremental --export $devnode --offroot ${DEVLINKS}"
+ACTION=="add|change", IMPORT{program}="BINDIR/mdadm --incremental --export $devnode --offroot $env{DEVLINKS}"
ACTION=="add|change", ENV{MD_STARTED}=="*unsafe*", ENV{MD_FOREIGN}=="no", ENV{SYSTEMD_WANTS}+="mdadm-last-resort@$env{MD_DEVICE}.timer"
ACTION=="remove", ENV{ID_PATH}=="?*", RUN+="BINDIR/mdadm -If $name --path $env{ID_PATH}"
ACTION=="remove", ENV{ID_PATH}!="?*", RUN+="BINDIR/mdadm -If $name"
diff --git a/udev-md-raid-creating.rules b/udev-md-raid-creating.rules
new file mode 100644
index 0000000..9bef8d1
--- /dev/null
+++ b/udev-md-raid-creating.rules
@@ -0,0 +1,7 @@
+# do not edit this file, it will be overwritten on update
+# While mdadm is creating an array, it creates a file
+# /run/mdadm/creating-mdXXX. If that file exists, then
+# the array is not "ready" and we should make sure the
+# content is ignored.
+
+KERNEL=="md*", TEST=="/run/mdadm/creating-$kernel", ENV{SYSTEMD_READY}="0"
diff --git a/udev-md-raid-safe-timeouts.rules b/udev-md-raid-safe-timeouts.rules
new file mode 100644
index 0000000..420c862
--- /dev/null
+++ b/udev-md-raid-safe-timeouts.rules
@@ -0,0 +1,61 @@
+# Copyright (C) 2017 by Jonathan G. Underwood
+# This file is part of mdraid-safe-timeouts.
+#
+# mdraid-safe-timeouts is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# Foobar is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with mdraid-safe-timeouts. If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file causes block devices with Linux RAID (mdadm) signatures to
+# attempt to set safe timeouts for the drives involved
+# See udev(8) for syntax
+
+# Don't process any events if anaconda is running as anaconda brings up
+# raid devices manually
+ENV{ANACONDA}=="?*", GOTO="md_timeouts_end"
+
+SUBSYSTEM!="block|machinecheck", GOTO="md_timeouts_end"
+
+# "noiswmd" on kernel command line stops mdadm from handling
+# "isw" (aka IMSM - Intel RAID).
+# "nodmraid" on kernel command line stops mdadm from handling
+# "isw" or "ddf".
+IMPORT{cmdline}="nodmraid"
+ENV{nodmraid}=="?*", GOTO="md_timeouts_end"
+IMPORT{cmdline}="noiswmd"
+ENV{noiswmd}=="?*", GOTO="md_timeouts_end"
+
+# Set controller timeout for parent disk of each partition if the
+# partition is a mdraid partition of higher than raid 0, and the disk
+# doesn't have scterc turned on (i.e. if it's disabled or the disk
+# doesn't support it). We determine if the disk has SCTERC turned on
+# by examining the output of smartctl and seeing if it contains the
+# word "seconds". If the word "seconds" is found we take this to imply
+# STCERC is turned on, and take no action. Otherwise we set the drive
+# controller timeout to 180 seconds. It would be better to check the
+# exit status code of smartctl rather than grepping for "seconds", but
+# it's not clear what that will be in the three cases (supported and
+# turned on, supported but disabled, not supported).
+
+ENV{DEVTYPE}!="partition", GOTO="md_timeouts_end"
+
+IMPORT{program}="/sbin/mdadm --examine --export $devnode"
+
+ACTION=="add|change", \
+ ENV{ID_FS_TYPE}=="linux_raid_member", \
+ ENV{MD_LEVEL}=="raid[1-9]*", \
+ TEST=="/sys/block/$parent/device/timeout", \
+ TEST=="/usr/sbin/smartctl", \
+ PROGRAM!="/usr/bin/sh -c '/usr/sbin/smartctl -l scterc /dev/$parent | grep -q seconds && exit 0 || exit 1'", \
+ RUN+="/usr/bin/sh -c '/usr/bin/echo 180 > /sys/block/$parent/device/timeout && /usr/bin/logger timeout for /dev/$parent set to 180 secs'"
+
+LABEL="md_timeouts_end"
diff --git a/util.c b/util.c
index f100972..4adbbff 100644
--- a/util.c
+++ b/util.c
@@ -30,6 +30,7 @@
#include <sys/un.h>
#include <sys/resource.h>
#include <sys/vfs.h>
+#include <sys/mman.h>
#include <linux/magic.h>
#include <poll.h>
#include <ctype.h>
@@ -89,7 +90,6 @@ int dlm_funs_ready(void)
return is_dlm_hooks_ready ? 1 : 0;
}
-#ifndef MDASSEMBLE
static struct dlm_hooks *dlm_hooks = NULL;
struct dlm_lock_resource *dlm_lock_res = NULL;
static int ast_called = 0;
@@ -128,11 +128,17 @@ static void dlm_ast(void *arg)
static char *cluster_name = NULL;
/* Create the lockspace, take bitmapXXX locks on all the bitmaps. */
-int cluster_get_dlmlock(int *lockid)
+int cluster_get_dlmlock(void)
{
int ret = -1;
char str[64];
int flags = LKF_NOQUEUE;
+ int retry_count = 0;
+
+ if (!dlm_funs_ready()) {
+ pr_err("Something wrong with dlm library\n");
+ return -1;
+ }
ret = get_cluster_name(&cluster_name);
if (ret) {
@@ -141,38 +147,57 @@ int cluster_get_dlmlock(int *lockid)
}
dlm_lock_res = xmalloc(sizeof(struct dlm_lock_resource));
- dlm_lock_res->ls = dlm_hooks->create_lockspace(cluster_name, O_RDWR);
+ dlm_lock_res->ls = dlm_hooks->open_lockspace(cluster_name);
if (!dlm_lock_res->ls) {
- pr_err("%s failed to create lockspace\n", cluster_name);
- return -ENOMEM;
+ dlm_lock_res->ls = dlm_hooks->create_lockspace(cluster_name, O_RDWR);
+ if (!dlm_lock_res->ls) {
+ pr_err("%s failed to create lockspace\n", cluster_name);
+ return -ENOMEM;
+ }
+ } else {
+ pr_err("open existed %s lockspace\n", cluster_name);
}
snprintf(str, 64, "bitmap%s", cluster_name);
- ret = dlm_hooks->ls_lock(dlm_lock_res->ls, LKM_PWMODE, &dlm_lock_res->lksb,
- flags, str, strlen(str), 0, dlm_ast,
- dlm_lock_res, NULL, NULL);
+retry:
+ ret = dlm_hooks->ls_lock(dlm_lock_res->ls, LKM_PWMODE,
+ &dlm_lock_res->lksb, flags, str, strlen(str),
+ 0, dlm_ast, dlm_lock_res, NULL, NULL);
if (ret) {
pr_err("error %d when get PW mode on lock %s\n", errno, str);
+ /* let's try several times if EAGAIN happened */
+ if (dlm_lock_res->lksb.sb_status == EAGAIN && retry_count < 10) {
+ sleep(10);
+ retry_count++;
+ goto retry;
+ }
dlm_hooks->release_lockspace(cluster_name, dlm_lock_res->ls, 1);
return ret;
}
/* Wait for it to complete */
poll_for_ast(dlm_lock_res->ls);
- *lockid = dlm_lock_res->lksb.sb_lkid;
- return dlm_lock_res->lksb.sb_status;
+ if (dlm_lock_res->lksb.sb_status) {
+ pr_err("failed to lock cluster\n");
+ return -1;
+ }
+ return 1;
}
-int cluster_release_dlmlock(int lockid)
+int cluster_release_dlmlock(void)
{
int ret = -1;
if (!cluster_name)
- return -1;
+ goto out;
- ret = dlm_hooks->ls_unlock(dlm_lock_res->ls, lockid, 0,
- &dlm_lock_res->lksb, dlm_lock_res);
+ if (!dlm_lock_res->lksb.sb_lkid)
+ goto out;
+
+ ret = dlm_hooks->ls_unlock_wait(dlm_lock_res->ls,
+ dlm_lock_res->lksb.sb_lkid, 0,
+ &dlm_lock_res->lksb);
if (ret) {
pr_err("error %d happened when unlock\n", errno);
/* XXX make sure the lock is unlocked eventually */
@@ -184,7 +209,8 @@ int cluster_release_dlmlock(int lockid)
errno = dlm_lock_res->lksb.sb_status;
if (errno != EUNLOCK) {
- pr_err("error %d happened in ast when unlock lockspace\n", errno);
+ pr_err("error %d happened in ast when unlock lockspace\n",
+ errno);
/* XXX make sure the lockspace is unlocked eventually */
goto out;
}
@@ -200,16 +226,85 @@ int cluster_release_dlmlock(int lockid)
out:
return ret;
}
-#else
-int cluster_get_dlmlock(int *lockid)
+
+int md_array_valid(int fd)
{
- return -1;
+ struct mdinfo *sra;
+ int ret;
+
+ sra = sysfs_read(fd, NULL, GET_ARRAY_STATE);
+ if (sra) {
+ if (sra->array_state != ARRAY_UNKNOWN_STATE)
+ ret = 0;
+ else
+ ret = -ENODEV;
+
+ free(sra);
+ } else {
+ /*
+ * GET_ARRAY_INFO doesn't provide access to the proper state
+ * information, so fallback to a basic check for raid_disks != 0
+ */
+ ret = ioctl(fd, RAID_VERSION);
+ }
+
+ return !ret;
}
-int cluster_release_dlmlock(int lockid)
+
+int md_array_active(int fd)
{
- return -1;
+ struct mdinfo *sra;
+ struct mdu_array_info_s array;
+ int ret = 0;
+
+ sra = sysfs_read(fd, NULL, GET_ARRAY_STATE);
+ if (sra) {
+ if (!md_array_is_active(sra))
+ ret = -ENODEV;
+
+ free(sra);
+ } else {
+ /*
+ * GET_ARRAY_INFO doesn't provide access to the proper state
+ * information, so fallback to a basic check for raid_disks != 0
+ */
+ ret = ioctl(fd, GET_ARRAY_INFO, &array);
+ }
+
+ return !ret;
+}
+
+int md_array_is_active(struct mdinfo *info)
+{
+ return (info->array_state != ARRAY_CLEAR &&
+ info->array_state != ARRAY_INACTIVE &&
+ info->array_state != ARRAY_UNKNOWN_STATE);
+}
+
+/*
+ * Get array info from the kernel. Longer term we want to deprecate the
+ * ioctl and get it from sysfs.
+ */
+int md_get_array_info(int fd, struct mdu_array_info_s *array)
+{
+ return ioctl(fd, GET_ARRAY_INFO, array);
+}
+
+/*
+ * Set array info
+ */
+int md_set_array_info(int fd, struct mdu_array_info_s *array)
+{
+ return ioctl(fd, SET_ARRAY_INFO, array);
+}
+
+/*
+ * Get disk info from the kernel.
+ */
+int md_get_disk_info(int fd, struct mdu_disk_info_s *disk)
+{
+ return ioctl(fd, GET_DISK_INFO, disk);
}
-#endif
/*
* Parse a 128 bit uuid in 4 integers
@@ -248,35 +343,6 @@ int parse_uuid(char *str, int uuid[4])
return 0;
}
-/*
- * Get the md version number.
- * We use the RAID_VERSION ioctl if it is supported
- * If not, but we have a block device with major '9', we assume
- * 0.36.0
- *
- * Return version number as 24 but number - assume version parts
- * always < 255
- */
-
-int md_get_version(int fd)
-{
- struct stat stb;
- mdu_version_t vers;
-
- if (fstat(fd, &stb)<0)
- return -1;
- if ((S_IFMT&stb.st_mode) != S_IFBLK)
- return -1;
-
- if (ioctl(fd, RAID_VERSION, &vers) == 0)
- return (vers.major*10000) + (vers.minor*100) + vers.patchlevel;
- if (errno == EACCES)
- return -1;
- if (major(stb.st_rdev) == MD_MAJOR)
- return (3600);
- return -1;
-}
-
int get_linux_version()
{
struct utsname name;
@@ -295,7 +361,6 @@ int get_linux_version()
return (a*1000000)+(b*1000)+c;
}
-#ifndef MDASSEMBLE
int mdadm_version(char *version)
{
int a, b, c;
@@ -357,6 +422,17 @@ unsigned long long parse_size(char *size)
return s;
}
+int is_near_layout_10(int layout)
+{
+ int fc, fo;
+
+ fc = (layout >> 8) & 255;
+ fo = layout & (1 << 16);
+ if (fc > 1 || fo > 0)
+ return 0;
+ return 1;
+}
+
int parse_layout_10(char *layout)
{
int copies, rv;
@@ -401,7 +477,6 @@ long parse_num(char *num)
else
return rv;
}
-#endif
int parse_cluster_confirm_arg(char *input, char **devname, int *slot)
{
@@ -532,38 +607,6 @@ int enough(int level, int raid_disks, int layout, int clean, char *avail)
}
}
-int enough_fd(int fd)
-{
- struct mdu_array_info_s array;
- struct mdu_disk_info_s disk;
- int i, rv;
- char *avail;
-
- if (ioctl(fd, GET_ARRAY_INFO, &array) != 0 ||
- array.raid_disks <= 0)
- return 0;
- avail = xcalloc(array.raid_disks, 1);
- for (i = 0; i < MAX_DISKS && array.nr_disks > 0; i++) {
- disk.number = i;
- if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
- continue;
- if (disk.major == 0 && disk.minor == 0)
- continue;
- array.nr_disks--;
-
- if (! (disk.state & (1<<MD_DISK_SYNC)))
- continue;
- if (disk.raid_disk < 0 || disk.raid_disk >= array.raid_disks)
- continue;
- avail[disk.raid_disk] = 1;
- }
- /* This is used on an active array, so assume it is clean */
- rv = enough(array.level, array.raid_disks, array.layout,
- 1, avail);
- free(avail);
- return rv;
-}
-
const int uuid_zero[4] = { 0, 0, 0, 0 };
int same_uuid(int a[4], int b[4], int swapuuid)
@@ -634,17 +677,18 @@ char *__fname_from_uuid(int id[4], int swap, char *buf, char sep)
}
-char *fname_from_uuid(struct supertype *st, struct mdinfo *info, char *buf, char sep)
+char *fname_from_uuid(struct supertype *st, struct mdinfo *info,
+ char *buf, char sep)
{
// dirty hack to work around an issue with super1 superblocks...
// super1 superblocks need swapuuid set in order for assembly to
// work, but can't have it set if we want this printout to match
// all the other uuid printouts in super1.c, so we force swapuuid
// to 1 to make our printout match the rest of super1
- return __fname_from_uuid(info->uuid, (st->ss == &super1) ? 1 : st->ss->swapuuid, buf, sep);
+ return __fname_from_uuid(info->uuid, (st->ss == &super1) ? 1 :
+ st->ss->swapuuid, buf, sep);
}
-#ifndef MDASSEMBLE
int check_ext2(int fd, char *name)
{
/*
@@ -729,6 +773,40 @@ int check_raid(int fd, char *name)
return 1;
}
+int fstat_is_blkdev(int fd, char *devname, dev_t *rdev)
+{
+ struct stat stb;
+
+ if (fstat(fd, &stb) != 0) {
+ pr_err("fstat failed for %s: %s\n", devname, strerror(errno));
+ return 0;
+ }
+ if ((S_IFMT & stb.st_mode) != S_IFBLK) {
+ pr_err("%s is not a block device.\n", devname);
+ return 0;
+ }
+ if (rdev)
+ *rdev = stb.st_rdev;
+ return 1;
+}
+
+int stat_is_blkdev(char *devname, dev_t *rdev)
+{
+ struct stat stb;
+
+ if (stat(devname, &stb) != 0) {
+ pr_err("stat failed for %s: %s\n", devname, strerror(errno));
+ return 0;
+ }
+ if ((S_IFMT & stb.st_mode) != S_IFBLK) {
+ pr_err("%s is not a block device.\n", devname);
+ return 0;
+ }
+ if (rdev)
+ *rdev = stb.st_rdev;
+ return 1;
+}
+
int ask(char *mesg)
{
char *add = "";
@@ -748,7 +826,6 @@ int ask(char *mesg)
pr_err("assuming 'no'\n");
return 0;
}
-#endif /* MDASSEMBLE */
int is_standard(char *dev, int *nump)
{
@@ -808,10 +885,9 @@ unsigned long calc_csum(void *super, int bytes)
return csum;
}
-#ifndef MDASSEMBLE
char *human_size(long long bytes)
{
- static char buf[30];
+ static char buf[47];
/* We convert bytes to either centi-M{ega,ibi}bytes or
* centi-G{igi,ibi}bytes, with appropriate rounding,
@@ -828,14 +904,12 @@ char *human_size(long long bytes)
long cMiB = (bytes * 200LL / (1LL<<20) + 1) / 2;
long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2;
snprintf(buf, sizeof(buf), " (%ld.%02ld MiB %ld.%02ld MB)",
- cMiB/100 , cMiB % 100,
- cMB/100, cMB % 100);
+ cMiB/100, cMiB % 100, cMB/100, cMB % 100);
} else {
long cGiB = (bytes * 200LL / (1LL<<30) +1) / 2;
long cGB = (bytes / (1000000000LL/200LL ) +1) /2;
snprintf(buf, sizeof(buf), " (%ld.%02ld GiB %ld.%02ld GB)",
- cGiB/100 , cGiB % 100,
- cGB/100, cGB % 100);
+ cGiB/100, cGiB % 100, cGB/100, cGB % 100);
}
return buf;
}
@@ -862,22 +936,22 @@ char *human_size_brief(long long bytes, int prefix)
if (bytes < 2*1024LL*1024LL*1024LL) {
long cMiB = (bytes * 200LL / (1LL<<20) +1) /2;
snprintf(buf, sizeof(buf), "%ld.%02ldMiB",
- cMiB/100 , cMiB % 100);
+ cMiB/100, cMiB % 100);
} else {
long cGiB = (bytes * 200LL / (1LL<<30) +1) /2;
snprintf(buf, sizeof(buf), "%ld.%02ldGiB",
- cGiB/100 , cGiB % 100);
+ cGiB/100, cGiB % 100);
}
}
else if (prefix == JEDEC) {
if (bytes < 2*1024LL*1024LL*1024LL) {
long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2;
snprintf(buf, sizeof(buf), "%ld.%02ldMB",
- cMB/100, cMB % 100);
+ cMB/100, cMB % 100);
} else {
long cGB = (bytes / (1000000000LL/200LL ) +1) /2;
snprintf(buf, sizeof(buf), "%ld.%02ldGB",
- cGB/100 , cGB % 100);
+ cGB/100, cGB % 100);
}
}
else
@@ -902,7 +976,6 @@ void print_r10_layout(int layout)
if (near*far == 1)
printf("NO REDUNDANCY");
}
-#endif
unsigned long long calc_array_size(int level, int raid_disks, int layout,
int chunksize, unsigned long long devsize)
@@ -969,7 +1042,6 @@ dev_t devnm2devid(char *devnm)
return 0;
}
-#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
char *get_md_name(char *devnm)
{
/* find /dev/md%d or /dev/md/%d or make a device /dev/.tmp.md%d */
@@ -985,21 +1057,18 @@ char *get_md_name(char *devnm)
if (strncmp(devnm, "md_", 3) == 0) {
snprintf(devname, sizeof(devname), "/dev/md/%s",
devnm + 3);
- if (stat(devname, &stb) == 0
- && (S_IFMT&stb.st_mode) == S_IFBLK
- && (stb.st_rdev == rdev))
+ if (stat(devname, &stb) == 0 &&
+ (S_IFMT&stb.st_mode) == S_IFBLK && (stb.st_rdev == rdev))
return devname;
}
snprintf(devname, sizeof(devname), "/dev/%s", devnm);
- if (stat(devname, &stb) == 0
- && (S_IFMT&stb.st_mode) == S_IFBLK
- && (stb.st_rdev == rdev))
+ if (stat(devname, &stb) == 0 && (S_IFMT&stb.st_mode) == S_IFBLK &&
+ (stb.st_rdev == rdev))
return devname;
snprintf(devname, sizeof(devname), "/dev/md/%s", devnm+2);
- if (stat(devname, &stb) == 0
- && (S_IFMT&stb.st_mode) == S_IFBLK
- && (stb.st_rdev == rdev))
+ if (stat(devname, &stb) == 0 && (S_IFMT&stb.st_mode) == S_IFBLK &&
+ (stb.st_rdev == rdev))
return devname;
dn = map_dev(major(rdev), minor(rdev), 0);
@@ -1010,9 +1079,8 @@ char *get_md_name(char *devnm)
if (errno != EEXIST)
return NULL;
- if (stat(devname, &stb) == 0
- && (S_IFMT&stb.st_mode) == S_IFBLK
- && (stb.st_rdev == rdev))
+ if (stat(devname, &stb) == 0 && (S_IFMT&stb.st_mode) == S_IFBLK &&
+ (stb.st_rdev == rdev))
return devname;
unlink(devname);
return NULL;
@@ -1023,7 +1091,6 @@ void put_md_name(char *name)
if (strncmp(name, "/dev/.tmp.md", 12) == 0)
unlink(name);
}
-#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
int get_maj_min(char *dev, int *major, int *minor)
{
@@ -1057,9 +1124,11 @@ int dev_open(char *dev, int flags)
}
if (fd < 0) {
/* Try /tmp as /dev appear to be read-only */
- snprintf(devname, sizeof(devname), "/tmp/.tmp.md.%d:%d:%d",
+ snprintf(devname, sizeof(devname),
+ "/tmp/.tmp.md.%d:%d:%d",
(int)getpid(), major, minor);
- if (mknod(devname, S_IFBLK|0600, makedev(major, minor)) == 0) {
+ if (mknod(devname, S_IFBLK|0600,
+ makedev(major, minor)) == 0) {
fd = open(devname, flags);
unlink(devname);
}
@@ -1093,7 +1162,7 @@ int open_dev_excl(char *devnm)
long delay = 1000;
sprintf(buf, "%d:%d", major(devid), minor(devid));
- for (i = 0 ; i < 25 ; i++) {
+ for (i = 0; i < 25; i++) {
int fd = dev_open(buf, flags|O_EXCL);
if (fd >= 0)
return fd;
@@ -1134,7 +1203,7 @@ void wait_for(char *dev, int fd)
(stb_want.st_mode & S_IFMT) != S_IFBLK)
return;
- for (i = 0 ; i < 25 ; i++) {
+ for (i = 0; i < 25; i++) {
struct stat stb;
if (stat(dev, &stb) == 0 &&
(stb.st_mode & S_IFMT) == S_IFBLK &&
@@ -1145,7 +1214,7 @@ void wait_for(char *dev, int fd)
delay *= 2;
}
if (i == 25)
- dprintf("timeout waiting for %s\n", dev);
+ pr_err("timeout waiting for %s\n", dev);
}
struct superswitch *superlist[] =
@@ -1153,9 +1222,8 @@ struct superswitch *superlist[] =
&super0, &super1,
&super_ddf, &super_imsm,
&mbr, &gpt,
- NULL };
-
-#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
+ NULL
+};
struct supertype *super_by_fd(int fd, char **subarrayp)
{
@@ -1177,7 +1245,7 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
minor = sra->array.minor_version;
verstr = sra->text_version;
} else {
- if (ioctl(fd, GET_ARRAY_INFO, &array))
+ if (md_get_array_info(fd, &array))
array.major_version = array.minor_version = 0;
vers = array.major_version;
minor = array.minor_version;
@@ -1205,7 +1273,7 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
verstr = "-no-metadata-";
}
- for (i = 0; st == NULL && superlist[i] ; i++)
+ for (i = 0; st == NULL && superlist[i]; i++)
st = superlist[i]->match_metadata_desc(verstr);
sysfs_free(sra);
@@ -1220,7 +1288,6 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
return st;
}
-#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
int dev_size_from_id(dev_t id, unsigned long long *size)
{
@@ -1239,6 +1306,23 @@ int dev_size_from_id(dev_t id, unsigned long long *size)
return 0;
}
+int dev_sector_size_from_id(dev_t id, unsigned int *size)
+{
+ char buf[20];
+ int fd;
+
+ sprintf(buf, "%d:%d", major(id), minor(id));
+ fd = dev_open(buf, O_RDONLY);
+ if (fd < 0)
+ return 0;
+ if (get_dev_sector_size(fd, NULL, size)) {
+ close(fd);
+ return 1;
+ }
+ close(fd);
+ return 0;
+}
+
struct supertype *dup_super(struct supertype *orig)
{
struct supertype *st;
@@ -1270,7 +1354,7 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type)
st = xcalloc(1, sizeof(*st));
st->container_devnm[0] = 0;
- for (i = 0 ; superlist[i]; i++) {
+ for (i = 0; superlist[i]; i++) {
int rv;
ss = superlist[i];
if (guess_type == guess_array && ss->add_to_super == NULL)
@@ -1354,9 +1438,14 @@ int get_dev_sector_size(int fd, char *dname, unsigned int *sectsizep)
*/
int must_be_container(int fd)
{
+ struct mdinfo *mdi;
unsigned long long size;
- if (md_get_version(fd) < 0)
+
+ mdi = sysfs_read(fd, NULL, GET_VERSION);
+ if (!mdi)
return 0;
+ sysfs_free(mdi);
+
if (get_dev_size(fd, NULL, &size) == 0)
return 1;
if (size == 0)
@@ -1755,7 +1844,7 @@ int add_disk(int mdfd, struct supertype *st,
{
/* Add a device to an array, in one of 2 ways. */
int rv;
-#ifndef MDASSEMBLE
+
if (st->ss->external) {
if (info->disk.state & (1<<MD_DISK_SYNC))
info->recovery_start = MaxSector;
@@ -1775,7 +1864,6 @@ int add_disk(int mdfd, struct supertype *st,
}
}
} else
-#endif
rv = ioctl(mdfd, ADD_NEW_DISK, &info->disk);
return rv;
}
@@ -1784,39 +1872,63 @@ int remove_disk(int mdfd, struct supertype *st,
struct mdinfo *sra, struct mdinfo *info)
{
int rv;
+
/* Remove the disk given by 'info' from the array */
-#ifndef MDASSEMBLE
if (st->ss->external)
rv = sysfs_set_str(sra, info, "slot", "none");
else
-#endif
rv = ioctl(mdfd, HOT_REMOVE_DISK, makedev(info->disk.major,
info->disk.minor));
return rv;
}
+int hot_remove_disk(int mdfd, unsigned long dev, int force)
+{
+ int cnt = force ? 500 : 5;
+ int ret;
+
+ /* HOT_REMOVE_DISK can fail with EBUSY if there are
+ * outstanding IO requests to the device.
+ * In this case, it can be helpful to wait a little while,
+ * up to 5 seconds if 'force' is set, or 50 msec if not.
+ */
+ while ((ret = ioctl(mdfd, HOT_REMOVE_DISK, dev)) == -1 &&
+ errno == EBUSY &&
+ cnt-- > 0)
+ usleep(10000);
+
+ return ret;
+}
+
+int sys_hot_remove_disk(int statefd, int force)
+{
+ int cnt = force ? 500 : 5;
+ int ret;
+
+ while ((ret = write(statefd, "remove", 6)) == -1 &&
+ errno == EBUSY &&
+ cnt-- > 0)
+ usleep(10000);
+ return ret == 6 ? 0 : -1;
+}
+
int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
{
/* Initialise kernel's knowledge of array.
* This varies between externally managed arrays
* and older kernels
*/
- int vers = md_get_version(mdfd);
+ mdu_array_info_t inf;
int rv;
-#ifndef MDASSEMBLE
if (st->ss->external)
- rv = sysfs_set_array(info, vers);
- else
-#endif
- if ((vers % 100) >= 1) { /* can use different versions */
- mdu_array_info_t inf;
- memset(&inf, 0, sizeof(inf));
- inf.major_version = info->array.major_version;
- inf.minor_version = info->array.minor_version;
- rv = ioctl(mdfd, SET_ARRAY_INFO, &inf);
- } else
- rv = ioctl(mdfd, SET_ARRAY_INFO, NULL);
+ return sysfs_set_array(info, 9003);
+
+ memset(&inf, 0, sizeof(inf));
+ inf.major_version = info->array.major_version;
+ inf.minor_version = info->array.minor_version;
+ rv = md_set_array_info(mdfd, &inf);
+
return rv;
}
@@ -1990,7 +2102,6 @@ use_random:
memcpy(buf, r, 16);
}
-#ifndef MDASSEMBLE
int flush_metadata_updates(struct supertype *st)
{
int sfd;
@@ -2032,7 +2143,6 @@ void append_metadata_update(struct supertype *st, void *buf, int len)
*st->update_tail = mu;
st->update_tail = &mu->next;
}
-#endif /* MDASSEMBLE */
#ifdef __TINYC__
/* tinyc doesn't optimize this check in ioctl.h out ... */
@@ -2055,7 +2165,7 @@ int experimental(void)
* if spare_group given add it to domains of each spare
* metadata allows to test domains using metadata of destination array */
struct mdinfo *container_choose_spares(struct supertype *st,
- unsigned long long min_size,
+ struct spare_criteria *criteria,
struct domainlist *domlist,
char *spare_group,
const char *metadata, int get_one)
@@ -2077,12 +2187,24 @@ struct mdinfo *container_choose_spares(struct supertype *st,
if (d->disk.state == 0) {
/* check if size is acceptable */
unsigned long long dev_size;
+ unsigned int dev_sector_size;
+ int size_valid = 0;
+ int sector_size_valid = 0;
+
dev_t dev = makedev(d->disk.major,d->disk.minor);
- if (!min_size ||
+ if (!criteria->min_size ||
(dev_size_from_id(dev, &dev_size) &&
- dev_size >= min_size))
- found = 1;
+ dev_size >= criteria->min_size))
+ size_valid = 1;
+
+ if (!criteria->sector_size ||
+ (dev_sector_size_from_id(dev, &dev_sector_size) &&
+ criteria->sector_size == dev_sector_size))
+ sector_size_valid = 1;
+
+ found = size_valid && sector_size_valid;
+
/* check if domain matches */
if (found && domlist) {
struct dev_policy *pol = devid_policy(dev);
@@ -2136,8 +2258,7 @@ void enable_fds(int devices)
{
unsigned int fds = 20 + devices;
struct rlimit lim;
- if (getrlimit(RLIMIT_NOFILE, &lim) != 0
- || lim.rlim_cur >= fds)
+ if (getrlimit(RLIMIT_NOFILE, &lim) != 0 || lim.rlim_cur >= fds)
return;
if (lim.rlim_max < fds)
lim.rlim_max = fds;
@@ -2172,7 +2293,6 @@ void reopen_mddev(int mdfd)
dup2(fd, mdfd);
}
-#ifndef MDASSEMBLE
static struct cmap_hooks *cmap_hooks = NULL;
static int is_cmap_hooks_ready = 0;
@@ -2183,8 +2303,10 @@ void set_cmap_hooks(void)
if (!cmap_hooks->cmap_handle)
return;
- cmap_hooks->initialize = dlsym(cmap_hooks->cmap_handle, "cmap_initialize");
- cmap_hooks->get_string = dlsym(cmap_hooks->cmap_handle, "cmap_get_string");
+ cmap_hooks->initialize =
+ dlsym(cmap_hooks->cmap_handle, "cmap_initialize");
+ cmap_hooks->get_string =
+ dlsym(cmap_hooks->cmap_handle, "cmap_get_string");
cmap_hooks->finalize = dlsym(cmap_hooks->cmap_handle, "cmap_finalize");
if (!cmap_hooks->initialize || !cmap_hooks->get_string ||
@@ -2227,16 +2349,22 @@ void set_dlm_hooks(void)
if (!dlm_hooks->dlm_handle)
return;
- dlm_hooks->create_lockspace = dlsym(dlm_hooks->dlm_handle, "dlm_create_lockspace");
- dlm_hooks->release_lockspace = dlsym(dlm_hooks->dlm_handle, "dlm_release_lockspace");
+ dlm_hooks->open_lockspace =
+ dlsym(dlm_hooks->dlm_handle, "dlm_open_lockspace");
+ dlm_hooks->create_lockspace =
+ dlsym(dlm_hooks->dlm_handle, "dlm_create_lockspace");
+ dlm_hooks->release_lockspace =
+ dlsym(dlm_hooks->dlm_handle, "dlm_release_lockspace");
dlm_hooks->ls_lock = dlsym(dlm_hooks->dlm_handle, "dlm_ls_lock");
- dlm_hooks->ls_unlock = dlsym(dlm_hooks->dlm_handle, "dlm_ls_unlock");
+ dlm_hooks->ls_unlock_wait =
+ dlsym(dlm_hooks->dlm_handle, "dlm_ls_unlock_wait");
dlm_hooks->ls_get_fd = dlsym(dlm_hooks->dlm_handle, "dlm_ls_get_fd");
dlm_hooks->dispatch = dlsym(dlm_hooks->dlm_handle, "dlm_dispatch");
- if (!dlm_hooks->create_lockspace || !dlm_hooks->ls_lock ||
- !dlm_hooks->ls_unlock || !dlm_hooks->release_lockspace ||
- !dlm_hooks->ls_get_fd || !dlm_hooks->dispatch)
+ if (!dlm_hooks->open_lockspace || !dlm_hooks->create_lockspace ||
+ !dlm_hooks->ls_lock || !dlm_hooks->ls_unlock_wait ||
+ !dlm_hooks->release_lockspace || !dlm_hooks->ls_get_fd ||
+ !dlm_hooks->dispatch)
dlclose(dlm_hooks->dlm_handle);
else
is_dlm_hooks_ready = 1;
@@ -2247,4 +2375,51 @@ void set_hooks(void)
set_dlm_hooks();
set_cmap_hooks();
}
-#endif
+
+int zero_disk_range(int fd, unsigned long long sector, size_t count)
+{
+ int ret = 0;
+ int fd_zero;
+ void *addr = NULL;
+ size_t written = 0;
+ size_t len = count * 512;
+ ssize_t n;
+
+ fd_zero = open("/dev/zero", O_RDONLY);
+ if (fd_zero < 0) {
+ pr_err("Cannot open /dev/zero\n");
+ return -1;
+ }
+
+ if (lseek64(fd, sector * 512, SEEK_SET) < 0) {
+ ret = -errno;
+ pr_err("Failed to seek offset for zeroing\n");
+ goto out;
+ }
+
+ addr = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd_zero, 0);
+
+ if (addr == MAP_FAILED) {
+ ret = -errno;
+ pr_err("Mapping /dev/zero failed\n");
+ goto out;
+ }
+
+ do {
+ n = write(fd, addr + written, len - written);
+ if (n < 0) {
+ if (errno == EINTR)
+ continue;
+ ret = -errno;
+ pr_err("Zeroing disk range failed\n");
+ break;
+ }
+ written += n;
+ } while (written != len);
+
+ munmap(addr, len);
+
+out:
+ close(fd_zero);
+ return ret;
+}