summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Assemble.c54
-rw-r--r--Create.c362
-rw-r--r--Grow.c2
-rw-r--r--Incremental.c202
-rw-r--r--Kill.c8
-rw-r--r--Makefile27
-rw-r--r--Manage.c143
-rw-r--r--Query.c4
-rw-r--r--ReadMe.c1
-rw-r--r--TODO35
-rw-r--r--bitmap.c6
-rw-r--r--crc32.c340
-rw-r--r--crc32.h441
-rw-r--r--kernel-patch-2.6.25199
-rw-r--r--managemon.c524
-rw-r--r--mapfile.c27
-rw-r--r--md.416
-rw-r--r--mdadm.810
-rw-r--r--mdadm.c3
-rw-r--r--mdadm.h298
-rw-r--r--mdmon.c348
-rw-r--r--mdmon.h65
-rw-r--r--mdstat.c58
-rw-r--r--monitor.c527
-rw-r--r--msg.c185
-rw-r--r--msg.h31
-rw-r--r--sg_io.c42
-rw-r--r--super-ddf.c3227
-rw-r--r--super-intel.c2552
-rw-r--r--super0.c160
-rw-r--r--super1.c314
-rw-r--r--sysfs.c276
-rw-r--r--test2
-rw-r--r--util.c357
34 files changed, 10441 insertions, 405 deletions
diff --git a/Assemble.c b/Assemble.c
index 79f09126..7efa2b8c 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -542,8 +542,8 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
== devices[devcnt].i.events
&& (devices[best[i]].i.disk.minor
!= devices[devcnt].i.disk.minor)
- && st->ss->major == 0
- && info.array.level != -4) {
+ && st->ss == &super0
+ && info.array.level != LEVEL_MULTIPATH) {
/* two different devices with identical superblock.
* Could be a mis-detection caused by overlapping
* partitions. fail-safe.
@@ -845,11 +845,29 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
/* Almost ready to actually *do* something */
if (!old_linux) {
int rv;
+
+#ifndef MDASSEMBLE
+ struct mdinfo *sra;
+ if (st->ss->external) {
+ char ver[100];
+ strcat(strcpy(ver, "external:"), info.text_version);
+ sra = sysfs_read(mdfd, 0, 0);
+ if ((vers % 100) < 2 ||
+ sra == NULL ||
+ sysfs_set_str(sra, NULL, "metadata_version",
+ ver) < 0) {
+ fprintf(stderr, Name ": This kernel does not "
+ "support external metadata.\n");
+ return 1;
+ }
+ rv = sysfs_set_array(sra, &info);
+ } else
+#endif
if ((vers % 100) >= 1) { /* can use different versions */
mdu_array_info_t inf;
memset(&inf, 0, sizeof(inf));
- inf.major_version = st->ss->major;
- inf.minor_version = st->minor_version;
+ inf.major_version = info.array.major_version;
+ inf.minor_version = info.array.minor_version;
rv = ioctl(mdfd, SET_ARRAY_INFO, &inf);
} else
rv = ioctl(mdfd, SET_ARRAY_INFO, NULL);
@@ -895,8 +913,14 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
j = chosen_drive;
if (j >= 0 /* && devices[j].uptodate */) {
- if (ioctl(mdfd, ADD_NEW_DISK,
- &devices[j].i.disk)!=0) {
+#ifndef MDASSEMBLE
+ if (st->ss->external)
+ rv = sysfs_add_disk(sra, &devices[j].i);
+ else
+#endif
+ rv = ioctl(mdfd, ADD_NEW_DISK,
+ &devices[j].i.disk);
+ if (rv) {
fprintf(stderr, Name ": failed to add "
"%s to %s: %s\n",
devices[j].devname,
@@ -918,6 +942,21 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
i, mddev);
}
+ if (info.array.level == LEVEL_CONTAINER) {
+ if (verbose >= 0) {
+ fprintf(stderr, Name ": Container %s has been "
+ "assembled with %d drive%s",
+ mddev, okcnt, okcnt==1?"":"s");
+ if (okcnt < info.array.raid_disks)
+ fprintf(stderr, " (out of %d)",
+ info.array.raid_disks);
+ fprintf(stderr, "\n");
+ }
+ if (must_close)
+ close(mdfd);
+ return 0;
+ }
+
if (runstop == 1 ||
(runstop <= 0 &&
( enough(info.array.level, info.array.raid_disks,
@@ -940,7 +979,8 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
/* There is a nasty race with 'mdadm --monitor'.
* If it opens this device before we close it,
* it gets an incomplete open on which IO
- * doesn't work and the capacity if wrong.
+ * doesn't work and the capacity is
+ * wrong.
* If we reopen (to check for layered devices)
* before --monitor closes, we loose.
*
diff --git a/Create.c b/Create.c
index 7b1836a3..69192abb 100644
--- a/Create.c
+++ b/Create.c
@@ -66,12 +66,18 @@ int Create(struct supertype *st, char *mddev, int mdfd,
int second_missing = subdevs * 2;
int missing_disks = 0;
int insert_point = subdevs * 2; /* where to insert a missing drive */
+ int total_slots;
int pass;
int vers;
int rv;
int bitmap_fd;
+ int have_container = 0;
+ int container_fd;
+ int need_mdmon = 0;
unsigned long long bitmapsize;
- struct mdinfo info;
+ struct mdinfo *sra;
+ struct mdinfo info, *infos;
+ int did_default = 0;
int major_num = BITMAP_MAJOR_HI;
@@ -92,6 +98,14 @@ int Create(struct supertype *st, char *mddev, int mdfd,
}
}
if (level == UnSet) {
+ /* "ddf" and "imsm" metadata only supports one level - should possibly
+ * push this into metadata handler??
+ */
+ if (st && (st->ss == &super_ddf || st->ss == &super_imsm))
+ level = LEVEL_CONTAINER;
+ }
+
+ if (level == UnSet) {
fprintf(stderr,
Name ": a RAID level is needed to create an array.\n");
return 1;
@@ -116,11 +130,47 @@ int Create(struct supertype *st, char *mddev, int mdfd,
Name ": This level does not support spare devices\n");
return 1;
}
+
+ if (subdevs == 1 && strcmp(devlist->devname, "missing") != 0) {
+ /* If given a single device, it might be a container, and we can
+ * extract a device list from there
+ */
+ mdu_array_info_t inf;
+ int fd;
+
+ memset(&inf, 0, sizeof(inf));
+ fd = open(devlist->devname, O_RDONLY);
+ if (fd >= 0 &&
+ ioctl(fd, GET_ARRAY_INFO, &inf) == 0 &&
+ inf.raid_disks == 0) {
+ /* yep, looks like a container */
+ if (st) {
+ rv = st->ss->load_super(st, fd,
+ devlist->devname);
+ if (rv == 0)
+ have_container = 1;
+ } else {
+ st = guess_super(fd);
+ if (st && !(rv = st->ss->
+ load_super(st, fd,
+ devlist->devname)))
+ have_container = 1;
+ else
+ st = NULL;
+ }
+ }
+ if (fd >= 0)
+ close(fd);
+ if (have_container) {
+ subdevs = 0;
+ devlist = NULL;
+ }
+ }
if (subdevs > raiddisks+sparedisks) {
fprintf(stderr, Name ": You have listed more devices (%d) than are in the array(%d)!\n", subdevs, raiddisks+sparedisks);
return 1;
}
- if (subdevs < raiddisks+sparedisks) {
+ if (!have_container && subdevs < raiddisks+sparedisks) {
fprintf(stderr, Name ": You haven't given enough devices (real or missing) to create this array\n");
return 1;
}
@@ -182,6 +232,7 @@ int Create(struct supertype *st, char *mddev, int mdfd,
case 1:
case LEVEL_FAULTY:
case LEVEL_MULTIPATH:
+ case LEVEL_CONTAINER:
if (chunk) {
chunk = 0;
if (verbose > 0)
@@ -193,14 +244,17 @@ int Create(struct supertype *st, char *mddev, int mdfd,
return 1;
}
+ if (st && ! st->ss->validate_geometry(st, level, layout, raiddisks,
+ chunk, size, NULL, NULL, verbose>=0))
+ return 1;
+
/* now look at the subdevs */
info.array.active_disks = 0;
info.array.working_disks = 0;
dnum = 0;
for (dv=devlist; dv; dv=dv->next, dnum++) {
char *dname = dv->devname;
- unsigned long long ldsize, freesize;
- int fd;
+ unsigned long long freesize;
if (strcasecmp(dname, "missing")==0) {
if (first_missing > dnum)
first_missing = dnum;
@@ -212,18 +266,6 @@ int Create(struct supertype *st, char *mddev, int mdfd,
info.array.working_disks++;
if (dnum < raiddisks)
info.array.active_disks++;
- fd = open(dname, O_RDONLY|O_EXCL);
- if (fd <0 ) {
- fprintf(stderr, Name ": Cannot open %s: %s\n",
- dname, strerror(errno));
- fail=1;
- continue;
- }
- if (!get_dev_size(fd, dname, &ldsize)) {
- fail = 1;
- close(fd);
- continue;
- }
if (st == NULL) {
struct createinfo *ci = conf_get_create_info();
if (ci)
@@ -231,33 +273,42 @@ int Create(struct supertype *st, char *mddev, int mdfd,
}
if (st == NULL) {
/* Need to choose a default metadata, which is different
- * depending on the sizes of devices
+ * depending on geometry of array.
*/
int i;
char *name = "default";
- if (level >= 1 && ldsize > (0x7fffffffULL<<10))
- name = "default/large";
- for(i=0; !st && superlist[i]; i++)
+ for(i=0; !st && superlist[i]; i++) {
st = superlist[i]->match_metadata_desc(name);
+ if (st && !st->ss->validate_geometry
+ (st, level, layout, raiddisks,
+ chunk, size, dname, &freesize,
+ verbose > 0))
+ st = NULL;
+ }
if (!st) {
- fprintf(stderr, Name ": internal error - no default metadata style\n");
+ fprintf(stderr, Name ": device %s not suitable "
+ "for any style of array\n",
+ dname);
exit(2);
}
- if (st->ss->major != 0 ||
+ if (st->ss != &super0 ||
st->minor_version != 90)
- fprintf(stderr, Name ": Defaulting to version"
- " %d.%d metadata\n",
- st->ss->major,
- st->minor_version);
- }
- freesize = st->ss->avail_size(st, ldsize >> 9);
- if (freesize == 0) {
- fprintf(stderr, Name ": %s is too small: %luK\n",
- dname, (unsigned long)(ldsize>>10));
- fail = 1;
- close(fd);
- continue;
+ did_default = 1;
+ } else {
+ if (!st->ss->validate_geometry(st, level, layout,
+ raiddisks,
+ chunk, size, dname,
+ &freesize,
+ verbose > 0)) {
+
+ fprintf(stderr,
+ Name ": %s is not suitable for "
+ "this array.\n",
+ dname);
+ fail = 1;
+ continue;
+ }
}
freesize /= 2; /* convert to K */
@@ -268,9 +319,9 @@ int Create(struct supertype *st, char *mddev, int mdfd,
if (size && freesize < size) {
fprintf(stderr, Name ": %s is smaller that given size."
- " %lluK < %lluK + superblock\n", dname, freesize, size);
+ " %lluK < %lluK + metadata\n",
+ dname, freesize, size);
fail = 1;
- close(fd);
continue;
}
if (maxdisc == NULL || (maxdisc && freesize > maxsize)) {
@@ -282,24 +333,36 @@ int Create(struct supertype *st, char *mddev, int mdfd,
minsize = freesize;
}
if (runstop != 1 || verbose >= 0) {
+ int fd = open(dname, O_RDONLY);
+ if (fd <0 ) {
+ fprintf(stderr, Name ": Cannot open %s: %s\n",
+ dname, strerror(errno));
+ fail=1;
+ continue;
+ }
warn |= check_ext2(fd, dname);
warn |= check_reiser(fd, dname);
warn |= check_raid(fd, dname);
+ close(fd);
}
- close(fd);
}
if (fail) {
fprintf(stderr, Name ": create aborted\n");
return 1;
}
if (size == 0) {
- if (mindisc == NULL) {
+ if (mindisc == NULL && !have_container) {
fprintf(stderr, Name ": no size and no drives given - aborting create.\n");
return 1;
}
- if (level > 0 || level == LEVEL_MULTIPATH || level == LEVEL_FAULTY) {
+ if (level > 0 || level == LEVEL_MULTIPATH
+ || level == LEVEL_FAULTY
+ || st->ss->external ) {
/* size is meaningful */
- if (minsize > 0x100000000ULL && st->ss->major == 0) {
+ if (!st->ss->validate_geometry(st, level, layout,
+ raiddisks,
+ chunk, minsize,
+ NULL, NULL, 0)) {
fprintf(stderr, Name ": devices too large for RAID level %d\n", level);
return 1;
}
@@ -357,7 +420,7 @@ int Create(struct supertype *st, char *mddev, int mdfd,
missing_disks++;
}
- if (level <= 0 && first_missing != subdevs * 2) {
+ if (level <= 0 && first_missing < subdevs * 2) {
fprintf(stderr,
Name ": This level does not support missing devices\n");
return 1;
@@ -382,12 +445,16 @@ int Create(struct supertype *st, char *mddev, int mdfd,
( level == 6 && (insert_point < raiddisks
|| second_missing < raiddisks))
||
+ ( level <= 0 )
+ ||
assume_clean
- )
+ ) {
info.array.state = 1; /* clean, but one+ drive will be missing*/
- else
+ info.resync_start = ~0ULL;
+ } else {
info.array.state = 0; /* not clean, but no errors */
-
+ info.resync_start = 0;
+ }
if (level == 10) {
/* for raid10, the bitmap size is the capacity of the array,
* which is array.size * raid_disks / ncopies;
@@ -424,7 +491,6 @@ int Create(struct supertype *st, char *mddev, int mdfd,
+ info.array.failed_disks;
info.array.layout = layout;
info.array.chunk_size = chunk*1024;
- info.array.major_version = st->ss->major;
if (name == NULL || *name == 0) {
/* base name on mddev */
@@ -453,6 +519,31 @@ int Create(struct supertype *st, char *mddev, int mdfd,
if (!st->ss->init_super(st, &info.array, size, name, homehost, uuid))
return 1;
+ total_slots = info.array.nr_disks;
+ st->ss->getinfo_super(st, &info);
+
+ if (did_default && verbose >= 0) {
+ if (info.text_version[0] == '/') {
+ int dnum = devname2devnum(info.text_version+1);
+ char *path;
+ int mdp = get_mdp_major();
+ struct mdinfo *mdi;
+ if (dnum > 0)
+ path = map_dev(MD_MAJOR, dnum, 1);
+ else
+ path = map_dev(mdp, (-1-dnum)<< 6, 1);
+
+ mdi = sysfs_read(-1, dnum, GET_VERSION);
+
+ fprintf(stderr, Name ": Creating array inside "
+ "%s container %s\n",
+ mdi?mdi->text_version:"managed", path);
+ sysfs_free(mdi);
+ } else
+ fprintf(stderr, Name ": Defaulting to version"
+ " %s metadata\n", info.text_version);
+ }
+
if (bitmap_file && vers < 9003) {
major_num = BITMAP_MAJOR_HOSTENDIAN;
#ifdef __BIG_ENDIAN
@@ -476,12 +567,56 @@ int Create(struct supertype *st, char *mddev, int mdfd,
}
-
- if ((vers % 100) >= 1) { /* can use different versions */
+ sra = sysfs_read(mdfd, 0, 0);
+
+ if (st->ss->external) {
+ char ver[100];
+ strcat(strcpy(ver, "external:"),
+ info.text_version);
+ if (st->ss->external && st->subarray[0]) {
+ /* member */
+
+ /* When creating a member, we need to be careful
+ * to negotiate with mdmon properly.
+ * If it is already running, we cannot write to
+ * the devices and must ask it to do that part.
+ * If it isn't running, we write to the devices,
+ * and then start it.
+ * We hold an exclusive open on the container
+ * device to make sure mdmon doesn't exit after
+ * we checked that it is running.
+ *
+ * For now, fail if it is already running.
+ */
+ container_fd = open_dev_excl(st->container_dev);
+ if (container_fd < 0) {
+ fprintf(stderr, Name ": Cannot get exclusive "
+ "open on container - weird.\n");
+ return 1;
+ }
+ if (mdmon_running(st->container_dev)) {
+ if (verbose)
+ fprintf(stderr, Name ": reusing mdmon "
+ "for %s.\n",
+ devnum2devname(st->container_dev));
+ st->update_tail = &st->updates;
+ } else
+ need_mdmon = 1;
+ }
+ if ((vers % 100) < 2 ||
+ sra == NULL ||
+ sysfs_set_str(sra, NULL, "metadata_version",
+ ver) < 0) {
+ fprintf(stderr, Name ": This kernel does not "
+ "support external metadata.\n");
+ return 1;
+ }
+ rv = sysfs_set_array(sra, &info);
+ } else if ((vers % 100) >= 1) { /* can use different versions */
mdu_array_info_t inf;
memset(&inf, 0, sizeof(inf));
- inf.major_version = st->ss->major;
- inf.minor_version = st->minor_version;
+ inf.major_version = info.array.major_version;
+ inf.minor_version = info.array.minor_version;
rv = ioctl(mdfd, SET_ARRAY_INFO, &inf);
} else
rv = ioctl(mdfd, SET_ARRAY_INFO, NULL);
@@ -514,7 +649,7 @@ int Create(struct supertype *st, char *mddev, int mdfd,
}
}
-
+ infos = malloc(sizeof(*infos) * total_slots);
for (pass=1; pass <=2 ; pass++) {
mddev_dev_t moved_disk = NULL; /* the disk that was moved out of the insert point */
@@ -523,74 +658,123 @@ int Create(struct supertype *st, char *mddev, int mdfd,
dv=(dv->next)?(dv->next):moved_disk, dnum++) {
int fd;
struct stat stb;
+ struct mdinfo *inf = &infos[dnum];
- info.disk.number = dnum;
+ if (dnum >= total_slots)
+ abort();
if (dnum == insert_point) {
moved_disk = dv;
}
- info.disk.raid_disk = info.disk.number;
- if (info.disk.raid_disk < raiddisks)
- info.disk.state = (1<<MD_DISK_ACTIVE) |
+ if (dnum == insert_point ||
+ strcasecmp(dv->devname, "missing")==0)
+ continue;
+
+ switch(pass) {
+ case 1:
+ *inf = info;
+
+ inf->disk.number = dnum;
+ inf->disk.raid_disk = dnum;
+ if (inf->disk.raid_disk < raiddisks)
+ inf->disk.state = (1<<MD_DISK_ACTIVE) |
(1<<MD_DISK_SYNC);
- else
- info.disk.state = 0;
- if (dv->writemostly)
- info.disk.state |= (1<<MD_DISK_WRITEMOSTLY);
+ else
+ inf->disk.state = 0;
+
+ if (dv->writemostly)
+ inf->disk.state |= (1<<MD_DISK_WRITEMOSTLY);
+
+ if (st->ss->external && st->subarray[0])
+ fd = open(dv->devname, O_RDWR);
+ else
+ fd = open(dv->devname, O_RDWR|O_EXCL);
- if (dnum == insert_point ||
- strcasecmp(dv->devname, "missing")==0) {
- info.disk.major = 0;
- info.disk.minor = 0;
- info.disk.state = (1<<MD_DISK_FAULTY);
- } else {
- fd = open(dv->devname, O_RDONLY|O_EXCL);
if (fd < 0) {
- fprintf(stderr, Name ": failed to open %s after earlier success - aborting\n",
+ fprintf(stderr, Name ": failed to open %s "
+ "after earlier success - aborting\n",
dv->devname);
return 1;
}
fstat(fd, &stb);
- info.disk.major = major(stb.st_rdev);
- info.disk.minor = minor(stb.st_rdev);
+ inf->disk.major = major(stb.st_rdev);
+ inf->disk.minor = minor(stb.st_rdev);
+
remove_partitions(fd);
- close(fd);
- }
- switch(pass){
- case 1:
- st->ss->add_to_super(st, &info.disk);
+ st->ss->add_to_super(st, &inf->disk,
+ fd, dv->devname);
+ st->ss->getinfo_super(st, inf);
+
+ /* getinfo_super might have lost these ... */
+ inf->disk.major = major(stb.st_rdev);
+ inf->disk.minor = minor(stb.st_rdev);
break;
case 2:
- if (info.disk.state == 1) break;
- Kill(dv->devname, 0, 1); /* Just be sure it is clean */
- Kill(dv->devname, 0, 1); /* and again, there could be two superblocks */
- st->ss->write_init_super(st, &info.disk,
- dv->devname);
-
- if (ioctl(mdfd, ADD_NEW_DISK, &info.disk)) {
- fprintf(stderr, Name ": ADD_NEW_DISK for %s failed: %s\n",
+ inf->errors = 0;
+ rv = 0;
+
+ if (st->ss->external)
+ rv = sysfs_add_disk(sra, inf);
+ else
+ rv = ioctl(mdfd, ADD_NEW_DISK,
+ &inf->disk);
+
+ if (rv) {
+ fprintf(stderr,
+ Name ": ADD_NEW_DISK for %s "
+ "failed: %s\n",
dv->devname, strerror(errno));
st->ss->free_super(st);
return 1;
}
-
break;
}
if (dv == moved_disk && dnum != insert_point) break;
}
+ if (pass == 1) {
+ st->ss->write_init_super(st);
+ flush_metadata_updates(st);
+ }
}
+ free(infos);
st->ss->free_super(st);
/* param is not actually used */
- if (runstop == 1 || subdevs >= raiddisks) {
- mdu_param_t param;
- if (ioctl(mdfd, RUN_ARRAY, &param)) {
- fprintf(stderr, Name ": RUN_ARRAY failed: %s\n",
- strerror(errno));
- Manage_runstop(mddev, mdfd, -1, 0);
- return 1;
+ if (level == LEVEL_CONTAINER)
+ /* No need to start */
+ ;
+ else if (runstop == 1 || subdevs >= raiddisks) {
+ if (st->ss->external) {
+ switch(level) {
+ case LEVEL_LINEAR:
+ case LEVEL_MULTIPATH:
+ case 0:
+ sysfs_set_str(sra, NULL, "array_state",
+ "active");
+ need_mdmon = 0;
+ break;
+ default:
+ sysfs_set_str(sra, NULL, "array_state",
+ "readonly");
+ break;
+ }
+ } else {
+ mdu_param_t param;
+ if (ioctl(mdfd, RUN_ARRAY, &param)) {
+ fprintf(stderr, Name ": RUN_ARRAY failed: %s\n",
+ strerror(errno));
+ Manage_runstop(mddev, mdfd, -1, 0);
+ return 1;
+ }
}
if (verbose >= 0)
fprintf(stderr, Name ": array %s started.\n", mddev);
+ if (st->ss->external && st->subarray[0]) {
+ if (need_mdmon)
+ start_mdmon(st->container_dev);
+
+ ping_monitor(devnum2devname(st->container_dev));
+ close(container_fd);
+ }
} else {
fprintf(stderr, Name ": not starting array - not enough devices.\n");
}
diff --git a/Grow.c b/Grow.c
index a8194bf0..3a31ea54 100644
--- a/Grow.c
+++ b/Grow.c
@@ -69,7 +69,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
return 1;
}
- nfd = open(newdev, O_RDWR|O_EXCL);
+ nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT);
if (nfd < 0) {
fprintf(stderr, Name ": cannot open %s\n", newdev);
return 1;
diff --git a/Incremental.c b/Incremental.c
index 0fb9afd3..9b222206 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -40,7 +40,7 @@ int Incremental(char *devname, int verbose, int runstop,
struct supertype *st, char *homehost, int autof)
{
/* Add this device to an array, creating the array if necessary
- * and starting the array if sensibe or - if runstop>0 - if possible.
+ * and starting the array if sensible or - if runstop>0 - if possible.
*
* This has several steps:
*
@@ -140,9 +140,17 @@ int Incremental(char *devname, int verbose, int runstop,
close(dfd);
return 1;
}
- st->ss->getinfo_super(st, &info);
close (dfd);
+ if (st->ss->container_content) {
+ /* This is a pre-built container array, so we do something
+ * rather different.
+ */
+ return Incremental_container(st, devname, verbose, runstop,
+ autof);
+ }
+
+ st->ss->getinfo_super(st, &info);
/* 3/ Check if there is a match in mdadm.conf */
array_list = conf_get_ident(NULL);
@@ -229,6 +237,7 @@ int Incremental(char *devname, int verbose, int runstop,
/* Have to guess a bit. */
int use_partitions = 1;
char *np, *ep;
+ char *nm, nbuf[1024];
if ((autof&7) == 3 || (autof&7) == 5)
use_partitions = 0;
np = strchr(info.name, ':');
@@ -244,6 +253,24 @@ int Incremental(char *devname, int verbose, int runstop,
} else
devnum = -1;
+ if (match)
+ nm = match->devname;
+ else {
+ sprintf(nbuf, "/dev/md/%s", np);
+ nm = nbuf;
+ }
+ if (stat(nm, &stb) == 0 &&
+ S_ISBLK(stb.st_mode) &&
+ major(stb.st_rdev) == (use_partitions ?
+ get_mdp_major() : MD_MAJOR)) {
+ if (use_partitions)
+ devnum = minor(stb.st_rdev) >> MdpMinorShift;
+ else
+ devnum = minor(stb.st_rdev);
+ if (mddev_busy(use_partitions ? (-1-devnum) : devnum))
+ devnum = -1;
+ }
+
if (devnum < 0) {
/* Haven't found anything yet, choose something free */
devnum = find_free_devnum(use_partitions);
@@ -273,12 +300,11 @@ int Incremental(char *devname, int verbose, int runstop,
/* - add the device */
mdu_array_info_t ainf;
mdu_disk_info_t disk;
- char md[20];
struct mdinfo *sra;
memset(&ainf, 0, sizeof(ainf));
- ainf.major_version = st->ss->major;
- ainf.minor_version = st->minor_version;
+ ainf.major_version = info.array.major_version;
+ ainf.minor_version = info.array.minor_version;
if (ioctl(mdfd, SET_ARRAY_INFO, &ainf) != 0) {
fprintf(stderr, Name
": SET_ARRAY_INFO failed for %s: %s\b",
@@ -286,9 +312,8 @@ int Incremental(char *devname, int verbose, int runstop,
close(mdfd);
return 2;
}
- sprintf(md, "%d.%d\n", st->ss->major, st->minor_version);
sra = sysfs_read(mdfd, devnum, GET_VERSION);
- sysfs_set_str(sra, NULL, "metadata_version", md);
+ sysfs_set_str(sra, NULL, "metadata_version", info.text_version);
memset(&disk, 0, sizeof(disk));
disk.major = major(stb.st_rdev);
disk.minor = minor(stb.st_rdev);
@@ -325,29 +350,18 @@ int Incremental(char *devname, int verbose, int runstop,
int err;
struct mdinfo *sra;
struct supertype *st2;
- sra = sysfs_read(mdfd, devnum, (GET_VERSION | GET_DEVS |
- GET_STATE));
+ sra = sysfs_read(mdfd, devnum, (GET_DEVS | GET_STATE));
- if (sra->array.major_version != st->ss->major ||
- sra->array.minor_version != st->minor_version) {
- if (verbose >= 0)
- fprintf(stderr, Name
- ": %s has different metadata to chosen array %s %d.%d %d.%d.\n",
- devname, chosen_name,
- sra->array.major_version,
- sra->array.minor_version,
- st->ss->major, st->minor_version);
- close(mdfd);
- return 1;
- }
sprintf(dn, "%d:%d", sra->devs->disk.major,
sra->devs->disk.minor);
dfd2 = dev_open(dn, O_RDONLY);
st2 = dup_super(st);
- if (st2->ss->load_super(st2, dfd2, NULL)) {
+ if (st2->ss->load_super(st2, dfd2, NULL) ||
+ st->ss->compare_super(st, st2) != 0) {
fprintf(stderr, Name
- ": Strange error loading metadata for %s.\n",
- chosen_name);
+ ": metadata mismatch between %s and "
+ "chosen array %s\n",
+ devname, chosen_name);
close(mdfd);
close(dfd2);
return 2;
@@ -385,8 +399,7 @@ int Incremental(char *devname, int verbose, int runstop,
}
/* 6/ Make sure /var/run/mdadm.map contains this array. */
map_update(&map, devnum,
- info.array.major_version,
- info.array.minor_version,
+ info.text_version,
info.uuid, chosen_name);
/* 7/ Is there enough devices to possibly start the array? */
@@ -620,8 +633,8 @@ void RebuildMap(void)
path = map_dev(MD_MAJOR, md->devnum, 0);
else
path = map_dev(mdp, (-1-md->devnum)<< 6, 0);
- map_add(&map, md->devnum, st->ss->major,
- st->minor_version,
+ map_add(&map, md->devnum,
+ info.text_version,
info.uuid, path ? : "/unknown");
st->ss->free_super(st);
break;
@@ -708,3 +721,136 @@ int IncrementalScan(int verbose)
}
return rv;
}
+
+static char *container2devname(char *devname)
+{
+ int fd = open(devname, O_RDONLY);
+ char *mdname = NULL;
+
+ if (fd >= 0) {
+ mdname = devnum2devname(fd2devnum(fd));
+ close(fd);
+ }
+
+ return mdname;
+}
+
+int Incremental_container(struct supertype *st, char *devname, int verbose,
+ int runstop, int autof)
+{
+ /* Collect the contents of this container and for each
+ * array, choose a device name and assemble the array.
+ */
+
+ struct mdinfo *list = st->ss->container_content(st);
+ struct mdinfo *ra;
+ char *mdname = container2devname(devname);
+
+ if (!mdname) {
+ fprintf(stderr, Name": failed to determine device name\n");
+ return 2;
+ }
+
+ for (ra = list ; ra ; ra = ra->next) {
+ struct mdinfo *sra;
+ struct mdinfo *dev;
+ int devnum = -1;
+ int mdfd;
+ char chosen_name[1024];
+ int usepart = 1;
+ char *n;
+ int working = 0;
+ char ver[100];
+
+ if ((autof&7) == 3 || (autof&7) == 5)
+ usepart = 0;
+
+ n = ra->name;
+ if (*n == 'd')
+ n++;
+ if (*n) {
+ devnum = strtoul(n, &n, 10);
+ if (devnum >= 0 && (*n == 0 || *n == ' ')) {
+ /* Use this devnum */
+ usepart = (ra->name[0] == 'd');
+ if (mddev_busy(usepart ? (-1-devnum) : devnum))
+ devnum = -1;
+ } else
+ devnum = -1;
+ }
+
+ if (devnum < 0) {
+ char *nm = ra->name;
+ char nbuf[1024];
+ struct stat stb;
+ if (strchr(nm, ':'))
+ nm = strchr(nm, ':')+1;
+ sprintf(nbuf, "/dev/md/%s", nm);
+
+ if (stat(nbuf, &stb) == 0 &&
+ S_ISBLK(stb.st_mode) &&
+ major(stb.st_rdev) == (usepart ?
+ get_mdp_major() : MD_MAJOR)){
+ if (usepart)
+ devnum = minor(stb.st_rdev)
+ >> MdpMinorShift;
+ else
+ devnum = minor(stb.st_rdev);
+ if (mddev_busy(usepart ? (-1-devnum) : devnum))
+ devnum = -1;
+ }
+ }
+
+ if (devnum >= 0)
+ devnum = usepart ? (-1-devnum) : devnum;
+ else
+ devnum = find_free_devnum(usepart);
+ mdfd = open_mddev_devnum(NULL, devnum, ra->name,
+ chosen_name, autof>>3);
+
+ if (mdfd < 0) {
+ fprintf(stderr, Name ": failed to open %s: %s.\n",
+ chosen_name, strerror(errno));
+ return 2;
+ }
+
+ sra = sysfs_read(mdfd, 0, 0);
+
+ sprintf(ver, "external:%s", ra->text_version);
+ sysfs_set_str(sra, NULL, "metadata_version", ver);
+
+ sysfs_set_array(sra, ra);
+ for (dev = ra->devs; dev; dev = dev->next)
+ if (sysfs_add_disk(sra, dev) == 0)
+ working++;
+
+ if (runstop > 0 || working >= ra->array.working_disks) {
+ switch(ra->array.level) {
+ case LEVEL_LINEAR:
+ case LEVEL_MULTIPATH:
+ case 0:
+ sysfs_set_str(sra, NULL, "array_state",
+ "active");
+ break;
+ default:
+ sysfs_set_str(sra, NULL, "array_state",
+ "readonly");
+ /* start mdmon if needed. */
+ if (!mdmon_running(st->container_dev))
+ start_mdmon(st->container_dev);
+ ping_monitor(devnum2devname(st->container_dev));
+ break;
+ }
+ if (verbose >= 0)
+ printf("Started %s with %d devices\n",
+ chosen_name, working);
+ /* FIXME should have an O_EXCL and wait for read-auto */
+ } else
+ if (verbose >= 0)
+ printf("%s assembled with %d devices but "
+ "not started\n",
+ chosen_name, working);
+ close(mdfd);
+ }
+ return 0;
+}
diff --git a/Kill.c b/Kill.c
index 0a2763ea..d5c1e36d 100644
--- a/Kill.c
+++ b/Kill.c
@@ -34,7 +34,7 @@
#include "md_u.h"
#include "md_p.h"
-int Kill(char *dev, int force, int quiet)
+int Kill(char *dev, int force, int quiet, int noexcl)
{
/*
* Nothing fancy about Kill. It just zeroes out a superblock
@@ -44,7 +44,7 @@ int Kill(char *dev, int force, int quiet)
int fd, rv = 0;
struct supertype *st;
- fd = open(dev, O_RDWR|O_EXCL);
+ fd = open(dev, O_DIRECT | (noexcl ? O_RDWR : (O_RDWR|O_EXCL)));
if (fd < 0) {
if (!quiet)
fprintf(stderr, Name ": Couldn't open %s for write - not zeroing\n",
@@ -63,10 +63,8 @@ int Kill(char *dev, int force, int quiet)
if (force && rv >= 2)
rv = 0; /* ignore bad data in superblock */
if (rv== 0 || (force && rv >= 2)) {
- mdu_array_info_t info;
- info.major_version = -1; /* zero superblock */
st->ss->free_super(st);
- st->ss->init_super(st, &info, 0, "", NULL, NULL);
+ st->ss->init_super(st, NULL, 0, "", NULL, NULL);
if (st->ss->store_super(st, fd)) {
if (!quiet)
fprintf(stderr, Name ": Could not zero superblock on %s\n",
diff --git a/Makefile b/Makefile
index 52bd5505..4a19fa9a 100644
--- a/Makefile
+++ b/Makefile
@@ -69,19 +69,24 @@ MAN8DIR = $(MANDIR)/man8
OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \
Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
Incremental.o \
- mdopen.o super0.o super1.o bitmap.o restripe.o sysfs.o sha1.o \
- mapfile.o
+ mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
+ restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o
SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \
Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c \
Incremental.c \
- mdopen.c super0.c super1.c bitmap.c restripe.c sysfs.c sha1.c \
- mapfile.c
+ mdopen.c super0.c super1.c super-ddf.c super-intel.c bitmap.c \
+ restripe.c sysfs.c sha1.c mapfile.c crc32.c sg_io.c msg.c
+
+MON_OBJS = mdmon.o monitor.o managemon.o util.o mdstat.o sysfs.o config.o \
+ Kill.o sg_io.o dlink.o ReadMe.o super0.o super1.o super-intel.o \
+ super-ddf.o sha1.o crc32.o msg.o
+
STATICSRC = pwgr.c
STATICOBJS = pwgr.o
ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c dlink.c util.c \
- super0.c super1.c sha1.c
+ super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c
ASSEMBLE_AUTO_SRCS := mdopen.c mdstat.c sysfs.c
ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE
ifdef MDASSEMBLE_AUTO
@@ -89,7 +94,7 @@ ASSEMBLE_SRCS += $(ASSEMBLE_AUTO_SRCS)
ASSEMBLE_FLAGS += -DMDASSEMBLE_AUTO
endif
-all : mdadm mdadm.man md.man mdadm.conf.man
+all : mdadm mdmon mdadm.man md.man mdadm.conf.man
everything: all mdadm.static swap_super test_stripe \
mdassemble mdassemble.auto mdassemble.static mdassemble.man \
@@ -119,6 +124,10 @@ mdadm.Os : $(SRCS) mdadm.h
mdadm.O2 : $(SRCS) mdadm.h
gcc -o mdadm.O2 $(CFLAGS) -DHAVE_STDINT_H -O2 $(SRCS)
+mdmon : $(MON_OBJS)
+ $(CC) $(LDFLAGS) -o mdmon $(MON_OBJS) $(LDLIBS)
+msg.o: msg.c msg.h
+
test_stripe : restripe.c mdadm.h
$(CC) $(CXFLAGS) $(LDFLAGS) -o test_stripe -DMAIN restripe.c
@@ -161,8 +170,9 @@ $(OBJS) : mdadm.h bitmap.h
sha1.o : sha1.c sha1.h md5.h
$(CC) $(CFLAGS) -DHAVE_STDINT_H -o sha1.o -c sha1.c
-install : mdadm install-man
+install : mdadm mdmon install-man
$(INSTALL) -D $(STRIP) -m 755 mdadm $(DESTDIR)$(BINDIR)/mdadm
+ $(INSTALL) -D $(STRIP) -m 755 mdmon $(DESTDIR)$(BINDIR)/mdmon
install-static : mdadm.static install-man
$(INSTALL) -D $(STRIP) -m 755 mdadm.static $(DESTDIR)$(BINDIR)/mdadm
@@ -188,7 +198,8 @@ test: mdadm test_stripe swap_super
@echo "Please run 'sh ./test' as root"
clean :
- rm -f mdadm $(OBJS) $(STATICOBJS) core *.man mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt \
+ rm -f mdadm mdmon $(OBJS) $(MON_OBJS) $(STATICOBJS) core *.man \
+ mdadm.tcc mdadm.uclibc mdadm.static *.orig *.porig *.rej *.alt \
mdadm.Os mdadm.O2 \
mdassemble mdassemble.static mdassemble.auto mdassemble.uclibc \
mdassemble.klibc swap_super \
diff --git a/Manage.c b/Manage.c
index 8297708d..714a33b0 100644
--- a/Manage.c
+++ b/Manage.c
@@ -78,13 +78,18 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
{
/* Run or stop the array. array must already be configured
* required >= 0.90.0
+ * Only print failure messages if quiet == 0;
+ * quiet > 0 means really be quiet
+ * quiet < 0 means we will try again if it fails.
*/
mdu_param_t param; /* unused */
if (runstop == -1 && md_get_version(fd) < 9000) {
if (ioctl(fd, STOP_MD, 0)) {
- if (!quiet) fprintf(stderr, Name ": stopping device %s failed: %s\n",
- devname, strerror(errno));
+ if (quiet == 0) fprintf(stderr,
+ Name ": stopping device %s "
+ "failed: %s\n",
+ devname, strerror(errno));
return 1;
}
}
@@ -111,9 +116,51 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
} else if (runstop < 0){
struct map_ent *map = NULL;
struct stat stb;
- if (ioctl(fd, STOP_ARRAY, NULL)) {
- if (quiet==0) {
- fprintf(stderr, Name ": fail to stop array %s: %s\n",
+ struct mdinfo *mdi;
+ /* If this is an mdmon managed array, just write 'inactive'
+ * to the array state and let mdmon clear up.
+ */
+ mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
+ if (mdi &&
+ mdi->array.level > 0 &&
+ mdi->text_version[0] == '/') {
+ char *cp;
+
+ /* This is mdmon managed. */
+ close(fd);
+ if (sysfs_set_str(mdi, NULL,
+ "array_state", "inactive") < 0) {
+ if (quiet == 0)
+ fprintf(stderr, Name
+ ": failed to stop array %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+
+ /* Give monitor a chance to act */
+ cp = strchr(mdi->text_version+1, '/');
+ if (*cp)
+ *cp = 0;
+ ping_monitor(mdi->text_version+1);
+
+ fd = open(devname, O_RDONLY);
+ } else if (mdi &&
+ mdi->array.major_version == -1 &&
+ mdi->array.minor_version == -2 &&
+ mdi->text_version[0] != '/') {
+ /* container, possibly mdmon-managed.
+ * Make sure mdmon isn't opening it, which
+ * would interfere with the 'stop'
+ */
+ ping_monitor(mdi->sys_name);
+ }
+ if (mdi)
+ sysfs_free(mdi);
+
+ if (fd >= 0 && ioctl(fd, STOP_ARRAY, NULL)) {
+ if (quiet == 0) {
+ fprintf(stderr, Name
+ ": failed to stop array %s: %s\n",
devname, strerror(errno));
if (errno == EBUSY)
fprintf(stderr, "Perhaps a running "
@@ -122,9 +169,10 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
}
return 1;
}
+
if (quiet <= 0)
fprintf(stderr, Name ": stopped %s\n", devname);
- if (fstat(fd, &stb) == 0) {
+ if (fd >= 0 && fstat(fd, &stb) == 0) {
int devnum;
if (major(stb.st_rdev) == MD_MAJOR)
devnum = minor(stb.st_rdev);
@@ -201,6 +249,7 @@ int Manage_subdevs(char *devname, int fd,
struct supertype *st, *tst;
int duuid[4];
int ouuid[4];
+ int lfd = -1;
if (ioctl(fd, GET_ARRAY_INFO, &array)) {
fprintf(stderr, Name ": cannot get array info for %s\n",
@@ -227,6 +276,7 @@ int Manage_subdevs(char *devname, int fd,
unsigned long long ldsize;
char dvname[20];
char *dnprintable = dv->devname;
+ int err;
next = dv->next;
jnext = 0;
@@ -311,9 +361,14 @@ int Manage_subdevs(char *devname, int fd,
return 1;
case 'a':
/* add the device */
-
+ if (tst->subarray[0]) {
+ fprintf(stderr, Name ": Cannot add disks to a"
+ " \'member\' array, perform this"
+ " operation on the parent container\n");
+ return 1;
+ }
/* Make sure it isn't in use (in 2.6 or later) */
- tfd = open(dv->devname, O_RDONLY|O_EXCL);
+ tfd = open(dv->devname, O_RDONLY|O_EXCL|O_DIRECT);
if (tfd < 0) {
fprintf(stderr, Name ": Cannot open %s: %s\n",
dv->devname, strerror(errno));
@@ -332,7 +387,9 @@ int Manage_subdevs(char *devname, int fd,
}
close(tfd);
- if (array.major_version == 0 &&
+
+ if (!tst->ss->external &&
+ array.major_version == 0 &&
md_get_version(fd)%100 < 2) {
if (ioctl(fd, HOT_ADD_DISK,
(unsigned long)stb.st_rdev)==0) {
@@ -451,11 +508,14 @@ int Manage_subdevs(char *devname, int fd,
disc.number =j;
disc.state = 0;
if (array.not_persistent==0) {
+ int dfd;
if (dv->writemostly)
disc.state |= 1 << MD_DISK_WRITEMOSTLY;
- tst->ss->add_to_super(tst, &disc);
- if (tst->ss->write_init_super(tst, &disc,
- dv->devname))
+ dfd = open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
+ tst->ss->add_to_super(tst, &disc, dfd,
+ dv->devname);
+ /* write_init_super will close 'dfd' */
+ if (tst->ss->write_init_super(tst))
return 1;
} else if (dv->re_add) {
/* this had better be raid1.
@@ -499,13 +559,70 @@ int Manage_subdevs(char *devname, int fd,
case 'r':
/* hot remove */
+ if (tst->subarray[0]) {
+ fprintf(stderr, Name ": Cannot remove disks from a"
+ " \'member\' array, perform this"
+ " operation on the parent container\n");
+ return 1;
+ }
+ if (tst->ss->external) {
+ /* To remove a device from a container, we must
+ * check that it isn't in use in an array.
+ * This involves looking in the 'holders'
+ * directory - there must be just one entry,
+ * the container.
+ * To ensure that it doesn't get used as a
+ * hold spare while we are checking, we
+ * get an O_EXCL open on the container
+ */
+ int dnum = fd2devnum(fd);
+ lfd = open_dev_excl(dnum);
+ if (lfd < 0) {
+ fprintf(stderr, Name
+ ": Cannot get exclusive access "
+ " to container - odd\n");
+ return 1;
+ }
+ if (!sysfs_unique_holder(dnum, stb.st_rdev)) {
+ fprintf(stderr, Name
+ ": %s is %s, cannot remove.\n",
+ dnprintable,
+ errno == EEXIST ? "still in use":
+ "not a member");
+ close(lfd);
+ return 1;
+ }
+ }
/* FIXME check that it is a current member */
- if (ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev)) {
+ err = ioctl(fd, HOT_REMOVE_DISK, (unsigned long)stb.st_rdev);
+ if (err && errno == ENODEV) {
+ /* Old kernels rejected this if no personality
+ * registered */
+ struct mdinfo *sra = sysfs_read(fd, 0, GET_DEVS);
+ struct mdinfo *dv = NULL;
+ if (sra)
+ dv = sra->devs;
+ for ( ; dv ; dv=dv->next)
+ if (dv->disk.major == major(stb.st_rdev) &&
+ dv->disk.minor == minor(stb.st_rdev))
+ break;
+ if (dv)
+ err = sysfs_set_str(sra, dv,
+ "state", "remove");
+ else
+ err = -1;
+ if (sra)
+ sysfs_free(sra);
+ }
+ if (err) {
fprintf(stderr, Name ": hot remove failed "
"for %s: %s\n", dnprintable,
strerror(errno));
+ if (lfd >= 0)
+ close(lfd);
return 1;
}
+ close(lfd);
if (verbose >= 0)
fprintf(stderr, Name ": hot removed %s\n",
dnprintable);
diff --git a/Query.c b/Query.c
index 190ee298..dc69eb82 100644
--- a/Query.c
+++ b/Query.c
@@ -96,7 +96,7 @@ int Query(char *dev)
if (superror == 0) {
/* array might be active... */
st->ss->getinfo_super(st, &info);
- if (st->ss->major == 0) {
+ if (st->ss == &super0) {
mddev = get_md_name(info.array.md_minor);
disc.number = info.disk.number;
activity = "undetected";
@@ -121,7 +121,7 @@ int Query(char *dev)
activity,
map_num(pers, info.array.level),
mddev);
- if (st->ss->major == 0)
+ if (st->ss == &super0)
put_md_name(mddev);
}
return 0;
diff --git a/ReadMe.c b/ReadMe.c
index 03188943..12ed17f9 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -612,6 +612,7 @@ mapping_t pers[] = {
{ "raid10", 10},
{ "10", 10},
{ "faulty", LEVEL_FAULTY},
+ { "container", LEVEL_CONTAINER},
{ NULL, 0}
};
diff --git a/TODO b/TODO
index f79163b8..279d20db 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1,38 @@
+ - add 'name' field to metadata type and use it.
+ - use validate_geometry more
+ - metadata should be able to check/reject bitmap stuff.
+
+DDF:
+ Three new metadata types:
+ ddf - used only to create a container.
+ ddf-bvd - used to create an array in a container
+ ddf-svd - used to create a secondary array from bvds.
+
+ Usage:
+ mdadm -C /dev/ddf1 /dev/sd[abcdef]
+ mdadm -C /dev/md1 -e ddf /dev/sd[a-f]
+ mdadm -C /dev/md1 -l container /dev/sd[a-f]
+
+ Each of these create a new ddf container using all those
+ devices. The name 'ddf*' signals that ddf metadata should be used.
+ '-e ddf' only supports one level - 'container'. 'container' is only
+ supported by ddf.
+
+ mdadm -C /dev/md1 -l0 -n4 /dev/ddf1 # or maybe not ???
+ mdadm -C /dev/md1 -l1 -n2 /dev/sda /dev/sdb
+ If exactly one device is given, and it is a container, we select
+ devices from that container.
+ If devices are given that are already in use, they must be in use by
+ a container, and the array is created in the container.
+ If devices given are bvds, we slip under the hood to make
+ the svd arrays.
+
+ mdadm -A /dev/ddf ......
+ base drives make a container. Anything in that container is started
+ auto-read-only.
+ if /dev/ddf is already assembled, we assemble bvds and svds inside it.
+
+
2005-dec-20
Want an incremental assembly mode to work nicely with udev.
Core usage would be something like
diff --git a/bitmap.c b/bitmap.c
index fdf8884d..86176696 100644
--- a/bitmap.c
+++ b/bitmap.c
@@ -122,11 +122,10 @@ bitmap_info_t *bitmap_fd_read(int fd, int brief)
*/
unsigned long long total_bits = 0, read_bits = 0, dirty_bits = 0;
bitmap_info_t *info;
- char *buf, *unaligned;
+ void *buf;
int n, skip;
- unaligned = malloc(8192*2);
- buf = (char*) ((unsigned long)unaligned | 8191)+1;
+ posix_memalign(&buf, 512, 8192);
n = read(fd, buf, 8192);
info = malloc(sizeof(*info));
@@ -145,7 +144,6 @@ bitmap_info_t *bitmap_fd_read(int fd, int brief)
fprintf(stderr, Name ": failed to read superblock of bitmap "
"file: %s\n", strerror(errno));
free(info);
- free(unaligned);
return NULL;
}
memcpy(&info->sb, buf, sizeof(info->sb));
diff --git a/crc32.c b/crc32.c
new file mode 100644
index 00000000..12d08e52
--- /dev/null
+++ b/crc32.c
@@ -0,0 +1,340 @@
+/* crc32.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-2003 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
+ * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
+ * tables for updating the shift register in one step with three exclusive-ors
+ * instead of four steps with four exclusive-ors. This results about a factor
+ * of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
+ */
+
+/* @(#) $Id$ */
+
+/*
+ Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
+ protection on the static variables used to control the first-use generation
+ of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
+ first call get_crc_table() to initialize the tables before allowing more than
+ one thread to use crc32().
+ */
+
+#ifdef MAKECRCH
+# include <stdio.h>
+# ifndef DYNAMIC_CRC_TABLE
+# define DYNAMIC_CRC_TABLE
+# endif /* !DYNAMIC_CRC_TABLE */
+#endif /* MAKECRCH */
+
+/* #include "zutil.h" / * for STDC and FAR definitions */
+#define STDC
+#define FAR
+#define Z_NULL ((void*)0)
+#define OF(X) X
+#define ZEXPORT
+typedef long ptrdiff_t;
+#define NOBYFOUR
+
+#define local static
+
+/* Find a four-byte integer type for crc32_little() and crc32_big(). */
+#ifndef NOBYFOUR
+# ifdef STDC /* need ANSI C limits.h to determine sizes */
+# include <limits.h>
+# define BYFOUR
+# if (UINT_MAX == 0xffffffffUL)
+ typedef unsigned int u4;
+# else
+# if (ULONG_MAX == 0xffffffffUL)
+ typedef unsigned long u4;
+# else
+# if (USHRT_MAX == 0xffffffffUL)
+ typedef unsigned short u4;
+# else
+# undef BYFOUR /* can't find a four-byte integer type! */
+# endif
+# endif
+# endif
+# endif /* STDC */
+#endif /* !NOBYFOUR */
+
+/* Definitions for doing the crc four data bytes at a time. */
+#ifdef BYFOUR
+# define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \
+ (((w)&0xff00)<<8)+(((w)&0xff)<<24))
+ local unsigned long crc32_little OF((unsigned long,
+ const unsigned char FAR *, unsigned));
+ local unsigned long crc32_big OF((unsigned long,
+ const unsigned char FAR *, unsigned));
+# define TBLS 8
+#else
+# define TBLS 1
+#endif /* BYFOUR */
+
+#ifdef DYNAMIC_CRC_TABLE
+
+local volatile int crc_table_empty = 1;
+local unsigned long FAR crc_table[TBLS][256];
+local void make_crc_table OF((void));
+#ifdef MAKECRCH
+ local void write_table OF((FILE *, const unsigned long FAR *));
+#endif /* MAKECRCH */
+
+/*
+ Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
+ x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
+
+ Polynomials over GF(2) are represented in binary, one bit per coefficient,
+ with the lowest powers in the most significant bit. Then adding polynomials
+ is just exclusive-or, and multiplying a polynomial by x is a right shift by
+ one. If we call the above polynomial p, and represent a byte as the
+ polynomial q, also with the lowest power in the most significant bit (so the
+ byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
+ where a mod b means the remainder after dividing a by b.
+
+ This calculation is done using the shift-register method of multiplying and
+ taking the remainder. The register is initialized to zero, and for each
+ incoming bit, x^32 is added mod p to the register if the bit is a one (where
+ x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
+ x (which is shifting right by one and adding x^32 mod p if the bit shifted
+ out is a one). We start with the highest power (least significant bit) of
+ q and repeat for all eight bits of q.
+
+ The first table is simply the CRC of all possible eight bit values. This is
+ all the information needed to generate CRCs on data a byte at a time for all
+ combinations of CRC register values and incoming bytes. The remaining tables
+ allow for word-at-a-time CRC calculation for both big-endian and little-
+ endian machines, where a word is four bytes.
+*/
+local void make_crc_table()
+{
+ unsigned long c;
+ int n, k;
+ unsigned long poly; /* polynomial exclusive-or pattern */
+ /* terms of polynomial defining this crc (except x^32): */
+ static volatile int first = 1; /* flag to limit concurrent making */
+ static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
+
+ /* See if another task is already doing this (not thread-safe, but better
+ than nothing -- significantly reduces duration of vulnerability in
+ case the advice about DYNAMIC_CRC_TABLE is ignored) */
+ if (first) {
+ first = 0;
+
+ /* make exclusive-or pattern from polynomial (0xedb88320UL) */
+ poly = 0UL;
+ for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++)
+ poly |= 1UL << (31 - p[n]);
+
+ /* generate a crc for every 8-bit value */
+ for (n = 0; n < 256; n++) {
+ c = (unsigned long)n;
+ for (k = 0; k < 8; k++)
+ c = c & 1 ? poly ^ (c >> 1) : c >> 1;
+ crc_table[0][n] = c;
+ }
+
+#ifdef BYFOUR
+ /* generate crc for each value followed by one, two, and three zeros,
+ and then the byte reversal of those as well as the first table */
+ for (n = 0; n < 256; n++) {
+ c = crc_table[0][n];
+ crc_table[4][n] = REV(c);
+ for (k = 1; k < 4; k++) {
+ c = crc_table[0][c & 0xff] ^ (c >> 8);
+ crc_table[k][n] = c;
+ crc_table[k + 4][n] = REV(c);
+ }
+ }
+#endif /* BYFOUR */
+
+ crc_table_empty = 0;
+ }
+ else { /* not first */
+ /* wait for the other guy to finish (not efficient, but rare) */
+ while (crc_table_empty)
+ ;
+ }
+
+#ifdef MAKECRCH
+ /* write out CRC tables to crc32.h */
+ {
+ FILE *out;
+
+ out = fopen("crc32.h", "w");
+ if (out == NULL) return;
+ fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
+ fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
+ fprintf(out, "local const unsigned long FAR ");
+ fprintf(out, "crc_table[TBLS][256] =\n{\n {\n");
+ write_table(out, crc_table[0]);
+# ifdef BYFOUR
+ fprintf(out, "#ifdef BYFOUR\n");
+ for (k = 1; k < 8; k++) {
+ fprintf(out, " },\n {\n");
+ write_table(out, crc_table[k]);
+ }
+ fprintf(out, "#endif\n");
+# endif /* BYFOUR */
+ fprintf(out, " }\n};\n");
+ fclose(out);
+ }
+#endif /* MAKECRCH */
+}
+
+#ifdef MAKECRCH
+local void write_table(out, table)
+ FILE *out;
+ const unsigned long FAR *table;
+{
+ int n;
+
+ for (n = 0; n < 256; n++)
+ fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", table[n],
+ n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
+}
+#endif /* MAKECRCH */
+
+#else /* !DYNAMIC_CRC_TABLE */
+/* ========================================================================
+ * Tables of CRC-32s of all single-byte values, made by make_crc_table().
+ */
+#include "crc32.h"
+#endif /* DYNAMIC_CRC_TABLE */
+
+/* =========================================================================
+ * This function can be used by asm versions of crc32()
+ */
+const unsigned long FAR * ZEXPORT get_crc_table(void)
+{
+#ifdef DYNAMIC_CRC_TABLE
+ if (crc_table_empty)
+ make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+ return (const unsigned long FAR *)crc_table;
+}
+
+/* ========================================================================= */
+#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
+#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
+
+/* ========================================================================= */
+unsigned long ZEXPORT crc32(
+ unsigned long crc,
+ const unsigned char FAR *buf,
+ unsigned len)
+{
+ if (buf == Z_NULL) return 0UL;
+
+#ifdef DYNAMIC_CRC_TABLE
+ if (crc_table_empty)
+ make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+
+#ifdef BYFOUR
+ if (sizeof(void *) == sizeof(ptrdiff_t)) {
+ u4 endian;
+
+ endian = 1;
+ if (*((unsigned char *)(&endian)))
+ return crc32_little(crc, buf, len);
+ else
+ return crc32_big(crc, buf, len);
+ }
+#endif /* BYFOUR */
+/* crc = crc ^ 0xffffffffUL;*/
+ while (len >= 8) {
+ DO8;
+ len -= 8;
+ }
+ if (len) do {
+ DO1;
+ } while (--len);
+ return crc /* ^ 0xffffffffUL*/;
+}
+
+#ifdef BYFOUR
+
+/* ========================================================================= */
+#define DOLIT4 c ^= *buf4++; \
+ c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
+ crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
+#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
+
+/* ========================================================================= */
+local unsigned long crc32_little(crc, buf, len)
+ unsigned long crc;
+ const unsigned char FAR *buf;
+ unsigned len;
+{
+ register u4 c;
+ register const u4 FAR *buf4;
+
+ c = (u4)crc;
+ c = ~c;
+ while (len && ((ptrdiff_t)buf & 3)) {
+ c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+ len--;
+ }
+
+ buf4 = (const u4 FAR *)buf;
+ while (len >= 32) {
+ DOLIT32;
+ len -= 32;
+ }
+ while (len >= 4) {
+ DOLIT4;
+ len -= 4;
+ }
+ buf = (const unsigned char FAR *)buf4;
+
+ if (len) do {
+ c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+ } while (--len);
+ c = ~c;
+ return (unsigned long)c;
+}
+
+/* ========================================================================= */
+#define DOBIG4 c ^= *++buf4; \
+ c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
+ crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
+#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
+
+/* ========================================================================= */
+local unsigned long crc32_big(crc, buf, len)
+ unsigned long crc;
+ const unsigned char FAR *buf;
+ unsigned len;
+{
+ register u4 c;
+ register const u4 FAR *buf4;
+
+ c = REV((u4)crc);
+ c = ~c;
+ while (len && ((ptrdiff_t)buf & 3)) {
+ c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
+ len--;
+ }
+
+ buf4 = (const u4 FAR *)buf;
+ buf4--;
+ while (len >= 32) {
+ DOBIG32;
+ len -= 32;
+ }
+ while (len >= 4) {
+ DOBIG4;
+ len -= 4;
+ }
+ buf4++;
+ buf = (const unsigned char FAR *)buf4;
+
+ if (len) do {
+ c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
+ } while (--len);
+ c = ~c;
+ return (unsigned long)(REV(c));
+}
+
+#endif /* BYFOUR */
diff --git a/crc32.h b/crc32.h
new file mode 100644
index 00000000..8053b611
--- /dev/null
+++ b/crc32.h
@@ -0,0 +1,441 @@
+/* crc32.h -- tables for rapid CRC calculation
+ * Generated automatically by crc32.c
+ */
+
+local const unsigned long FAR crc_table[TBLS][256] =
+{
+ {
+ 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+ 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+ 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+ 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+ 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+ 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+ 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+ 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+ 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+ 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+ 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+ 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+ 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+ 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+ 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+ 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+ 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+ 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+ 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+ 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+ 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+ 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+ 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+ 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+ 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+ 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+ 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+ 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+ 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+ 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+ 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+ 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+ 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+ 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+ 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+ 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+ 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+ 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+ 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+ 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+ 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+ 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+ 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+ 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+ 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+ 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+ 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+ 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+ 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+ 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+ 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+ 0x2d02ef8dUL
+#ifdef BYFOUR
+ },
+ {
+ 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
+ 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
+ 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
+ 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
+ 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
+ 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
+ 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
+ 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
+ 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
+ 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
+ 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
+ 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
+ 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
+ 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
+ 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
+ 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
+ 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
+ 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
+ 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
+ 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
+ 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
+ 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
+ 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
+ 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
+ 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
+ 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
+ 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
+ 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
+ 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
+ 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
+ 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
+ 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
+ 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
+ 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
+ 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
+ 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
+ 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
+ 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
+ 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
+ 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
+ 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
+ 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
+ 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
+ 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
+ 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
+ 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
+ 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
+ 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
+ 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
+ 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
+ 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
+ 0x9324fd72UL
+ },
+ {
+ 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
+ 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
+ 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
+ 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
+ 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
+ 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
+ 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
+ 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
+ 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
+ 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
+ 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
+ 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
+ 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
+ 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
+ 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
+ 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
+ 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
+ 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
+ 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
+ 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
+ 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
+ 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
+ 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
+ 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
+ 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
+ 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
+ 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
+ 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
+ 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
+ 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
+ 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
+ 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
+ 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
+ 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
+ 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
+ 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
+ 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
+ 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
+ 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
+ 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
+ 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
+ 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
+ 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
+ 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
+ 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
+ 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
+ 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
+ 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
+ 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
+ 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
+ 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
+ 0xbe9834edUL
+ },
+ {
+ 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
+ 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
+ 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
+ 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
+ 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
+ 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
+ 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
+ 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
+ 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
+ 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
+ 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
+ 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
+ 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
+ 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
+ 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
+ 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
+ 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
+ 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
+ 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
+ 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
+ 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
+ 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
+ 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
+ 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
+ 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
+ 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
+ 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
+ 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
+ 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
+ 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
+ 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
+ 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
+ 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
+ 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
+ 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
+ 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
+ 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
+ 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
+ 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
+ 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
+ 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
+ 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
+ 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
+ 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
+ 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
+ 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
+ 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
+ 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
+ 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
+ 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
+ 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
+ 0xde0506f1UL
+ },
+ {
+ 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL,
+ 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL,
+ 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL,
+ 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL,
+ 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL,
+ 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL,
+ 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL,
+ 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL,
+ 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL,
+ 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL,
+ 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL,
+ 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL,
+ 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL,
+ 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL,
+ 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL,
+ 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL,
+ 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL,
+ 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL,
+ 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL,
+ 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL,
+ 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL,
+ 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL,
+ 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL,
+ 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL,
+ 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL,
+ 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL,
+ 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL,
+ 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL,
+ 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL,
+ 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL,
+ 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL,
+ 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL,
+ 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL,
+ 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL,
+ 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL,
+ 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL,
+ 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL,
+ 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL,
+ 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL,
+ 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL,
+ 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL,
+ 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL,
+ 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL,
+ 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL,
+ 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL,
+ 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL,
+ 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL,
+ 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL,
+ 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL,
+ 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL,
+ 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL,
+ 0x8def022dUL
+ },
+ {
+ 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL,
+ 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL,
+ 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL,
+ 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL,
+ 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL,
+ 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL,
+ 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL,
+ 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL,
+ 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL,
+ 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL,
+ 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL,
+ 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL,
+ 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL,
+ 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL,
+ 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL,
+ 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL,
+ 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL,
+ 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL,
+ 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL,
+ 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL,
+ 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL,
+ 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL,
+ 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL,
+ 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL,
+ 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL,
+ 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL,
+ 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL,
+ 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL,
+ 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL,
+ 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL,
+ 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL,
+ 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL,
+ 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL,
+ 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL,
+ 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL,
+ 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL,
+ 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL,
+ 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL,
+ 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL,
+ 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL,
+ 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL,
+ 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL,
+ 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL,
+ 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL,
+ 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL,
+ 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL,
+ 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL,
+ 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL,
+ 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL,
+ 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL,
+ 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL,
+ 0x72fd2493UL
+ },
+ {
+ 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL,
+ 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL,
+ 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL,
+ 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL,
+ 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL,
+ 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL,
+ 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL,
+ 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL,
+ 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL,
+ 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL,
+ 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL,
+ 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL,
+ 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL,
+ 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL,
+ 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL,
+ 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL,
+ 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL,
+ 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL,
+ 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL,
+ 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL,
+ 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL,
+ 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL,
+ 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL,
+ 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL,
+ 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL,
+ 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL,
+ 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL,
+ 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL,
+ 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL,
+ 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL,
+ 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL,
+ 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL,
+ 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL,
+ 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL,
+ 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL,
+ 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL,
+ 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL,
+ 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL,
+ 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL,
+ 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL,
+ 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL,
+ 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL,
+ 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL,
+ 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL,
+ 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL,
+ 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL,
+ 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL,
+ 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL,
+ 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL,
+ 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL,
+ 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL,
+ 0xed3498beUL
+ },
+ {
+ 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL,
+ 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL,
+ 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL,
+ 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL,
+ 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL,
+ 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL,
+ 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL,
+ 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL,
+ 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL,
+ 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL,
+ 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL,
+ 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL,
+ 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL,
+ 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL,
+ 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL,
+ 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL,
+ 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL,
+ 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL,
+ 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL,
+ 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL,
+ 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL,
+ 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL,
+ 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL,
+ 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL,
+ 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL,
+ 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL,
+ 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL,
+ 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL,
+ 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL,
+ 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL,
+ 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL,
+ 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL,
+ 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL,
+ 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL,
+ 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL,
+ 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL,
+ 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL,
+ 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL,
+ 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL,
+ 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL,
+ 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL,
+ 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL,
+ 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL,
+ 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL,
+ 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL,
+ 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL,
+ 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL,
+ 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL,
+ 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL,
+ 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL,
+ 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL,
+ 0xf10605deUL
+#endif
+ }
+};
diff --git a/kernel-patch-2.6.25 b/kernel-patch-2.6.25
new file mode 100644
index 00000000..23290078
--- /dev/null
+++ b/kernel-patch-2.6.25
@@ -0,0 +1,199 @@
+Status: ok
+
+Support adding a spare to a live md array with external metadata.
+
+i.e. extend the 'md/dev-XXX/slot' attribute so that you can
+tell a device to fill an vacant slot in an and md array.
+
+
+Signed-off-by: Neil Brown <neilb@suse.de>
+
+### Diffstat output
+ ./drivers/md/md.c | 44 ++++++++++++++++++++++++++++++++++++++++----
+ ./drivers/md/multipath.c | 7 ++++++-
+ ./drivers/md/raid1.c | 7 ++++++-
+ ./drivers/md/raid10.c | 10 ++++++++--
+ ./drivers/md/raid5.c | 10 ++++++++--
+ 5 files changed, 68 insertions(+), 10 deletions(-)
+
+diff .prev/drivers/md/md.c ./drivers/md/md.c
+--- .prev/drivers/md/md.c 2008-06-05 09:19:56.000000000 +1000
++++ ./drivers/md/md.c 2008-06-10 10:41:21.000000000 +1000
+@@ -1932,7 +1932,7 @@ slot_store(mdk_rdev_t *rdev, const char
+ slot = -1;
+ else if (e==buf || (*e && *e!= '\n'))
+ return -EINVAL;
+- if (rdev->mddev->pers) {
++ if (rdev->mddev->pers && slot == -1) {
+ /* Setting 'slot' on an active array requires also
+ * updating the 'rd%d' link, and communicating
+ * with the personality with ->hot_*_disk.
+@@ -1940,8 +1940,6 @@ slot_store(mdk_rdev_t *rdev, const char
+ * failed/spare devices. This normally happens automatically,
+ * but not when the metadata is externally managed.
+ */
+- if (slot != -1)
+- return -EBUSY;
+ if (rdev->raid_disk == -1)
+ return -EEXIST;
+ /* personality does all needed checks */
+@@ -1955,6 +1953,44 @@ slot_store(mdk_rdev_t *rdev, const char
+ sysfs_remove_link(&rdev->mddev->kobj, nm);
+ set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+ md_wakeup_thread(rdev->mddev->thread);
++ } else if (rdev->mddev->pers) {
++ mdk_rdev_t *rdev2;
++ struct list_head *tmp;
++ /* Activating a spare .. or possibly reactivating
++ * if we every get bitmaps working here.
++ */
++
++ if (rdev->raid_disk != -1)
++ return -EBUSY;
++
++ if (rdev->mddev->pers->hot_add_disk == NULL)
++ return -EINVAL;
++
++ rdev_for_each(rdev2, tmp, rdev->mddev)
++ if (rdev2->raid_disk == slot)
++ return -EEXIST;
++
++ rdev->raid_disk = slot;
++ if (test_bit(In_sync, &rdev->flags))
++ rdev->saved_raid_disk = slot;
++ else
++ rdev->saved_raid_disk = -1;
++ err = rdev->mddev->pers->
++ hot_add_disk(rdev->mddev, rdev);
++ if (err != 1) {
++ rdev->raid_disk = -1;
++ if (err == 0)
++ return -EEXIST;
++ return err;
++ }
++ sprintf(nm, "rd%d", rdev->raid_disk);
++ if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
++ printk(KERN_WARNING
++ "md: cannot register "
++ "%s for %s\n",
++ nm, mdname(rdev->mddev));
++
++ /* don't wakeup anyone, leave that to userspace. */
+ } else {
+ if (slot >= rdev->mddev->raid_disks)
+ return -ENOSPC;
+@@ -4205,7 +4241,7 @@ static int add_new_disk(mddev_t * mddev,
+ super_types[mddev->major_version].
+ validate_super(mddev, rdev);
+ err = mddev->pers->hot_add_disk(mddev, rdev);
+- if (err)
++ if (err < 0)
+ unbind_rdev_from_array(rdev);
+ }
+ if (err)
+
+diff .prev/drivers/md/multipath.c ./drivers/md/multipath.c
+--- .prev/drivers/md/multipath.c 2008-05-30 14:49:31.000000000 +1000
++++ ./drivers/md/multipath.c 2008-06-10 10:35:03.000000000 +1000
+@@ -284,10 +284,15 @@ static int multipath_add_disk(mddev_t *m
+ int found = 0;
+ int path;
+ struct multipath_info *p;
++ int first = 0;
++ int last = mddev->raid_disks - 1;
++
++ if (rdev->raid_disk >= 0)
++ first = last = rdev->raid_disk;
+
+ print_multipath_conf(conf);
+
+- for (path=0; path<mddev->raid_disks; path++)
++ for (path = first; path <= last; path++)
+ if ((p=conf->multipaths+path)->rdev == NULL) {
+ q = rdev->bdev->bd_disk->queue;
+ blk_queue_stack_limits(mddev->queue, q);
+
+diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
+--- .prev/drivers/md/raid10.c 2008-05-30 14:49:31.000000000 +1000
++++ ./drivers/md/raid10.c 2008-06-10 10:28:53.000000000 +1000
+@@ -1116,6 +1116,8 @@ static int raid10_add_disk(mddev_t *mdde
+ int found = 0;
+ int mirror;
+ mirror_info_t *p;
++ int first = 0;
++ int last = mddev->raid_disks - 1;
+
+ if (mddev->recovery_cp < MaxSector)
+ /* only hot-add to in-sync arrays, as recovery is
+@@ -1125,12 +1127,16 @@ static int raid10_add_disk(mddev_t *mdde
+ if (!enough(conf))
+ return 0;
+
++ if (rdev->raid_disk)
++ first = last = rdev->raid_disk;
++
+ if (rdev->saved_raid_disk >= 0 &&
++ rdev->saved_raid_disk >= first &&
+ conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
+ mirror = rdev->saved_raid_disk;
+ else
+- mirror = 0;
+- for ( ; mirror < mddev->raid_disks; mirror++)
++ mirror = first;
++ for ( ; mirror <= last ; mirror++)
+ if ( !(p=conf->mirrors+mirror)->rdev) {
+
+ blk_queue_stack_limits(mddev->queue,
+
+diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
+--- .prev/drivers/md/raid1.c 2008-05-30 14:49:31.000000000 +1000
++++ ./drivers/md/raid1.c 2008-06-10 10:41:00.000000000 +1000
+@@ -1103,8 +1103,13 @@ static int raid1_add_disk(mddev_t *mddev
+ int found = 0;
+ int mirror = 0;
+ mirror_info_t *p;
++ int first = 0;
++ int last = mddev->raid_disks - 1;
+
+- for (mirror=0; mirror < mddev->raid_disks; mirror++)
++ if (rdev->raid_disk >= 0)
++ first = last = rdev->raid_disk;
++
++ for (mirror = first; mirror <= last; mirror++)
+ if ( !(p=conf->mirrors+mirror)->rdev) {
+
+ blk_queue_stack_limits(mddev->queue,
+
+diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
+--- .prev/drivers/md/raid5.c 2008-05-30 14:49:35.000000000 +1000
++++ ./drivers/md/raid5.c 2008-06-10 10:27:51.000000000 +1000
+@@ -4399,21 +4399,27 @@ static int raid5_add_disk(mddev_t *mddev
+ int found = 0;
+ int disk;
+ struct disk_info *p;
++ int first = 0;
++ int last = conf->raid_disks - 1;
+
+ if (mddev->degraded > conf->max_degraded)
+ /* no point adding a device */
+ return 0;
+
++ if (rdev->raid_disk >= 0)
++ first = last = rdev->raid_disk;
++
+ /*
+ * find the disk ... but prefer rdev->saved_raid_disk
+ * if possible.
+ */
+ if (rdev->saved_raid_disk >= 0 &&
++ rdev->saved_raid_disk >= first &&
+ conf->disks[rdev->saved_raid_disk].rdev == NULL)
+ disk = rdev->saved_raid_disk;
+ else
+- disk = 0;
+- for ( ; disk < conf->raid_disks; disk++)
++ disk = first;
++ for ( ; disk <= last ; disk++)
+ if ((p=conf->disks + disk)->rdev == NULL) {
+ clear_bit(In_sync, &rdev->flags);
+ rdev->raid_disk = disk;
diff --git a/managemon.c b/managemon.c
new file mode 100644
index 00000000..c947552e
--- /dev/null
+++ b/managemon.c
@@ -0,0 +1,524 @@
+
+/*
+ * The management thread for monitoring active md arrays.
+ * This thread does things which might block such as memory
+ * allocation.
+ * In particular:
+ *
+ * - Find out about new arrays in this container.
+ * Allocate the data structures and open the files.
+ *
+ * For this we watch /proc/mdstat and find new arrays with
+ * metadata type that confirms sharing. e.g. "md4"
+ * When we find a new array we slip it into the list of
+ * arrays and signal 'monitor' by writing to a pipe.
+ *
+ * - Respond to reshape requests by allocating new data structures
+ * and opening new files.
+ *
+ * These come as a change to raid_disks. We allocate a new
+ * version of the data structures and slip it into the list.
+ * 'monitor' will notice and release the old version.
+ * Changes to level, chunksize, layout.. do not need re-allocation.
+ * Reductions in raid_disks don't really either, but we handle
+ * them the same way for consistency.
+ *
+ * - When a device is added to the container, we add it to the metadata
+ * as a spare.
+ *
+ * - Deal with degraded array
+ * We only do this when first noticing the array is degraded.
+ * This can be when we first see the array, when sync completes or
+ * when recovery completes.
+ *
+ * Check if number of failed devices suggests recovery is needed, and
+ * skip if not.
+ * Ask metadata to allocate a spare device
+ * Add device as not in_sync and give a role
+ * Update metadata.
+ * Open sysfs files and pass to monitor.
+ * Make sure that monitor Starts recovery....
+ *
+ * - Pass on metadata updates from external programs such as
+ * mdadm creating a new array.
+ *
+ * This is most-messy.
+ * It might involve adding a new array or changing the status of
+ * a spare, or any reconfig that the kernel doesn't get involved in.
+ *
+ * The required updates are received via a named pipe. There will
+ * be one named pipe for each container. Each message contains a
+ * sync marker: 0x5a5aa5a5, A byte count, and the message. This is
+ * passed to the metadata handler which will interpret and process it.
+ * For 'DDF' messages are internal data blocks with the leading
+ * 'magic number' signifying what sort of data it is.
+ *
+ */
+
+/*
+ * We select on /proc/mdstat and the named pipe.
+ * We create new arrays or updated version of arrays and slip
+ * them into the head of the list, then signal 'monitor' via a pipe write.
+ * 'monitor' will notice and place the old array on a return list.
+ * Metadata updates are placed on a queue just like they arrive
+ * from the named pipe.
+ *
+ * When new arrays are found based on correct metadata string, we
+ * need to identify them with an entry in the metadata. Maybe we require
+ * the metadata to be mdX/NN when NN is the index into an appropriate table.
+ *
+ */
+
+/*
+ * List of tasks:
+ * - Watch for spares to be added to the container, and write updated
+ * metadata to them.
+ * - Watch for new arrays using this container, confirm they match metadata
+ * and if so, start monitoring them
+ * - Watch for spares being added to monitored arrays. This shouldn't
+ * happen, as we should do all the adding. Just remove them.
+ * - Watch for change in raid-disks, chunk-size, etc. Update metadata and
+ * start a reshape.
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include "mdadm.h"
+#include "mdmon.h"
+#include <sys/syscall.h>
+#include <sys/socket.h>
+#include <signal.h>
+
+static void close_aa(struct active_array *aa)
+{
+ struct mdinfo *d;
+
+ for (d = aa->info.devs; d; d = d->next)
+ close(d->state_fd);
+
+ close(aa->action_fd);
+ close(aa->info.state_fd);
+ close(aa->resync_start_fd);
+}
+
+static void free_aa(struct active_array *aa)
+{
+ /* Note that this doesn't close fds if they are being used
+ * by a clone. ->container will be set for a clone
+ */
+ dprintf("%s: devnum: %d\n", __func__, aa->devnum);
+ if (!aa->container)
+ close_aa(aa);
+ while (aa->info.devs) {
+ struct mdinfo *d = aa->info.devs;
+ aa->info.devs = d->next;
+ free(d);
+ }
+ free(aa);
+}
+
+static struct active_array *duplicate_aa(struct active_array *aa)
+{
+ struct active_array *newa = malloc(sizeof(*newa));
+ struct mdinfo **dp1, **dp2;
+
+ *newa = *aa;
+ newa->next = NULL;
+ newa->replaces = NULL;
+ newa->info.next = NULL;
+
+ dp2 = &newa->info.devs;
+
+ for (dp1 = &aa->info.devs; *dp1; dp1 = &(*dp1)->next) {
+ struct mdinfo *d;
+ if ((*dp1)->state_fd < 0)
+ continue;
+
+ d = malloc(sizeof(*d));
+ *d = **dp1;
+ *dp2 = d;
+ dp2 = & d->next;
+ }
+ *dp2 = NULL;
+
+ return newa;
+}
+
+static void wakeup_monitor(void)
+{
+ /* tgkill(getpid(), mon_tid, SIGUSR1); */
+ int pid = getpid();
+ syscall(SYS_tgkill, pid, mon_tid, SIGUSR1);
+}
+
+static void remove_old(void)
+{
+ if (discard_this) {
+ discard_this->next = NULL;
+ free_aa(discard_this);
+ if (pending_discard == discard_this)
+ pending_discard = NULL;
+ discard_this = NULL;
+ wakeup_monitor();
+ }
+}
+
+static void replace_array(struct supertype *container,
+ struct active_array *old,
+ struct active_array *new)
+{
+ /* To replace an array, we add it to the top of the list
+ * marked with ->replaces to point to the original.
+ * 'monitor' will take the original out of the list
+ * and put it on 'discard_this'. We take it from there
+ * and discard it.
+ */
+ remove_old();
+ while (pending_discard) {
+ while (discard_this == NULL)
+ sleep(1);
+ remove_old();
+ }
+ pending_discard = old;
+ new->replaces = old;
+ new->next = container->arrays;
+ container->arrays = new;
+ wakeup_monitor();
+}
+
+struct metadata_update *update_queue = NULL;
+struct metadata_update *update_queue_handled = NULL;
+struct metadata_update *update_queue_pending = NULL;
+
+void check_update_queue(struct supertype *container)
+{
+ while (update_queue_handled) {
+ struct metadata_update *this = update_queue_handled;
+ update_queue_handled = this->next;
+ free(this->buf);
+ if (this->space)
+ free(this->space);
+ free(this);
+ }
+ if (update_queue == NULL &&
+ update_queue_pending) {
+ update_queue = update_queue_pending;
+ update_queue_pending = NULL;
+ wakeup_monitor();
+ }
+}
+
+static void queue_metadata_update(struct metadata_update *mu)
+{
+ struct metadata_update **qp;
+
+ qp = &update_queue_pending;
+ while (*qp)
+ qp = & ((*qp)->next);
+ *qp = mu;
+}
+
+void wait_update_handled(void)
+{
+ /* Wait for any pending update to be handled by monitor.
+ * i.e. wait until update_queue is NULL
+ */
+ while (update_queue)
+ usleep(100 * 1000);
+}
+
+static void manage_container(struct mdstat_ent *mdstat,
+ struct supertype *container)
+{
+ /* The only thing of interest here is if a new device
+ * has been added to the container. We add it to the
+ * array ignoring any metadata on it.
+ * FIXME should we look for compatible metadata and take hints
+ * about spare assignment.... probably not.
+ */
+ if (mdstat->devcnt != container->devcnt) {
+ /* read /sys/block/NAME/md/dev-??/block/dev to find out
+ * what is there, and compare with container->info.devs
+ * To see what is removed and what is added.
+ * These need to be remove from, or added to, the array
+ */
+ // FIXME
+ container->devcnt = mdstat->devcnt;
+ }
+}
+
+static void manage_member(struct mdstat_ent *mdstat,
+ struct active_array *a)
+{
+ /* Compare mdstat info with known state of member array.
+ * We do not need to look for device state changes here, that
+ * is dealt with by the monitor.
+ *
+ * We just look for changes which suggest that a reshape is
+ * being requested.
+ * Unfortunately decreases in raid_disks don't show up in
+ * mdstat until the reshape completes FIXME.
+ *
+ * Actually, we also want to handle degraded arrays here by
+ * trying to find and assign a spare.
+ * We do that whenever the monitor tells us too.
+ */
+ // FIXME
+ a->info.array.raid_disks = mdstat->raid_disks;
+ a->info.array.chunk_size = mdstat->chunk_size;
+ // MORE
+
+ if (a->check_degraded) {
+ struct metadata_update *updates = NULL;
+ struct mdinfo *newdev;
+ struct active_array *newa;
+ wait_update_handled();
+ a->check_degraded = 0;
+
+ /* The array may not be degraded, this is just a good time
+ * to check.
+ */
+ newdev = a->container->ss->activate_spare(a, &updates);
+ if (newdev) {
+ struct mdinfo *d;
+ /* Cool, we can add a device or several. */
+ newa = duplicate_aa(a);
+ /* suspend recovery - maybe not needed */
+
+ /* Add device to array and set offset/size/slot.
+ * and open files for each newdev */
+ for (d = newdev; d ; d = d->next) {
+ struct mdinfo *newd;
+ if (sysfs_add_disk(&newa->info, d) < 0)
+ continue;
+ newd = newa->info.devs;
+ newd->state_fd = sysfs_open(a->devnum,
+ newd->sys_name,
+ "state");
+ newd->prev_state
+ = read_dev_state(newd->state_fd);
+ newd->curr_state = newd->prev_state;
+ }
+ queue_metadata_update(updates);
+ replace_array(a->container, a, newa);
+ sysfs_set_str(&a->info, NULL, "sync_action", "repair");
+ }
+ }
+}
+
+static void manage_new(struct mdstat_ent *mdstat,
+ struct supertype *container,
+ struct active_array *victim)
+{
+ /* A new array has appeared in this container.
+ * Hopefully it is already recorded in the metadata.
+ * Check, then create the new array to report it to
+ * the monitor.
+ */
+
+ struct active_array *new;
+ struct mdinfo *mdi, *di;
+ char *inst;
+ int i;
+
+ new = malloc(sizeof(*new));
+
+ memset(new, 0, sizeof(*new));
+
+ new->devnum = mdstat->devnum;
+ strcpy(new->info.sys_name, devnum2devname(new->devnum));
+
+ new->prev_state = new->curr_state = new->next_state = inactive;
+ new->prev_action= new->curr_action= new->next_action= idle;
+
+ new->container = container;
+
+ inst = &mdstat->metadata_version[10+strlen(container->devname)+1];
+
+ mdi = sysfs_read(-1, new->devnum,
+ GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
+ GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);
+ if (!mdi) {
+ /* Eeek. Cannot monitor this array.
+ * Mark it to be ignored by setting container to NULL
+ */
+ new->container = NULL;
+ replace_array(container, victim, new);
+ return;
+ }
+
+ new->info.array = mdi->array;
+ new->info.component_size = mdi->component_size;
+
+ for (i = 0; i < new->info.array.raid_disks; i++) {
+ struct mdinfo *newd = malloc(sizeof(*newd));
+
+ for (di = mdi->devs; di; di = di->next)
+ if (i == di->disk.raid_disk)
+ break;
+
+ if (di) {
+ memcpy(newd, di, sizeof(*newd));
+
+ newd->state_fd = sysfs_open(new->devnum,
+ newd->sys_name,
+ "state");
+
+ newd->prev_state = read_dev_state(newd->state_fd);
+ newd->curr_state = newd->prev_state;
+ } else {
+ newd->state_fd = -1;
+ newd->disk.raid_disk = i;
+ newd->prev_state = DS_REMOVE;
+ newd->curr_state = DS_REMOVE;
+ }
+ sprintf(newd->sys_name, "rd%d", i);
+ newd->next = new->info.devs;
+ new->info.devs = newd;
+ }
+ new->action_fd = sysfs_open(new->devnum, NULL, "sync_action");
+ new->info.state_fd = sysfs_open(new->devnum, NULL, "array_state");
+ new->resync_start_fd = sysfs_open(new->devnum, NULL, "resync_start");
+ get_resync_start(new);
+ dprintf("%s: inst: %d action: %d state: %d\n", __func__, atoi(inst),
+ new->action_fd, new->info.state_fd);
+
+ sysfs_free(mdi);
+ // finds and compares.
+ if (container->ss->open_new(container, new, inst) < 0) {
+ // FIXME close all those files
+ new->container = NULL;
+ replace_array(container, victim, new);
+ return;
+ }
+ replace_array(container, victim, new);
+ return;
+}
+
+void manage(struct mdstat_ent *mdstat, struct supertype *container)
+{
+ /* We have just read mdstat and need to compare it with
+ * the known active arrays.
+ * Arrays with the wrong metadata are ignored.
+ */
+
+ for ( ; mdstat ; mdstat = mdstat->next) {
+ struct active_array *a;
+ if (mdstat->devnum == container->devnum) {
+ manage_container(mdstat, container);
+ continue;
+ }
+ if (mdstat->metadata_version == NULL ||
+ strncmp(mdstat->metadata_version, "external:/", 10) != 0 ||
+ strncmp(mdstat->metadata_version+10, container->devname,
+ strlen(container->devname)) != 0 ||
+ mdstat->metadata_version[10+strlen(container->devname)]
+ != '/')
+ /* Not for this array */
+ continue;
+ /* Looks like a member of this container */
+ for (a = container->arrays; a; a = a->next) {
+ if (mdstat->devnum == a->devnum) {
+ if (a->container)
+ manage_member(mdstat, a);
+ break;
+ }
+ }
+ if (a == NULL || !a->container)
+ manage_new(mdstat, container, a);
+ }
+}
+
+static void handle_message(struct supertype *container, struct metadata_update *msg)
+{
+ /* queue this metadata update through to the monitor */
+
+ struct metadata_update *mu;
+
+ if (msg->len == 0) {
+ int cnt = monitor_loop_cnt;
+ if (cnt & 1)
+ cnt += 2; /* wait until next pselect */
+ else
+ cnt += 3; /* wait for 2 pselects */
+ wakeup_monitor();
+ wait_update_handled();
+ while (monitor_loop_cnt - cnt < 0)
+ usleep(10 * 1000);
+ } else {
+ mu = malloc(sizeof(*mu));
+ mu->len = msg->len;
+ mu->buf = msg->buf;
+ msg->buf = NULL;
+ mu->space = NULL;
+ mu->next = NULL;
+ if (container->ss->prepare_update)
+ container->ss->prepare_update(container, mu);
+ queue_metadata_update(mu);
+ }
+}
+
+void read_sock(struct supertype *container)
+{
+ int fd;
+ struct metadata_update msg;
+ int terminate = 0;
+ long fl;
+ int tmo = 3; /* 3 second timeout before hanging up the socket */
+
+ fd = accept(container->sock, NULL, NULL);
+ if (fd < 0)
+ return;
+
+ fl = fcntl(fd, F_GETFL, 0);
+ fl |= O_NONBLOCK;
+ fcntl(fd, F_SETFL, fl);
+
+ do {
+ msg.buf = NULL;
+
+ /* read and validate the message */
+ if (receive_message(fd, &msg, tmo) == 0) {
+ handle_message(container, &msg);
+ if (ack(fd, tmo) < 0)
+ terminate = 1;
+ } else
+ terminate = 1;
+
+ } while (!terminate);
+
+ close(fd);
+}
+
+int exit_now = 0;
+int manager_ready = 0;
+void do_manager(struct supertype *container)
+{
+ struct mdstat_ent *mdstat;
+ sigset_t set;
+
+ sigprocmask(SIG_UNBLOCK, NULL, &set);
+ sigdelset(&set, SIGUSR1);
+
+ do {
+
+ if (exit_now)
+ exit(0);
+
+ mdstat = mdstat_read(1, 0);
+
+ manage(mdstat, container);
+
+ read_sock(container);
+
+ free_mdstat(mdstat);
+
+ remove_old();
+
+ check_update_queue(container);
+
+ manager_ready = 1;
+
+ mdstat_wait_fd(container->sock, &set);
+ } while(1);
+}
diff --git a/mapfile.c b/mapfile.c
index 746073d0..cf2ca2bf 100644
--- a/mapfile.c
+++ b/mapfile.c
@@ -33,8 +33,8 @@
* also allows the array device name to be easily found.
*
* The map file is line based with space separated fields. The fields are:
- * Device id - mdX or mdpX where is a number.
- * metadata - 0.90 1.0 1.1 1.2
+ * Device id - mdX or mdpX where X is a number.
+ * metadata - 0.90 1.0 1.1 1.2 ddf ...
* UUID - uuid of the array
* path - path where device created: /dev/md/home
*
@@ -62,7 +62,7 @@ int map_write(struct map_ent *mel)
fprintf(f, "mdp%d ", -1-mel->devnum);
else
fprintf(f, "md%d ", mel->devnum);
- fprintf(f, "%d.%d ", mel->major, mel->minor);
+ fprintf(f, "%s ", mel->metadata);
fprintf(f, "%08x:%08x:%08x:%08x ", mel->uuid[0],
mel->uuid[1], mel->uuid[2], mel->uuid[3]);
fprintf(f, "%s\n", mel->path);
@@ -87,13 +87,12 @@ int map_write(struct map_ent *mel)
}
void map_add(struct map_ent **melp,
- int devnum, int major, int minor, int uuid[4], char *path)
+ int devnum, char *metadata, int uuid[4], char *path)
{
struct map_ent *me = malloc(sizeof(*me));
me->devnum = devnum;
- me->major = major;
- me->minor = minor;
+ strcpy(me->metadata, metadata);
memcpy(me->uuid, uuid, 16);
me->path = strdup(path);
me->next = *melp;
@@ -105,7 +104,8 @@ void map_read(struct map_ent **melp)
FILE *f;
char buf[8192];
char path[200];
- int devnum, major, minor, uuid[4];
+ int devnum, uuid[4];
+ char metadata[30];
char nam[4];
*melp = NULL;
@@ -117,12 +117,12 @@ void map_read(struct map_ent **melp)
return;
while (fgets(buf, sizeof(buf), f)) {
- if (sscanf(buf, " md%1[p]%d %d.%d %x:%x:%x:%x %200s",
- nam, &devnum, &major, &minor, uuid, uuid+1,
+ if (sscanf(buf, " md%1[p]%d %s %x:%x:%x:%x %200s",
+ nam, &devnum, metadata, uuid, uuid+1,
uuid+2, uuid+3, path) == 9) {
if (nam[0] == 'p')
devnum = -1 - devnum;
- map_add(melp, devnum, major, minor, uuid, path);
+ map_add(melp, devnum, metadata, uuid, path);
}
}
fclose(f);
@@ -138,7 +138,7 @@ void map_free(struct map_ent *map)
}
}
-int map_update(struct map_ent **mpp, int devnum, int major, int minor,
+int map_update(struct map_ent **mpp, int devnum, char *metadata,
int *uuid, char *path)
{
struct map_ent *map, *mp;
@@ -151,15 +151,14 @@ int map_update(struct map_ent **mpp, int devnum, int major, int minor,
for (mp = map ; mp ; mp=mp->next)
if (mp->devnum == devnum) {
- mp->major = major;
- mp->minor = minor;
+ strcpy(mp->metadata, metadata);
memcpy(mp->uuid, uuid, 16);
free(mp->path);
mp->path = strdup(path);
break;
}
if (!mp)
- map_add(&map, devnum, major, minor, uuid, path);
+ map_add(&map, devnum, metadata, uuid, path);
*mpp = NULL;
rv = map_write(map);
map_free(map);
diff --git a/md.4 b/md.4
index dfd287f1..ea12eaff 100644
--- a/md.4
+++ b/md.4
@@ -526,10 +526,22 @@ Finally, "idle" can be written to stop the check/repair process.
.B md/stripe_cache_size
This is only available on RAID5 and RAID6. It records the size (in
pages per device) of the stripe cache which is used for synchronising
-all read and write operations to the array. The default is 128.
+all write operations to the array and all read operations if the array
+is degraded. The default is 256. Valid values are 17 to 32768.
Increasing this number can increase performance in some situations, at
-some cost in system memory.
+some cost in system memory. Note, setting this value too high can
+result in an "out of memory" condition for the system.
+memory_consumed = system_page_size * nr_disks * stripe_cache_size
+
+.TP
+.B md/preread_bypass_threshold
+This is only available on RAID5 and RAID6. This variable sets the
+number of times MD will service a full-stripe-write before servicing a
+stripe that requires some "prereading". For fairness this defaults to
+1. Valid values are 0 to stripe_cache_size. Setting this to 0
+maximizes sequential-write throughput at the cost of fairness to threads
+doing small or random writes.
.SS KERNEL PARAMETERS
diff --git a/mdadm.8 b/mdadm.8
index be8568d1..3c283ca9 100644
--- a/mdadm.8
+++ b/mdadm.8
@@ -1937,6 +1937,16 @@ that no metadata updates are made and no attempt at resync or recovery
happens. Further devices that are found before the first write can
still be added safely.
+
+.SH ENVIRONMENT
+This section describes environment variables that affect how mdadm
+operates.
+
+.TP
+.B MDADM_NO_MDMON
+Setting this value to 1 will prevent mdadm from automatically launching
+mdmon. This variable is intended primarily for debugging mdadm/mdmon.
+
.SH EXAMPLES
.B " mdadm \-\-query /dev/name-of-device"
diff --git a/mdadm.c b/mdadm.c
index 3aa3b132..b7865ef7 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -1272,7 +1272,8 @@ int main(int argc, char *argv[])
export, test, homehost);
continue;
case 'K': /* Zero superblock */
- rv |= Kill(dv->devname, force, quiet); continue;
+ rv |= Kill(dv->devname, force, quiet,0);
+ continue;
case 'Q':
rv |= Query(dv->devname); continue;
case 'X':
diff --git a/mdadm.h b/mdadm.h
index 5c18d15e..52d94352 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -76,6 +76,7 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#include "md_u.h"
#include "md_p.h"
#include "bitmap.h"
+#include "msg.h"
#include <endian.h>
/* Redhat don't like to #include <asm/byteorder.h>, and
@@ -106,6 +107,13 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#define __le16_to_cpu(_x) (_x)
#define __le32_to_cpu(_x) (_x)
#define __le64_to_cpu(_x) (_x)
+
+#define __cpu_to_be16(_x) bswap_16(_x)
+#define __cpu_to_be32(_x) bswap_32(_x)
+#define __cpu_to_be64(_x) bswap_64(_x)
+#define __be16_to_cpu(_x) bswap_16(_x)
+#define __be32_to_cpu(_x) bswap_32(_x)
+#define __be64_to_cpu(_x) bswap_64(_x)
#elif BYTE_ORDER == BIG_ENDIAN
#define __cpu_to_le16(_x) bswap_16(_x)
#define __cpu_to_le32(_x) bswap_32(_x)
@@ -113,6 +121,13 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#define __le16_to_cpu(_x) bswap_16(_x)
#define __le32_to_cpu(_x) bswap_32(_x)
#define __le64_to_cpu(_x) bswap_64(_x)
+
+#define __cpu_to_be16(_x) (_x)
+#define __cpu_to_be32(_x) (_x)
+#define __cpu_to_be64(_x) (_x)
+#define __be16_to_cpu(_x) (_x)
+#define __be32_to_cpu(_x) (_x)
+#define __be64_to_cpu(_x) (_x)
#else
# error "unknown endianness."
#endif
@@ -128,18 +143,36 @@ struct mdinfo {
int uuid[4];
char name[33];
unsigned long long data_offset;
- unsigned long long component_size;
+ unsigned long long component_size; /* same as array.size, except in
+ * sectors and up to 64bits.
+ */
int reshape_active;
unsigned long long reshape_progress;
+ unsigned long long resync_start;
int new_level, delta_disks, new_layout, new_chunk;
int errors;
int cache_size; /* size of raid456 stripe cache*/
int mismatch_cnt;
char text_version[50];
+ int container_member; /* for assembling external-metatdata arrays
+ * This is to be used internally by metadata
+ * handler only */
+
char sys_name[20];
struct mdinfo *devs;
struct mdinfo *next;
+
+ /* Device info for mdmon: */
+ int state_fd;
+ #define DS_FAULTY 1
+ #define DS_INSYNC 2
+ #define DS_WRITE_MOSTLY 4
+ #define DS_SPARE 8
+ #define DS_BLOCKED 16
+ #define DS_REMOVE 1024
+ int prev_state, curr_state, next_state;
+
};
struct createinfo {
@@ -252,22 +285,27 @@ struct mdstat_ent {
char *pattern; /* U or up, _ for down */
int percent; /* -1 if no resync */
int resync; /* 1 if resync, 0 if recovery */
+ int devcnt;
+ int raid_disks;
+ int chunk_size;
+ char * metadata_version;
struct mdstat_ent *next;
};
extern struct mdstat_ent *mdstat_read(int hold, int start);
extern void free_mdstat(struct mdstat_ent *ms);
extern void mdstat_wait(int seconds);
+extern void mdstat_wait_fd(int fd, const sigset_t *sigmask);
extern int mddev_busy(int devnum);
struct map_ent {
struct map_ent *next;
int devnum;
- int major,minor;
+ char metadata[20];
int uuid[4];
char *path;
};
-extern int map_update(struct map_ent **mpp, int devnum, int major, int minor,
+extern int map_update(struct map_ent **mpp, int devnum, char *metadata,
int uuid[4], char *path);
extern struct map_ent *map_by_uuid(struct map_ent **map, int uuid[4]);
extern void map_read(struct map_ent **melp);
@@ -275,7 +313,7 @@ extern int map_write(struct map_ent *mel);
extern void map_delete(struct map_ent **mapp, int devnum);
extern void map_free(struct map_ent *map);
extern void map_add(struct map_ent **melp,
- int devnum, int major, int minor, int uuid[4], char *path);
+ int devnum, char *metadata, int uuid[4], char *path);
/* various details can be requested */
#define GET_LEVEL 1
@@ -285,6 +323,7 @@ extern void map_add(struct map_ent **melp,
#define GET_CACHE 16
#define GET_MISMATCH 32
#define GET_VERSION 64
+#define GET_DISKS 128
#define GET_DEVS 1024 /* gets role, major, minor */
#define GET_OFFSET 2048
@@ -295,6 +334,7 @@ extern void map_add(struct map_ent **melp,
/* If fd >= 0, get the array it is open on,
* else use devnum. >=0 -> major9. <0.....
*/
+extern int sysfs_open(int devnum, char *devname, char *attr);
extern void sysfs_free(struct mdinfo *sra);
extern struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options);
extern int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
@@ -303,6 +343,11 @@ extern int sysfs_set_num(struct mdinfo *sra, struct mdinfo *dev,
char *name, unsigned long long val);
extern int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
char *name, unsigned long long *val);
+extern int sysfs_set_array(struct mdinfo *sra,
+ struct mdinfo *info);
+extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd);
+extern int sysfs_disk_to_scsi_id(int fd, __u32 *id);
+extern int sysfs_unique_holder(int devnum, long rdev);
extern int save_stripes(int *source, unsigned long long *offsets,
@@ -326,28 +371,126 @@ extern mapping_t r5layout[], pers[], modes[], faultylayout[];
extern char *map_dev(int major, int minor, int create);
+struct active_array;
+struct metadata_update;
+/* A superswitch provides entry point the a metadata handler.
+ *
+ * The super_switch primarily operates on some "metadata" that
+ * is accessed via the 'supertype'.
+ * This metadata has one of three possible sources.
+ * 1/ It is read from a single device. In this case it may not completely
+ * describe the array or arrays as some information might be on other
+ * devices.
+ * 2/ It is read from all devices in a container. In this case all
+ * information is present.
+ * 3/ It is created by ->init_super / ->add_to_super. In this case it will
+ * be complete once enough ->add_to_super calls have completed.
+ *
+ * When creating an array inside a container, the metadata will be
+ * formed by a combination of 2 and 3. The metadata or the array is read,
+ * then new information is added.
+ *
+ * The metadata must sometimes have a concept of a 'current' array
+ * and a 'current' device.
+ * The 'current' array is set by init_super to be the newly created array,
+ * or is set by super_by_fd when it finds it is looking at an array inside
+ * a container.
+ *
+ * The 'current' device is either the device that the metadata was read from
+ * in case 1, or the last device added by add_to_super in case 3.
+ * Case 2 does not identify a 'current' device.
+ */
extern struct superswitch {
+
+ /* Used to report details of metadata read from a component
+ * device. ->load_super has been called.
+ */
void (*examine_super)(struct supertype *st, char *homehost);
void (*brief_examine_super)(struct supertype *st);
void (*export_examine_super)(struct supertype *st);
+
+ /* Used to report details of an active array.
+ * ->load_super was possibly given a 'component' string.
+ */
void (*detail_super)(struct supertype *st, char *homehost);
void (*brief_detail_super)(struct supertype *st);
void (*export_detail_super)(struct supertype *st);
+
+ /* Used:
+ * to get uuid to storing in bitmap metadata
+ * and 'reshape' backup-data metadata
+ * To see if a device is being re-added to an array it was part of.
+ */
void (*uuid_from_super)(struct supertype *st, int uuid[4]);
+
+ /* Extra generic details from metadata. This could be details about
+ * the container, or about an individual array within the container.
+ * The determination is made either by:
+ * load_super being given a 'component' string.
+ * validate_geometry determining what to create.
+ * The info includes both array information and device information.
+ * The particular device should be:
+ * The last device added by add_to_super
+ * The device the metadata was loaded from by load_super
+ */
void (*getinfo_super)(struct supertype *st, struct mdinfo *info);
+
+ /* Check if the given metadata is flagged as belonging to "this"
+ * host. For arrays that don't determine a minor-number, this
+ * can always be true (??)
+ */
int (*match_home)(struct supertype *st, char *homehost);
+
+ /* Make one of several generic modifications to metadata
+ * prior to assembly (or other times).
+ * sparc2.2 - first bug in early 0.90 metadata
+ * super-minor - change name of 0.90 metadata
+ * summaries - 'correct' any redundant data
+ * resync - mark array as dirty to trigger a resync.
+ * uuid - set new uuid - only 0.90 or 1.x
+ * name - change the name of the array (where supported)
+ * homehost - change which host this array is tied to.
+ * devicesize - If metadata is at start of device, change recorded
+ * device size to match actual device size
+ * byteorder - swap bytes for 0.90 metadata
+ *
+ * force-one - mark that device as uptodate, not old or failed.
+ * force-array - mark array as clean if it would not otherwise
+ * assemble
+ * assemble - not sure how this is different from force-one...
+ * linear-grow-new - add a new device to a linear array, but don't
+ * change the size: so superblock still matches
+ * linear-grow-update - now change the size of the array.
+ */
int (*update_super)(struct supertype *st, struct mdinfo *info,
char *update,
char *devname, int verbose,
int uuid_set, char *homehost);
+
+ /* Create new metadata for new array as described. This could
+ * be a new container, or an array in a pre-existing container.
+ * Also used to zero metadata prior to writing it to invalidate old
+ * metadata.
+ */
int (*init_super)(struct supertype *st, mdu_array_info_t *info,
unsigned long long size, char *name,
char *homehost, int *uuid);
- void (*add_to_super)(struct supertype *st, mdu_disk_info_t *dinfo);
+
+ /* update the metadata to include new device, either at create or
+ * when hot-adding a spare.
+ */
+ void (*add_to_super)(struct supertype *st, mdu_disk_info_t *dinfo,
+ int fd, char *devname);
+
+ /* Write metadata to one device when fixing problems or adding
+ * a new device.
+ */
int (*store_super)(struct supertype *st, int fd);
- int (*write_init_super)(struct supertype *st, mdu_disk_info_t *dinfo,
- char *devname);
+
+ /* Write all metadata for this array.
+ */
+ int (*write_init_super)(struct supertype *st);
int (*compare_super)(struct supertype *st, struct supertype *tst);
int (*load_super)(struct supertype *st, int fd, char *devname);
struct supertype * (*match_metadata_desc)(char *arg);
@@ -358,15 +501,108 @@ extern struct superswitch {
void (*locate_bitmap)(struct supertype *st, int fd);
int (*write_bitmap)(struct supertype *st, int fd);
void (*free_super)(struct supertype *st);
- int major;
+
+ /* validate_geometry is called with an st returned by
+ * match_metadata_desc.
+ * It should check that the geometry described in compatible with
+ * the metadata type. It will be called repeatedly as devices
+ * added to validate changing size and new devices. If there are
+ * inter-device dependencies, it should record sufficient details
+ * so these can be validated.
+ */
+ int (*validate_geometry)(struct supertype *st, int level, int layout,
+ int raiddisks,
+ int chunk, unsigned long long size,
+ char *subdev, unsigned long long *freesize,
+ int verbose);
+
+ struct mdinfo *(*container_content)(struct supertype *st);
+
+/* for mdmon */
+ int (*open_new)(struct supertype *c, struct active_array *a,
+ char *inst);
+
+ /* Tell the metadata handler the current state of the array.
+ * This covers whether it is known to be consistent (no pending writes)
+ * when how far along a resync is known to have progressed
+ * (in a->resync_start).
+ * resync status is really irrelevant if the array is not consistent,
+ * but some metadata (DDF!) have a place to record the distinction.
+ */
+ void (*set_array_state)(struct active_array *a, int consistent);
+
+ /* When the state of a device might have changed, we call set_disk to
+ * tell the metadata what the current state is.
+ * Typically this happens on spare->in_sync and (spare|in_sync)->faulty
+ * transitions.
+ * set_disk might be called when the state of the particular disk has
+ * not in fact changed.
+ */
+ void (*set_disk)(struct active_array *a, int n, int state);
+ void (*sync_metadata)(struct supertype *st);
+ void (*process_update)(struct supertype *st,
+ struct metadata_update *update);
+ void (*prepare_update)(struct supertype *st,
+ struct metadata_update *update);
+
+ /* activate_spare will check if the array is degraded and, if it
+ * is, try to find some spare space in the container.
+ * On success, it add appropriate updates (For process_update) to
+ * to the 'updates' list and returns a list of 'mdinfo' identifying
+ * the device, or devices as there might be multiple missing
+ * devices and multiple spares available.
+ */
+ struct mdinfo *(*activate_spare)(struct active_array *a,
+ struct metadata_update **updates);
+
int swapuuid; /* true if uuid is bigending rather than hostendian */
-} super0, super1, *superlist[];
+ int external;
+} super0, super1, super_ddf, *superlist[];
+extern struct superswitch super_imsm;
+
+struct metadata_update {
+ int len;
+ char *buf;
+ void *space; /* allocated space that monitor will use */
+ struct metadata_update *next;
+};
+
+/* A supertype holds a particular collection of metadata.
+ * It identifies the metadata type by the superswitch, and the particular
+ * sub-version of that metadata type.
+ * metadata read in or created is stored in 'sb' and 'info'.
+ * There are also fields used by mdmon to track containers.
+ *
+ * A supertype is created by:
+ * super_by_fd
+ * guess_super
+ * dup_super
+ */
struct supertype {
struct superswitch *ss;
int minor_version;
int max_devs;
+ int container_dev; /* devnum of container */
+ char subarray[32]; /* name of array inside container */
void *sb;
+ void *info;
+
+ struct metadata_update *updates;
+ struct metadata_update **update_tail;
+
+ /* extra stuff used by mdmon */
+ struct active_array *arrays;
+ int sock; /* listen to external programs */
+ int devnum;
+ char *devname; /* e.g. md0. This appears in metadata_verison:
+ * external:/md0/12
+ */
+ int devcnt;
+ char *device_name; /* e.g. /dev/md/whatever */
+
+ struct mdinfo *devs;
+
};
extern struct supertype *super_by_fd(int fd);
@@ -459,11 +695,13 @@ extern int Monitor(mddev_dev_t devlist,
int period, int daemonise, int scan, int oneshot,
int dosyslog, int test, char *pidfile);
-extern int Kill(char *dev, int force, int quiet);
+extern int Kill(char *dev, int force, int quiet, int noexcl);
extern int Wait(char *dev);
extern int Incremental(char *devname, int verbose, int runstop,
struct supertype *st, char *homehost, int autof);
+extern int Incremental_container(struct supertype *st, char *devname,
+ int verbose, int runstop, int autof);
extern void RebuildMap(void);
extern int IncrementalScan(int verbose);
@@ -484,6 +722,7 @@ extern int check_raid(int fd, char *name);
extern int get_mdp_major(void);
extern int dev_open(char *dev, int flags);
+extern int open_dev_excl(int devnum);
extern int is_standard(char *dev, int *nump);
extern int parse_auto(char *str, char *msg, int config);
@@ -509,6 +748,10 @@ extern int enough(int level, int raid_disks, int layout, int clean,
extern int ask(char *mesg);
extern unsigned long long get_component_size(int fd);
extern void remove_partitions(int fd);
+extern unsigned long long calc_array_size(int level, int raid_disks, int layout,
+ int chunksize, unsigned long long devsize);
+extern int flush_metadata_updates(struct supertype *st);
+extern void append_metadata_update(struct supertype *st, void *buf, int len);
extern char *human_size(long long bytes);
@@ -525,12 +768,45 @@ extern char DefaultConfFile[];
extern int open_mddev(char *dev, int autof);
extern int open_mddev_devnum(char *devname, int devnum, char *name,
char *chosen_name, int parts);
-
+extern int open_container(int fd);
+
+extern int mdmon_running(int devnum);
+extern int signal_mdmon(int devnum);
+extern int env_no_mdmon(void);
+extern int start_mdmon(int devnum);
+
+extern char *devnum2devname(int num);
+extern int devname2devnum(char *name);
+extern int fd2devnum(int fd);
+
+static inline int dev2major(int d)
+{
+ if (d >= 0)
+ return MD_MAJOR;
+ else
+ return get_mdp_major();
+}
+
+static inline int dev2minor(int d)
+{
+ if (d >= 0)
+ return d;
+ return (-1-d) << MdpMinorShift;
+}
+
+static inline int ROUND_UP(int a, int base)
+{
+ return ((a+base-1)/base)*base;
+}
#define LEVEL_MULTIPATH (-4)
#define LEVEL_LINEAR (-1)
#define LEVEL_FAULTY (-5)
+/* kernel module doesn't know about these */
+#define LEVEL_CONTAINER (-100)
+#define LEVEL_UNSUPPORTED (-200)
+
/* faulty stuff */
diff --git a/mdmon.c b/mdmon.c
new file mode 100644
index 00000000..85f44bc2
--- /dev/null
+++ b/mdmon.c
@@ -0,0 +1,348 @@
+
+/*
+ * md array manager.
+ * When md arrays have user-space managed metadata, this is the program
+ * that does the managing.
+ *
+ * Given one argument: the name of the array (e.g. /dev/md0) that is
+ * the container.
+ * We fork off a helper that runs high priority and mlocked. It responds to
+ * device failures and other events that might stop writeout, or that are
+ * trivial to deal with.
+ * The main thread then watches for new arrays being created in the container
+ * and starts monitoring them too ... along with a few other tasks.
+ *
+ * The main thread communicates with the priority thread by writing over
+ * a pipe.
+ * Separate programs can communicate with the main thread via Unix-domain
+ * socket.
+ * The two threads share address space and open file table.
+ *
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <signal.h>
+
+#include <sched.h>
+
+#include "mdadm.h"
+#include "mdmon.h"
+
+struct active_array *discard_this;
+struct active_array *pending_discard;
+
+int mon_tid, mgr_tid;
+
+int run_child(void *v)
+{
+ struct supertype *c = v;
+
+ do_monitor(c);
+ return 0;
+}
+
+int clone_monitor(struct supertype *container)
+{
+ static char stack[4096];
+
+ mon_tid = clone(run_child, stack+4096-64,
+ CLONE_FS|CLONE_FILES|CLONE_VM|CLONE_SIGHAND|CLONE_THREAD,
+ container);
+
+ mgr_tid = syscall(SYS_gettid);
+
+ return mon_tid;
+}
+
+static struct superswitch *find_metadata_methods(char *vers)
+{
+ if (strcmp(vers, "ddf") == 0)
+ return &super_ddf;
+ if (strcmp(vers, "imsm") == 0)
+ return &super_imsm;
+ return NULL;
+}
+
+
+static int make_pidfile(char *devname, int o_excl)
+{
+ char path[100];
+ char pid[10];
+ int fd;
+ sprintf(path, "/var/run/mdadm/%s.pid", devname);
+
+ fd = open(path, O_RDWR|O_CREAT|o_excl, 0600);
+ if (fd < 0)
+ return -1;
+ sprintf(pid, "%d\n", getpid());
+ write(fd, pid, strlen(pid));
+ close(fd);
+ return 0;
+}
+
+static void try_kill_monitor(char *devname)
+{
+ char buf[100];
+ int fd;
+ pid_t pid;
+
+ sprintf(buf, "/var/run/mdadm/%s.pid", devname);
+ fd = open(buf, O_RDONLY);
+ if (fd < 0)
+ return;
+
+ if (read(fd, buf, sizeof(buf)) < 0) {
+ close(fd);
+ return;
+ }
+
+ close(fd);
+ pid = strtoul(buf, NULL, 10);
+
+ /* kill this process if it is mdmon */
+ sprintf(buf, "/proc/%lu/cmdline", (unsigned long) pid);
+ fd = open(buf, O_RDONLY);
+ if (fd < 0)
+ return;
+
+ if (read(fd, buf, sizeof(buf)) < 0) {
+ close(fd);
+ return;
+ }
+
+ if (strstr(buf, "mdmon") != NULL)
+ kill(pid, SIGTERM);
+}
+
+void remove_pidfile(char *devname)
+{
+ char buf[100];
+
+ sprintf(buf, "/var/run/mdadm/%s.pid", devname);
+ unlink(buf);
+}
+
+static int make_control_sock(char *devname)
+{
+ char path[100];
+ int sfd;
+ long fl;
+ struct sockaddr_un addr;
+
+ sprintf(path, "/var/run/mdadm/%s.sock", devname);
+ unlink(path);
+ sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
+ if (sfd < 0)
+ return -1;
+
+ addr.sun_family = PF_LOCAL;
+ strcpy(addr.sun_path, path);
+ if (bind(sfd, &addr, sizeof(addr)) < 0) {
+ close(sfd);
+ return -1;
+ }
+ listen(sfd, 10);
+ fl = fcntl(sfd, F_GETFL, 0);
+ fl |= O_NONBLOCK;
+ fcntl(sfd, F_SETFL, fl);
+ return sfd;
+}
+
+static void wake_me(int sig)
+{
+
+}
+
+/* if we are debugging and starting mdmon by hand then don't fork */
+static int do_fork(void)
+{
+ #ifdef DEBUG
+ if (env_no_mdmon())
+ return 0;
+ #endif
+
+ return 1;
+}
+
+
+
+int main(int argc, char *argv[])
+{
+ int mdfd;
+ struct mdinfo *mdi, *di;
+ struct supertype *container;
+ sigset_t set;
+ struct sigaction act;
+ int pfd[2];
+ int status;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: md-manage /device/name/for/container\n");
+ exit(2);
+ }
+ mdfd = open(argv[1], O_RDWR);
+ if (mdfd < 0) {
+ fprintf(stderr, "md-manage: %s: %s\n", argv[1],
+ strerror(errno));
+ exit(1);
+ }
+ if (md_get_version(mdfd) < 0) {
+ fprintf(stderr, "md-manage: %s: Not an md device\n",
+ argv[1]);
+ exit(1);
+ }
+
+ /* Fork, and have the child tell us when they are ready */
+ if (do_fork()) {
+ pipe(pfd);
+ switch(fork()) {
+ case -1:
+ fprintf(stderr, "mdmon: failed to fork: %s\n",
+ strerror(errno));
+ exit(1);
+ case 0: /* child */
+ close(pfd[0]);
+ break;
+ default: /* parent */
+ close(pfd[1]);
+ if (read(pfd[0], &status, sizeof(status)) != sizeof(status)) {
+ wait(&status);
+ status = WEXITSTATUS(status);
+ }
+ exit(status);
+ }
+ } else
+ pfd[0] = pfd[1] = -1;
+ /* hopefully it is a container - we'll check later */
+
+ container = malloc(sizeof(*container));
+ container->devnum = fd2devnum(mdfd);
+ container->devname = devnum2devname(container->devnum);
+ container->device_name = argv[1];
+
+ /* If this fails, we hope it already exists */
+ mkdir("/var/run/mdadm", 0600);
+ /* pid file lives in /var/run/mdadm/mdXX.pid */
+ if (make_pidfile(container->devname, O_EXCL) < 0) {
+ if (ping_monitor(container->devname) == 0) {
+ fprintf(stderr, "mdmon: %s already managed\n",
+ container->devname);
+ exit(3);
+ } else {
+ /* cleanup the old monitor, this one is taking over */
+ try_kill_monitor(container->devname);
+ if (make_pidfile(container->devname, 0) < 0) {
+ fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
+ container->devname);
+ exit(3);
+ }
+ }
+ }
+
+ container->sock = make_control_sock(container->devname);
+ if (container->sock < 0) {
+ fprintf(stderr, "mdmon: Cannot create socket in /var/run/mdadm\n");
+ exit(3);
+ }
+ container->arrays = NULL;
+
+ mdi = sysfs_read(mdfd, container->devnum,
+ GET_VERSION|GET_LEVEL|GET_DEVS);
+
+ if (!mdi) {
+ fprintf(stderr, "mdmon: failed to load sysfs info for %s\n",
+ container->devname);
+ exit(3);
+ }
+ if (mdi->array.level != UnSet) {
+ fprintf(stderr, "mdmon: %s is not a container - cannot monitor\n",
+ argv[1]);
+ exit(3);
+ }
+ if (mdi->array.major_version != -1 ||
+ mdi->array.minor_version != -2) {
+ fprintf(stderr, "mdmon: %s does not use external metadata - cannot monitor\n",
+ argv[1]);
+ exit(3);
+ }
+
+ container->ss = find_metadata_methods(mdi->text_version);
+ if (container->ss == NULL) {
+ fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n",
+ argv[1], mdi->text_version);
+ exit(3);
+ }
+
+ container->devs = NULL;
+ for (di = mdi->devs; di; di = di->next) {
+ struct mdinfo *cd = malloc(sizeof(*cd));
+ cd = di;
+ cd->next = container->devs;
+ container->devs = cd;
+ }
+ sysfs_free(mdi);
+
+
+ if (container->ss->load_super(container, mdfd, argv[1])) {
+ fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
+ argv[1]);
+ exit(3);
+ }
+
+ /* Ok, this is close enough. We can say goodbye to our parent now.
+ */
+ status = 0;
+ write(pfd[1], &status, sizeof(status));
+ close(pfd[1]);
+
+ chdir("/");
+ setsid();
+ close(0);
+ open("/dev/null", O_RDWR);
+ close(1);
+ dup(0);
+#ifndef DEBUG
+ close(2);
+ dup(0);
+#endif
+
+ mlockall(MCL_FUTURE);
+
+ /* SIGUSR is sent between parent and child. So both block it
+ * and enable it only with pselect.
+ */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ sigprocmask(SIG_BLOCK, &set, NULL);
+ act.sa_handler = wake_me;
+ act.sa_flags = 0;
+ sigaction(SIGUSR1, &act, NULL);
+ act.sa_handler = SIG_IGN;
+ sigaction(SIGPIPE, &act, NULL);
+
+ if (clone_monitor(container) < 0) {
+ fprintf(stderr, "md-manage: failed to start monitor process: %s\n",
+ strerror(errno));
+ exit(2);
+ }
+
+ do_manager(container);
+
+ exit(0);
+}
diff --git a/mdmon.h b/mdmon.h
new file mode 100644
index 00000000..6c1961ad
--- /dev/null
+++ b/mdmon.h
@@ -0,0 +1,65 @@
+#ifdef DEBUG
+#define dprintf(fmt, arg...) \
+ fprintf(stderr, fmt, ##arg)
+#else
+#define dprintf(fmt, arg...) \
+ ({ if (0) fprintf(stderr, fmt, ##arg); 0; })
+#endif
+
+enum array_state { clear, inactive, suspended, readonly, read_auto,
+ clean, active, write_pending, active_idle, bad_word};
+
+enum sync_action { idle, reshape, resync, recover, check, repair, bad_action };
+
+
+struct active_array {
+ struct mdinfo info;
+ struct supertype *container;
+ struct active_array *next, *replaces;
+
+ int action_fd;
+ int resync_start_fd;
+
+ enum array_state prev_state, curr_state, next_state;
+ enum sync_action prev_action, curr_action, next_action;
+
+ int check_degraded; /* flag set by mon, read by manage */
+
+ int devnum;
+
+ unsigned long long resync_start;
+};
+
+/*
+ * Metadata updates are handled by the monitor thread,
+ * as it has exclusive access to the metadata.
+ * When the manager want to updates metadata, either
+ * for it's own reason (e.g. committing a spare) or
+ * on behalf of mdadm, it creates a metadata_update
+ * structure and queues it to the monitor.
+ * Updates are created and processed by code under the
+ * superswitch. All common code sees them as opaque
+ * blobs.
+ */
+extern struct metadata_update *update_queue, *update_queue_handled;
+
+#define MD_MAJOR 9
+
+extern struct active_array *container;
+extern struct active_array *discard_this;
+extern struct active_array *pending_discard;
+extern struct md_generic_cmd *active_cmd;
+
+
+void remove_pidfile(char *devname);
+void do_monitor(struct supertype *container);
+void do_manager(struct supertype *container);
+
+int read_dev_state(int fd);
+int get_resync_start(struct active_array *a);
+
+struct mdstat_ent *mdstat_read(int hold, int start);
+
+extern int exit_now, manager_ready;
+extern int mon_tid, mgr_tid;
+extern int monitor_loop_cnt;
diff --git a/mdstat.c b/mdstat.c
index a8f7ce75..4bb29d85 100644
--- a/mdstat.c
+++ b/mdstat.c
@@ -86,6 +86,7 @@
#include "mdadm.h"
#include "dlink.h"
#include <sys/select.h>
+#include <ctype.h>
void free_mdstat(struct mdstat_ent *ms)
{
@@ -94,6 +95,7 @@ void free_mdstat(struct mdstat_ent *ms)
if (ms->dev) free(ms->dev);
if (ms->level) free(ms->level);
if (ms->pattern) free(ms->pattern);
+ if (ms->metadata_version) free(ms->metadata_version);
t = ms;
ms = ms->next;
free(t);
@@ -158,6 +160,10 @@ struct mdstat_ent *mdstat_read(int hold, int start)
ent->percent = -1;
ent->active = -1;
ent->resync = 0;
+ ent->metadata_version = NULL;
+ ent->raid_disks = 0;
+ ent->chunk_size = 0;
+ ent->devcnt = 0;
ent->dev = strdup(line);
ent->devnum = devnum;
@@ -176,22 +182,28 @@ struct mdstat_ent *mdstat_read(int hold, int start)
in_devs = 1;
} else if (in_devs && strcmp(w, "blocks")==0)
in_devs = 0;
- else if (in_devs && strncmp(w, "md", 2)==0) {
- /* This has an md device as a component.
- * If that device is already in the list,
- * make sure we insert before there.
- */
- struct mdstat_ent **ih;
- int dn2;
- if (strncmp(w, "md_d", 4)==0)
- dn2 = -1-strtoul(w+4, &ep, 10);
- else
- dn2 = strtoul(w+2, &ep, 10);
- ih = &all;
- while (ih != insert_here && *ih &&
- (*ih)->devnum != dn2)
- ih = & (*ih)->next;
- insert_here = ih;
+ else if (in_devs) {
+ ent->devcnt++;
+ if (strncmp(w, "md", 2)==0) {
+ /* This has an md device as a component.
+ * If that device is already in the
+ * list, make sure we insert before
+ * there.
+ */
+ struct mdstat_ent **ih;
+ int dn2 = devname2devnum(w);
+ ih = &all;
+ while (ih != insert_here && *ih &&
+ (*ih)->devnum != dn2)
+ ih = & (*ih)->next;
+ insert_here = ih;
+ }
+ } else if (strcmp(w, "super") == 0 &&
+ dl_next(w) != line) {
+ w = dl_next(w);
+ ent->metadata_version = strdup(w);
+ } else if (w[0] == '[' && isdigit(w[1])) {
+ ent->raid_disks = atoi(w+1);
} else if (!ent->pattern &&
w[0] == '[' &&
(w[1] == 'U' || w[1] == '_')) {
@@ -256,6 +268,20 @@ void mdstat_wait(int seconds)
select(mdstat_fd >2 ? mdstat_fd+1:3, NULL, NULL, &fds, &tm);
}
+void mdstat_wait_fd(int fd, const sigset_t *sigmask)
+{
+ fd_set fds, rfds;
+
+ FD_ZERO(&fds);
+ FD_ZERO(&rfds);
+ if (mdstat_fd >= 0)
+ FD_SET(mdstat_fd, &fds);
+ FD_SET(fd, &rfds);
+
+ pselect(mdstat_fd >2 ? mdstat_fd+1:3, &rfds, NULL, &fds,
+ NULL, sigmask);
+}
+
int mddev_busy(int devnum)
{
struct mdstat_ent *mdstat = mdstat_read(0, 0);
diff --git a/monitor.c b/monitor.c
new file mode 100644
index 00000000..7cce5a8b
--- /dev/null
+++ b/monitor.c
@@ -0,0 +1,527 @@
+
+#include "mdadm.h"
+#include "mdmon.h"
+#include <sys/syscall.h>
+#include <sys/select.h>
+#include <signal.h>
+
+static char *array_states[] = {
+ "clear", "inactive", "suspended", "readonly", "read-auto",
+ "clean", "active", "write-pending", "active-idle", NULL };
+static char *sync_actions[] = {
+ "idle", "reshape", "resync", "recover", "check", "repair", NULL
+};
+
+static int write_attr(char *attr, int fd)
+{
+ return write(fd, attr, strlen(attr));
+}
+
+static void add_fd(fd_set *fds, int *maxfd, int fd)
+{
+ if (fd < 0)
+ return;
+ if (fd > *maxfd)
+ *maxfd = fd;
+ FD_SET(fd, fds);
+}
+
+static int read_attr(char *buf, int len, int fd)
+{
+ int n;
+
+ if (fd < 0) {
+ buf[0] = 0;
+ return 0;
+ }
+ lseek(fd, 0, 0);
+ n = read(fd, buf, len - 1);
+
+ if (n <= 0) {
+ buf[0] = 0;
+ return 0;
+ }
+ buf[n] = 0;
+ if (buf[n-1] == '\n')
+ buf[n-1] = 0;
+ return n;
+}
+
+
+int get_resync_start(struct active_array *a)
+{
+ char buf[30];
+ int n;
+
+ n = read_attr(buf, 30, a->resync_start_fd);
+ if (n <= 0)
+ return n;
+
+ a->resync_start = strtoull(buf, NULL, 10);
+
+ return 1;
+}
+
+static int attr_match(const char *attr, const char *str)
+{
+ /* See if attr, read from a sysfs file, matches
+ * str. They must either be the same, or attr can
+ * have a trailing newline or comma
+ */
+ while (*attr && *str && *attr == *str) {
+ attr++;
+ str++;
+ }
+
+ if (*str || (*attr && *attr != ',' && *attr != '\n'))
+ return 0;
+ return 1;
+}
+
+static int match_word(const char *word, char **list)
+{
+ int n;
+ for (n=0; list[n]; n++)
+ if (attr_match(word, list[n]))
+ break;
+ return n;
+}
+
+static enum array_state read_state(int fd)
+{
+ char buf[20];
+ int n = read_attr(buf, 20, fd);
+
+ if (n <= 0)
+ return bad_word;
+ return (enum array_state) match_word(buf, array_states);
+}
+
+static enum sync_action read_action( int fd)
+{
+ char buf[20];
+ int n = read_attr(buf, 20, fd);
+
+ if (n <= 0)
+ return bad_action;
+ return (enum sync_action) match_word(buf, sync_actions);
+}
+
+int read_dev_state(int fd)
+{
+ char buf[60];
+ int n = read_attr(buf, 60, fd);
+ char *cp;
+ int rv = 0;
+
+ if (n <= 0)
+ return 0;
+
+ cp = buf;
+ while (cp) {
+ if (attr_match(cp, "faulty"))
+ rv |= DS_FAULTY;
+ if (attr_match(cp, "in_sync"))
+ rv |= DS_INSYNC;
+ if (attr_match(cp, "write_mostly"))
+ rv |= DS_WRITE_MOSTLY;
+ if (attr_match(cp, "spare"))
+ rv |= DS_SPARE;
+ if (attr_match(cp, "blocked"))
+ rv |= DS_BLOCKED;
+ cp = strchr(cp, ',');
+ if (cp)
+ cp++;
+ }
+ return rv;
+}
+
+static void signal_manager(void)
+{
+ /* tgkill(getpid(), mon_tid, SIGUSR1); */
+ int pid = getpid();
+ syscall(SYS_tgkill, pid, mgr_tid, SIGUSR1);
+}
+
+/* Monitor a set of active md arrays - all of which share the
+ * same metadata - and respond to events that require
+ * metadata update.
+ *
+ * New arrays are detected by another thread which allocates
+ * required memory and attaches the data structure to our list.
+ *
+ * Events:
+ * Array stops.
+ * This is detected by array_state going to 'clear' or 'inactive'.
+ * while we thought it was active.
+ * Response is to mark metadata as clean and 'clear' the array(??)
+ * write-pending
+ * array_state if 'write-pending'
+ * We mark metadata as 'dirty' then set array to 'active'.
+ * active_idle
+ * Either ignore, or mark clean, then mark metadata as clean.
+ *
+ * device fails
+ * detected by rd-N/state reporting "faulty"
+ * mark device as 'failed' in metadata, let the kernel release the
+ * device by writing '-blocked' to rd/state, and finally write 'remove' to
+ * rd/state. Before a disk can be replaced it must be failed and removed
+ * from all container members, this will be preemptive for the other
+ * arrays... safe?
+ *
+ * sync completes
+ * sync_action was 'resync' and becomes 'idle' and resync_start becomes
+ * MaxSector
+ * Notify metadata that sync is complete.
+ *
+ * recovery completes
+ * sync_action changes from 'recover' to 'idle'
+ * Check each device state and mark metadata if 'faulty' or 'in_sync'.
+ *
+ * deal with resync
+ * This only happens on finding a new array... mdadm will have set
+ * 'resync_start' to the correct value. If 'resync_start' indicates that an
+ * resync needs to occur set the array to the 'active' state rather than the
+ * initial read-auto state.
+ *
+ *
+ *
+ * We wait for a change (poll/select) on array_state, sync_action, and
+ * each rd-X/state file.
+ * When we get any change, we check everything. So read each state file,
+ * then decide what to do.
+ *
+ * The core action is to write new metadata to all devices in the array.
+ * This is done at most once on any wakeup.
+ * After that we might:
+ * - update the array_state
+ * - set the role of some devices.
+ * - request a sync_action
+ *
+ */
+
+static int read_and_act(struct active_array *a)
+{
+ int check_degraded = 0;
+ int deactivate = 0;
+ struct mdinfo *mdi;
+
+ a->next_state = bad_word;
+ a->next_action = bad_action;
+
+ a->curr_state = read_state(a->info.state_fd);
+ a->curr_action = read_action(a->action_fd);
+ for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
+ mdi->next_state = 0;
+ if (mdi->state_fd >= 0)
+ mdi->curr_state = read_dev_state(mdi->state_fd);
+ }
+
+ if (a->curr_state <= inactive &&
+ a->prev_state > inactive) {
+ /* array has been stopped */
+ get_resync_start(a);
+ a->container->ss->set_array_state(a, 1);
+ a->next_state = clear;
+ deactivate = 1;
+ }
+ if (a->curr_state == write_pending) {
+ get_resync_start(a);
+ a->container->ss->set_array_state(a, 0);
+ a->next_state = active;
+ }
+ if (a->curr_state == active_idle) {
+ /* Set array to 'clean' FIRST, then
+ * a->ss->mark_clean(a, ~0ULL);
+ * just ignore for now.
+ */
+ }
+
+ if (a->curr_state == readonly) {
+ /* Well, I'm ready to handle things, so
+ * read-auto is OK. FIXME what if we really want
+ * readonly ???
+ */
+ get_resync_start(a);
+// printf("Found a readonly array at %llu\n", a->resync_start);
+ if (a->resync_start == ~0ULL)
+ a->next_state = read_auto; /* array is clean */
+ else {
+ a->container->ss->set_array_state(a, 0);
+ a->next_state = active;
+ }
+ }
+
+ if (!deactivate &&
+ a->curr_action == idle &&
+ a->prev_action == resync) {
+ /* A resync has finished. The endpoint is recorded in
+ * 'sync_start'. We don't update the metadata
+ * until the array goes inactive or readonly though.
+ * Just check if we need to fiddle spares.
+ */
+ get_resync_start(a);
+ a->container->ss->set_array_state(a, 0);
+ check_degraded = 1;
+ }
+
+ if (!deactivate &&
+ a->curr_action == idle &&
+ a->prev_action == recover) {
+ /* A recovery has finished. Some disks may be in sync now,
+ * and the array may no longer be degraded
+ */
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
+ a->container->ss->set_disk(a, mdi->disk.raid_disk,
+ mdi->curr_state);
+ if (! (mdi->curr_state & DS_INSYNC))
+ check_degraded = 1;
+ }
+ }
+
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
+ if (mdi->curr_state & DS_FAULTY) {
+ a->container->ss->set_disk(a, mdi->disk.raid_disk,
+ mdi->curr_state);
+ check_degraded = 1;
+ mdi->next_state = DS_REMOVE;
+ }
+ }
+
+ a->container->ss->sync_metadata(a->container);
+ dprintf("%s: update[%d]: (", __func__, a->info.container_member);
+
+ /* Effect state changes in the array */
+ if (a->next_state != bad_word) {
+ dprintf(" state:%s", array_states[a->next_state]);
+ write_attr(array_states[a->next_state], a->info.state_fd);
+ }
+ if (a->next_action != bad_action) {
+ write_attr(sync_actions[a->next_action], a->action_fd);
+ dprintf(" action:%s", array_states[a->next_state]);
+ }
+ for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
+ if (mdi->next_state == DS_REMOVE && mdi->state_fd >= 0) {
+ int remove_result;
+
+ write_attr("-blocked", mdi->state_fd);
+ /* the kernel may not be able to immediately remove the
+ * disk, we can simply wait until the next event to try
+ * again.
+ */
+ dprintf(" %d:-blocked", mdi->disk.raid_disk);
+ remove_result = write_attr("remove", mdi->state_fd);
+ if (remove_result > 0) {
+ dprintf(" %d:removed", mdi->disk.raid_disk);
+ close(mdi->state_fd);
+ mdi->state_fd = -1;
+ }
+ }
+ if (mdi->next_state & DS_INSYNC) {
+ write_attr("+in_sync", mdi->state_fd);
+ dprintf(" %d:+in_sync", mdi->disk.raid_disk);
+ }
+ }
+ dprintf(" )\n");
+
+ /* move curr_ to prev_ */
+ a->prev_state = a->curr_state;
+
+ a->prev_action = a->curr_action;
+
+ for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
+ mdi->prev_state = mdi->curr_state;
+ mdi->next_state = 0;
+ }
+
+ if (check_degraded) {
+ /* manager will do the actual check */
+ a->check_degraded = 1;
+ signal_manager();
+ }
+
+ if (deactivate)
+ a->container = NULL;
+
+ return 1;
+}
+
+static struct mdinfo *
+find_device(struct active_array *a, int major, int minor)
+{
+ struct mdinfo *mdi;
+
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
+ if (mdi->disk.major == major && mdi->disk.minor == minor)
+ return mdi;
+
+ return NULL;
+}
+
+static void reconcile_failed(struct active_array *aa, struct mdinfo *failed)
+{
+ struct active_array *a;
+ struct mdinfo *victim;
+
+ for (a = aa; a; a = a->next) {
+ if (!a->container)
+ continue;
+ victim = find_device(a, failed->disk.major, failed->disk.minor);
+ if (!victim)
+ continue;
+
+ if (!(victim->curr_state & DS_FAULTY))
+ write_attr("faulty", victim->state_fd);
+ }
+}
+
+#ifdef DEBUG
+static void dprint_wake_reasons(fd_set *fds)
+{
+ int i;
+ char proc_path[256];
+ char link[256];
+ char *basename;
+ int rv;
+
+ fprintf(stderr, "monitor: wake ( ");
+ for (i = 0; i < FD_SETSIZE; i++) {
+ if (FD_ISSET(i, fds)) {
+ sprintf(proc_path, "/proc/%d/fd/%d",
+ (int) getpid(), i);
+
+ rv = readlink(proc_path, link, sizeof(link) - 1);
+ if (rv < 0) {
+ fprintf(stderr, "%d:unknown ", i);
+ continue;
+ }
+ link[rv] = '\0';
+ basename = strrchr(link, '/');
+ fprintf(stderr, "%d:%s ",
+ i, basename ? ++basename : link);
+ }
+ }
+ fprintf(stderr, ")\n");
+}
+#endif
+
+int monitor_loop_cnt;
+
+static int wait_and_act(struct supertype *container, int nowait)
+{
+ fd_set rfds;
+ int maxfd = 0;
+ struct active_array **aap = &container->arrays;
+ struct active_array *a, **ap;
+ int rv;
+ struct mdinfo *mdi;
+
+ FD_ZERO(&rfds);
+
+ for (ap = aap ; *ap ;) {
+ a = *ap;
+ /* once an array has been deactivated we want to
+ * ask the manager to discard it.
+ */
+ if (!a->container) {
+ if (discard_this) {
+ ap = &(*ap)->next;
+ continue;
+ }
+ *ap = a->next;
+ a->next = NULL;
+ discard_this = a;
+ signal_manager();
+ continue;
+ }
+
+ add_fd(&rfds, &maxfd, a->info.state_fd);
+ add_fd(&rfds, &maxfd, a->action_fd);
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
+ add_fd(&rfds, &maxfd, mdi->state_fd);
+
+ ap = &(*ap)->next;
+ }
+
+ if (manager_ready && *aap == NULL) {
+ /* No interesting arrays. Lets see about exiting.
+ * Note that blocking at this point is not a problem
+ * as there are no active arrays, there is nothing that
+ * we need to be ready to do.
+ */
+ int fd = open(container->device_name, O_RDONLY|O_EXCL);
+ if (fd >= 0 || errno != EBUSY) {
+ /* OK, we are safe to leave */
+ dprintf("no arrays to monitor... exiting\n");
+ remove_pidfile(container->devname);
+ exit_now = 1;
+ signal_manager();
+ exit(0);
+ }
+ }
+
+ if (!nowait) {
+ sigset_t set;
+ sigprocmask(SIG_UNBLOCK, NULL, &set);
+ sigdelset(&set, SIGUSR1);
+ monitor_loop_cnt |= 1;
+ rv = pselect(maxfd+1, &rfds, NULL, NULL, NULL, &set);
+ monitor_loop_cnt += 1;
+ if (rv == -1 && errno == EINTR)
+ rv = 0;
+ #ifdef DEBUG
+ dprint_wake_reasons(&rfds);
+ #endif
+
+ }
+
+ if (update_queue) {
+ struct metadata_update *this;
+
+ for (this = update_queue; this ; this = this->next)
+ container->ss->process_update(container, this);
+
+ update_queue_handled = update_queue;
+ update_queue = NULL;
+ signal_manager();
+ container->ss->sync_metadata(container);
+ }
+
+ for (a = *aap; a ; a = a->next) {
+ if (a->replaces && !discard_this) {
+ struct active_array **ap;
+ for (ap = &a->next; *ap && *ap != a->replaces;
+ ap = & (*ap)->next)
+ ;
+ if (*ap)
+ *ap = (*ap)->next;
+ discard_this = a->replaces;
+ a->replaces = NULL;
+ /* FIXME check if device->state_fd need to be cleared?*/
+ signal_manager();
+ }
+ if (a->container)
+ rv += read_and_act(a);
+ }
+
+ /* propagate failures across container members */
+ for (a = *aap; a ; a = a->next) {
+ if (!a->container)
+ continue;
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
+ if (mdi->curr_state & DS_FAULTY)
+ reconcile_failed(*aap, mdi);
+ }
+
+ return rv;
+}
+
+void do_monitor(struct supertype *container)
+{
+ int rv;
+ int first = 1;
+ do {
+ rv = wait_and_act(container, first);
+ first = 0;
+ } while (rv >= 0);
+}
diff --git a/msg.c b/msg.c
new file mode 100644
index 00000000..78fd7f7e
--- /dev/null
+++ b/msg.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2008 Intel Corporation
+ *
+ * mdmon socket / message handling
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include "mdadm.h"
+#include "mdmon.h"
+
+static const __u32 start_magic = 0x5a5aa5a5;
+static const __u32 end_magic = 0xa5a55a5a;
+
+static int send_buf(int fd, const void* buf, int len, int tmo)
+{
+ fd_set set;
+ int rv;
+ struct timeval timeout = {tmo, 0};
+ struct timeval *ptmo = tmo ? &timeout : NULL;
+
+ while (len) {
+ FD_ZERO(&set);
+ FD_SET(fd, &set);
+ rv = select(fd+1, NULL, &set, NULL, ptmo);
+ if (rv <= 0)
+ return -1;
+ rv = write(fd, buf, len);
+ if (rv <= 0)
+ return -1;
+ len -= rv;
+ buf += rv;
+ }
+ return 0;
+}
+
+static int recv_buf(int fd, void* buf, int len, int tmo)
+{
+ fd_set set;
+ int rv;
+ struct timeval timeout = {tmo, 0};
+ struct timeval *ptmo = tmo ? &timeout : NULL;
+
+ while (len) {
+ FD_ZERO(&set);
+ FD_SET(fd, &set);
+ rv = select(fd+1, &set, NULL, NULL, ptmo);
+ if (rv <= 0)
+ return -1;
+ rv = read(fd, buf, len);
+ if (rv <= 0)
+ return -1;
+ len -= rv;
+ buf += rv;
+ }
+ return 0;
+}
+
+
+int send_message(int fd, struct metadata_update *msg, int tmo)
+{
+ __u32 len = msg->len;
+ int rv;
+
+ rv = send_buf(fd, &start_magic, 4, tmo);
+ rv = rv ?: send_buf(fd, &len, 4, tmo);
+ if (len)
+ rv = rv ?: send_buf(fd, msg->buf, msg->len, tmo);
+ rv = send_buf(fd, &end_magic, 4, tmo);
+
+ return rv;
+}
+
+int receive_message(int fd, struct metadata_update *msg, int tmo)
+{
+ __u32 magic;
+ __u32 len;
+ int rv;
+
+ rv = recv_buf(fd, &magic, 4, tmo);
+ if (rv < 0 || magic != start_magic)
+ return -1;
+ rv = recv_buf(fd, &len, 4, tmo);
+ if (rv < 0 || len > MSG_MAX_LEN)
+ return -1;
+ if (len) {
+ msg->buf = malloc(len);
+ if (msg->buf == NULL)
+ return -1;
+ rv = recv_buf(fd, msg->buf, len, tmo);
+ if (rv < 0) {
+ free(msg->buf);
+ return -1;
+ }
+ } else
+ msg->buf = NULL;
+ rv = recv_buf(fd, &magic, 4, tmo);
+ if (rv < 0 || magic != end_magic) {
+ free(msg->buf);
+ return -1;
+ }
+ msg->len = len;
+ return 0;
+}
+
+int ack(int fd, int tmo)
+{
+ struct metadata_update msg = { .len = 0 };
+
+ return send_message(fd, &msg, tmo);
+}
+
+int wait_reply(int fd, int tmo)
+{
+ struct metadata_update msg;
+ return receive_message(fd, &msg, tmo);
+}
+
+int connect_monitor(char *devname)
+{
+ char path[100];
+ int sfd;
+ long fl;
+ struct sockaddr_un addr;
+
+ sprintf(path, "/var/run/mdadm/%s.sock", devname);
+ sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
+ if (sfd < 0)
+ return -1;
+
+ addr.sun_family = PF_LOCAL;
+ strcpy(addr.sun_path, path);
+ if (connect(sfd, &addr, sizeof(addr)) < 0) {
+ close(sfd);
+ return -1;
+ }
+
+ fl = fcntl(sfd, F_GETFL, 0);
+ fl |= O_NONBLOCK;
+ fcntl(sfd, F_SETFL, fl);
+
+ return sfd;
+}
+
+int ping_monitor(char *devname)
+{
+ int sfd = connect_monitor(devname);
+ int err = 0;
+
+ if (sfd < 0)
+ return sfd;
+
+ /* try to ping existing socket */
+ if (ack(sfd, 20) != 0)
+ err = -1;
+
+ /* check the reply */
+ if (!err && wait_reply(sfd, 20) != 0)
+ err = -1;
+
+ close(sfd);
+ return err;
+}
diff --git a/msg.h b/msg.h
new file mode 100644
index 00000000..4dc805e5
--- /dev/null
+++ b/msg.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2008 Intel Corporation
+ *
+ * mdmon socket / message handling
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+
+struct mdinfo;
+struct metadata_update;
+
+extern int receive_message(int fd, struct metadata_update *msg, int tmo);
+extern int send_message(int fd, struct metadata_update *msg, int tmo);
+extern int ack(int fd, int tmo);
+extern int wait_reply(int fd, int tmo);
+extern int connect_monitor(char *devname);
+extern int ping_monitor(char *devname);
+
+#define MSG_MAX_LEN (4*1024*1024)
diff --git a/sg_io.c b/sg_io.c
new file mode 100644
index 00000000..4ae5d927
--- /dev/null
+++ b/sg_io.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2007 Intel Corporation
+ *
+ * Retrieve drive serial numbers for scsi disks
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <string.h>
+#include <scsi/scsi.h>
+#include <scsi/sg.h>
+#include <sys/ioctl.h>
+
+int scsi_get_serial(int fd, void *buf, size_t buf_len)
+{
+ unsigned char inq_cmd[] = {INQUIRY, 1, 0x80, 0, buf_len, 0};
+ unsigned char sense[32];
+ struct sg_io_hdr io_hdr;
+
+ memset(&io_hdr, 0, sizeof(io_hdr));
+ io_hdr.interface_id = 'S';
+ io_hdr.cmdp = inq_cmd;
+ io_hdr.cmd_len = sizeof(inq_cmd);
+ io_hdr.dxferp = buf;
+ io_hdr.dxfer_len = buf_len;
+ io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+ io_hdr.sbp = sense;
+ io_hdr.mx_sb_len = sizeof(sense);
+ io_hdr.timeout = 5000;
+
+ return ioctl(fd, SG_IO, &io_hdr);
+}
diff --git a/super-ddf.c b/super-ddf.c
new file mode 100644
index 00000000..5d387504
--- /dev/null
+++ b/super-ddf.c
@@ -0,0 +1,3227 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2006-2007 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neil@brown.name>
+ *
+ * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
+ * (July 28 2006). Reused by permission of SNIA.
+ */
+
+#define HAVE_STDINT_H 1
+#include "mdadm.h"
+#include "mdmon.h"
+#include "sha1.h"
+#include <values.h>
+
+/* a non-official T10 name for creation GUIDs */
+static char T10[] = "Linux-MD";
+
+/* DDF timestamps are 1980 based, so we need to add
+ * second-in-decade-of-seventies to convert to linux timestamps.
+ * 10 years with 2 leap years.
+ */
+#define DECADE (3600*24*(365*10+2))
+unsigned long crc32(
+ unsigned long crc,
+ const unsigned char *buf,
+ unsigned len);
+
+/* The DDF metadata handling.
+ * DDF metadata lives at the end of the device.
+ * The last 512 byte block provides an 'anchor' which is used to locate
+ * the rest of the metadata which usually lives immediately behind the anchor.
+ *
+ * Note:
+ * - all multibyte numeric fields are bigendian.
+ * - all strings are space padded.
+ *
+ */
+
+/* Primary Raid Level (PRL) */
+#define DDF_RAID0 0x00
+#define DDF_RAID1 0x01
+#define DDF_RAID3 0x03
+#define DDF_RAID4 0x04
+#define DDF_RAID5 0x05
+#define DDF_RAID1E 0x11
+#define DDF_JBOD 0x0f
+#define DDF_CONCAT 0x1f
+#define DDF_RAID5E 0x15
+#define DDF_RAID5EE 0x25
+#define DDF_RAID6 0x06
+
+/* Raid Level Qualifier (RLQ) */
+#define DDF_RAID0_SIMPLE 0x00
+#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
+#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
+#define DDF_RAID3_0 0x00 /* parity in first extent */
+#define DDF_RAID3_N 0x01 /* parity in last extent */
+#define DDF_RAID4_0 0x00 /* parity in first extent */
+#define DDF_RAID4_N 0x01 /* parity in last extent */
+/* these apply to raid5e and raid5ee as well */
+#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
+#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
+#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
+#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
+
+#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
+#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
+
+/* Secondary RAID Level (SRL) */
+#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
+#define DDF_2MIRRORED 0x01
+#define DDF_2CONCAT 0x02
+#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
+
+/* Magic numbers */
+#define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
+#define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
+#define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
+#define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
+#define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
+#define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
+#define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
+#define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
+#define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
+#define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
+
+#define DDF_GUID_LEN 24
+#define DDF_REVISION_0 "01.00.00"
+#define DDF_REVISION_2 "01.02.00"
+
+struct ddf_header {
+ __u32 magic; /* DDF_HEADER_MAGIC */
+ __u32 crc;
+ char guid[DDF_GUID_LEN];
+ char revision[8]; /* 01.02.00 */
+ __u32 seq; /* starts at '1' */
+ __u32 timestamp;
+ __u8 openflag;
+ __u8 foreignflag;
+ __u8 enforcegroups;
+ __u8 pad0; /* 0xff */
+ __u8 pad1[12]; /* 12 * 0xff */
+ /* 64 bytes so far */
+ __u8 header_ext[32]; /* reserved: fill with 0xff */
+ __u64 primary_lba;
+ __u64 secondary_lba;
+ __u8 type;
+ __u8 pad2[3]; /* 0xff */
+ __u32 workspace_len; /* sectors for vendor space -
+ * at least 32768(sectors) */
+ __u64 workspace_lba;
+ __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
+ __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
+ __u16 max_partitions; /* i.e. max num of configuration
+ record entries per disk */
+ __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
+ *12/512) */
+ __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
+ __u8 pad3[54]; /* 0xff */
+ /* 192 bytes so far */
+ __u32 controller_section_offset;
+ __u32 controller_section_length;
+ __u32 phys_section_offset;
+ __u32 phys_section_length;
+ __u32 virt_section_offset;
+ __u32 virt_section_length;
+ __u32 config_section_offset;
+ __u32 config_section_length;
+ __u32 data_section_offset;
+ __u32 data_section_length;
+ __u32 bbm_section_offset;
+ __u32 bbm_section_length;
+ __u32 diag_space_offset;
+ __u32 diag_space_length;
+ __u32 vendor_offset;
+ __u32 vendor_length;
+ /* 256 bytes so far */
+ __u8 pad4[256]; /* 0xff */
+};
+
+/* type field */
+#define DDF_HEADER_ANCHOR 0x00
+#define DDF_HEADER_PRIMARY 0x01
+#define DDF_HEADER_SECONDARY 0x02
+
+/* The content of the 'controller section' - global scope */
+struct ddf_controller_data {
+ __u32 magic; /* DDF_CONTROLLER_MAGIC */
+ __u32 crc;
+ char guid[DDF_GUID_LEN];
+ struct controller_type {
+ __u16 vendor_id;
+ __u16 device_id;
+ __u16 sub_vendor_id;
+ __u16 sub_device_id;
+ } type;
+ char product_id[16];
+ __u8 pad[8]; /* 0xff */
+ __u8 vendor_data[448];
+};
+
+/* The content of phys_section - global scope */
+struct phys_disk {
+ __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
+ __u32 crc;
+ __u16 used_pdes;
+ __u16 max_pdes;
+ __u8 pad[52];
+ struct phys_disk_entry {
+ char guid[DDF_GUID_LEN];
+ __u32 refnum;
+ __u16 type;
+ __u16 state;
+ __u64 config_size; /* DDF structures must be after here */
+ char path[18]; /* another horrible structure really */
+ __u8 pad[6];
+ } entries[0];
+};
+
+/* phys_disk_entry.type is a bitmap - bigendian remember */
+#define DDF_Forced_PD_GUID 1
+#define DDF_Active_in_VD 2
+#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
+#define DDF_Spare 8 /* overrides Global_spare */
+#define DDF_Foreign 16
+#define DDF_Legacy 32 /* no DDF on this device */
+
+#define DDF_Interface_mask 0xf00
+#define DDF_Interface_SCSI 0x100
+#define DDF_Interface_SAS 0x200
+#define DDF_Interface_SATA 0x300
+#define DDF_Interface_FC 0x400
+
+/* phys_disk_entry.state is a bigendian bitmap */
+#define DDF_Online 1
+#define DDF_Failed 2 /* overrides 1,4,8 */
+#define DDF_Rebuilding 4
+#define DDF_Transition 8
+#define DDF_SMART 16
+#define DDF_ReadErrors 32
+#define DDF_Missing 64
+
+/* The content of the virt_section global scope */
+struct virtual_disk {
+ __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
+ __u32 crc;
+ __u16 populated_vdes;
+ __u16 max_vdes;
+ __u8 pad[52];
+ struct virtual_entry {
+ char guid[DDF_GUID_LEN];
+ __u16 unit;
+ __u16 pad0; /* 0xffff */
+ __u16 guid_crc;
+ __u16 type;
+ __u8 state;
+ __u8 init_state;
+ __u8 pad1[14];
+ char name[16];
+ } entries[0];
+};
+
+/* virtual_entry.type is a bitmap - bigendian */
+#define DDF_Shared 1
+#define DDF_Enforce_Groups 2
+#define DDF_Unicode 4
+#define DDF_Owner_Valid 8
+
+/* virtual_entry.state is a bigendian bitmap */
+#define DDF_state_mask 0x7
+#define DDF_state_optimal 0x0
+#define DDF_state_degraded 0x1
+#define DDF_state_deleted 0x2
+#define DDF_state_missing 0x3
+#define DDF_state_failed 0x4
+#define DDF_state_part_optimal 0x5
+
+#define DDF_state_morphing 0x8
+#define DDF_state_inconsistent 0x10
+
+/* virtual_entry.init_state is a bigendian bitmap */
+#define DDF_initstate_mask 0x03
+#define DDF_init_not 0x00
+#define DDF_init_quick 0x01 /* initialisation is progress.
+ * i.e. 'state_inconsistent' */
+#define DDF_init_full 0x02
+
+#define DDF_access_mask 0xc0
+#define DDF_access_rw 0x00
+#define DDF_access_ro 0x80
+#define DDF_access_blocked 0xc0
+
+/* The content of the config_section - local scope
+ * It has multiple records each config_record_len sectors
+ * They can be vd_config or spare_assign
+ */
+
+struct vd_config {
+ __u32 magic; /* DDF_VD_CONF_MAGIC */
+ __u32 crc;
+ char guid[DDF_GUID_LEN];
+ __u32 timestamp;
+ __u32 seqnum;
+ __u8 pad0[24];
+ __u16 prim_elmnt_count;
+ __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
+ __u8 prl;
+ __u8 rlq;
+ __u8 sec_elmnt_count;
+ __u8 sec_elmnt_seq;
+ __u8 srl;
+ __u64 blocks; /* blocks per component could be different
+ * on different component devices...(only
+ * for concat I hope) */
+ __u64 array_blocks; /* blocks in array */
+ __u8 pad1[8];
+ __u32 spare_refs[8];
+ __u8 cache_pol[8];
+ __u8 bg_rate;
+ __u8 pad2[3];
+ __u8 pad3[52];
+ __u8 pad4[192];
+ __u8 v0[32]; /* reserved- 0xff */
+ __u8 v1[32]; /* reserved- 0xff */
+ __u8 v2[16]; /* reserved- 0xff */
+ __u8 v3[16]; /* reserved- 0xff */
+ __u8 vendor[32];
+ __u32 phys_refnum[0]; /* refnum of each disk in sequence */
+ /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
+ bvd are always the same size */
+};
+
+/* vd_config.cache_pol[7] is a bitmap */
+#define DDF_cache_writeback 1 /* else writethrough */
+#define DDF_cache_wadaptive 2 /* only applies if writeback */
+#define DDF_cache_readahead 4
+#define DDF_cache_radaptive 8 /* only if doing read-ahead */
+#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
+#define DDF_cache_wallowed 32 /* enable write caching */
+#define DDF_cache_rallowed 64 /* enable read caching */
+
+struct spare_assign {
+ __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
+ __u32 crc;
+ __u32 timestamp;
+ __u8 reserved[7];
+ __u8 type;
+ __u16 populated; /* SAEs used */
+ __u16 max; /* max SAEs */
+ __u8 pad[8];
+ struct spare_assign_entry {
+ char guid[DDF_GUID_LEN];
+ __u16 secondary_element;
+ __u8 pad[6];
+ } spare_ents[0];
+};
+/* spare_assign.type is a bitmap */
+#define DDF_spare_dedicated 0x1 /* else global */
+#define DDF_spare_revertible 0x2 /* else committable */
+#define DDF_spare_active 0x4 /* else not active */
+#define DDF_spare_affinity 0x8 /* enclosure affinity */
+
+/* The data_section contents - local scope */
+struct disk_data {
+ __u32 magic; /* DDF_PHYS_DATA_MAGIC */
+ __u32 crc;
+ char guid[DDF_GUID_LEN];
+ __u32 refnum; /* crc of some magic drive data ... */
+ __u8 forced_ref; /* set when above was not result of magic */
+ __u8 forced_guid; /* set if guid was forced rather than magic */
+ __u8 vendor[32];
+ __u8 pad[442];
+};
+
+/* bbm_section content */
+struct bad_block_log {
+ __u32 magic;
+ __u32 crc;
+ __u16 entry_count;
+ __u32 spare_count;
+ __u8 pad[10];
+ __u64 first_spare;
+ struct mapped_block {
+ __u64 defective_start;
+ __u32 replacement_start;
+ __u16 remap_count;
+ __u8 pad[2];
+ } entries[0];
+};
+
+/* Struct for internally holding ddf structures */
+/* The DDF structure stored on each device is potentially
+ * quite different, as some data is global and some is local.
+ * The global data is:
+ * - ddf header
+ * - controller_data
+ * - Physical disk records
+ * - Virtual disk records
+ * The local data is:
+ * - Configuration records
+ * - Physical Disk data section
+ * ( and Bad block and vendor which I don't care about yet).
+ *
+ * The local data is parsed into separate lists as it is read
+ * and reconstructed for writing. This means that we only need
+ * to make config changes once and they are automatically
+ * propagated to all devices.
+ * Note that the ddf_super has space of the conf and disk data
+ * for this disk and also for a list of all such data.
+ * The list is only used for the superblock that is being
+ * built in Create or Assemble to describe the whole array.
+ */
+struct ddf_super {
+ struct ddf_header anchor, primary, secondary;
+ struct ddf_controller_data controller;
+ struct ddf_header *active;
+ struct phys_disk *phys;
+ struct virtual_disk *virt;
+ int pdsize, vdsize;
+ int max_part, mppe, conf_rec_len;
+ int currentdev;
+ int updates_pending;
+ struct vcl {
+ union {
+ char space[512];
+ struct {
+ struct vcl *next;
+ __u64 *lba_offset; /* location in 'conf' of
+ * the lba table */
+ int vcnum; /* index into ->virt */
+ __u64 *block_sizes; /* NULL if all the same */
+ };
+ };
+ struct vd_config conf;
+ } *conflist, *currentconf;
+ struct dl {
+ union {
+ char space[512];
+ struct {
+ struct dl *next;
+ int major, minor;
+ char *devname;
+ int fd;
+ unsigned long long size; /* sectors */
+ int pdnum; /* index in ->phys */
+ struct spare_assign *spare;
+ };
+ };
+ struct disk_data disk;
+ struct vcl *vlist[0]; /* max_part in size */
+ } *dlist;
+};
+
+#ifndef offsetof
+#define offsetof(t,f) ((size_t)&(((t*)0)->f))
+#endif
+
+
+static int calc_crc(void *buf, int len)
+{
+ /* crcs are always at the same place as in the ddf_header */
+ struct ddf_header *ddf = buf;
+ __u32 oldcrc = ddf->crc;
+ __u32 newcrc;
+ ddf->crc = 0xffffffff;
+
+ newcrc = crc32(0, buf, len);
+ ddf->crc = oldcrc;
+ return newcrc;
+}
+
+static int load_ddf_header(int fd, unsigned long long lba,
+ unsigned long long size,
+ int type,
+ struct ddf_header *hdr, struct ddf_header *anchor)
+{
+ /* read a ddf header (primary or secondary) from fd/lba
+ * and check that it is consistent with anchor
+ * Need to check:
+ * magic, crc, guid, rev, and LBA's header_type, and
+ * everything after header_type must be the same
+ */
+ if (lba >= size-1)
+ return 0;
+
+ if (lseek64(fd, lba<<9, 0) < 0)
+ return 0;
+
+ if (read(fd, hdr, 512) != 512)
+ return 0;
+
+ if (hdr->magic != DDF_HEADER_MAGIC)
+ return 0;
+ if (calc_crc(hdr, 512) != hdr->crc)
+ return 0;
+ if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
+ memcmp(anchor->revision, hdr->revision, 8) != 0 ||
+ anchor->primary_lba != hdr->primary_lba ||
+ anchor->secondary_lba != hdr->secondary_lba ||
+ hdr->type != type ||
+ memcmp(anchor->pad2, hdr->pad2, 512 -
+ offsetof(struct ddf_header, pad2)) != 0)
+ return 0;
+
+ /* Looks good enough to me... */
+ return 1;
+}
+
+static void *load_section(int fd, struct ddf_super *super, void *buf,
+ __u32 offset_be, __u32 len_be, int check)
+{
+ unsigned long long offset = __be32_to_cpu(offset_be);
+ unsigned long long len = __be32_to_cpu(len_be);
+ int dofree = (buf == NULL);
+
+ if (check)
+ if (len != 2 && len != 8 && len != 32
+ && len != 128 && len != 512)
+ return NULL;
+
+ if (len > 1024)
+ return NULL;
+ if (buf) {
+ /* All pre-allocated sections are a single block */
+ if (len != 1)
+ return NULL;
+ } else {
+ posix_memalign(&buf, 512, len<<9);
+ }
+
+ if (!buf)
+ return NULL;
+
+ if (super->active->type == 1)
+ offset += __be64_to_cpu(super->active->primary_lba);
+ else
+ offset += __be64_to_cpu(super->active->secondary_lba);
+
+ if (lseek64(fd, offset<<9, 0) != (offset<<9)) {
+ if (dofree)
+ free(buf);
+ return NULL;
+ }
+ if (read(fd, buf, len<<9) != (len<<9)) {
+ if (dofree)
+ free(buf);
+ return NULL;
+ }
+ return buf;
+}
+
+static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
+{
+ unsigned long long dsize;
+
+ get_dev_size(fd, NULL, &dsize);
+
+ if (lseek64(fd, dsize-512, 0) < 0) {
+ if (devname)
+ fprintf(stderr,
+ Name": Cannot seek to anchor block on %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+ if (read(fd, &super->anchor, 512) != 512) {
+ if (devname)
+ fprintf(stderr,
+ Name ": Cannot read anchor block on %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+ if (super->anchor.magic != DDF_HEADER_MAGIC) {
+ if (devname)
+ fprintf(stderr, Name ": no DDF anchor found on %s\n",
+ devname);
+ return 2;
+ }
+ if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
+ if (devname)
+ fprintf(stderr, Name ": bad CRC on anchor on %s\n",
+ devname);
+ return 2;
+ }
+ if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
+ memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
+ if (devname)
+ fprintf(stderr, Name ": can only support super revision"
+ " %.8s and earlier, not %.8s on %s\n",
+ DDF_REVISION_2, super->anchor.revision,devname);
+ return 2;
+ }
+ if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
+ dsize >> 9, 1,
+ &super->primary, &super->anchor) == 0) {
+ if (devname)
+ fprintf(stderr,
+ Name ": Failed to load primary DDF header "
+ "on %s\n", devname);
+ return 2;
+ }
+ super->active = &super->primary;
+ if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
+ dsize >> 9, 2,
+ &super->secondary, &super->anchor)) {
+ if ((__be32_to_cpu(super->primary.seq)
+ < __be32_to_cpu(super->secondary.seq) &&
+ !super->secondary.openflag)
+ || (__be32_to_cpu(super->primary.seq)
+ == __be32_to_cpu(super->secondary.seq) &&
+ super->primary.openflag && !super->secondary.openflag)
+ )
+ super->active = &super->secondary;
+ }
+ return 0;
+}
+
+static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
+{
+ void *ok;
+ ok = load_section(fd, super, &super->controller,
+ super->active->controller_section_offset,
+ super->active->controller_section_length,
+ 0);
+ super->phys = load_section(fd, super, NULL,
+ super->active->phys_section_offset,
+ super->active->phys_section_length,
+ 1);
+ super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
+
+ super->virt = load_section(fd, super, NULL,
+ super->active->virt_section_offset,
+ super->active->virt_section_length,
+ 1);
+ super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
+ if (!ok ||
+ !super->phys ||
+ !super->virt) {
+ free(super->phys);
+ free(super->virt);
+ super->phys = NULL;
+ super->virt = NULL;
+ return 2;
+ }
+ super->conflist = NULL;
+ super->dlist = NULL;
+
+ super->max_part = __be16_to_cpu(super->active->max_partitions);
+ super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
+ super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
+ return 0;
+}
+
+static int load_ddf_local(int fd, struct ddf_super *super,
+ char *devname, int keep)
+{
+ struct dl *dl;
+ struct stat stb;
+ char *conf;
+ int i;
+ int vnum;
+ int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
+ unsigned long long dsize;
+
+ /* First the local disk info */
+ posix_memalign((void**)&dl, 512,
+ sizeof(*dl) +
+ (super->max_part) * sizeof(dl->vlist[0]));
+
+ load_section(fd, super, &dl->disk,
+ super->active->data_section_offset,
+ super->active->data_section_length,
+ 0);
+ dl->devname = devname ? strdup(devname) : NULL;
+
+ fstat(fd, &stb);
+ dl->major = major(stb.st_rdev);
+ dl->minor = minor(stb.st_rdev);
+ dl->next = super->dlist;
+ dl->fd = keep ? fd : -1;
+
+ dl->size = 0;
+ if (get_dev_size(fd, devname, &dsize))
+ dl->size = dsize >> 9;
+ dl->spare = NULL;
+ for (i=0 ; i < super->max_part ; i++)
+ dl->vlist[i] = NULL;
+ super->dlist = dl;
+ dl->pdnum = -1;
+ for (i=0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
+ if (memcmp(super->phys->entries[i].guid,
+ dl->disk.guid, DDF_GUID_LEN) == 0)
+ dl->pdnum = i;
+
+ /* Now the config list. */
+ /* 'conf' is an array of config entries, some of which are
+ * probably invalid. Those which are good need to be copied into
+ * the conflist
+ */
+
+ conf = load_section(fd, super, NULL,
+ super->active->config_section_offset,
+ super->active->config_section_length,
+ 0);
+
+ vnum = 0;
+ for (i = 0;
+ i < __be32_to_cpu(super->active->config_section_length);
+ i += super->conf_rec_len) {
+ struct vd_config *vd =
+ (struct vd_config *)((char*)conf + i*512);
+ struct vcl *vcl;
+
+ if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
+ if (dl->spare)
+ continue;
+ posix_memalign((void**)&dl->spare, 512,
+ super->conf_rec_len*512);
+ memcpy(dl->spare, vd, super->conf_rec_len*512);
+ continue;
+ }
+ if (vd->magic != DDF_VD_CONF_MAGIC)
+ continue;
+ for (vcl = super->conflist; vcl; vcl = vcl->next) {
+ if (memcmp(vcl->conf.guid,
+ vd->guid, DDF_GUID_LEN) == 0)
+ break;
+ }
+
+ if (vcl) {
+ dl->vlist[vnum++] = vcl;
+ if (__be32_to_cpu(vd->seqnum) <=
+ __be32_to_cpu(vcl->conf.seqnum))
+ continue;
+ } else {
+ posix_memalign((void**)&vcl, 512,
+ (super->conf_rec_len*512 +
+ offsetof(struct vcl, conf)));
+ vcl->next = super->conflist;
+ vcl->block_sizes = NULL; /* FIXME not for CONCAT */
+ super->conflist = vcl;
+ dl->vlist[vnum++] = vcl;
+ }
+ memcpy(&vcl->conf, vd, super->conf_rec_len*512);
+ vcl->lba_offset = (__u64*)
+ &vcl->conf.phys_refnum[super->mppe];
+
+ for (i=0; i < max_virt_disks ; i++)
+ if (memcmp(super->virt->entries[i].guid,
+ vcl->conf.guid, DDF_GUID_LEN)==0)
+ break;
+ if (i < max_virt_disks)
+ vcl->vcnum = i;
+ }
+ free(conf);
+
+ return 0;
+}
+
+#ifndef MDASSEMBLE
+static int load_super_ddf_all(struct supertype *st, int fd,
+ void **sbp, char *devname, int keep_fd);
+#endif
+static int load_super_ddf(struct supertype *st, int fd,
+ char *devname)
+{
+ unsigned long long dsize;
+ struct ddf_super *super;
+ int rv;
+
+#ifndef MDASSEMBLE
+ /* if 'fd' is a container, load metadata from all the devices */
+ if (load_super_ddf_all(st, fd, &st->sb, devname, 1) == 0)
+ return 0;
+#endif
+ if (st->subarray[0])
+ return 1; /* FIXME Is this correct */
+
+ if (get_dev_size(fd, devname, &dsize) == 0)
+ return 1;
+
+ /* 32M is a lower bound */
+ if (dsize <= 32*1024*1024) {
+ if (devname) {
+ fprintf(stderr,
+ Name ": %s is too small for ddf: "
+ "size is %llu sectors.\n",
+ devname, dsize>>9);
+ return 1;
+ }
+ }
+ if (dsize & 511) {
+ if (devname) {
+ fprintf(stderr,
+ Name ": %s is an odd size for ddf: "
+ "size is %llu bytes.\n",
+ devname, dsize);
+ return 1;
+ }
+ }
+
+ if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
+ fprintf(stderr, Name ": malloc of %zu failed.\n",
+ sizeof(*super));
+ return 1;
+ }
+ memset(super, 0, sizeof(*super));
+
+ rv = load_ddf_headers(fd, super, devname);
+ if (rv) {
+ free(super);
+ return rv;
+ }
+
+ /* Have valid headers and have chosen the best. Let's read in the rest*/
+
+ rv = load_ddf_global(fd, super, devname);
+
+ if (rv) {
+ if (devname)
+ fprintf(stderr,
+ Name ": Failed to load all information "
+ "sections on %s\n", devname);
+ free(super);
+ return rv;
+ }
+
+ load_ddf_local(fd, super, devname, 0);
+
+ /* Should possibly check the sections .... */
+
+ st->sb = super;
+ if (st->ss == NULL) {
+ st->ss = &super_ddf;
+ st->minor_version = 0;
+ st->max_devs = 512;
+ }
+ return 0;
+
+}
+
+static void free_super_ddf(struct supertype *st)
+{
+ struct ddf_super *ddf = st->sb;
+ if (ddf == NULL)
+ return;
+ free(ddf->phys);
+ free(ddf->virt);
+ while (ddf->conflist) {
+ struct vcl *v = ddf->conflist;
+ ddf->conflist = v->next;
+ if (v->block_sizes)
+ free(v->block_sizes);
+ free(v);
+ }
+ while (ddf->dlist) {
+ struct dl *d = ddf->dlist;
+ ddf->dlist = d->next;
+ if (d->fd >= 0)
+ close(d->fd);
+ if (d->spare)
+ free(d->spare);
+ free(d);
+ }
+ free(ddf);
+ st->sb = NULL;
+}
+
+static struct supertype *match_metadata_desc_ddf(char *arg)
+{
+ /* 'ddf' only support containers */
+ struct supertype *st;
+ if (strcmp(arg, "ddf") != 0 &&
+ strcmp(arg, "default") != 0
+ )
+ return NULL;
+
+ st = malloc(sizeof(*st));
+ memset(st, 0, sizeof(*st));
+ st->ss = &super_ddf;
+ st->max_devs = 512;
+ st->minor_version = 0;
+ st->sb = NULL;
+ return st;
+}
+
+
+#ifndef MDASSEMBLE
+
+static mapping_t ddf_state[] = {
+ { "Optimal", 0},
+ { "Degraded", 1},
+ { "Deleted", 2},
+ { "Missing", 3},
+ { "Failed", 4},
+ { "Partially Optimal", 5},
+ { "-reserved-", 6},
+ { "-reserved-", 7},
+ { NULL, 0}
+};
+
+static mapping_t ddf_init_state[] = {
+ { "Not Initialised", 0},
+ { "QuickInit in Progress", 1},
+ { "Fully Initialised", 2},
+ { "*UNKNOWN*", 3},
+ { NULL, 0}
+};
+static mapping_t ddf_access[] = {
+ { "Read/Write", 0},
+ { "Reserved", 1},
+ { "Read Only", 2},
+ { "Blocked (no access)", 3},
+ { NULL ,0}
+};
+
+static mapping_t ddf_level[] = {
+ { "RAID0", DDF_RAID0},
+ { "RAID1", DDF_RAID1},
+ { "RAID3", DDF_RAID3},
+ { "RAID4", DDF_RAID4},
+ { "RAID5", DDF_RAID5},
+ { "RAID1E",DDF_RAID1E},
+ { "JBOD", DDF_JBOD},
+ { "CONCAT",DDF_CONCAT},
+ { "RAID5E",DDF_RAID5E},
+ { "RAID5EE",DDF_RAID5EE},
+ { "RAID6", DDF_RAID6},
+ { NULL, 0}
+};
+static mapping_t ddf_sec_level[] = {
+ { "Striped", DDF_2STRIPED},
+ { "Mirrored", DDF_2MIRRORED},
+ { "Concat", DDF_2CONCAT},
+ { "Spanned", DDF_2SPANNED},
+ { NULL, 0}
+};
+#endif
+
+struct num_mapping {
+ int num1, num2;
+};
+static struct num_mapping ddf_level_num[] = {
+ { DDF_RAID0, 0 },
+ { DDF_RAID1, 1 },
+ { DDF_RAID3, LEVEL_UNSUPPORTED },
+ { DDF_RAID4, 4 },
+ { DDF_RAID5, 5 },
+ { DDF_RAID1E, LEVEL_UNSUPPORTED },
+ { DDF_JBOD, LEVEL_UNSUPPORTED },
+ { DDF_CONCAT, LEVEL_LINEAR },
+ { DDF_RAID5E, LEVEL_UNSUPPORTED },
+ { DDF_RAID5EE, LEVEL_UNSUPPORTED },
+ { DDF_RAID6, 6},
+ { MAXINT, MAXINT }
+};
+
+static int map_num1(struct num_mapping *map, int num)
+{
+ int i;
+ for (i=0 ; map[i].num1 != MAXINT; i++)
+ if (map[i].num1 == num)
+ break;
+ return map[i].num2;
+}
+
+#ifndef MDASSEMBLE
+static void print_guid(char *guid, int tstamp)
+{
+ /* A GUIDs are part (or all) ASCII and part binary.
+ * They tend to be space padded.
+ * We print the GUID in HEX, then in parentheses add
+ * any initial ASCII sequence, and a possible
+ * time stamp from bytes 16-19
+ */
+ int l = DDF_GUID_LEN;
+ int i;
+
+ for (i=0 ; i<DDF_GUID_LEN ; i++) {
+ if ((i&3)==0 && i != 0) printf(":");
+ printf("%02X", guid[i]&255);
+ }
+
+ printf(" (");
+ while (l && guid[l-1] == ' ')
+ l--;
+ for (i=0 ; i<l ; i++) {
+ if (guid[i] >= 0x20 && guid[i] < 0x7f)
+ fputc(guid[i], stdout);
+ else
+ break;
+ }
+ if (tstamp) {
+ time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
+ char tbuf[100];
+ struct tm *tm;
+ tm = localtime(&then);
+ strftime(tbuf, 100, " %D %T",tm);
+ fputs(tbuf, stdout);
+ }
+ printf(")");
+}
+
+static void examine_vd(int n, struct ddf_super *sb, char *guid)
+{
+ int crl = sb->conf_rec_len;
+ struct vcl *vcl;
+
+ for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
+ struct vd_config *vc = &vcl->conf;
+
+ if (calc_crc(vc, crl*512) != vc->crc)
+ continue;
+ if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
+ continue;
+
+ /* Ok, we know about this VD, let's give more details */
+ printf(" Raid Devices[%d] : %d\n", n,
+ __be16_to_cpu(vc->prim_elmnt_count));
+ printf(" Chunk Size[%d] : %d sectors\n", n,
+ 1 << vc->chunk_shift);
+ printf(" Raid Level[%d] : %s\n", n,
+ map_num(ddf_level, vc->prl)?:"-unknown-");
+ if (vc->sec_elmnt_count != 1) {
+ printf(" Secondary Position[%d] : %d of %d\n", n,
+ vc->sec_elmnt_seq, vc->sec_elmnt_count);
+ printf(" Secondary Level[%d] : %s\n", n,
+ map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
+ }
+ printf(" Device Size[%d] : %llu\n", n,
+ __be64_to_cpu(vc->blocks)/2);
+ printf(" Array Size[%d] : %llu\n", n,
+ __be64_to_cpu(vc->array_blocks)/2);
+ }
+}
+
+static void examine_vds(struct ddf_super *sb)
+{
+ int cnt = __be16_to_cpu(sb->virt->populated_vdes);
+ int i;
+ printf(" Virtual Disks : %d\n", cnt);
+
+ for (i=0; i<cnt; i++) {
+ struct virtual_entry *ve = &sb->virt->entries[i];
+ printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
+ printf("\n");
+ printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
+ printf(" state[%d] : %s, %s%s\n", i,
+ map_num(ddf_state, ve->state & 7),
+ (ve->state & 8) ? "Morphing, ": "",
+ (ve->state & 16)? "Not Consistent" : "Consistent");
+ printf(" init state[%d] : %s\n", i,
+ map_num(ddf_init_state, ve->init_state&3));
+ printf(" access[%d] : %s\n", i,
+ map_num(ddf_access, (ve->init_state>>6) & 3));
+ printf(" Name[%d] : %.16s\n", i, ve->name);
+ examine_vd(i, sb, ve->guid);
+ }
+ if (cnt) printf("\n");
+}
+
+static void examine_pds(struct ddf_super *sb)
+{
+ int cnt = __be16_to_cpu(sb->phys->used_pdes);
+ int i;
+ struct dl *dl;
+ printf(" Physical Disks : %d\n", cnt);
+
+ for (i=0 ; i<cnt ; i++) {
+ struct phys_disk_entry *pd = &sb->phys->entries[i];
+ int type = __be16_to_cpu(pd->type);
+ int state = __be16_to_cpu(pd->state);
+
+ printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
+ printf("\n");
+ printf(" ref[%d] : %08x\n", i,
+ __be32_to_cpu(pd->refnum));
+ printf(" mode[%d] : %s%s%s%s%s\n", i,
+ (type&2) ? "active":"",
+ (type&4) ? "Global Spare":"",
+ (type&8) ? "spare" : "",
+ (type&16)? ", foreign" : "",
+ (type&32)? "pass-through" : "");
+ printf(" state[%d] : %s%s%s%s%s%s%s\n", i,
+ (state&1)? "Online": "Offline",
+ (state&2)? ", Failed": "",
+ (state&4)? ", Rebuilding": "",
+ (state&8)? ", in-transition": "",
+ (state&16)? ", SMART errors": "",
+ (state&32)? ", Unrecovered Read Errors": "",
+ (state&64)? ", Missing" : "");
+ printf(" Avail Size[%d] : %llu K\n", i,
+ __be64_to_cpu(pd->config_size)>>1);
+ for (dl = sb->dlist; dl ; dl = dl->next) {
+ if (dl->disk.refnum == pd->refnum) {
+ char *dv = map_dev(dl->major, dl->minor, 0);
+ if (dv)
+ printf(" Device[%d] : %s\n",
+ i, dv);
+ }
+ }
+ printf("\n");
+ }
+}
+
+static void examine_super_ddf(struct supertype *st, char *homehost)
+{
+ struct ddf_super *sb = st->sb;
+
+ printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
+ printf(" Version : %.8s\n", sb->anchor.revision);
+ printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
+ printf("\n");
+ printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
+ printf("\n");
+ printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
+ printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
+ ?"yes" : "no");
+ examine_vds(sb);
+ examine_pds(sb);
+}
+
+static void brief_examine_super_ddf(struct supertype *st)
+{
+ /* We just write a generic DDF ARRAY entry
+ * The uuid is all hex, 6 groups of 4 bytes
+ */
+ struct ddf_super *ddf = st->sb;
+ int i;
+ printf("ARRAY /dev/ddf metadata=ddf UUID=");
+ for (i = 0; i < DDF_GUID_LEN; i++) {
+ if ((i&3) == 0 && i != 0)
+ printf(":");
+ printf("%02X", 255&ddf->anchor.guid[i]);
+ }
+ printf("\n");
+}
+
+static void detail_super_ddf(struct supertype *st, char *homehost)
+{
+ /* FIXME later
+ * Could print DDF GUID
+ * Need to find which array
+ * If whole, briefly list all arrays
+ * If one, give name
+ */
+}
+
+static void brief_detail_super_ddf(struct supertype *st)
+{
+ /* FIXME I really need to know which array we are detailing.
+ * Can that be stored in ddf_super??
+ */
+// struct ddf_super *ddf = st->sb;
+}
+#endif
+
+static int match_home_ddf(struct supertype *st, char *homehost)
+{
+ /* It matches 'this' host if the controller is a
+ * Linux-MD controller with vendor_data matching
+ * the hostname
+ */
+ struct ddf_super *ddf = st->sb;
+ int len = strlen(homehost);
+
+ return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
+ len < sizeof(ddf->controller.vendor_data) &&
+ memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
+ ddf->controller.vendor_data[len] == 0);
+}
+
+static struct vd_config *find_vdcr(struct ddf_super *ddf, int inst)
+{
+ struct vcl *v;
+
+ for (v = ddf->conflist; v; v = v->next)
+ if (inst == v->vcnum)
+ return &v->conf;
+ return NULL;
+}
+
+static int find_phys(struct ddf_super *ddf, __u32 phys_refnum)
+{
+ /* Find the entry in phys_disk which has the given refnum
+ * and return it's index
+ */
+ int i;
+ for (i=0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
+ if (ddf->phys->entries[i].refnum == phys_refnum)
+ return i;
+ return -1;
+}
+
+static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
+{
+ /* The uuid returned here is used for:
+ * uuid to put into bitmap file (Create, Grow)
+ * uuid for backup header when saving critical section (Grow)
+ * comparing uuids when re-adding a device into an array
+ * For each of these we can make do with a truncated
+ * or hashed uuid rather than the original, as long as
+ * everyone agrees.
+ * In each case the uuid required is that of the data-array,
+ * not the device-set.
+ * In the case of SVD we assume the BVD is of interest,
+ * though that might be the case if a bitmap were made for
+ * a mirrored SVD - worry about that later.
+ * So we need to find the VD configuration record for the
+ * relevant BVD and extract the GUID and Secondary_Element_Seq.
+ * The first 16 bytes of the sha1 of these is used.
+ */
+ struct ddf_super *ddf = st->sb;
+ struct vcl *vcl = ddf->currentconf;
+
+ if (!vcl)
+ memset(uuid, 0, sizeof (uuid));
+ else {
+ char buf[20];
+ struct sha1_ctx ctx;
+ sha1_init_ctx(&ctx);
+ sha1_process_bytes(&vcl->conf.guid, DDF_GUID_LEN, &ctx);
+ if (vcl->conf.sec_elmnt_count > 1)
+ sha1_process_bytes(&vcl->conf.sec_elmnt_seq, 1, &ctx);
+ sha1_finish_ctx(&ctx, buf);
+ memcpy(uuid, buf, sizeof(uuid));
+ }
+}
+
+static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info);
+
+static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info)
+{
+ struct ddf_super *ddf = st->sb;
+
+ if (ddf->currentconf) {
+ getinfo_super_ddf_bvd(st, info);
+ return;
+ }
+
+ info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
+ info->array.level = LEVEL_CONTAINER;
+ info->array.layout = 0;
+ info->array.md_minor = -1;
+ info->array.ctime = DECADE + __be32_to_cpu(*(__u32*)
+ (ddf->anchor.guid+16));
+ info->array.utime = 0;
+ info->array.chunk_size = 0;
+
+
+ info->disk.major = 0;
+ info->disk.minor = 0;
+ if (ddf->dlist) {
+ info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
+ info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
+
+ info->data_offset = __be64_to_cpu(ddf->phys->
+ entries[info->disk.raid_disk].
+ config_size);
+ info->component_size = ddf->dlist->size - info->data_offset;
+ } else {
+ info->disk.number = -1;
+// info->disk.raid_disk = find refnum in the table and use index;
+ }
+ info->disk.state = (1 << MD_DISK_SYNC);
+
+
+ info->reshape_active = 0;
+
+ strcpy(info->text_version, "ddf");
+
+// uuid_from_super_ddf(info->uuid, sbv);
+
+// info->name[] ?? ;
+}
+
+static int rlq_to_layout(int rlq, int prl, int raiddisks);
+
+static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
+{
+ struct ddf_super *ddf = st->sb;
+ struct vcl *vc = ddf->currentconf;
+ int cd = ddf->currentdev;
+
+ /* FIXME this returns BVD info - what if we want SVD ?? */
+
+ info->array.raid_disks = __be16_to_cpu(vc->conf.prim_elmnt_count);
+ info->array.level = map_num1(ddf_level_num, vc->conf.prl);
+ info->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
+ info->array.raid_disks);
+ info->array.md_minor = -1;
+ info->array.ctime = DECADE +
+ __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
+ info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
+ info->array.chunk_size = 512 << vc->conf.chunk_shift;
+
+ if (cd >= 0 && cd < ddf->mppe) {
+ info->data_offset = __be64_to_cpu(vc->lba_offset[cd]);
+ if (vc->block_sizes)
+ info->component_size = vc->block_sizes[cd];
+ else
+ info->component_size = __be64_to_cpu(vc->conf.blocks);
+ }
+
+ info->disk.major = 0;
+ info->disk.minor = 0;
+// info->disk.number = __be32_to_cpu(ddf->disk.refnum);
+// info->disk.raid_disk = find refnum in the table and use index;
+// info->disk.state = ???;
+
+ info->container_member = ddf->currentconf->vcnum;
+
+ info->resync_start = 0;
+ if (!(ddf->virt->entries[info->container_member].state
+ & DDF_state_inconsistent) &&
+ (ddf->virt->entries[info->container_member].init_state
+ & DDF_initstate_mask)
+ == DDF_init_full)
+ info->resync_start = ~0ULL;
+
+ uuid_from_super_ddf(st, info->uuid);
+
+ info->container_member = atoi(st->subarray);
+ sprintf(info->text_version, "/%s/%s",
+ devnum2devname(st->container_dev),
+ st->subarray);
+
+// info->name[] ?? ;
+}
+
+
+static int update_super_ddf(struct supertype *st, struct mdinfo *info,
+ char *update,
+ char *devname, int verbose,
+ int uuid_set, char *homehost)
+{
+ /* For 'assemble' and 'force' we need to return non-zero if any
+ * change was made. For others, the return value is ignored.
+ * Update options are:
+ * force-one : This device looks a bit old but needs to be included,
+ * update age info appropriately.
+ * assemble: clear any 'faulty' flag to allow this device to
+ * be assembled.
+ * force-array: Array is degraded but being forced, mark it clean
+ * if that will be needed to assemble it.
+ *
+ * newdev: not used ????
+ * grow: Array has gained a new device - this is currently for
+ * linear only
+ * resync: mark as dirty so a resync will happen.
+ * uuid: Change the uuid of the array to match what is given
+ * homehost: update the recorded homehost
+ * name: update the name - preserving the homehost
+ * _reshape_progress: record new reshape_progress position.
+ *
+ * Following are not relevant for this version:
+ * sparc2.2 : update from old dodgey metadata
+ * super-minor: change the preferred_minor number
+ * summaries: update redundant counters.
+ */
+ int rv = 0;
+// struct ddf_super *ddf = st->sb;
+// struct vd_config *vd = find_vdcr(ddf, info->container_member);
+// struct virtual_entry *ve = find_ve(ddf);
+
+ /* we don't need to handle "force-*" or "assemble" as
+ * there is no need to 'trick' the kernel. We the metadata is
+ * first updated to activate the array, all the implied modifications
+ * will just happen.
+ */
+
+ if (strcmp(update, "grow") == 0) {
+ /* FIXME */
+ }
+ if (strcmp(update, "resync") == 0) {
+// info->resync_checkpoint = 0;
+ }
+ /* We ignore UUID updates as they make even less sense
+ * with DDF
+ */
+ if (strcmp(update, "homehost") == 0) {
+ /* homehost is stored in controller->vendor_data,
+ * or it is when we are the vendor
+ */
+// if (info->vendor_is_local)
+// strcpy(ddf->controller.vendor_data, homehost);
+ }
+ if (strcmp(update, "name") == 0) {
+ /* name is stored in virtual_entry->name */
+// memset(ve->name, ' ', 16);
+// strncpy(ve->name, info->name, 16);
+ }
+ if (strcmp(update, "_reshape_progress") == 0) {
+ /* We don't support reshape yet */
+ }
+
+// update_all_csum(ddf);
+
+ return rv;
+}
+
+static void make_header_guid(char *guid)
+{
+ __u32 stamp;
+ int rfd;
+ /* Create a DDF Header of Virtual Disk GUID */
+
+ /* 24 bytes of fiction required.
+ * first 8 are a 'vendor-id' - "Linux-MD"
+ * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
+ * Remaining 8 random number plus timestamp
+ */
+ memcpy(guid, T10, sizeof(T10));
+ stamp = __cpu_to_be32(0xdeadbeef);
+ memcpy(guid+8, &stamp, 4);
+ stamp = __cpu_to_be32(0);
+ memcpy(guid+12, &stamp, 4);
+ stamp = __cpu_to_be32(time(0) - DECADE);
+ memcpy(guid+16, &stamp, 4);
+ rfd = open("/dev/urandom", O_RDONLY);
+ if (rfd < 0 || read(rfd, &stamp, 4) != 4)
+ stamp = random();
+ memcpy(guid+20, &stamp, 4);
+ if (rfd >= 0) close(rfd);
+}
+
+static int init_super_ddf_bvd(struct supertype *st,
+ mdu_array_info_t *info,
+ unsigned long long size,
+ char *name, char *homehost,
+ int *uuid);
+
+static int init_super_ddf(struct supertype *st,
+ mdu_array_info_t *info,
+ unsigned long long size, char *name, char *homehost,
+ int *uuid)
+{
+ /* This is primarily called by Create when creating a new array.
+ * We will then get add_to_super called for each component, and then
+ * write_init_super called to write it out to each device.
+ * For DDF, Create can create on fresh devices or on a pre-existing
+ * array.
+ * To create on a pre-existing array a different method will be called.
+ * This one is just for fresh drives.
+ *
+ * We need to create the entire 'ddf' structure which includes:
+ * DDF headers - these are easy.
+ * Controller data - a Sector describing this controller .. not that
+ * this is a controller exactly.
+ * Physical Disk Record - one entry per device, so
+ * leave plenty of space.
+ * Virtual Disk Records - again, just leave plenty of space.
+ * This just lists VDs, doesn't give details
+ * Config records - describes the VDs that use this disk
+ * DiskData - describes 'this' device.
+ * BadBlockManagement - empty
+ * Diag Space - empty
+ * Vendor Logs - Could we put bitmaps here?
+ *
+ */
+ struct ddf_super *ddf;
+ char hostname[17];
+ int hostlen;
+ int max_phys_disks, max_virt_disks;
+ unsigned long long sector;
+ int clen;
+ int i;
+ int pdsize, vdsize;
+ struct phys_disk *pd;
+ struct virtual_disk *vd;
+
+ if (!info) {
+ st->sb = NULL;
+ return 0;
+ }
+ if (st->sb)
+ return init_super_ddf_bvd(st, info, size, name, homehost,
+ uuid);
+
+ posix_memalign((void**)&ddf, 512, sizeof(*ddf));
+ memset(ddf, 0, sizeof(*ddf));
+ ddf->dlist = NULL; /* no physical disks yet */
+ ddf->conflist = NULL; /* No virtual disks yet */
+
+ /* At least 32MB *must* be reserved for the ddf. So let's just
+ * start 32MB from the end, and put the primary header there.
+ * Don't do secondary for now.
+ * We don't know exactly where that will be yet as it could be
+ * different on each device. To just set up the lengths.
+ *
+ */
+
+ ddf->anchor.magic = DDF_HEADER_MAGIC;
+ make_header_guid(ddf->anchor.guid);
+
+ memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
+ ddf->anchor.seq = __cpu_to_be32(1);
+ ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
+ ddf->anchor.openflag = 0xFF;
+ ddf->anchor.foreignflag = 0;
+ ddf->anchor.enforcegroups = 0; /* Is this best?? */
+ ddf->anchor.pad0 = 0xff;
+ memset(ddf->anchor.pad1, 0xff, 12);
+ memset(ddf->anchor.header_ext, 0xff, 32);
+ ddf->anchor.primary_lba = ~(__u64)0;
+ ddf->anchor.secondary_lba = ~(__u64)0;
+ ddf->anchor.type = DDF_HEADER_ANCHOR;
+ memset(ddf->anchor.pad2, 0xff, 3);
+ ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
+ ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
+ of 32M reserved.. */
+ max_phys_disks = 1023; /* Should be enough */
+ ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
+ max_virt_disks = 255;
+ ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
+ ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
+ ddf->max_part = 64;
+ ddf->mppe = 256;
+ ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
+ ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
+ ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
+ memset(ddf->anchor.pad3, 0xff, 54);
+ /* controller sections is one sector long immediately
+ * after the ddf header */
+ sector = 1;
+ ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
+ ddf->anchor.controller_section_length = __cpu_to_be32(1);
+ sector += 1;
+
+ /* phys is 8 sectors after that */
+ pdsize = ROUND_UP(sizeof(struct phys_disk) +
+ sizeof(struct phys_disk_entry)*max_phys_disks,
+ 512);
+ switch(pdsize/512) {
+ case 2: case 8: case 32: case 128: case 512: break;
+ default: abort();
+ }
+ ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
+ ddf->anchor.phys_section_length =
+ __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
+ sector += pdsize/512;
+
+ /* virt is another 32 sectors */
+ vdsize = ROUND_UP(sizeof(struct virtual_disk) +
+ sizeof(struct virtual_entry) * max_virt_disks,
+ 512);
+ switch(vdsize/512) {
+ case 2: case 8: case 32: case 128: case 512: break;
+ default: abort();
+ }
+ ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
+ ddf->anchor.virt_section_length =
+ __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
+ sector += vdsize/512;
+
+ clen = ddf->conf_rec_len * (ddf->max_part+1);
+ ddf->anchor.config_section_offset = __cpu_to_be32(sector);
+ ddf->anchor.config_section_length = __cpu_to_be32(clen);
+ sector += clen;
+
+ ddf->anchor.data_section_offset = __cpu_to_be32(sector);
+ ddf->anchor.data_section_length = __cpu_to_be32(1);
+ sector += 1;
+
+ ddf->anchor.bbm_section_length = __cpu_to_be32(0);
+ ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
+ ddf->anchor.diag_space_length = __cpu_to_be32(0);
+ ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
+ ddf->anchor.vendor_length = __cpu_to_be32(0);
+ ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
+
+ memset(ddf->anchor.pad4, 0xff, 256);
+
+ memcpy(&ddf->primary, &ddf->anchor, 512);
+ memcpy(&ddf->secondary, &ddf->anchor, 512);
+
+ ddf->primary.openflag = 1; /* I guess.. */
+ ddf->primary.type = DDF_HEADER_PRIMARY;
+
+ ddf->secondary.openflag = 1; /* I guess.. */
+ ddf->secondary.type = DDF_HEADER_SECONDARY;
+
+ ddf->active = &ddf->primary;
+
+ ddf->controller.magic = DDF_CONTROLLER_MAGIC;
+
+ /* 24 more bytes of fiction required.
+ * first 8 are a 'vendor-id' - "Linux-MD"
+ * Remaining 16 are serial number.... maybe a hostname would do?
+ */
+ memcpy(ddf->controller.guid, T10, sizeof(T10));
+ gethostname(hostname, sizeof(hostname));
+ hostname[sizeof(hostname) - 1] = 0;
+ hostlen = strlen(hostname);
+ memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
+ for (i = strlen(T10) ; i+hostlen < 24; i++)
+ ddf->controller.guid[i] = ' ';
+
+ ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
+ ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
+ ddf->controller.type.sub_vendor_id = 0;
+ ddf->controller.type.sub_device_id = 0;
+ memcpy(ddf->controller.product_id, "What Is My PID??", 16);
+ memset(ddf->controller.pad, 0xff, 8);
+ memset(ddf->controller.vendor_data, 0xff, 448);
+
+ posix_memalign((void**)&pd, 512, pdsize);
+ ddf->phys = pd;
+ ddf->pdsize = pdsize;
+
+ memset(pd, 0xff, pdsize);
+ memset(pd, 0, sizeof(*pd));
+ pd->magic = DDF_PHYS_DATA_MAGIC;
+ pd->used_pdes = __cpu_to_be16(0);
+ pd->max_pdes = __cpu_to_be16(max_phys_disks);
+ memset(pd->pad, 0xff, 52);
+
+ posix_memalign((void**)&vd, 512, vdsize);
+ ddf->virt = vd;
+ ddf->vdsize = vdsize;
+ memset(vd, 0, vdsize);
+ vd->magic = DDF_VIRT_RECORDS_MAGIC;
+ vd->populated_vdes = __cpu_to_be16(0);
+ vd->max_vdes = __cpu_to_be16(max_virt_disks);
+ memset(vd->pad, 0xff, 52);
+
+ for (i=0; i<max_virt_disks; i++)
+ memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
+
+ st->sb = ddf;
+ ddf->updates_pending = 1;
+ return 1;
+}
+
+static int all_ff(char *guid)
+{
+ int i;
+ for (i = 0; i < DDF_GUID_LEN; i++)
+ if (guid[i] != (char)0xff)
+ return 0;
+ return 1;
+}
+static int chunk_to_shift(int chunksize)
+{
+ return ffs(chunksize/512)-1;
+}
+
+static int level_to_prl(int level)
+{
+ switch (level) {
+ case LEVEL_LINEAR: return DDF_CONCAT;
+ case 0: return DDF_RAID0;
+ case 1: return DDF_RAID1;
+ case 4: return DDF_RAID4;
+ case 5: return DDF_RAID5;
+ case 6: return DDF_RAID6;
+ default: return -1;
+ }
+}
+static int layout_to_rlq(int level, int layout, int raiddisks)
+{
+ switch(level) {
+ case 0:
+ return DDF_RAID0_SIMPLE;
+ case 1:
+ switch(raiddisks) {
+ case 2: return DDF_RAID1_SIMPLE;
+ case 3: return DDF_RAID1_MULTI;
+ default: return -1;
+ }
+ case 4:
+ switch(layout) {
+ case 0: return DDF_RAID4_N;
+ }
+ break;
+ case 5:
+ case 6:
+ switch(layout) {
+ case ALGORITHM_LEFT_ASYMMETRIC:
+ return DDF_RAID5_N_RESTART;
+ case ALGORITHM_RIGHT_ASYMMETRIC:
+ if (level == 5)
+ return DDF_RAID5_0_RESTART;
+ else
+ return DDF_RAID6_0_RESTART;
+ case ALGORITHM_LEFT_SYMMETRIC:
+ return DDF_RAID5_N_CONTINUE;
+ case ALGORITHM_RIGHT_SYMMETRIC:
+ return -1; /* not mentioned in standard */
+ }
+ }
+ return -1;
+}
+
+static int rlq_to_layout(int rlq, int prl, int raiddisks)
+{
+ switch(prl) {
+ case DDF_RAID0:
+ return 0; /* hopefully rlq == DDF_RAID0_SIMPLE */
+ case DDF_RAID1:
+ return 0; /* hopefully rlq == SIMPLE or MULTI depending
+ on raiddisks*/
+ case DDF_RAID4:
+ switch(rlq) {
+ case DDF_RAID4_N:
+ return 0;
+ default:
+ /* not supported */
+ return -1; /* FIXME this isn't checked */
+ }
+ case DDF_RAID5:
+ switch(rlq) {
+ case DDF_RAID5_N_RESTART:
+ return ALGORITHM_LEFT_ASYMMETRIC;
+ case DDF_RAID5_0_RESTART:
+ return ALGORITHM_RIGHT_ASYMMETRIC;
+ case DDF_RAID5_N_CONTINUE:
+ return ALGORITHM_LEFT_SYMMETRIC;
+ default:
+ return -1;
+ }
+ case DDF_RAID6:
+ switch(rlq) {
+ case DDF_RAID5_N_RESTART:
+ return ALGORITHM_LEFT_ASYMMETRIC;
+ case DDF_RAID6_0_RESTART:
+ return ALGORITHM_RIGHT_ASYMMETRIC;
+ case DDF_RAID5_N_CONTINUE:
+ return ALGORITHM_LEFT_SYMMETRIC;
+ default:
+ return -1;
+ }
+ }
+ return -1;
+}
+
+struct extent {
+ unsigned long long start, size;
+};
+static int cmp_extent(const void *av, const void *bv)
+{
+ const struct extent *a = av;
+ const struct extent *b = bv;
+ if (a->start < b->start)
+ return -1;
+ if (a->start > b->start)
+ return 1;
+ return 0;
+}
+
+static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
+{
+ /* find a list of used extents on the give physical device
+ * (dnum) of the given ddf.
+ * Return a malloced array of 'struct extent'
+
+FIXME ignore DDF_Legacy devices?
+
+ */
+ struct extent *rv;
+ int n = 0;
+ int i, j;
+
+ rv = malloc(sizeof(struct extent) * (ddf->max_part + 2));
+ if (!rv)
+ return NULL;
+
+ for (i = 0; i < ddf->max_part; i++) {
+ struct vcl *v = dl->vlist[i];
+ if (v == NULL)
+ continue;
+ for (j=0; j < v->conf.prim_elmnt_count; j++)
+ if (v->conf.phys_refnum[j] == dl->disk.refnum) {
+ /* This device plays role 'j' in 'v'. */
+ rv[n].start = __be64_to_cpu(v->lba_offset[j]);
+ rv[n].size = __be64_to_cpu(v->conf.blocks);
+ n++;
+ break;
+ }
+ }
+ qsort(rv, n, sizeof(*rv), cmp_extent);
+
+ rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
+ rv[n].size = 0;
+ return rv;
+}
+
+static int init_super_ddf_bvd(struct supertype *st,
+ mdu_array_info_t *info,
+ unsigned long long size,
+ char *name, char *homehost,
+ int *uuid)
+{
+ /* We are creating a BVD inside a pre-existing container.
+ * so st->sb is already set.
+ * We need to create a new vd_config and a new virtual_entry
+ */
+ struct ddf_super *ddf = st->sb;
+ int venum;
+ struct virtual_entry *ve;
+ struct vcl *vcl;
+ struct vd_config *vc;
+
+ if (__be16_to_cpu(ddf->virt->populated_vdes)
+ >= __be16_to_cpu(ddf->virt->max_vdes)) {
+ fprintf(stderr, Name": This ddf already has the "
+ "maximum of %d virtual devices\n",
+ __be16_to_cpu(ddf->virt->max_vdes));
+ return 0;
+ }
+
+ for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
+ if (all_ff(ddf->virt->entries[venum].guid))
+ break;
+ if (venum == __be16_to_cpu(ddf->virt->max_vdes)) {
+ fprintf(stderr, Name ": Cannot find spare slot for "
+ "virtual disk - DDF is corrupt\n");
+ return 0;
+ }
+ ve = &ddf->virt->entries[venum];
+
+ /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
+ * timestamp, random number
+ */
+ make_header_guid(ve->guid);
+ ve->unit = __cpu_to_be16(info->md_minor);
+ ve->pad0 = 0xFFFF;
+ ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
+ ve->type = 0;
+ ve->state = DDF_state_degraded; /* Will be modified as devices are added */
+ if (info->state & 1) /* clean */
+ ve->init_state = DDF_init_full;
+ else
+ ve->init_state = DDF_init_not;
+
+ memset(ve->pad1, 0xff, 14);
+ memset(ve->name, ' ', 16);
+ if (name)
+ strncpy(ve->name, name, 16);
+ ddf->virt->populated_vdes =
+ __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
+
+ /* Now create a new vd_config */
+ posix_memalign((void**)&vcl, 512,
+ (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512));
+ vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
+ vcl->vcnum = venum;
+ sprintf(st->subarray, "%d", venum);
+ vcl->block_sizes = NULL; /* FIXME not for CONCAT */
+
+ vc = &vcl->conf;
+
+ vc->magic = DDF_VD_CONF_MAGIC;
+ memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
+ vc->timestamp = __cpu_to_be32(time(0)-DECADE);
+ vc->seqnum = __cpu_to_be32(1);
+ memset(vc->pad0, 0xff, 24);
+ vc->prim_elmnt_count = __cpu_to_be16(info->raid_disks);
+ vc->chunk_shift = chunk_to_shift(info->chunk_size);
+ vc->prl = level_to_prl(info->level);
+ vc->rlq = layout_to_rlq(info->level, info->layout, info->raid_disks);
+ vc->sec_elmnt_count = 1;
+ vc->sec_elmnt_seq = 0;
+ vc->srl = 0;
+ vc->blocks = __cpu_to_be64(info->size * 2);
+ vc->array_blocks = __cpu_to_be64(
+ calc_array_size(info->level, info->raid_disks, info->layout,
+ info->chunk_size, info->size*2));
+ memset(vc->pad1, 0xff, 8);
+ vc->spare_refs[0] = 0xffffffff;
+ vc->spare_refs[1] = 0xffffffff;
+ vc->spare_refs[2] = 0xffffffff;
+ vc->spare_refs[3] = 0xffffffff;
+ vc->spare_refs[4] = 0xffffffff;
+ vc->spare_refs[5] = 0xffffffff;
+ vc->spare_refs[6] = 0xffffffff;
+ vc->spare_refs[7] = 0xffffffff;
+ memset(vc->cache_pol, 0, 8);
+ vc->bg_rate = 0x80;
+ memset(vc->pad2, 0xff, 3);
+ memset(vc->pad3, 0xff, 52);
+ memset(vc->pad4, 0xff, 192);
+ memset(vc->v0, 0xff, 32);
+ memset(vc->v1, 0xff, 32);
+ memset(vc->v2, 0xff, 16);
+ memset(vc->v3, 0xff, 16);
+ memset(vc->vendor, 0xff, 32);
+
+ memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
+ memset(vc->phys_refnum+(ddf->mppe * 4), 0x00, 8*ddf->mppe);
+
+ vcl->next = ddf->conflist;
+ ddf->conflist = vcl;
+ ddf->currentconf = vcl;
+ ddf->updates_pending = 1;
+ return 1;
+}
+
+static void add_to_super_ddf_bvd(struct supertype *st,
+ mdu_disk_info_t *dk, int fd, char *devname)
+{
+ /* fd and devname identify a device with-in the ddf container (st).
+ * dk identifies a location in the new BVD.
+ * We need to find suitable free space in that device and update
+ * the phys_refnum and lba_offset for the newly created vd_config.
+ * We might also want to update the type in the phys_disk
+ * section.
+ */
+ struct dl *dl;
+ struct ddf_super *ddf = st->sb;
+ struct vd_config *vc;
+ __u64 *lba_offset;
+ int working;
+ int i;
+ unsigned long long blocks, pos, esize;
+ struct extent *ex;
+
+ for (dl = ddf->dlist; dl ; dl = dl->next)
+ if (dl->major == dk->major &&
+ dl->minor == dk->minor)
+ break;
+ if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
+ return;
+
+ vc = &ddf->currentconf->conf;
+ lba_offset = ddf->currentconf->lba_offset;
+
+ ex = get_extents(ddf, dl);
+ if (!ex)
+ return;
+
+ i = 0; pos = 0;
+ blocks = __be64_to_cpu(vc->blocks);
+ if (ddf->currentconf->block_sizes)
+ blocks = ddf->currentconf->block_sizes[dk->raid_disk];
+
+ do {
+ esize = ex[i].start - pos;
+ if (esize >= blocks)
+ break;
+ pos = ex[i].start + ex[i].size;
+ i++;
+ } while (ex[i-1].size);
+
+ free(ex);
+ if (esize < blocks)
+ return;
+
+ ddf->currentdev = dk->raid_disk;
+ vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
+ lba_offset[dk->raid_disk] = __cpu_to_be64(pos);
+
+ for (i=0; i < ddf->max_part ; i++)
+ if (dl->vlist[i] == NULL)
+ break;
+ if (i == ddf->max_part)
+ return;
+ dl->vlist[i] = ddf->currentconf;
+
+ dl->fd = fd;
+ dl->devname = devname;
+
+ /* Check how many working raid_disks, and if we can mark
+ * array as optimal yet
+ */
+ working = 0;
+
+ for (i=0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
+ if (vc->phys_refnum[i] != 0xffffffff)
+ working++;
+
+ /* Find which virtual_entry */
+ i = ddf->currentconf->vcnum;
+ if (working == __be16_to_cpu(vc->prim_elmnt_count))
+ ddf->virt->entries[i].state =
+ (ddf->virt->entries[i].state & ~DDF_state_mask)
+ | DDF_state_optimal;
+
+ if (vc->prl == DDF_RAID6 &&
+ working+1 == __be16_to_cpu(vc->prim_elmnt_count))
+ ddf->virt->entries[i].state =
+ (ddf->virt->entries[i].state & ~DDF_state_mask)
+ | DDF_state_part_optimal;
+
+ ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
+ ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
+ ddf->updates_pending = 1;
+}
+
+/* add a device to a container, either while creating it or while
+ * expanding a pre-existing container
+ */
+static void add_to_super_ddf(struct supertype *st,
+ mdu_disk_info_t *dk, int fd, char *devname)
+{
+ struct ddf_super *ddf = st->sb;
+ struct dl *dd;
+ time_t now;
+ struct tm *tm;
+ unsigned long long size;
+ struct phys_disk_entry *pde;
+ int n, i;
+ struct stat stb;
+
+ if (ddf->currentconf) {
+ add_to_super_ddf_bvd(st, dk, fd, devname);
+ return;
+ }
+
+ /* This is device numbered dk->number. We need to create
+ * a phys_disk entry and a more detailed disk_data entry.
+ */
+ fstat(fd, &stb);
+ posix_memalign((void**)&dd, 512,
+ sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part);
+ dd->major = major(stb.st_rdev);
+ dd->minor = minor(stb.st_rdev);
+ dd->devname = devname;
+ dd->next = ddf->dlist;
+ dd->fd = fd;
+ dd->spare = NULL;
+
+ dd->disk.magic = DDF_PHYS_DATA_MAGIC;
+ now = time(0);
+ tm = localtime(&now);
+ sprintf(dd->disk.guid, "%8s%04d%02d%02d",
+ T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
+ *(__u32*)(dd->disk.guid + 16) = random();
+ *(__u32*)(dd->disk.guid + 20) = random();
+
+ do {
+ /* Cannot be bothered finding a CRC of some irrelevant details*/
+ dd->disk.refnum = random();
+ for (i = __be16_to_cpu(ddf->active->max_pd_entries) - 1;
+ i >= 0; i--)
+ if (ddf->phys->entries[i].refnum == dd->disk.refnum)
+ break;
+ } while (i >= 0);
+
+ dd->disk.forced_ref = 1;
+ dd->disk.forced_guid = 1;
+ memset(dd->disk.vendor, ' ', 32);
+ memcpy(dd->disk.vendor, "Linux", 5);
+ memset(dd->disk.pad, 0xff, 442);
+ for (i = 0; i < ddf->max_part ; i++)
+ dd->vlist[i] = NULL;
+
+ n = __be16_to_cpu(ddf->phys->used_pdes);
+ pde = &ddf->phys->entries[n];
+ dd->pdnum = n;
+
+ n++;
+ ddf->phys->used_pdes = __cpu_to_be16(n);
+
+ memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
+ pde->refnum = dd->disk.refnum;
+ pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
+ pde->state = __cpu_to_be16(DDF_Online);
+ get_dev_size(fd, NULL, &size);
+ /* We are required to reserve 32Meg, and record the size in sectors */
+ pde->config_size = __cpu_to_be64( (size - 32*1024*1024) / 512);
+ sprintf(pde->path, "%17.17s","Information: nil") ;
+ memset(pde->pad, 0xff, 6);
+
+ dd->size = size >> 9;
+ ddf->dlist = dd;
+ ddf->updates_pending = 1;
+}
+
+/*
+ * This is the write_init_super method for a ddf container. It is
+ * called when creating a container or adding another device to a
+ * container.
+ */
+
+#ifndef MDASSEMBLE
+
+static unsigned char null_conf[4096+512];
+
+static int __write_init_super_ddf(struct supertype *st, int do_close)
+{
+
+ struct ddf_super *ddf = st->sb;
+ int i;
+ struct dl *d;
+ int n_config;
+ int conf_size;
+
+ unsigned long long size, sector;
+
+ for (d = ddf->dlist; d; d=d->next) {
+ int fd = d->fd;
+
+ if (fd < 0)
+ continue;
+
+ /* We need to fill in the primary, (secondary) and workspace
+ * lba's in the headers, set their checksums,
+ * Also checksum phys, virt....
+ *
+ * Then write everything out, finally the anchor is written.
+ */
+ get_dev_size(fd, NULL, &size);
+ size /= 512;
+ ddf->anchor.workspace_lba = __cpu_to_be64(size - 32*1024*2);
+ ddf->anchor.primary_lba = __cpu_to_be64(size - 16*1024*2);
+ ddf->anchor.seq = __cpu_to_be32(1);
+ memcpy(&ddf->primary, &ddf->anchor, 512);
+ memcpy(&ddf->secondary, &ddf->anchor, 512);
+
+ ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
+ ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
+ ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
+
+ ddf->primary.openflag = 0;
+ ddf->primary.type = DDF_HEADER_PRIMARY;
+
+ ddf->secondary.openflag = 0;
+ ddf->secondary.type = DDF_HEADER_SECONDARY;
+
+ ddf->primary.crc = calc_crc(&ddf->primary, 512);
+ ddf->secondary.crc = calc_crc(&ddf->secondary, 512);
+
+ sector = size - 16*1024*2;
+ lseek64(fd, sector<<9, 0);
+ write(fd, &ddf->primary, 512);
+
+ ddf->controller.crc = calc_crc(&ddf->controller, 512);
+ write(fd, &ddf->controller, 512);
+
+ ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
+
+ write(fd, ddf->phys, ddf->pdsize);
+
+ ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
+ write(fd, ddf->virt, ddf->vdsize);
+
+ /* Now write lots of config records. */
+ n_config = ddf->max_part;
+ conf_size = ddf->conf_rec_len * 512;
+ for (i = 0 ; i <= n_config ; i++) {
+ struct vcl *c = d->vlist[i];
+ if (i == n_config)
+ c = (struct vcl*)d->spare;
+
+ if (c) {
+ c->conf.crc = calc_crc(&c->conf, conf_size);
+ write(fd, &c->conf, conf_size);
+ } else {
+ char *null_aligned = (char*)((((unsigned long)null_conf)+511)&~511UL);
+ if (null_conf[0] != 0xff)
+ memset(null_conf, 0xff, sizeof(null_conf));
+ int togo = conf_size;
+ while (togo > sizeof(null_conf)-512) {
+ write(fd, null_aligned, sizeof(null_conf)-512);
+ togo -= sizeof(null_conf)-512;
+ }
+ write(fd, null_aligned, togo);
+ }
+ }
+ d->disk.crc = calc_crc(&d->disk, 512);
+ write(fd, &d->disk, 512);
+
+ /* Maybe do the same for secondary */
+
+ lseek64(fd, (size-1)*512, SEEK_SET);
+ write(fd, &ddf->anchor, 512);
+ if (do_close) {
+ close(fd);
+ d->fd = -1;
+ }
+ }
+ return 1;
+}
+
+static int write_init_super_ddf(struct supertype *st)
+{
+
+ if (st->update_tail) {
+ /* queue the virtual_disk and vd_config as metadata updates */
+ struct virtual_disk *vd;
+ struct vd_config *vc;
+ struct ddf_super *ddf = st->sb;
+ int len;
+
+ /* First the virtual disk. We have a slightly fake header */
+ len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
+ vd = malloc(len);
+ *vd = *ddf->virt;
+ vd->entries[0] = ddf->virt->entries[ddf->currentconf->vcnum];
+ vd->populated_vdes = __cpu_to_be16(ddf->currentconf->vcnum);
+ append_metadata_update(st, vd, len);
+
+ /* Then the vd_config */
+ len = ddf->conf_rec_len * 512;
+ vc = malloc(len);
+ memcpy(vc, &ddf->currentconf->conf, len);
+ append_metadata_update(st, vc, len);
+
+ /* FIXME I need to close the fds! */
+ return 0;
+ } else
+ return __write_init_super_ddf(st, 1);
+}
+
+#endif
+
+static __u64 avail_size_ddf(struct supertype *st, __u64 devsize)
+{
+ /* We must reserve the last 32Meg */
+ if (devsize <= 32*1024*2)
+ return 0;
+ return devsize - 32*1024*2;
+}
+
+#ifndef MDASSEMBLE
+static int
+validate_geometry_ddf_container(struct supertype *st,
+ int level, int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ char *dev, unsigned long long *freesize,
+ int verbose);
+
+static int validate_geometry_ddf_bvd(struct supertype *st,
+ int level, int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ char *dev, unsigned long long *freesize,
+ int verbose);
+
+static int validate_geometry_ddf(struct supertype *st,
+ int level, int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ char *dev, unsigned long long *freesize,
+ int verbose)
+{
+ int fd;
+ struct mdinfo *sra;
+ int cfd;
+
+ /* ddf potentially supports lots of things, but it depends on
+ * what devices are offered (and maybe kernel version?)
+ * If given unused devices, we will make a container.
+ * If given devices in a container, we will make a BVD.
+ * If given BVDs, we make an SVD, changing all the GUIDs in the process.
+ */
+
+ if (level == LEVEL_CONTAINER) {
+ /* Must be a fresh device to add to a container */
+ return validate_geometry_ddf_container(st, level, layout,
+ raiddisks, chunk,
+ size, dev, freesize,
+ verbose);
+ }
+
+ if (st->sb) {
+ /* A container has already been opened, so we are
+ * creating in there. Maybe a BVD, maybe an SVD.
+ * Should make a distinction one day.
+ */
+ return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
+ chunk, size, dev, freesize,
+ verbose);
+ }
+ if (!dev) {
+ /* Initial sanity check. Exclude illegal levels. */
+ int i;
+ for (i=0; ddf_level_num[i].num1 != MAXINT; i++)
+ if (ddf_level_num[i].num2 == level)
+ break;
+ if (ddf_level_num[i].num1 == MAXINT)
+ return 0;
+ /* Should check layout? etc */
+ return 1;
+ }
+
+ /* This is the first device for the array.
+ * If it is a container, we read it in and do automagic allocations,
+ * no other devices should be given.
+ * Otherwise it must be a member device of a container, and we
+ * do manual allocation.
+ * Later we should check for a BVD and make an SVD.
+ */
+ fd = open(dev, O_RDONLY|O_EXCL, 0);
+ if (fd >= 0) {
+ sra = sysfs_read(fd, 0, GET_VERSION);
+ close(fd);
+ if (sra && sra->array.major_version == -1 &&
+ strcmp(sra->text_version, "ddf") == 0) {
+
+ /* load super */
+ /* find space for 'n' devices. */
+ /* remember the devices */
+ /* Somehow return the fact that we have enough */
+ }
+
+ if (verbose)
+ fprintf(stderr,
+ Name ": ddf: Cannot create this array "
+ "on device %s\n",
+ dev);
+ return 0;
+ }
+ if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
+ if (verbose)
+ fprintf(stderr, Name ": ddf: Cannot open %s: %s\n",
+ dev, strerror(errno));
+ return 0;
+ }
+ /* Well, it is in use by someone, maybe a 'ddf' container. */
+ cfd = open_container(fd);
+ if (cfd < 0) {
+ close(fd);
+ if (verbose)
+ fprintf(stderr, Name ": ddf: Cannot use %s: %s\n",
+ dev, strerror(EBUSY));
+ return 0;
+ }
+ sra = sysfs_read(cfd, 0, GET_VERSION);
+ close(fd);
+ if (sra && sra->array.major_version == -1 &&
+ strcmp(sra->text_version, "ddf") == 0) {
+ /* This is a member of a ddf container. Load the container
+ * and try to create a bvd
+ */
+ struct ddf_super *ddf;
+ if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL, 1) == 0) {
+ st->sb = ddf;
+ st->container_dev = fd2devnum(cfd);
+ close(cfd);
+ return validate_geometry_ddf_bvd(st, level, layout,
+ raiddisks, chunk, size,
+ dev, freesize,
+ verbose);
+ }
+ close(cfd);
+ } else /* device may belong to a different container */
+ return 0;
+
+ return 1;
+}
+
+static int
+validate_geometry_ddf_container(struct supertype *st,
+ int level, int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ char *dev, unsigned long long *freesize,
+ int verbose)
+{
+ int fd;
+ unsigned long long ldsize;
+
+ if (level != LEVEL_CONTAINER)
+ return 0;
+ if (!dev)
+ return 1;
+
+ fd = open(dev, O_RDONLY|O_EXCL, 0);
+ if (fd < 0) {
+ if (verbose)
+ fprintf(stderr, Name ": ddf: Cannot open %s: %s\n",
+ dev, strerror(errno));
+ return 0;
+ }
+ if (!get_dev_size(fd, dev, &ldsize)) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+
+ *freesize = avail_size_ddf(st, ldsize >> 9);
+
+ return 1;
+}
+
+static int validate_geometry_ddf_bvd(struct supertype *st,
+ int level, int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ char *dev, unsigned long long *freesize,
+ int verbose)
+{
+ struct stat stb;
+ struct ddf_super *ddf = st->sb;
+ struct dl *dl;
+ unsigned long long pos = 0;
+ unsigned long long maxsize;
+ struct extent *e;
+ int i;
+ /* ddf/bvd supports lots of things, but not containers */
+ if (level == LEVEL_CONTAINER)
+ return 0;
+ /* We must have the container info already read in. */
+ if (!ddf)
+ return 0;
+
+ if (!dev) {
+ /* General test: make sure there is space for
+ * 'raiddisks' device extents of size 'size'.
+ */
+ unsigned long long minsize = size;
+ int dcnt = 0;
+ if (minsize == 0)
+ minsize = 8;
+ for (dl = ddf->dlist; dl ; dl = dl->next)
+ {
+ int found = 0;
+ pos = 0;
+
+ i = 0;
+ e = get_extents(ddf, dl);
+ if (!e) continue;
+ do {
+ unsigned long long esize;
+ esize = e[i].start - pos;
+ if (esize >= minsize)
+ found = 1;
+ pos = e[i].start + e[i].size;
+ i++;
+ } while (e[i-1].size);
+ if (found)
+ dcnt++;
+ free(e);
+ }
+ if (dcnt < raiddisks) {
+ if (verbose)
+ fprintf(stderr,
+ Name ": ddf: Not enough devices with "
+ "space for this array (%d < %d)\n",
+ dcnt, raiddisks);
+ return 0;
+ }
+ return 1;
+ }
+ /* This device must be a member of the set */
+ if (stat(dev, &stb) < 0)
+ return 0;
+ if ((S_IFMT & stb.st_mode) != S_IFBLK)
+ return 0;
+ for (dl = ddf->dlist ; dl ; dl = dl->next) {
+ if (dl->major == major(stb.st_rdev) &&
+ dl->minor == minor(stb.st_rdev))
+ break;
+ }
+ if (!dl) {
+ if (verbose)
+ fprintf(stderr, Name ": ddf: %s is not in the "
+ "same DDF set\n",
+ dev);
+ return 0;
+ }
+ e = get_extents(ddf, dl);
+ maxsize = 0;
+ i = 0;
+ if (e) do {
+ unsigned long long esize;
+ esize = e[i].start - pos;
+ if (esize >= maxsize)
+ maxsize = esize;
+ pos = e[i].start + e[i].size;
+ i++;
+ } while (e[i-1].size);
+ *freesize = maxsize;
+ // FIXME here I am
+
+ return 1;
+}
+
+static int load_super_ddf_all(struct supertype *st, int fd,
+ void **sbp, char *devname, int keep_fd)
+{
+ struct mdinfo *sra;
+ struct ddf_super *super;
+ struct mdinfo *sd, *best = NULL;
+ int bestseq = 0;
+ int seq;
+ char nm[20];
+ int dfd;
+
+ sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
+ if (!sra)
+ return 1;
+ if (sra->array.major_version != -1 ||
+ sra->array.minor_version != -2 ||
+ strcmp(sra->text_version, "ddf") != 0)
+ return 1;
+
+ if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
+ return 1;
+ memset(super, 0, sizeof(*super));
+
+ /* first, try each device, and choose the best ddf */
+ for (sd = sra->devs ; sd ; sd = sd->next) {
+ int rv;
+ sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
+ dfd = dev_open(nm, O_RDONLY);
+ if (dfd < 0)
+ return 2;
+ rv = load_ddf_headers(dfd, super, NULL);
+ close(dfd);
+ if (rv == 0) {
+ seq = __be32_to_cpu(super->active->seq);
+ if (super->active->openflag)
+ seq--;
+ if (!best || seq > bestseq) {
+ bestseq = seq;
+ best = sd;
+ }
+ }
+ }
+ if (!best)
+ return 1;
+ /* OK, load this ddf */
+ sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
+ dfd = dev_open(nm, O_RDONLY);
+ if (dfd < 0)
+ return 1;
+ load_ddf_headers(dfd, super, NULL);
+ load_ddf_global(dfd, super, NULL);
+ close(dfd);
+ /* Now we need the device-local bits */
+ for (sd = sra->devs ; sd ; sd = sd->next) {
+ sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
+ dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
+ if (dfd < 0)
+ return 2;
+ seq = load_ddf_local(dfd, super, NULL, keep_fd);
+ if (!keep_fd) close(dfd);
+ }
+ if (st->subarray[0]) {
+ struct vcl *v;
+
+ for (v = super->conflist; v; v = v->next)
+ if (v->vcnum == atoi(st->subarray))
+ super->currentconf = v;
+ if (!super->currentconf)
+ return 1;
+ }
+ *sbp = super;
+ if (st->ss == NULL) {
+ st->ss = &super_ddf;
+ st->minor_version = 0;
+ st->max_devs = 512;
+ st->container_dev = fd2devnum(fd);
+ }
+ return 0;
+}
+#endif
+
+static struct mdinfo *container_content_ddf(struct supertype *st)
+{
+ /* Given a container loaded by load_super_ddf_all,
+ * extract information about all the arrays into
+ * an mdinfo tree.
+ *
+ * For each vcl in conflist: create an mdinfo, fill it in,
+ * then look for matching devices (phys_refnum) in dlist
+ * and create appropriate device mdinfo.
+ */
+ struct ddf_super *ddf = st->sb;
+ struct mdinfo *rest = NULL;
+ struct vcl *vc;
+
+ for (vc = ddf->conflist ; vc ; vc=vc->next)
+ {
+ int i;
+ struct mdinfo *this;
+ this = malloc(sizeof(*this));
+ memset(this, 0, sizeof(*this));
+ this->next = rest;
+ rest = this;
+
+ this->array.level = map_num1(ddf_level_num, vc->conf.prl);
+ this->array.raid_disks =
+ __be16_to_cpu(vc->conf.prim_elmnt_count);
+ this->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
+ this->array.raid_disks);
+ this->array.md_minor = -1;
+ this->array.ctime = DECADE +
+ __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
+ this->array.utime = DECADE +
+ __be32_to_cpu(vc->conf.timestamp);
+ this->array.chunk_size = 512 << vc->conf.chunk_shift;
+
+ i = vc->vcnum;
+ if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
+ (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
+ DDF_init_full) {
+ this->array.state = 0;
+ this->resync_start = 0;
+ } else {
+ this->array.state = 1;
+ this->resync_start = ~0ULL;
+ }
+ memcpy(this->name, ddf->virt->entries[i].name, 32);
+ this->name[33]=0;
+
+ memset(this->uuid, 0, sizeof(this->uuid));
+ this->component_size = __be64_to_cpu(vc->conf.blocks);
+ this->array.size = this->component_size / 2;
+ this->container_member = i;
+
+ sprintf(this->text_version, "/%s/%d",
+ devnum2devname(st->container_dev),
+ this->container_member);
+
+ for (i=0 ; i < ddf->mppe ; i++) {
+ struct mdinfo *dev;
+ struct dl *d;
+
+ if (vc->conf.phys_refnum[i] == 0xFFFFFFFF)
+ continue;
+
+ this->array.working_disks++;
+
+ for (d = ddf->dlist; d ; d=d->next)
+ if (d->disk.refnum == vc->conf.phys_refnum[i])
+ break;
+ if (d == NULL)
+ break;
+
+ dev = malloc(sizeof(*dev));
+ memset(dev, 0, sizeof(*dev));
+ dev->next = this->devs;
+ this->devs = dev;
+
+ dev->disk.number = __be32_to_cpu(d->disk.refnum);
+ dev->disk.major = d->major;
+ dev->disk.minor = d->minor;
+ dev->disk.raid_disk = i;
+ dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
+
+ dev->events = __be32_to_cpu(ddf->primary.seq);
+ dev->data_offset = __be64_to_cpu(vc->lba_offset[i]);
+ dev->component_size = __be64_to_cpu(vc->conf.blocks);
+ if (d->devname)
+ strcpy(dev->name, d->devname);
+ }
+ }
+ return rest;
+}
+
+static int store_zero_ddf(struct supertype *st, int fd)
+{
+ unsigned long long dsize;
+ void *buf;
+
+ if (!get_dev_size(fd, NULL, &dsize))
+ return 1;
+
+ posix_memalign(&buf, 512, 512);
+ memset(buf, 0, 512);
+
+ lseek64(fd, dsize-512, 0);
+ write(fd, buf, 512);
+ free(buf);
+ return 0;
+}
+
+static int compare_super_ddf(struct supertype *st, struct supertype *tst)
+{
+ /*
+ * return:
+ * 0 same, or first was empty, and second was copied
+ * 1 second had wrong number
+ * 2 wrong uuid
+ * 3 wrong other info
+ */
+ struct ddf_super *first = st->sb;
+ struct ddf_super *second = tst->sb;
+
+ if (!first) {
+ st->sb = tst->sb;
+ tst->sb = NULL;
+ return 0;
+ }
+
+ if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
+ return 2;
+
+ /* FIXME should I look at anything else? */
+ return 0;
+}
+
+/*
+ * A new array 'a' has been started which claims to be instance 'inst'
+ * within container 'c'.
+ * We need to confirm that the array matches the metadata in 'c' so
+ * that we don't corrupt any metadata.
+ */
+static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
+{
+ dprintf("ddf: open_new %s\n", inst);
+ a->info.container_member = atoi(inst);
+ return 0;
+}
+
+/*
+ * The array 'a' is to be marked clean in the metadata.
+ * If '->resync_start' is not ~(unsigned long long)0, then the array is only
+ * clean up to the point (in sectors). If that cannot be recorded in the
+ * metadata, then leave it as dirty.
+ *
+ * For DDF, we need to clear the DDF_state_inconsistent bit in the
+ * !global! virtual_disk.virtual_entry structure.
+ */
+static void ddf_set_array_state(struct active_array *a, int consistent)
+{
+ struct ddf_super *ddf = a->container->sb;
+ int inst = a->info.container_member;
+ int old = ddf->virt->entries[inst].state;
+ if (consistent)
+ ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
+ else
+ ddf->virt->entries[inst].state |= DDF_state_inconsistent;
+ if (old != ddf->virt->entries[inst].state)
+ ddf->updates_pending = 1;
+
+ old = ddf->virt->entries[inst].init_state;
+ ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
+ if (a->resync_start == ~0ULL)
+ ddf->virt->entries[inst].init_state |= DDF_init_full;
+ else if (a->resync_start == 0)
+ ddf->virt->entries[inst].init_state |= DDF_init_not;
+ else
+ ddf->virt->entries[inst].init_state |= DDF_init_quick;
+ if (old != ddf->virt->entries[inst].init_state)
+ ddf->updates_pending = 1;
+
+ dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
+ a->resync_start);
+}
+
+/*
+ * The state of each disk is stored in the global phys_disk structure
+ * in phys_disk.entries[n].state.
+ * This makes various combinations awkward.
+ * - When a device fails in any array, it must be failed in all arrays
+ * that include a part of this device.
+ * - When a component is rebuilding, we cannot include it officially in the
+ * array unless this is the only array that uses the device.
+ *
+ * So: when transitioning:
+ * Online -> failed, just set failed flag. monitor will propagate
+ * spare -> online, the device might need to be added to the array.
+ * spare -> failed, just set failed. Don't worry if in array or not.
+ */
+static void ddf_set_disk(struct active_array *a, int n, int state)
+{
+ struct ddf_super *ddf = a->container->sb;
+ int inst = a->info.container_member;
+ struct vd_config *vc = find_vdcr(ddf, inst);
+ int pd = find_phys(ddf, vc->phys_refnum[n]);
+ int i, st, working;
+
+ if (vc == NULL) {
+ dprintf("ddf: cannot find instance %d!!\n", inst);
+ return;
+ }
+ if (pd < 0) {
+ /* disk doesn't currently exist. If it is now in_sync,
+ * insert it. */
+ if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
+ /* Find dev 'n' in a->info->devs, determine the
+ * ddf refnum, and set vc->phys_refnum and update
+ * phys->entries[]
+ */
+ /* FIXME */
+ }
+ } else {
+ int old = ddf->phys->entries[pd].state;
+ if (state & DS_FAULTY)
+ ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
+ if (state & DS_INSYNC) {
+ ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
+ ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
+ }
+ if (old != ddf->phys->entries[pd].state)
+ ddf->updates_pending = 1;
+ }
+
+ dprintf("ddf: set_disk %d to %x\n", n, state);
+
+ /* Now we need to check the state of the array and update
+ * virtual_disk.entries[n].state.
+ * It needs to be one of "optimal", "degraded", "failed".
+ * I don't understand 'deleted' or 'missing'.
+ */
+ working = 0;
+ for (i=0; i < a->info.array.raid_disks; i++) {
+ pd = find_phys(ddf, vc->phys_refnum[i]);
+ if (pd < 0)
+ continue;
+ st = __be16_to_cpu(ddf->phys->entries[pd].state);
+ if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
+ == DDF_Online)
+ working++;
+ }
+ state = DDF_state_degraded;
+ if (working == a->info.array.raid_disks)
+ state = DDF_state_optimal;
+ else switch(vc->prl) {
+ case DDF_RAID0:
+ case DDF_CONCAT:
+ case DDF_JBOD:
+ state = DDF_state_failed;
+ break;
+ case DDF_RAID1:
+ if (working == 0)
+ state = DDF_state_failed;
+ break;
+ case DDF_RAID4:
+ case DDF_RAID5:
+ if (working < a->info.array.raid_disks-1)
+ state = DDF_state_failed;
+ break;
+ case DDF_RAID6:
+ if (working < a->info.array.raid_disks-2)
+ state = DDF_state_failed;
+ else if (working == a->info.array.raid_disks-1)
+ state = DDF_state_part_optimal;
+ break;
+ }
+
+ if (ddf->virt->entries[inst].state !=
+ ((ddf->virt->entries[inst].state & ~DDF_state_mask)
+ | state)) {
+
+ ddf->virt->entries[inst].state =
+ (ddf->virt->entries[inst].state & ~DDF_state_mask)
+ | state;
+ ddf->updates_pending = 1;
+ }
+
+}
+
+static void ddf_sync_metadata(struct supertype *st)
+{
+
+ /*
+ * Write all data to all devices.
+ * Later, we might be able to track whether only local changes
+ * have been made, or whether any global data has been changed,
+ * but ddf is sufficiently weird that it probably always
+ * changes global data ....
+ */
+ struct ddf_super *ddf = st->sb;
+ if (!ddf->updates_pending)
+ return;
+ ddf->updates_pending = 0;
+ __write_init_super_ddf(st, 0);
+ dprintf("ddf: sync_metadata\n");
+}
+
+static void ddf_process_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ /* Apply this update to the metadata.
+ * The first 4 bytes are a DDF_*_MAGIC which guides
+ * our actions.
+ * Possible update are:
+ * DDF_PHYS_RECORDS_MAGIC
+ * Add a new physical device. Changes to this record
+ * only happen implicitly.
+ * used_pdes is the device number.
+ * DDF_VIRT_RECORDS_MAGIC
+ * Add a new VD. Possibly also change the 'access' bits.
+ * populated_vdes is the entry number.
+ * DDF_VD_CONF_MAGIC
+ * New or updated VD. the VIRT_RECORD must already
+ * exist. For an update, phys_refnum and lba_offset
+ * (at least) are updated, and the VD_CONF must
+ * be written to precisely those devices listed with
+ * a phys_refnum.
+ * DDF_SPARE_ASSIGN_MAGIC
+ * replacement Spare Assignment Record... but for which device?
+ *
+ * So, e.g.:
+ * - to create a new array, we send a VIRT_RECORD and
+ * a VD_CONF. Then assemble and start the array.
+ * - to activate a spare we send a VD_CONF to add the phys_refnum
+ * and offset. This will also mark the spare as active with
+ * a spare-assignment record.
+ */
+ struct ddf_super *ddf = st->sb;
+ __u32 *magic = (__u32*)update->buf;
+ struct phys_disk *pd;
+ struct virtual_disk *vd;
+ struct vd_config *vc;
+ struct vcl *vcl;
+ struct dl *dl;
+ int mppe;
+ int ent;
+
+ dprintf("Process update %x\n", *magic);
+
+ switch (*magic) {
+ case DDF_PHYS_RECORDS_MAGIC:
+
+ if (update->len != (sizeof(struct phys_disk) +
+ sizeof(struct phys_disk_entry)))
+ return;
+ pd = (struct phys_disk*)update->buf;
+
+ ent = __be16_to_cpu(pd->used_pdes);
+ if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
+ return;
+ if (!all_ff(ddf->phys->entries[ent].guid))
+ return;
+ ddf->phys->entries[ent] = pd->entries[0];
+ ddf->phys->used_pdes = __cpu_to_be16(1 +
+ __be16_to_cpu(ddf->phys->used_pdes));
+ ddf->updates_pending = 1;
+ break;
+
+ case DDF_VIRT_RECORDS_MAGIC:
+
+ if (update->len != (sizeof(struct virtual_disk) +
+ sizeof(struct virtual_entry)))
+ return;
+ vd = (struct virtual_disk*)update->buf;
+
+ ent = __be16_to_cpu(vd->populated_vdes);
+ if (ent >= __be16_to_cpu(ddf->virt->max_vdes))
+ return;
+ if (!all_ff(ddf->virt->entries[ent].guid))
+ return;
+ ddf->virt->entries[ent] = vd->entries[0];
+ ddf->virt->populated_vdes = __cpu_to_be16(1 +
+ __be16_to_cpu(ddf->virt->populated_vdes));
+ ddf->updates_pending = 1;
+ break;
+
+ case DDF_VD_CONF_MAGIC:
+ dprintf("len %d %d\n", update->len, ddf->conf_rec_len);
+
+ mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
+ if (update->len != ddf->conf_rec_len * 512)
+ return;
+ vc = (struct vd_config*)update->buf;
+ for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
+ if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
+ break;
+ dprintf("vcl = %p\n", vcl);
+ if (vcl) {
+ /* An update, just copy the phys_refnum and lba_offset
+ * fields
+ */
+ memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
+ mppe * (sizeof(__u32) + sizeof(__u64)));
+ } else {
+ /* A new VD_CONF */
+ vcl = update->space;
+ update->space = NULL;
+ vcl->next = ddf->conflist;
+ memcpy(&vcl->conf, vc, update->len);
+ vcl->lba_offset = (__u64*)
+ &vcl->conf.phys_refnum[mppe];
+ ddf->conflist = vcl;
+ }
+ /* Now make sure vlist is correct for each dl. */
+ for (dl = ddf->dlist; dl; dl = dl->next) {
+ int dn;
+ int vn = 0;
+ for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
+ for (dn=0; dn < ddf->mppe ; dn++)
+ if (vcl->conf.phys_refnum[dn] ==
+ dl->disk.refnum) {
+ dprintf("dev %d has %p at %d\n",
+ dl->pdnum, vcl, vn);
+ dl->vlist[vn++] = vcl;
+ break;
+ }
+ while (vn < ddf->max_part)
+ dl->vlist[vn++] = NULL;
+ if (dl->vlist[0]) {
+ ddf->phys->entries[dl->pdnum].type &=
+ ~__cpu_to_be16(DDF_Global_Spare);
+ ddf->phys->entries[dl->pdnum].type |=
+ __cpu_to_be16(DDF_Active_in_VD);
+ }
+ if (dl->spare) {
+ ddf->phys->entries[dl->pdnum].type &=
+ ~__cpu_to_be16(DDF_Global_Spare);
+ ddf->phys->entries[dl->pdnum].type |=
+ __cpu_to_be16(DDF_Spare);
+ }
+ if (!dl->vlist[0] && !dl->spare) {
+ ddf->phys->entries[dl->pdnum].type |=
+ __cpu_to_be16(DDF_Global_Spare);
+ ddf->phys->entries[dl->pdnum].type &=
+ ~__cpu_to_be16(DDF_Spare |
+ DDF_Active_in_VD);
+ }
+ }
+ ddf->updates_pending = 1;
+ break;
+ case DDF_SPARE_ASSIGN_MAGIC:
+ default: break;
+ }
+}
+
+static void ddf_prepare_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ /* This update arrived at managemon.
+ * We are about to pass it to monitor.
+ * If a malloc is needed, do it here.
+ */
+ struct ddf_super *ddf = st->sb;
+ __u32 *magic = (__u32*)update->buf;
+ if (*magic == DDF_VD_CONF_MAGIC)
+ posix_memalign(&update->space, 512,
+ offsetof(struct vcl, conf)
+ + ddf->conf_rec_len * 512);
+}
+
+/*
+ * Check if the array 'a' is degraded but not failed.
+ * If it is, find as many spares as are available and needed and
+ * arrange for their inclusion.
+ * We only choose devices which are not already in the array,
+ * and prefer those with a spare-assignment to this array.
+ * otherwise we choose global spares - assuming always that
+ * there is enough room.
+ * For each spare that we assign, we return an 'mdinfo' which
+ * describes the position for the device in the array.
+ * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
+ * the new phys_refnum and lba_offset values.
+ *
+ * Only worry about BVDs at the moment.
+ */
+static struct mdinfo *ddf_activate_spare(struct active_array *a,
+ struct metadata_update **updates)
+{
+ int working = 0;
+ struct mdinfo *d;
+ struct ddf_super *ddf = a->container->sb;
+ int global_ok = 0;
+ struct mdinfo *rv = NULL;
+ struct mdinfo *di;
+ struct metadata_update *mu;
+ struct dl *dl;
+ int i;
+ struct vd_config *vc;
+ __u64 *lba;
+
+ for (d = a->info.devs ; d ; d = d->next) {
+ if ((d->curr_state & DS_FAULTY) &&
+ d->state_fd >= 0)
+ /* wait for Removal to happen */
+ return NULL;
+ if (d->state_fd >= 0)
+ working ++;
+ }
+
+ dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
+ a->info.array.level);
+ if (working == a->info.array.raid_disks)
+ return NULL; /* array not degraded */
+ switch (a->info.array.level) {
+ case 1:
+ if (working == 0)
+ return NULL; /* failed */
+ break;
+ case 4:
+ case 5:
+ if (working < a->info.array.raid_disks - 1)
+ return NULL; /* failed */
+ break;
+ case 6:
+ if (working < a->info.array.raid_disks - 2)
+ return NULL; /* failed */
+ break;
+ default: /* concat or stripe */
+ return NULL; /* failed */
+ }
+
+ /* For each slot, if it is not working, find a spare */
+ dl = ddf->dlist;
+ for (i = 0; i < a->info.array.raid_disks; i++) {
+ for (d = a->info.devs ; d ; d = d->next)
+ if (d->disk.raid_disk == i)
+ break;
+ dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
+ if (d && (d->state_fd >= 0))
+ continue;
+
+ /* OK, this device needs recovery. Find a spare */
+ again:
+ for ( ; dl ; dl = dl->next) {
+ unsigned long long esize;
+ unsigned long long pos;
+ struct mdinfo *d2;
+ int is_global = 0;
+ int is_dedicated = 0;
+ struct extent *ex;
+ int j;
+ /* If in this array, skip */
+ for (d2 = a->info.devs ; d2 ; d2 = d2->next)
+ if (d2->disk.major == dl->major &&
+ d2->disk.minor == dl->minor) {
+ dprintf("%x:%x already in array\n", dl->major, dl->minor);
+ break;
+ }
+ if (d2)
+ continue;
+ if (ddf->phys->entries[dl->pdnum].type &
+ __cpu_to_be16(DDF_Spare)) {
+ /* Check spare assign record */
+ if (dl->spare) {
+ if (dl->spare->type & DDF_spare_dedicated) {
+ /* check spare_ents for guid */
+ for (j = 0 ;
+ j < __be16_to_cpu(dl->spare->populated);
+ j++) {
+ if (memcmp(dl->spare->spare_ents[j].guid,
+ ddf->virt->entries[a->info.container_member].guid,
+ DDF_GUID_LEN) == 0)
+ is_dedicated = 1;
+ }
+ } else
+ is_global = 1;
+ }
+ } else if (ddf->phys->entries[dl->pdnum].type &
+ __cpu_to_be16(DDF_Global_Spare)) {
+ is_global = 1;
+ }
+ if ( ! (is_dedicated ||
+ (is_global && global_ok))) {
+ dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
+ is_dedicated, is_global);
+ continue;
+ }
+
+ /* We are allowed to use this device - is there space?
+ * We need a->info.component_size sectors */
+ ex = get_extents(ddf, dl);
+ if (!ex) {
+ dprintf("cannot get extents\n");
+ continue;
+ }
+ j = 0; pos = 0;
+ esize = 0;
+
+ do {
+ esize = ex[j].start - pos;
+ if (esize >= a->info.component_size)
+ break;
+ pos = ex[i].start + ex[i].size;
+ i++;
+ } while (ex[i-1].size);
+
+ free(ex);
+ if (esize < a->info.component_size) {
+ dprintf("%x:%x has no room: %llu %llu\n", dl->major, dl->minor,
+ esize, a->info.component_size);
+ /* No room */
+ continue;
+ }
+
+ /* Cool, we have a device with some space at pos */
+ di = malloc(sizeof(*di));
+ memset(di, 0, sizeof(*di));
+ di->disk.number = i;
+ di->disk.raid_disk = i;
+ di->disk.major = dl->major;
+ di->disk.minor = dl->minor;
+ di->disk.state = 0;
+ di->data_offset = pos;
+ di->component_size = a->info.component_size;
+ di->container_member = dl->pdnum;
+ di->next = rv;
+ rv = di;
+ dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
+ i, pos);
+
+ break;
+ }
+ if (!dl && ! global_ok) {
+ /* not enough dedicated spares, try global */
+ global_ok = 1;
+ dl = ddf->dlist;
+ goto again;
+ }
+ }
+
+ if (!rv)
+ /* No spares found */
+ return rv;
+ /* Now 'rv' has a list of devices to return.
+ * Create a metadata_update record to update the
+ * phys_refnum and lba_offset values
+ */
+ mu = malloc(sizeof(*mu));
+ mu->buf = malloc(ddf->conf_rec_len * 512);
+ posix_memalign(&mu->space, 512, sizeof(struct vcl));
+ mu->len = ddf->conf_rec_len;
+ mu->next = *updates;
+ vc = find_vdcr(ddf, a->info.container_member);
+ memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
+
+ vc = (struct vd_config*)mu->buf;
+ lba = (__u64*)&vc->phys_refnum[ddf->mppe];
+ for (di = rv ; di ; di = di->next) {
+ vc->phys_refnum[di->disk.raid_disk] =
+ ddf->phys->entries[dl->pdnum].refnum;
+ lba[di->disk.raid_disk] = di->data_offset;
+ }
+ *updates = mu;
+ return rv;
+}
+
+struct superswitch super_ddf = {
+#ifndef MDASSEMBLE
+ .examine_super = examine_super_ddf,
+ .brief_examine_super = brief_examine_super_ddf,
+ .detail_super = detail_super_ddf,
+ .brief_detail_super = brief_detail_super_ddf,
+ .validate_geometry = validate_geometry_ddf,
+ .write_init_super = write_init_super_ddf,
+#endif
+ .match_home = match_home_ddf,
+ .uuid_from_super= uuid_from_super_ddf,
+ .getinfo_super = getinfo_super_ddf,
+ .update_super = update_super_ddf,
+
+ .avail_size = avail_size_ddf,
+
+ .compare_super = compare_super_ddf,
+
+ .load_super = load_super_ddf,
+ .init_super = init_super_ddf,
+ .store_super = store_zero_ddf,
+ .free_super = free_super_ddf,
+ .match_metadata_desc = match_metadata_desc_ddf,
+ .add_to_super = add_to_super_ddf,
+ .container_content = container_content_ddf,
+
+ .external = 1,
+
+/* for mdmon */
+ .open_new = ddf_open_new,
+ .set_array_state= ddf_set_array_state,
+ .set_disk = ddf_set_disk,
+ .sync_metadata = ddf_sync_metadata,
+ .process_update = ddf_process_update,
+ .prepare_update = ddf_prepare_update,
+ .activate_spare = ddf_activate_spare,
+
+};
diff --git a/super-intel.c b/super-intel.c
new file mode 100644
index 00000000..caa3881b
--- /dev/null
+++ b/super-intel.c
@@ -0,0 +1,2552 @@
+/*
+ * mdadm - Intel(R) Matrix Storage Manager Support
+ *
+ * Copyright (C) 2002-2007 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "mdadm.h"
+#include "mdmon.h"
+#include <values.h>
+#include <scsi/sg.h>
+#include <ctype.h>
+
+/* MPB == Metadata Parameter Block */
+#define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
+#define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
+#define MPB_VERSION_RAID0 "1.0.00"
+#define MPB_VERSION_RAID1 "1.1.00"
+#define MPB_VERSION_RAID5 "1.2.02"
+#define MAX_SIGNATURE_LENGTH 32
+#define MAX_RAID_SERIAL_LEN 16
+#define MPB_SECTOR_CNT 418
+#define IMSM_RESERVED_SECTORS 4096
+
+/* Disk configuration info. */
+#define IMSM_MAX_DEVICES 255
+struct imsm_disk {
+ __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
+ __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
+ __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
+ __u32 status; /* 0xF0 - 0xF3 */
+#define SPARE_DISK 0x01 /* Spare */
+#define CONFIGURED_DISK 0x02 /* Member of some RaidDev */
+#define FAILED_DISK 0x04 /* Permanent failure */
+#define USABLE_DISK 0x08 /* Fully usable unless FAILED_DISK is set */
+
+#define IMSM_DISK_FILLERS 5
+ __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
+};
+
+/* RAID map configuration infos. */
+struct imsm_map {
+ __u32 pba_of_lba0; /* start address of partition */
+ __u32 blocks_per_member;/* blocks per member */
+ __u32 num_data_stripes; /* number of data stripes */
+ __u16 blocks_per_strip;
+ __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
+#define IMSM_T_STATE_NORMAL 0
+#define IMSM_T_STATE_UNINITIALIZED 1
+#define IMSM_T_STATE_DEGRADED 2 /* FIXME: is this correct? */
+#define IMSM_T_STATE_FAILED 3 /* FIXME: is this correct? */
+ __u8 raid_level;
+#define IMSM_T_RAID0 0
+#define IMSM_T_RAID1 1
+#define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
+ __u8 num_members; /* number of member disks */
+ __u8 reserved[3];
+ __u32 filler[7]; /* expansion area */
+ __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
+ top byte special */
+} __attribute__ ((packed));
+
+struct imsm_vol {
+ __u32 reserved[2];
+ __u8 migr_state; /* Normal or Migrating */
+ __u8 migr_type; /* Initializing, Rebuilding, ... */
+ __u8 dirty;
+ __u8 fill[1];
+ __u32 filler[5];
+ struct imsm_map map[1];
+ /* here comes another one if migr_state */
+} __attribute__ ((packed));
+
+struct imsm_dev {
+ __u8 volume[MAX_RAID_SERIAL_LEN];
+ __u32 size_low;
+ __u32 size_high;
+ __u32 status; /* Persistent RaidDev status */
+ __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
+#define IMSM_DEV_FILLERS 12
+ __u32 filler[IMSM_DEV_FILLERS];
+ struct imsm_vol vol;
+} __attribute__ ((packed));
+
+struct imsm_super {
+ __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
+ __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
+ __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
+ __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
+ __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
+ __u32 reserved[2]; /* 0x30 - 0x37 */
+ __u8 num_disks; /* 0x38 Number of configured disks */
+ __u8 num_raid_devs; /* 0x39 Number of configured volumes */
+ __u8 fill[2]; /* 0x3A - 0x3B */
+#define IMSM_FILLERS 39
+ __u32 filler[IMSM_FILLERS]; /* 0x3C - 0xD7 RAID_MPB_FILLERS */
+ struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
+ /* here comes imsm_dev[num_raid_devs] */
+} __attribute__ ((packed));
+
+#ifndef MDASSEMBLE
+static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
+#endif
+
+static unsigned int sector_count(__u32 bytes)
+{
+ return ((bytes + (512-1)) & (~(512-1))) / 512;
+}
+
+static unsigned int mpb_sectors(struct imsm_super *mpb)
+{
+ return sector_count(__le32_to_cpu(mpb->mpb_size));
+}
+
+/* internal representation of IMSM metadata */
+struct intel_super {
+ union {
+ void *buf; /* O_DIRECT buffer for reading/writing metadata */
+ struct imsm_super *anchor; /* immovable parameters */
+ };
+ size_t len; /* size of the 'buf' allocation */
+ int updates_pending; /* count of pending updates for mdmon */
+ int creating_imsm; /* flag to indicate container creation */
+ int current_vol; /* index of raid device undergoing creation */
+ #define IMSM_MAX_DISKS 6
+ struct imsm_disk *disk_tbl[IMSM_MAX_DISKS];
+ #define IMSM_MAX_RAID_DEVS 2
+ struct imsm_dev *dev_tbl[IMSM_MAX_RAID_DEVS];
+ struct dl {
+ struct dl *next;
+ int index;
+ __u8 serial[MAX_RAID_SERIAL_LEN];
+ int major, minor;
+ char *devname;
+ int fd;
+ } *disks;
+};
+
+struct extent {
+ unsigned long long start, size;
+};
+
+/* definition of messages passed to imsm_process_update */
+enum imsm_update_type {
+ update_activate_spare,
+ update_create_array,
+};
+
+struct imsm_update_activate_spare {
+ enum imsm_update_type type;
+ int disk_idx;
+ int slot;
+ int array;
+ struct imsm_update_activate_spare *next;
+};
+
+struct imsm_update_create_array {
+ enum imsm_update_type type;
+ struct imsm_dev dev;
+ int dev_idx;
+};
+
+static int imsm_env_devname_as_serial(void)
+{
+ char *val = getenv("IMSM_DEVNAME_AS_SERIAL");
+
+ if (val && atoi(val) == 1)
+ return 1;
+
+ return 0;
+}
+
+
+static struct supertype *match_metadata_desc_imsm(char *arg)
+{
+ struct supertype *st;
+
+ if (strcmp(arg, "imsm") != 0 &&
+ strcmp(arg, "default") != 0
+ )
+ return NULL;
+
+ st = malloc(sizeof(*st));
+ memset(st, 0, sizeof(*st));
+ st->ss = &super_imsm;
+ st->max_devs = IMSM_MAX_DEVICES;
+ st->minor_version = 0;
+ st->sb = NULL;
+ return st;
+}
+
+static __u8 *get_imsm_version(struct imsm_super *mpb)
+{
+ return &mpb->sig[MPB_SIG_LEN];
+}
+
+/* retrieve a disk directly from the anchor when the anchor is known to be
+ * up-to-date, currently only at load time
+ */
+static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
+{
+ if (index >= mpb->num_disks)
+ return NULL;
+ return &mpb->disk[index];
+}
+
+static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
+{
+ if (index >= super->anchor->num_disks)
+ return NULL;
+ return super->disk_tbl[index];
+}
+
+/* generate a checksum directly from the anchor when the anchor is known to be
+ * up-to-date, currently only at load or write_super after coalescing
+ */
+static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
+{
+ __u32 end = mpb->mpb_size / sizeof(end);
+ __u32 *p = (__u32 *) mpb;
+ __u32 sum = 0;
+
+ while (end--)
+ sum += __le32_to_cpu(*p++);
+
+ return sum - __le32_to_cpu(mpb->check_sum);
+}
+
+static size_t sizeof_imsm_dev(struct imsm_dev *dev)
+{
+ size_t size = sizeof(*dev);
+
+ /* each map has disk_ord_tbl[num_members - 1] additional space */
+ size += sizeof(__u32) * (dev->vol.map[0].num_members - 1);
+
+ /* migrating means an additional map */
+ if (dev->vol.migr_state) {
+ size += sizeof(struct imsm_map);
+ size += sizeof(__u32) * (dev->vol.map[1].num_members - 1);
+ }
+
+ return size;
+}
+
+static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
+{
+ int offset;
+ int i;
+ void *_mpb = mpb;
+
+ if (index >= mpb->num_raid_devs)
+ return NULL;
+
+ /* devices start after all disks */
+ offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
+
+ for (i = 0; i <= index; i++)
+ if (i == index)
+ return _mpb + offset;
+ else
+ offset += sizeof_imsm_dev(_mpb + offset);
+
+ return NULL;
+}
+
+static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
+{
+ if (index >= super->anchor->num_raid_devs)
+ return NULL;
+ return super->dev_tbl[index];
+}
+
+static __u32 get_imsm_disk_idx(struct imsm_map *map, int slot)
+{
+ __u32 *ord_tbl = &map->disk_ord_tbl[slot];
+
+ /* top byte is 'special' */
+ return __le32_to_cpu(*ord_tbl & ~(0xff << 24));
+}
+
+static int get_imsm_raid_level(struct imsm_map *map)
+{
+ if (map->raid_level == 1) {
+ if (map->num_members == 2)
+ return 1;
+ else
+ return 10;
+ }
+
+ return map->raid_level;
+}
+
+static int cmp_extent(const void *av, const void *bv)
+{
+ const struct extent *a = av;
+ const struct extent *b = bv;
+ if (a->start < b->start)
+ return -1;
+ if (a->start > b->start)
+ return 1;
+ return 0;
+}
+
+static struct extent *get_extents(struct intel_super *super, struct dl *dl)
+{
+ /* find a list of used extents on the given physical device */
+ struct imsm_disk *disk;
+ struct extent *rv, *e;
+ int i, j;
+ int memberships = 0;
+
+ disk = get_imsm_disk(super, dl->index);
+ if (!disk)
+ return NULL;
+
+ for (i = 0; i < super->anchor->num_raid_devs; i++) {
+ struct imsm_dev *dev = get_imsm_dev(super, i);
+ struct imsm_map *map = dev->vol.map;
+
+ for (j = 0; j < map->num_members; j++) {
+ __u32 index = get_imsm_disk_idx(map, j);
+
+ if (index == dl->index)
+ memberships++;
+ }
+ }
+ rv = malloc(sizeof(struct extent) * (memberships + 1));
+ if (!rv)
+ return NULL;
+ e = rv;
+
+ for (i = 0; i < super->anchor->num_raid_devs; i++) {
+ struct imsm_dev *dev = get_imsm_dev(super, i);
+ struct imsm_map *map = dev->vol.map;
+
+ for (j = 0; j < map->num_members; j++) {
+ __u32 index = get_imsm_disk_idx(map, j);
+
+ if (index == dl->index) {
+ e->start = __le32_to_cpu(map->pba_of_lba0);
+ e->size = __le32_to_cpu(map->blocks_per_member);
+ e++;
+ }
+ }
+ }
+ qsort(rv, memberships, sizeof(*rv), cmp_extent);
+
+ e->start = __le32_to_cpu(disk->total_blocks) -
+ (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
+ e->size = 0;
+ return rv;
+}
+
+#ifndef MDASSEMBLE
+static void print_imsm_dev(struct imsm_dev *dev, int index)
+{
+ __u64 sz;
+ int slot;
+ struct imsm_map *map = dev->vol.map;
+
+ printf("\n");
+ printf("[%s]:\n", dev->volume);
+ printf(" RAID Level : %d\n", get_imsm_raid_level(map));
+ printf(" Members : %d\n", map->num_members);
+ for (slot = 0; slot < map->num_members; slot++)
+ if (index == get_imsm_disk_idx(map, slot))
+ break;
+ if (slot < map->num_members)
+ printf(" This Slot : %d\n", slot);
+ else
+ printf(" This Slot : ?\n");
+ sz = __le32_to_cpu(dev->size_high);
+ sz <<= 32;
+ sz += __le32_to_cpu(dev->size_low);
+ printf(" Array Size : %llu%s\n", (unsigned long long)sz,
+ human_size(sz * 512));
+ sz = __le32_to_cpu(map->blocks_per_member);
+ printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
+ human_size(sz * 512));
+ printf(" Sector Offset : %u\n",
+ __le32_to_cpu(map->pba_of_lba0));
+ printf(" Num Stripes : %u\n",
+ __le32_to_cpu(map->num_data_stripes));
+ printf(" Chunk Size : %u KiB\n",
+ __le16_to_cpu(map->blocks_per_strip) / 2);
+ printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
+ printf(" Migrate State : %s\n", dev->vol.migr_state ? "migrating" : "idle");
+ printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
+ printf(" Map State : %s\n", map_state_str[map->map_state]);
+}
+
+static void print_imsm_disk(struct imsm_super *mpb, int index)
+{
+ struct imsm_disk *disk = __get_imsm_disk(mpb, index);
+ char str[MAX_RAID_SERIAL_LEN];
+ __u32 s;
+ __u64 sz;
+
+ if (index < 0)
+ return;
+
+ printf("\n");
+ snprintf(str, MAX_RAID_SERIAL_LEN, "%s", disk->serial);
+ printf(" Disk%02d Serial : %s\n", index, str);
+ s = __le32_to_cpu(disk->status);
+ printf(" State :%s%s%s%s\n", s&SPARE_DISK ? " spare" : "",
+ s&CONFIGURED_DISK ? " active" : "",
+ s&FAILED_DISK ? " failed" : "",
+ s&USABLE_DISK ? " usable" : "");
+ printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
+ sz = __le32_to_cpu(disk->total_blocks) -
+ (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS * mpb->num_raid_devs);
+ printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
+ human_size(sz * 512));
+}
+
+static void examine_super_imsm(struct supertype *st, char *homehost)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ char str[MAX_SIGNATURE_LENGTH];
+ int i;
+ __u32 sum;
+
+ snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
+ printf(" Magic : %s\n", str);
+ snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
+ printf(" Version : %s\n", get_imsm_version(mpb));
+ printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
+ printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
+ sum = __le32_to_cpu(mpb->check_sum);
+ printf(" Checksum : %08x %s\n", sum,
+ __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
+ printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
+ printf(" Disks : %d\n", mpb->num_disks);
+ printf(" RAID Devices : %d\n", mpb->num_raid_devs);
+ print_imsm_disk(mpb, super->disks->index);
+ for (i = 0; i < mpb->num_raid_devs; i++)
+ print_imsm_dev(__get_imsm_dev(mpb, i), super->disks->index);
+ for (i = 0; i < mpb->num_disks; i++) {
+ if (i == super->disks->index)
+ continue;
+ print_imsm_disk(mpb, i);
+ }
+}
+
+static void brief_examine_super_imsm(struct supertype *st)
+{
+ struct intel_super *super = st->sb;
+
+ printf("ARRAY /dev/imsm family=%08x metadata=external:imsm\n",
+ __le32_to_cpu(super->anchor->family_num));
+}
+
+static void detail_super_imsm(struct supertype *st, char *homehost)
+{
+ printf("%s\n", __FUNCTION__);
+}
+
+static void brief_detail_super_imsm(struct supertype *st)
+{
+ printf("%s\n", __FUNCTION__);
+}
+#endif
+
+static int match_home_imsm(struct supertype *st, char *homehost)
+{
+ printf("%s\n", __FUNCTION__);
+
+ return 0;
+}
+
+static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
+{
+ printf("%s\n", __FUNCTION__);
+}
+
+#if 0
+static void
+get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
+{
+ __u8 *v = get_imsm_version(mpb);
+ __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
+ char major[] = { 0, 0, 0 };
+ char minor[] = { 0 ,0, 0 };
+ char patch[] = { 0, 0, 0 };
+ char *ver_parse[] = { major, minor, patch };
+ int i, j;
+
+ i = j = 0;
+ while (*v != '\0' && v < end) {
+ if (*v != '.' && j < 2)
+ ver_parse[i][j++] = *v;
+ else {
+ i++;
+ j = 0;
+ }
+ v++;
+ }
+
+ *m = strtol(minor, NULL, 0);
+ *p = strtol(patch, NULL, 0);
+}
+#endif
+
+static int imsm_level_to_layout(int level)
+{
+ switch (level) {
+ case 0:
+ case 1:
+ return 0;
+ case 5:
+ case 6:
+ return ALGORITHM_LEFT_SYMMETRIC;
+ case 10:
+ return 0x102; //FIXME is this correct?
+ }
+ return -1;
+}
+
+static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
+ struct imsm_map *map = &dev->vol.map[0];
+
+ info->container_member = super->current_vol;
+ info->array.raid_disks = map->num_members;
+ info->array.level = get_imsm_raid_level(map);
+ info->array.layout = imsm_level_to_layout(info->array.level);
+ info->array.md_minor = -1;
+ info->array.ctime = 0;
+ info->array.utime = 0;
+ info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip * 512);
+
+ info->data_offset = __le32_to_cpu(map->pba_of_lba0);
+ info->component_size = __le32_to_cpu(map->blocks_per_member);
+
+ info->disk.major = 0;
+ info->disk.minor = 0;
+
+ sprintf(info->text_version, "/%s/%d",
+ devnum2devname(st->container_dev),
+ info->container_member);
+}
+
+
+static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_disk *disk;
+ __u32 s;
+
+ if (super->current_vol >= 0) {
+ getinfo_super_imsm_volume(st, info);
+ return;
+ }
+ info->array.raid_disks = super->anchor->num_disks;
+ info->array.level = LEVEL_CONTAINER;
+ info->array.layout = 0;
+ info->array.md_minor = -1;
+ info->array.ctime = 0; /* N/A for imsm */
+ info->array.utime = 0;
+ info->array.chunk_size = 0;
+
+ info->disk.major = 0;
+ info->disk.minor = 0;
+ info->disk.raid_disk = -1;
+ info->reshape_active = 0;
+ strcpy(info->text_version, "imsm");
+ info->disk.number = -1;
+ info->disk.state = 0;
+
+ if (super->disks) {
+ disk = get_imsm_disk(super, super->disks->index);
+ if (!disk) {
+ info->disk.number = -1;
+ info->disk.raid_disk = -1;
+ return;
+ }
+ info->disk.number = super->disks->index;
+ info->disk.raid_disk = super->disks->index;
+ info->data_offset = __le32_to_cpu(disk->total_blocks) -
+ (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
+ info->component_size = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
+ s = __le32_to_cpu(disk->status);
+ info->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0;
+ info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0;
+ info->disk.state |= s & USABLE_DISK ? (1 << MD_DISK_SYNC) : 0;
+ }
+}
+
+static int update_super_imsm(struct supertype *st, struct mdinfo *info,
+ char *update, char *devname, int verbose,
+ int uuid_set, char *homehost)
+{
+ /* FIXME */
+
+ /* For 'assemble' and 'force' we need to return non-zero if any
+ * change was made. For others, the return value is ignored.
+ * Update options are:
+ * force-one : This device looks a bit old but needs to be included,
+ * update age info appropriately.
+ * assemble: clear any 'faulty' flag to allow this device to
+ * be assembled.
+ * force-array: Array is degraded but being forced, mark it clean
+ * if that will be needed to assemble it.
+ *
+ * newdev: not used ????
+ * grow: Array has gained a new device - this is currently for
+ * linear only
+ * resync: mark as dirty so a resync will happen.
+ * name: update the name - preserving the homehost
+ *
+ * Following are not relevant for this imsm:
+ * sparc2.2 : update from old dodgey metadata
+ * super-minor: change the preferred_minor number
+ * summaries: update redundant counters.
+ * uuid: Change the uuid of the array to match watch is given
+ * homehost: update the recorded homehost
+ * _reshape_progress: record new reshape_progress position.
+ */
+ int rv = 0;
+ //struct intel_super *super = st->sb;
+ //struct imsm_super *mpb = super->mpb;
+
+ if (strcmp(update, "grow") == 0) {
+ }
+ if (strcmp(update, "resync") == 0) {
+ /* dev->vol.dirty = 1; */
+ }
+
+ /* IMSM has no concept of UUID or homehost */
+
+ return rv;
+}
+
+static size_t disks_to_mpb_size(int disks)
+{
+ size_t size;
+
+ size = sizeof(struct imsm_super);
+ size += (disks - 1) * sizeof(struct imsm_disk);
+ size += 2 * sizeof(struct imsm_dev);
+ /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
+ size += (4 - 2) * sizeof(struct imsm_map);
+ /* 4 possible disk_ord_tbl's */
+ size += 4 * (disks - 1) * sizeof(__u32);
+
+ return size;
+}
+
+static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
+{
+ if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
+ return 0;
+
+ return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
+}
+
+static int compare_super_imsm(struct supertype *st, struct supertype *tst)
+{
+ /*
+ * return:
+ * 0 same, or first was empty, and second was copied
+ * 1 second had wrong number
+ * 2 wrong uuid
+ * 3 wrong other info
+ */
+ struct intel_super *first = st->sb;
+ struct intel_super *sec = tst->sb;
+
+ if (!first) {
+ st->sb = tst->sb;
+ tst->sb = NULL;
+ return 0;
+ }
+
+ if (memcmp(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH) != 0)
+ return 3;
+ if (first->anchor->family_num != sec->anchor->family_num)
+ return 3;
+ if (first->anchor->mpb_size != sec->anchor->mpb_size)
+ return 3;
+ if (first->anchor->check_sum != sec->anchor->check_sum)
+ return 3;
+
+ return 0;
+}
+
+static void fd2devname(int fd, char *name)
+{
+ struct stat st;
+ char path[256];
+ char dname[100];
+ char *nm;
+ int rv;
+
+ name[0] = '\0';
+ if (fstat(fd, &st) != 0)
+ return;
+ sprintf(path, "/sys/dev/block/%d:%d",
+ major(st.st_rdev), minor(st.st_rdev));
+
+ rv = readlink(path, dname, sizeof(dname));
+ if (rv <= 0)
+ return;
+
+ dname[rv] = '\0';
+ nm = strrchr(dname, '/');
+ nm++;
+ snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
+}
+
+
+extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
+
+static int imsm_read_serial(int fd, char *devname,
+ __u8 serial[MAX_RAID_SERIAL_LEN])
+{
+ unsigned char scsi_serial[255];
+ int rv;
+ int rsp_len;
+ int i, cnt;
+
+ memset(scsi_serial, 0, sizeof(scsi_serial));
+
+ if (imsm_env_devname_as_serial()) {
+ char name[MAX_RAID_SERIAL_LEN];
+
+ fd2devname(fd, name);
+ strcpy((char *) serial, name);
+ return 0;
+ }
+
+ rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
+
+ if (rv != 0) {
+ if (devname)
+ fprintf(stderr,
+ Name ": Failed to retrieve serial for %s\n",
+ devname);
+ return rv;
+ }
+
+ rsp_len = scsi_serial[3];
+ for (i = 0, cnt = 0; i < rsp_len; i++) {
+ if (!isspace(scsi_serial[4 + i]))
+ serial[cnt++] = scsi_serial[4 + i];
+ if (cnt == MAX_RAID_SERIAL_LEN)
+ break;
+ }
+
+ serial[MAX_RAID_SERIAL_LEN - 1] = '\0';
+
+ return 0;
+}
+
+static int
+load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
+{
+ struct dl *dl;
+ struct stat stb;
+ struct imsm_disk *disk;
+ int rv;
+ int i;
+
+ dl = malloc(sizeof(*dl));
+ disk = malloc(sizeof(*disk));
+ if (!dl || !disk) {
+ if (devname)
+ fprintf(stderr,
+ Name ": failed to allocate disk buffer for %s\n",
+ devname);
+ if (disk)
+ free(disk);
+ if (dl)
+ free(dl);
+ return 2;
+ }
+ memset(dl, 0, sizeof(*dl));
+ memset(disk, 0, sizeof(*disk));
+
+ fstat(fd, &stb);
+ dl->major = major(stb.st_rdev);
+ dl->minor = minor(stb.st_rdev);
+ dl->next = super->disks;
+ dl->fd = keep_fd ? fd : -1;
+ dl->devname = devname ? strdup(devname) : NULL;
+ dl->index = -1;
+ super->disks = dl;
+ rv = imsm_read_serial(fd, devname, dl->serial);
+
+ if (rv != 0)
+ return 2;
+
+ /* look up this disk's index */
+ for (i = 0; i < super->anchor->num_disks; i++) {
+ struct imsm_disk *disk_iter;
+
+ disk_iter = __get_imsm_disk(super->anchor, i);
+
+ if (memcmp(disk_iter->serial, dl->serial,
+ MAX_RAID_SERIAL_LEN) == 0) {
+ *disk = *disk_iter;
+ super->disk_tbl[i] = disk;
+ dl->index = i;
+ break;
+ }
+ }
+
+ if (i == super->anchor->num_disks) {
+ if (devname)
+ fprintf(stderr,
+ Name ": failed to match serial \'%s\' for %s\n",
+ dl->serial, devname);
+ free(disk);
+ return 0;
+ }
+
+ return 0;
+}
+
+static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
+{
+ int i;
+
+ *dest = *src;
+
+ for (i = 0; i < src->vol.map[0].num_members; i++)
+ dest->vol.map[0].disk_ord_tbl[i] = src->vol.map[0].disk_ord_tbl[i];
+
+ if (!src->vol.migr_state)
+ return;
+
+ dest->vol.map[1] = src->vol.map[1];
+ for (i = 0; i < src->vol.map[1].num_members; i++)
+ dest->vol.map[1].disk_ord_tbl[i] = src->vol.map[1].disk_ord_tbl[i];
+}
+
+static int parse_raid_devices(struct intel_super *super)
+{
+ int i;
+ struct imsm_dev *dev_new;
+ size_t len;
+
+ for (i = 0; i < super->anchor->num_raid_devs; i++) {
+ struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
+
+ len = sizeof_imsm_dev(dev_iter);
+ dev_new = malloc(len);
+ if (!dev_new)
+ return 1;
+ imsm_copy_dev(dev_new, dev_iter);
+ super->dev_tbl[i] = dev_new;
+ }
+
+ return 0;
+}
+
+static void __free_imsm(struct intel_super *super);
+
+/* load_imsm_mpb - read matrix metadata
+ * allocates super->mpb to be freed by free_super
+ */
+static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
+{
+ unsigned long long dsize;
+ unsigned long long sectors;
+ struct stat;
+ struct imsm_super *anchor;
+ __u32 check_sum;
+ int rc;
+
+ get_dev_size(fd, NULL, &dsize);
+
+ if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
+ if (devname)
+ fprintf(stderr,
+ Name ": Cannot seek to anchor block on %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+
+ if (posix_memalign((void**)&anchor, 512, 512) != 0) {
+ if (devname)
+ fprintf(stderr,
+ Name ": Failed to allocate imsm anchor buffer"
+ " on %s\n", devname);
+ return 1;
+ }
+ if (read(fd, anchor, 512) != 512) {
+ if (devname)
+ fprintf(stderr,
+ Name ": Cannot read anchor block on %s: %s\n",
+ devname, strerror(errno));
+ free(anchor);
+ return 1;
+ }
+
+ if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
+ if (devname)
+ fprintf(stderr,
+ Name ": no IMSM anchor on %s\n", devname);
+ free(anchor);
+ return 2;
+ }
+
+ __free_imsm(super);
+ super->len = __le32_to_cpu(anchor->mpb_size);
+ super->len = ROUND_UP(anchor->mpb_size, 512);
+ if (posix_memalign(&super->buf, 512, super->len) != 0) {
+ if (devname)
+ fprintf(stderr,
+ Name ": unable to allocate %zu byte mpb buffer\n",
+ super->len);
+ free(anchor);
+ return 2;
+ }
+ memcpy(super->buf, anchor, 512);
+
+ sectors = mpb_sectors(anchor) - 1;
+ free(anchor);
+ if (!sectors) {
+ rc = load_imsm_disk(fd, super, devname, 0);
+ if (rc == 0)
+ rc = parse_raid_devices(super);
+ return rc;
+ }
+
+ /* read the extended mpb */
+ if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
+ if (devname)
+ fprintf(stderr,
+ Name ": Cannot seek to extended mpb on %s: %s\n",
+ devname, strerror(errno));
+ return 1;
+ }
+
+ if (read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
+ if (devname)
+ fprintf(stderr,
+ Name ": Cannot read extended mpb on %s: %s\n",
+ devname, strerror(errno));
+ return 2;
+ }
+
+ check_sum = __gen_imsm_checksum(super->anchor);
+ if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
+ if (devname)
+ fprintf(stderr,
+ Name ": IMSM checksum %x != %x on %s\n",
+ check_sum, __le32_to_cpu(super->anchor->check_sum),
+ devname);
+ return 2;
+ }
+
+ rc = load_imsm_disk(fd, super, devname, 0);
+ if (rc == 0)
+ rc = parse_raid_devices(super);
+ return rc;
+}
+
+static void free_imsm_disks(struct intel_super *super)
+{
+ int i;
+
+ while (super->disks) {
+ struct dl *d = super->disks;
+
+ super->disks = d->next;
+ if (d->fd >= 0)
+ close(d->fd);
+ if (d->devname)
+ free(d->devname);
+ free(d);
+ }
+ for (i = 0; i < IMSM_MAX_DISKS; i++)
+ if (super->disk_tbl[i]) {
+ free(super->disk_tbl[i]);
+ super->disk_tbl[i] = NULL;
+ }
+}
+
+/* free all the pieces hanging off of a super pointer */
+static void __free_imsm(struct intel_super *super)
+{
+ int i;
+
+ if (super->buf) {
+ free(super->buf);
+ super->buf = NULL;
+ }
+ free_imsm_disks(super);
+ for (i = 0; i < IMSM_MAX_RAID_DEVS; i++)
+ if (super->dev_tbl[i]) {
+ free(super->dev_tbl[i]);
+ super->dev_tbl[i] = NULL;
+ }
+}
+
+static void free_imsm(struct intel_super *super)
+{
+ __free_imsm(super);
+ free(super);
+}
+
+static void free_super_imsm(struct supertype *st)
+{
+ struct intel_super *super = st->sb;
+
+ if (!super)
+ return;
+
+ free_imsm(super);
+ st->sb = NULL;
+}
+
+static struct intel_super *alloc_super(int creating_imsm)
+{
+ struct intel_super *super = malloc(sizeof(*super));
+
+ if (super) {
+ memset(super, 0, sizeof(*super));
+ super->creating_imsm = creating_imsm;
+ super->current_vol = -1;
+ }
+
+ return super;
+}
+
+#ifndef MDASSEMBLE
+static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
+ char *devname, int keep_fd)
+{
+ struct mdinfo *sra;
+ struct intel_super *super;
+ struct mdinfo *sd, *best = NULL;
+ __u32 bestgen = 0;
+ __u32 gen;
+ char nm[20];
+ int dfd;
+ int rv;
+
+ /* check if this disk is a member of an active array */
+ sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
+ if (!sra)
+ return 1;
+
+ if (sra->array.major_version != -1 ||
+ sra->array.minor_version != -2 ||
+ strcmp(sra->text_version, "imsm") != 0)
+ return 1;
+
+ super = alloc_super(0);
+ if (!super)
+ return 1;
+
+ /* find the most up to date disk in this array */
+ for (sd = sra->devs; sd; sd = sd->next) {
+ sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
+ dfd = dev_open(nm, keep_fd ? O_RDWR : O_RDONLY);
+ if (!dfd) {
+ free_imsm(super);
+ return 2;
+ }
+ rv = load_imsm_mpb(dfd, super, NULL);
+ if (!keep_fd)
+ close(dfd);
+ if (rv == 0) {
+ gen = __le32_to_cpu(super->anchor->generation_num);
+ if (!best || gen > bestgen) {
+ bestgen = gen;
+ best = sd;
+ }
+ } else {
+ free_imsm(super);
+ return 2;
+ }
+ }
+
+ if (!best) {
+ free_imsm(super);
+ return 1;
+ }
+
+ /* load the most up to date anchor */
+ sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
+ dfd = dev_open(nm, O_RDONLY);
+ if (!dfd) {
+ free_imsm(super);
+ return 1;
+ }
+ rv = load_imsm_mpb(dfd, super, NULL);
+ close(dfd);
+ if (rv != 0) {
+ free_imsm(super);
+ return 2;
+ }
+
+ /* reset the disk list */
+ free_imsm_disks(super);
+
+ /* populate disk list */
+ for (sd = sra->devs ; sd ; sd = sd->next) {
+ sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
+ dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
+ if (!dfd) {
+ free_imsm(super);
+ return 2;
+ }
+ load_imsm_disk(dfd, super, NULL, keep_fd);
+ if (!keep_fd)
+ close(dfd);
+ }
+
+ if (st->subarray[0]) {
+ if (atoi(st->subarray) <= super->anchor->num_raid_devs)
+ super->current_vol = atoi(st->subarray);
+ else
+ return 1;
+ }
+
+ *sbp = super;
+ if (st->ss == NULL) {
+ st->ss = &super_imsm;
+ st->minor_version = 0;
+ st->max_devs = IMSM_MAX_DEVICES;
+ st->container_dev = fd2devnum(fd);
+ }
+
+ return 0;
+}
+#endif
+
+static int load_super_imsm(struct supertype *st, int fd, char *devname)
+{
+ struct intel_super *super;
+ int rv;
+
+#ifndef MDASSEMBLE
+ if (load_super_imsm_all(st, fd, &st->sb, devname, 1) == 0)
+ return 0;
+#endif
+ if (st->subarray[0])
+ return 1; /* FIXME */
+
+ super = alloc_super(0);
+ if (!super) {
+ fprintf(stderr,
+ Name ": malloc of %zu failed.\n",
+ sizeof(*super));
+ return 1;
+ }
+
+ rv = load_imsm_mpb(fd, super, devname);
+
+ if (rv) {
+ if (devname)
+ fprintf(stderr,
+ Name ": Failed to load all information "
+ "sections on %s\n", devname);
+ free_imsm(super);
+ return rv;
+ }
+
+ st->sb = super;
+ if (st->ss == NULL) {
+ st->ss = &super_imsm;
+ st->minor_version = 0;
+ st->max_devs = IMSM_MAX_DEVICES;
+ }
+
+ return 0;
+}
+
+static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
+{
+ if (info->level == 1)
+ return 128;
+ return info->chunk_size >> 9;
+}
+
+static __u32 info_to_num_data_stripes(mdu_array_info_t *info)
+{
+ __u32 num_stripes;
+
+ num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
+ if (info->level == 1)
+ num_stripes /= 2;
+
+ return num_stripes;
+}
+
+static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
+{
+ return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
+}
+
+static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
+ unsigned long long size, char *name,
+ char *homehost, int *uuid)
+{
+ /* We are creating a volume inside a pre-existing container.
+ * so st->sb is already set.
+ */
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ struct imsm_dev *dev;
+ struct imsm_vol *vol;
+ struct imsm_map *map;
+ int idx = mpb->num_raid_devs;
+ int i;
+ unsigned long long array_blocks;
+ __u32 offset = 0;
+ size_t size_old, size_new;
+
+ if (mpb->num_raid_devs >= 2) {
+ fprintf(stderr, Name": This imsm-container already has the "
+ "maximum of 2 volumes\n");
+ return 0;
+ }
+
+ /* ensure the mpb is large enough for the new data */
+ size_old = __le32_to_cpu(mpb->mpb_size);
+ size_new = disks_to_mpb_size(info->nr_disks);
+ if (size_new > size_old) {
+ void *mpb_new;
+ size_t size_round = ROUND_UP(size_new, 512);
+
+ if (posix_memalign(&mpb_new, 512, size_round) != 0) {
+ fprintf(stderr, Name": could not allocate new mpb\n");
+ return 0;
+ }
+ memcpy(mpb_new, mpb, size_old);
+ free(mpb);
+ mpb = mpb_new;
+ super->anchor = mpb_new;
+ mpb->mpb_size = __cpu_to_le32(size_new);
+ memset(mpb_new + size_old, 0, size_round - size_old);
+ }
+ super->current_vol = idx;
+ sprintf(st->subarray, "%d", idx);
+ dev = malloc(sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
+ if (!dev) {
+ fprintf(stderr, Name": could not allocate raid device\n");
+ return 0;
+ }
+ strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
+ array_blocks = calc_array_size(info->level, info->raid_disks,
+ info->layout, info->chunk_size,
+ info->size*2);
+ dev->size_low = __cpu_to_le32((__u32) array_blocks);
+ dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
+ dev->status = __cpu_to_le32(0);
+ dev->reserved_blocks = __cpu_to_le32(0);
+ vol = &dev->vol;
+ vol->migr_state = 0;
+ vol->migr_type = 0;
+ vol->dirty = 0;
+ for (i = 0; i < idx; i++) {
+ struct imsm_dev *prev = get_imsm_dev(super, i);
+ struct imsm_map *pmap = &prev->vol.map[0];
+
+ offset += __le32_to_cpu(pmap->blocks_per_member);
+ offset += IMSM_RESERVED_SECTORS;
+ }
+ map = &vol->map[0];
+ map->pba_of_lba0 = __cpu_to_le32(offset);
+ map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
+ map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
+ map->num_data_stripes = __cpu_to_le32(info_to_num_data_stripes(info));
+ map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
+ IMSM_T_STATE_NORMAL;
+
+ if (info->level == 1 && info->raid_disks > 2) {
+ fprintf(stderr, Name": imsm does not support more than 2 disks"
+ "in a raid1 volume\n");
+ return 0;
+ }
+ if (info->level == 10)
+ map->raid_level = 1;
+ else
+ map->raid_level = info->level;
+
+ map->num_members = info->raid_disks;
+ for (i = 0; i < map->num_members; i++) {
+ /* initialized in add_to_super */
+ map->disk_ord_tbl[i] = __cpu_to_le32(0);
+ }
+ mpb->num_raid_devs++;
+ super->dev_tbl[super->current_vol] = dev;
+
+ return 1;
+}
+
+static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
+ unsigned long long size, char *name,
+ char *homehost, int *uuid)
+{
+ /* This is primarily called by Create when creating a new array.
+ * We will then get add_to_super called for each component, and then
+ * write_init_super called to write it out to each device.
+ * For IMSM, Create can create on fresh devices or on a pre-existing
+ * array.
+ * To create on a pre-existing array a different method will be called.
+ * This one is just for fresh drives.
+ */
+ struct intel_super *super;
+ struct imsm_super *mpb;
+ size_t mpb_size;
+
+ if (!info) {
+ st->sb = NULL;
+ return 0;
+ }
+ if (st->sb)
+ return init_super_imsm_volume(st, info, size, name, homehost,
+ uuid);
+
+ super = alloc_super(1);
+ if (!super)
+ return 0;
+ mpb_size = disks_to_mpb_size(info->nr_disks);
+ if (posix_memalign(&super->buf, 512, mpb_size) != 0) {
+ free(super);
+ return 0;
+ }
+ mpb = super->buf;
+ memset(mpb, 0, mpb_size);
+
+ memcpy(mpb->sig, MPB_SIGNATURE, strlen(MPB_SIGNATURE));
+ memcpy(mpb->sig + strlen(MPB_SIGNATURE), MPB_VERSION_RAID5,
+ strlen(MPB_VERSION_RAID5));
+ mpb->mpb_size = mpb_size;
+
+ st->sb = super;
+ return 1;
+}
+
+static void add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
+ int fd, char *devname)
+{
+ struct intel_super *super = st->sb;
+ struct dl *dl;
+ struct imsm_dev *dev;
+ struct imsm_map *map;
+ struct imsm_disk *disk;
+ __u32 status;
+
+ dev = get_imsm_dev(super, super->current_vol);
+ map = &dev->vol.map[0];
+
+ for (dl = super->disks; dl ; dl = dl->next)
+ if (dl->major == dk->major &&
+ dl->minor == dk->minor)
+ break;
+ if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
+ return;
+
+ map->disk_ord_tbl[dk->number] = __cpu_to_le32(dl->index);
+
+ disk = get_imsm_disk(super, dl->index);
+ status = CONFIGURED_DISK | USABLE_DISK;
+ disk->status = __cpu_to_le32(status);
+}
+
+static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
+ int fd, char *devname)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ struct imsm_disk *disk;
+ struct dl *dd;
+ unsigned long long size;
+ __u32 status, id;
+ int rv;
+ struct stat stb;
+
+ if (super->current_vol >= 0) {
+ add_to_super_imsm_volume(st, dk, fd, devname);
+ return;
+ }
+
+ fstat(fd, &stb);
+ dd = malloc(sizeof(*dd));
+ disk = malloc(sizeof(*disk));
+ if (!dd || !disk) {
+ fprintf(stderr,
+ Name ": malloc failed %s:%d.\n", __func__, __LINE__);
+ if (!dd)
+ free(dd);
+ if (!disk)
+ free(disk);
+ abort();
+ }
+ memset(dd, 0, sizeof(*dd));
+ memset(disk, 0, sizeof(*disk));
+ dd->major = major(stb.st_rdev);
+ dd->minor = minor(stb.st_rdev);
+ dd->index = dk->number;
+ dd->devname = devname ? strdup(devname) : NULL;
+ dd->next = super->disks;
+ dd->fd = fd;
+ rv = imsm_read_serial(fd, devname, dd->serial);
+ if (rv) {
+ fprintf(stderr,
+ Name ": failed to retrieve scsi serial, aborting\n");
+ free(dd);
+ free(disk);
+ abort();
+ }
+
+ if (mpb->num_disks <= dk->number)
+ mpb->num_disks = dk->number + 1;
+
+ get_dev_size(fd, NULL, &size);
+ size /= 512;
+ status = USABLE_DISK | SPARE_DISK;
+ strcpy((char *) disk->serial, (char *) dd->serial);
+ disk->total_blocks = __cpu_to_le32(size);
+ disk->status = __cpu_to_le32(status);
+ if (sysfs_disk_to_scsi_id(fd, &id) == 0)
+ disk->scsi_id = __cpu_to_le32(id);
+ else
+ disk->scsi_id = __cpu_to_le32(0);
+ super->disk_tbl[dd->index] = disk;
+
+ /* update the family number if we are creating a container */
+ if (super->creating_imsm) {
+ disk = __get_imsm_disk(mpb, dd->index);
+ *disk = *super->disk_tbl[dd->index]; /* copy in new disk */
+ mpb->family_num = __cpu_to_le32(__gen_imsm_checksum(mpb));
+ }
+
+ super->disks = dd;
+}
+
+static int store_imsm_mpb(int fd, struct intel_super *super);
+
+static int write_super_imsm(struct intel_super *super, int doclose)
+{
+ struct imsm_super *mpb = super->anchor;
+ struct dl *d;
+ __u32 generation;
+ __u32 sum;
+ int i;
+
+ /* 'generation' is incremented everytime the metadata is written */
+ generation = __le32_to_cpu(mpb->generation_num);
+ generation++;
+ mpb->generation_num = __cpu_to_le32(generation);
+
+ for (i = 0; i < mpb->num_disks; i++)
+ mpb->disk[i] = *super->disk_tbl[i];
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ struct imsm_dev *dev = __get_imsm_dev(mpb, i);
+
+ imsm_copy_dev(dev, super->dev_tbl[i]);
+ }
+
+ /* recalculate checksum */
+ sum = __gen_imsm_checksum(mpb);
+ mpb->check_sum = __cpu_to_le32(sum);
+
+ for (d = super->disks; d ; d = d->next) {
+ if (store_imsm_mpb(d->fd, super)) {
+ fprintf(stderr, "%s: failed for device %d:%d %s\n",
+ __func__, d->major, d->minor, strerror(errno));
+ return 0;
+ }
+ if (doclose) {
+ close(d->fd);
+ d->fd = -1;
+ }
+ }
+
+ return 1;
+}
+
+static int write_init_super_imsm(struct supertype *st)
+{
+ if (st->update_tail) {
+ /* queue the recently created array as a metadata update */
+ size_t len;
+ struct imsm_update_create_array *u;
+ struct intel_super *super = st->sb;
+ struct imsm_dev *dev;
+ struct dl *d;
+
+ if (super->current_vol < 0 ||
+ !(dev = get_imsm_dev(super, super->current_vol))) {
+ fprintf(stderr, "%s: could not determine sub-array\n",
+ __func__);
+ return 1;
+ }
+
+
+ len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev);
+ u = malloc(len);
+ if (!u) {
+ fprintf(stderr, "%s: failed to allocate update buffer\n",
+ __func__);
+ return 1;
+ }
+
+ u->type = update_create_array;
+ u->dev_idx = super->current_vol;
+ imsm_copy_dev(&u->dev, dev);
+ append_metadata_update(st, u, len);
+
+ for (d = super->disks; d ; d = d->next) {
+ close(d->fd);
+ d->fd = -1;
+ }
+
+ return 0;
+ } else
+ return write_super_imsm(st->sb, 1);
+}
+
+static int store_zero_imsm(struct supertype *st, int fd)
+{
+ unsigned long long dsize;
+ void *buf;
+
+ get_dev_size(fd, NULL, &dsize);
+
+ /* first block is stored on second to last sector of the disk */
+ if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
+ return 1;
+
+ if (posix_memalign(&buf, 512, 512) != 0)
+ return 1;
+
+ memset(buf, 0, 512);
+ if (write(fd, buf, 512) != 512)
+ return 1;
+ return 0;
+}
+
+static int validate_geometry_imsm_container(struct supertype *st, int level,
+ int layout, int raiddisks, int chunk,
+ unsigned long long size, char *dev,
+ unsigned long long *freesize,
+ int verbose)
+{
+ int fd;
+ unsigned long long ldsize;
+
+ if (level != LEVEL_CONTAINER)
+ return 0;
+ if (!dev)
+ return 1;
+
+ fd = open(dev, O_RDONLY|O_EXCL, 0);
+ if (fd < 0) {
+ if (verbose)
+ fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
+ dev, strerror(errno));
+ return 0;
+ }
+ if (!get_dev_size(fd, dev, &ldsize)) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+
+ *freesize = avail_size_imsm(st, ldsize >> 9);
+
+ return 1;
+}
+
+/* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
+ * FIX ME add ahci details
+ */
+static int validate_geometry_imsm_volume(struct supertype *st, int level,
+ int layout, int raiddisks, int chunk,
+ unsigned long long size, char *dev,
+ unsigned long long *freesize,
+ int verbose)
+{
+ struct stat stb;
+ struct intel_super *super = st->sb;
+ struct dl *dl;
+ unsigned long long pos = 0;
+ unsigned long long maxsize;
+ struct extent *e;
+ int i;
+
+ if (level == LEVEL_CONTAINER)
+ return 0;
+
+ if (level == 1 && raiddisks > 2) {
+ if (verbose)
+ fprintf(stderr, Name ": imsm does not support more "
+ "than 2 in a raid1 configuration\n");
+ return 0;
+ }
+
+ /* We must have the container info already read in. */
+ if (!super)
+ return 0;
+
+ if (!dev) {
+ /* General test: make sure there is space for
+ * 'raiddisks' device extents of size 'size' at a given
+ * offset
+ */
+ unsigned long long minsize = size*2 /* convert to blocks */;
+ unsigned long long start_offset = ~0ULL;
+ int dcnt = 0;
+ if (minsize == 0)
+ minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
+ for (dl = super->disks; dl ; dl = dl->next) {
+ int found = 0;
+
+ pos = 0;
+ i = 0;
+ e = get_extents(super, dl);
+ if (!e) continue;
+ do {
+ unsigned long long esize;
+ esize = e[i].start - pos;
+ if (esize >= minsize)
+ found = 1;
+ if (found && start_offset == ~0ULL) {
+ start_offset = pos;
+ break;
+ } else if (found && pos != start_offset) {
+ found = 0;
+ break;
+ }
+ pos = e[i].start + e[i].size;
+ i++;
+ } while (e[i-1].size);
+ if (found)
+ dcnt++;
+ free(e);
+ }
+ if (dcnt < raiddisks) {
+ if (verbose)
+ fprintf(stderr, Name ": imsm: Not enough "
+ "devices with space for this array "
+ "(%d < %d)\n",
+ dcnt, raiddisks);
+ return 0;
+ }
+ return 1;
+ }
+ /* This device must be a member of the set */
+ if (stat(dev, &stb) < 0)
+ return 0;
+ if ((S_IFMT & stb.st_mode) != S_IFBLK)
+ return 0;
+ for (dl = super->disks ; dl ; dl = dl->next) {
+ if (dl->major == major(stb.st_rdev) &&
+ dl->minor == minor(stb.st_rdev))
+ break;
+ }
+ if (!dl) {
+ if (verbose)
+ fprintf(stderr, Name ": %s is not in the "
+ "same imsm set\n", dev);
+ return 0;
+ }
+ e = get_extents(super, dl);
+ maxsize = 0;
+ i = 0;
+ if (e) do {
+ unsigned long long esize;
+ esize = e[i].start - pos;
+ if (esize >= maxsize)
+ maxsize = esize;
+ pos = e[i].start + e[i].size;
+ i++;
+ } while (e[i-1].size);
+ *freesize = maxsize;
+
+ return 1;
+}
+
+static int validate_geometry_imsm(struct supertype *st, int level, int layout,
+ int raiddisks, int chunk, unsigned long long size,
+ char *dev, unsigned long long *freesize,
+ int verbose)
+{
+ int fd, cfd;
+ struct mdinfo *sra;
+
+ /* if given unused devices create a container
+ * if given given devices in a container create a member volume
+ */
+ if (level == LEVEL_CONTAINER) {
+ /* Must be a fresh device to add to a container */
+ return validate_geometry_imsm_container(st, level, layout,
+ raiddisks, chunk, size,
+ dev, freesize,
+ verbose);
+ }
+
+ if (st->sb) {
+ /* creating in a given container */
+ return validate_geometry_imsm_volume(st, level, layout,
+ raiddisks, chunk, size,
+ dev, freesize, verbose);
+ }
+
+ /* limit creation to the following levels */
+ if (!dev)
+ switch (level) {
+ case 0:
+ case 1:
+ case 10:
+ case 5:
+ break;
+ default:
+ return 1;
+ }
+
+ /* This device needs to be a device in an 'imsm' container */
+ fd = open(dev, O_RDONLY|O_EXCL, 0);
+ if (fd >= 0) {
+ if (verbose)
+ fprintf(stderr,
+ Name ": Cannot create this array on device %s\n",
+ dev);
+ close(fd);
+ return 0;
+ }
+ if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
+ if (verbose)
+ fprintf(stderr, Name ": Cannot open %s: %s\n",
+ dev, strerror(errno));
+ return 0;
+ }
+ /* Well, it is in use by someone, maybe an 'imsm' container. */
+ cfd = open_container(fd);
+ if (cfd < 0) {
+ close(fd);
+ if (verbose)
+ fprintf(stderr, Name ": Cannot use %s: It is busy\n",
+ dev);
+ return 0;
+ }
+ sra = sysfs_read(cfd, 0, GET_VERSION);
+ close(fd);
+ if (sra && sra->array.major_version == -1 &&
+ strcmp(sra->text_version, "imsm") == 0) {
+ /* This is a member of a imsm container. Load the container
+ * and try to create a volume
+ */
+ struct intel_super *super;
+
+ if (load_super_imsm_all(st, cfd, (void **) &super, NULL, 1) == 0) {
+ st->sb = super;
+ st->container_dev = fd2devnum(cfd);
+ close(cfd);
+ return validate_geometry_imsm_volume(st, level, layout,
+ raiddisks, chunk,
+ size, dev,
+ freesize, verbose);
+ }
+ close(cfd);
+ } else /* may belong to another container */
+ return 0;
+
+ return 1;
+}
+
+static struct mdinfo *container_content_imsm(struct supertype *st)
+{
+ /* Given a container loaded by load_super_imsm_all,
+ * extract information about all the arrays into
+ * an mdinfo tree.
+ *
+ * For each imsm_dev create an mdinfo, fill it in,
+ * then look for matching devices in super->disks
+ * and create appropriate device mdinfo.
+ */
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ struct mdinfo *rest = NULL;
+ int i;
+
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ struct imsm_dev *dev = get_imsm_dev(super, i);
+ struct imsm_vol *vol = &dev->vol;
+ struct imsm_map *map = vol->map;
+ struct mdinfo *this;
+ int slot;
+
+ this = malloc(sizeof(*this));
+ memset(this, 0, sizeof(*this));
+ this->next = rest;
+ rest = this;
+
+ this->array.level = get_imsm_raid_level(map);
+ this->array.raid_disks = map->num_members;
+ this->array.layout = imsm_level_to_layout(this->array.level);
+ this->array.md_minor = -1;
+ this->array.ctime = 0;
+ this->array.utime = 0;
+ this->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9;
+ this->array.state = !vol->dirty;
+ this->container_member = i;
+ if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty)
+ this->resync_start = 0;
+ else
+ this->resync_start = ~0ULL;
+
+ strncpy(this->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
+ this->name[MAX_RAID_SERIAL_LEN] = 0;
+
+ sprintf(this->text_version, "/%s/%d",
+ devnum2devname(st->container_dev),
+ this->container_member);
+
+ memset(this->uuid, 0, sizeof(this->uuid));
+
+ this->component_size = __le32_to_cpu(map->blocks_per_member);
+
+ for (slot = 0 ; slot < map->num_members; slot++) {
+ struct imsm_disk *disk;
+ struct mdinfo *info_d;
+ struct dl *d;
+ int idx;
+ __u32 s;
+
+ idx = __le32_to_cpu(map->disk_ord_tbl[slot] & ~(0xff << 24));
+ for (d = super->disks; d ; d = d->next)
+ if (d->index == idx)
+ break;
+
+ if (d == NULL)
+ break; /* shouldn't this be continue ?? */
+
+ info_d = malloc(sizeof(*info_d));
+ if (!info_d)
+ break; /* ditto ?? */
+ memset(info_d, 0, sizeof(*info_d));
+ info_d->next = this->devs;
+ this->devs = info_d;
+
+ disk = get_imsm_disk(super, idx);
+ s = __le32_to_cpu(disk->status);
+
+ info_d->disk.number = d->index;
+ info_d->disk.major = d->major;
+ info_d->disk.minor = d->minor;
+ info_d->disk.raid_disk = slot;
+ info_d->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0;
+ info_d->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0;
+ info_d->disk.state |= s & USABLE_DISK ? (1 << MD_DISK_SYNC) : 0;
+
+ this->array.working_disks++;
+
+ info_d->events = __le32_to_cpu(mpb->generation_num);
+ info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
+ info_d->component_size = __le32_to_cpu(map->blocks_per_member);
+ if (d->devname)
+ strcpy(info_d->name, d->devname);
+ }
+ }
+
+ return rest;
+}
+
+
+static int imsm_open_new(struct supertype *c, struct active_array *a,
+ char *inst)
+{
+ struct intel_super *super = c->sb;
+ struct imsm_super *mpb = super->anchor;
+
+ if (atoi(inst) >= mpb->num_raid_devs) {
+ fprintf(stderr, "%s: subarry index %d, out of range\n",
+ __func__, atoi(inst));
+ return -ENODEV;
+ }
+
+ dprintf("imsm: open_new %s\n", inst);
+ a->info.container_member = atoi(inst);
+ return 0;
+}
+
+static __u8 imsm_check_degraded(struct intel_super *super, int n, int failed)
+{
+ struct imsm_dev *dev = get_imsm_dev(super, n);
+ struct imsm_map *map = dev->vol.map;
+
+ if (!failed)
+ return map->map_state;
+
+ switch (get_imsm_raid_level(map)) {
+ case 0:
+ return IMSM_T_STATE_FAILED;
+ break;
+ case 1:
+ if (failed < map->num_members)
+ return IMSM_T_STATE_DEGRADED;
+ else
+ return IMSM_T_STATE_FAILED;
+ break;
+ case 10:
+ {
+ /**
+ * check to see if any mirrors have failed,
+ * otherwise we are degraded
+ */
+ int device_per_mirror = 2; /* FIXME is this always the case?
+ * and are they always adjacent?
+ */
+ int failed = 0;
+ int i;
+
+ for (i = 0; i < map->num_members; i++) {
+ int idx = get_imsm_disk_idx(map, i);
+ struct imsm_disk *disk = get_imsm_disk(super, idx);
+
+ if (__le32_to_cpu(disk->status) & FAILED_DISK)
+ failed++;
+
+ if (failed >= device_per_mirror)
+ return IMSM_T_STATE_FAILED;
+
+ /* reset 'failed' for next mirror set */
+ if (!((i + 1) % device_per_mirror))
+ failed = 0;
+ }
+
+ return IMSM_T_STATE_DEGRADED;
+ }
+ case 5:
+ if (failed < 2)
+ return IMSM_T_STATE_DEGRADED;
+ else
+ return IMSM_T_STATE_FAILED;
+ break;
+ default:
+ break;
+ }
+
+ return map->map_state;
+}
+
+static int imsm_count_failed(struct intel_super *super, struct imsm_map *map)
+{
+ int i;
+ int failed = 0;
+ struct imsm_disk *disk;
+
+ for (i = 0; i < map->num_members; i++) {
+ int idx = get_imsm_disk_idx(map, i);
+
+ disk = get_imsm_disk(super, idx);
+ if (__le32_to_cpu(disk->status) & FAILED_DISK)
+ failed++;
+ }
+
+ return failed;
+}
+
+static void imsm_set_array_state(struct active_array *a, int consistent)
+{
+ int inst = a->info.container_member;
+ struct intel_super *super = a->container->sb;
+ struct imsm_dev *dev = get_imsm_dev(super, inst);
+ struct imsm_map *map = &dev->vol.map[0];
+ int dirty = !consistent;
+ int failed;
+ __u8 map_state;
+
+ if (a->resync_start == ~0ULL) {
+ failed = imsm_count_failed(super, map);
+ map_state = imsm_check_degraded(super, inst, failed);
+ if (!failed)
+ map_state = IMSM_T_STATE_NORMAL;
+ if (map->map_state != map_state) {
+ dprintf("imsm: map_state %d: %d\n",
+ inst, map_state);
+ map->map_state = map_state;
+ super->updates_pending++;
+ }
+ }
+
+ if (dev->vol.dirty != dirty) {
+ dprintf("imsm: mark '%s' (%llu)\n",
+ dirty?"dirty":"clean", a->resync_start);
+
+ dev->vol.dirty = dirty;
+ super->updates_pending++;
+ }
+}
+
+static void imsm_set_disk(struct active_array *a, int n, int state)
+{
+ int inst = a->info.container_member;
+ struct intel_super *super = a->container->sb;
+ struct imsm_dev *dev = get_imsm_dev(super, inst);
+ struct imsm_map *map = dev->vol.map;
+ struct imsm_disk *disk;
+ __u32 status;
+ int failed = 0;
+ int new_failure = 0;
+
+ if (n > map->num_members)
+ fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
+ n, map->num_members - 1);
+
+ if (n < 0)
+ return;
+
+ dprintf("imsm: set_disk %d:%x\n", n, state);
+
+ disk = get_imsm_disk(super, get_imsm_disk_idx(map, n));
+
+ /* check for new failures */
+ status = __le32_to_cpu(disk->status);
+ if ((state & DS_FAULTY) && !(status & FAILED_DISK)) {
+ status |= FAILED_DISK;
+ disk->status = __cpu_to_le32(status);
+ new_failure = 1;
+ super->updates_pending++;
+ }
+
+ /* the number of failures have changed, count up 'failed' to determine
+ * degraded / failed status
+ */
+ if (new_failure && map->map_state != IMSM_T_STATE_FAILED)
+ failed = imsm_count_failed(super, map);
+
+ /* determine map_state based on failed or in_sync count */
+ if (failed)
+ map->map_state = imsm_check_degraded(super, inst, failed);
+ else if (map->map_state == IMSM_T_STATE_DEGRADED) {
+ struct mdinfo *d;
+ int working = 0;
+
+ for (d = a->info.devs ; d ; d = d->next)
+ if (d->curr_state & DS_INSYNC)
+ working++;
+
+ if (working == a->info.array.raid_disks) {
+ map->map_state = IMSM_T_STATE_NORMAL;
+ super->updates_pending++;
+ }
+ }
+}
+
+static int store_imsm_mpb(int fd, struct intel_super *super)
+{
+ struct imsm_super *mpb = super->anchor;
+ __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
+ unsigned long long dsize;
+ unsigned long long sectors;
+
+ get_dev_size(fd, NULL, &dsize);
+
+ if (mpb_size > 512) {
+ /* -1 to account for anchor */
+ sectors = mpb_sectors(mpb) - 1;
+
+ /* write the extended mpb to the sectors preceeding the anchor */
+ if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
+ return 1;
+
+ if (write(fd, super->buf + 512, 512 * sectors) != 512 * sectors)
+ return 1;
+ }
+
+ /* first block is stored on second to last sector of the disk */
+ if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
+ return 1;
+
+ if (write(fd, super->buf, 512) != 512)
+ return 1;
+
+ return 0;
+}
+
+static void imsm_sync_metadata(struct supertype *container)
+{
+ struct intel_super *super = container->sb;
+
+ if (!super->updates_pending)
+ return;
+
+ write_super_imsm(super, 0);
+
+ super->updates_pending = 0;
+}
+
+static struct mdinfo *imsm_activate_spare(struct active_array *a,
+ struct metadata_update **updates)
+{
+ /**
+ * Take a device that is marked spare in the metadata and use it to
+ * replace a failed/vacant slot in an array. There may be a case where
+ * a device is failed in one array but active in a second.
+ * imsm_process_update catches this case and does not clear the SPARE_DISK
+ * flag, allowing the second array to start using the device on failure.
+ * SPARE_DISK is cleared when all arrays are using a device.
+ *
+ * FIXME: is this a valid use of SPARE_DISK?
+ */
+
+ struct intel_super *super = a->container->sb;
+ int inst = a->info.container_member;
+ struct imsm_dev *dev = get_imsm_dev(super, inst);
+ struct imsm_map *map = dev->vol.map;
+ int failed = a->info.array.raid_disks;
+ struct mdinfo *rv = NULL;
+ struct mdinfo *d;
+ struct mdinfo *di;
+ struct metadata_update *mu;
+ struct dl *dl;
+ struct imsm_update_activate_spare *u;
+ int num_spares = 0;
+ int i;
+
+ for (d = a->info.devs ; d ; d = d->next) {
+ if ((d->curr_state & DS_FAULTY) &&
+ d->state_fd >= 0)
+ /* wait for Removal to happen */
+ return NULL;
+ if (d->state_fd >= 0)
+ failed--;
+ }
+
+ dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
+ inst, failed, a->info.array.raid_disks, a->info.array.level);
+ if (imsm_check_degraded(super, inst, failed) != IMSM_T_STATE_DEGRADED)
+ return NULL;
+
+ /* For each slot, if it is not working, find a spare */
+ dl = super->disks;
+ for (i = 0; i < a->info.array.raid_disks; i++) {
+ for (d = a->info.devs ; d ; d = d->next)
+ if (d->disk.raid_disk == i)
+ break;
+ dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
+ if (d && (d->state_fd >= 0))
+ continue;
+
+ /* OK, this device needs recovery. Find a spare */
+ for ( ; dl ; dl = dl->next) {
+ unsigned long long esize;
+ unsigned long long pos;
+ struct mdinfo *d2;
+ struct extent *ex;
+ struct imsm_disk *disk;
+ int j;
+ int found;
+ __u32 array_start;
+
+ /* If in this array, skip */
+ for (d2 = a->info.devs ; d2 ; d2 = d2->next)
+ if (d2->disk.major == dl->major &&
+ d2->disk.minor == dl->minor) {
+ dprintf("%x:%x already in array\n", dl->major, dl->minor);
+ break;
+ }
+ if (d2)
+ continue;
+
+ /* is this unused device marked as a spare? */
+ disk = get_imsm_disk(super, dl->index);
+ if (!(__le32_to_cpu(disk->status) & SPARE_DISK))
+ continue;
+
+ /* We are allowed to use this device - is there space?
+ * We need a->info.component_size sectors */
+ ex = get_extents(super, dl);
+ if (!ex) {
+ dprintf("cannot get extents\n");
+ continue;
+ }
+ found = 0;
+ j = 0;
+ pos = 0;
+ array_start = __le32_to_cpu(map->pba_of_lba0);
+
+ do {
+ /* check that we can start at pba_of_lba0 with
+ * a->info.component_size of space
+ */
+ esize = ex[j].start - pos;
+ if (array_start >= pos &&
+ array_start + a->info.component_size < ex[j].start) {
+ found = 1;
+ break;
+ }
+ pos = ex[j].start + ex[j].size;
+ j++;
+
+ } while (ex[j-1].size);
+
+ free(ex);
+ if (!found) {
+ dprintf("%x:%x does not have %llu at %d\n",
+ dl->major, dl->minor,
+ a->info.component_size,
+ __le32_to_cpu(map->pba_of_lba0));
+ /* No room */
+ continue;
+ }
+
+ /* found a usable disk with enough space */
+ di = malloc(sizeof(*di));
+ memset(di, 0, sizeof(*di));
+ di->disk.number = dl->index;
+ di->disk.raid_disk = i;
+ di->disk.major = dl->major;
+ di->disk.minor = dl->minor;
+ di->disk.state = 0;
+ di->data_offset = array_start;
+ di->component_size = a->info.component_size;
+ di->container_member = inst;
+ di->next = rv;
+ rv = di;
+ num_spares++;
+ dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
+ i, pos);
+
+ break;
+ }
+ }
+
+ if (!rv)
+ /* No spares found */
+ return rv;
+ /* Now 'rv' has a list of devices to return.
+ * Create a metadata_update record to update the
+ * disk_ord_tbl for the array
+ */
+ mu = malloc(sizeof(*mu));
+ mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
+ mu->space = NULL;
+ mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
+ mu->next = *updates;
+ u = (struct imsm_update_activate_spare *) mu->buf;
+
+ for (di = rv ; di ; di = di->next) {
+ u->type = update_activate_spare;
+ u->disk_idx = di->disk.number;
+ u->slot = di->disk.raid_disk;
+ u->array = inst;
+ u->next = u + 1;
+ u++;
+ }
+ (u-1)->next = NULL;
+ *updates = mu;
+
+ return rv;
+}
+
+static int weight(unsigned int field)
+{
+ int weight;
+
+ for (weight = 0; field; weight++)
+ field &= field - 1;
+
+ return weight;
+}
+
+static int disks_overlap(struct imsm_map *m1, struct imsm_map *m2)
+{
+ int i;
+ int j;
+ int idx;
+
+ for (i = 0; i < m1->num_members; i++) {
+ idx = get_imsm_disk_idx(m1, i);
+ for (j = 0; j < m2->num_members; j++)
+ if (idx == get_imsm_disk_idx(m2, j))
+ return 1;
+ }
+
+ return 0;
+}
+
+static void imsm_process_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ /**
+ * crack open the metadata_update envelope to find the update record
+ * update can be one of:
+ * update_activate_spare - a spare device has replaced a failed
+ * device in an array, update the disk_ord_tbl. If this disk is
+ * present in all member arrays then also clear the SPARE_DISK
+ * flag
+ */
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
+
+ switch (type) {
+ case update_activate_spare: {
+ struct imsm_update_activate_spare *u = (void *) update->buf;
+ struct imsm_dev *dev = get_imsm_dev(super, u->array);
+ struct imsm_map *map = &dev->vol.map[0];
+ struct active_array *a;
+ struct imsm_disk *disk;
+ __u32 status;
+ struct dl *dl;
+ struct mdinfo *d;
+ unsigned int members;
+ unsigned int found;
+ int victim;
+ int i;
+
+ for (dl = super->disks; dl; dl = dl->next)
+ if (dl->index == u->disk_idx)
+ break;
+
+ if (!dl) {
+ fprintf(stderr, "error: imsm_activate_spare passed "
+ "an unknown disk_idx: %d\n", u->disk_idx);
+ return;
+ }
+
+ super->updates_pending++;
+
+ victim = get_imsm_disk_idx(map, u->slot);
+ map->disk_ord_tbl[u->slot] = __cpu_to_le32(u->disk_idx);
+ disk = get_imsm_disk(super, u->disk_idx);
+ status = __le32_to_cpu(disk->status);
+ status |= CONFIGURED_DISK;
+ disk->status = __cpu_to_le32(status);
+
+ /* map unique/live arrays using the spare */
+ members = 0;
+ found = 0;
+ for (a = st->arrays; a; a = a->next) {
+ int inst = a->info.container_member;
+
+ dev = get_imsm_dev(super, inst);
+ map = &dev->vol.map[0];
+ if (map->raid_level > 0)
+ members |= 1 << inst;
+ for (d = a->info.devs; d; d = d->next)
+ if (d->disk.major == dl->major &&
+ d->disk.minor == dl->minor)
+ found |= 1 << inst;
+ }
+
+ /* until all arrays that can absorb this disk have absorbed
+ * this disk it can still be considered a spare
+ */
+ if (weight(found) >= weight(members)) {
+ status = __le32_to_cpu(disk->status);
+ status &= ~SPARE_DISK;
+ disk->status = __cpu_to_le32(status);
+ }
+
+ /* count arrays using the victim in the metadata */
+ found = 0;
+ for (a = st->arrays; a ; a = a->next) {
+ dev = get_imsm_dev(super, a->info.container_member);
+ map = &dev->vol.map[0];
+ for (i = 0; i < map->num_members; i++)
+ if (victim == get_imsm_disk_idx(map, i))
+ found++;
+ }
+
+ /* clear some flags if the victim is no longer being
+ * utilized anywhere
+ */
+ disk = get_imsm_disk(super, victim);
+ if (!found) {
+ status = __le32_to_cpu(disk->status);
+ status &= ~(CONFIGURED_DISK | USABLE_DISK);
+ disk->status = __cpu_to_le32(status);
+ }
+ break;
+ }
+ case update_create_array: {
+ /* someone wants to create a new array, we need to be aware of
+ * a few races/collisions:
+ * 1/ 'Create' called by two separate instances of mdadm
+ * 2/ 'Create' versus 'activate_spare': mdadm has chosen
+ * devices that have since been assimilated via
+ * activate_spare.
+ * In the event this update can not be carried out mdadm will
+ * (FIX ME) notice that its update did not take hold.
+ */
+ struct imsm_update_create_array *u = (void *) update->buf;
+ struct imsm_dev *dev;
+ struct imsm_map *map, *new_map;
+ unsigned long long start, end;
+ unsigned long long new_start, new_end;
+ int i;
+ int overlap = 0;
+
+ /* handle racing creates: first come first serve */
+ if (u->dev_idx < mpb->num_raid_devs) {
+ dprintf("%s: subarray %d already defined\n",
+ __func__, u->dev_idx);
+ return;
+ }
+
+ /* check update is next in sequence */
+ if (u->dev_idx != mpb->num_raid_devs) {
+ dprintf("%s: can not create arrays out of sequence\n",
+ __func__);
+ return;
+ }
+
+ new_map = &u->dev.vol.map[0];
+ new_start = __le32_to_cpu(new_map->pba_of_lba0);
+ new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
+
+ /* handle activate_spare versus create race:
+ * check to make sure that overlapping arrays do not include
+ * overalpping disks
+ */
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ dev = get_imsm_dev(super, i);
+ map = &dev->vol.map[0];
+ start = __le32_to_cpu(map->pba_of_lba0);
+ end = start + __le32_to_cpu(map->blocks_per_member);
+ if ((new_start >= start && new_start <= end) ||
+ (start >= new_start && start <= new_end))
+ overlap = 1;
+ if (overlap && disks_overlap(map, new_map)) {
+ dprintf("%s: arrays overlap\n", __func__);
+ return;
+ }
+ }
+ /* check num_members sanity */
+ if (new_map->num_members > mpb->num_disks) {
+ dprintf("%s: num_disks out of range\n", __func__);
+ return;
+ }
+
+ /* check that prepare update was successful */
+ if (!update->space) {
+ dprintf("%s: prepare update failed\n", __func__);
+ return;
+ }
+
+ super->updates_pending++;
+ dev = update->space;
+ update->space = NULL;
+ imsm_copy_dev(dev, &u->dev);
+ super->dev_tbl[u->dev_idx] = dev;
+ mpb->num_raid_devs++;
+
+ /* fix up flags, if arrays overlap then the drives can not be
+ * spares
+ */
+ for (i = 0; i < map->num_members; i++) {
+ struct imsm_disk *disk;
+ __u32 status;
+
+ disk = get_imsm_disk(super, get_imsm_disk_idx(map, i));
+ status = __le32_to_cpu(disk->status);
+ status |= CONFIGURED_DISK;
+ if (overlap)
+ status &= ~SPARE_DISK;
+ disk->status = __cpu_to_le32(status);
+ }
+ break;
+ }
+ }
+}
+
+static void imsm_prepare_update(struct supertype *st,
+ struct metadata_update *update)
+{
+ /**
+ * Allocate space to hold new disk entries, raid-device entries or a
+ * new mpb if necessary. We currently maintain an mpb large enough to
+ * hold 2 subarrays for the given number of disks. This may not be
+ * sufficient when reshaping.
+ *
+ * FIX ME handle the reshape case.
+ *
+ * The monitor will be able to safely change super->mpb by arranging
+ * for it to be freed in check_update_queue(). I.e. the monitor thread
+ * will start using the new pointer and the manager can continue to use
+ * the old value until check_update_queue() runs.
+ */
+ enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
+
+ switch (type) {
+ case update_create_array: {
+ struct imsm_update_create_array *u = (void *) update->buf;
+ size_t len = sizeof_imsm_dev(&u->dev);
+
+ update->space = malloc(len);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return;
+}
+
+struct superswitch super_imsm = {
+#ifndef MDASSEMBLE
+ .examine_super = examine_super_imsm,
+ .brief_examine_super = brief_examine_super_imsm,
+ .detail_super = detail_super_imsm,
+ .brief_detail_super = brief_detail_super_imsm,
+ .write_init_super = write_init_super_imsm,
+#endif
+ .match_home = match_home_imsm,
+ .uuid_from_super= uuid_from_super_imsm,
+ .getinfo_super = getinfo_super_imsm,
+ .update_super = update_super_imsm,
+
+ .avail_size = avail_size_imsm,
+
+ .compare_super = compare_super_imsm,
+
+ .load_super = load_super_imsm,
+ .init_super = init_super_imsm,
+ .add_to_super = add_to_super_imsm,
+ .store_super = store_zero_imsm,
+ .free_super = free_super_imsm,
+ .match_metadata_desc = match_metadata_desc_imsm,
+ .container_content = container_content_imsm,
+
+ .validate_geometry = validate_geometry_imsm,
+ .external = 1,
+
+/* for mdmon */
+ .open_new = imsm_open_new,
+ .load_super = load_super_imsm,
+ .set_array_state= imsm_set_array_state,
+ .set_disk = imsm_set_disk,
+ .sync_metadata = imsm_sync_metadata,
+ .activate_spare = imsm_activate_spare,
+ .process_update = imsm_process_update,
+ .prepare_update = imsm_prepare_update,
+};
diff --git a/super0.c b/super0.c
index 8e4c568e..ab636605 100644
--- a/super0.c
+++ b/super0.c
@@ -53,7 +53,7 @@ static unsigned long calc_sb0_csum(mdp_super_t *super)
}
-void super0_swap_endian(struct mdp_superblock_s *sb)
+static void super0_swap_endian(struct mdp_superblock_s *sb)
{
/* as super0 superblocks are host-endian, it is sometimes
* useful to be able to swap the endianness
@@ -369,6 +369,8 @@ static void getinfo_super0(struct supertype *st, struct mdinfo *info)
info->events = md_event(sb);
info->data_offset = 0;
+ sprintf(info->text_version, "0.%d", sb->minor_version);
+
uuid_from_super0(st, info->uuid);
if (sb->minor_version > 90 && (sb->reshape_position+1) != 0) {
@@ -552,12 +554,14 @@ static int init_super0(struct supertype *st, mdu_array_info_t *info,
unsigned long long size, char *ignored_name, char *homehost,
int *uuid)
{
- mdp_super_t *sb = malloc(MD_SB_BYTES + sizeof(bitmap_super_t));
+ mdp_super_t *sb;
int spares;
+
+ posix_memalign((void**)&sb, 512, MD_SB_BYTES + sizeof(bitmap_super_t));
memset(sb, 0, MD_SB_BYTES + sizeof(bitmap_super_t));
st->sb = sb;
- if (info->major_version == -1) {
+ if (info == NULL) {
/* zeroing the superblock */
return 0;
}
@@ -623,17 +627,38 @@ static int init_super0(struct supertype *st, mdu_array_info_t *info,
return 1;
}
+struct devinfo {
+ int fd;
+ char *devname;
+ mdu_disk_info_t disk;
+ struct devinfo *next;
+};
/* Add a device to the superblock being created */
-static void add_to_super0(struct supertype *st, mdu_disk_info_t *dinfo)
+static void add_to_super0(struct supertype *st, mdu_disk_info_t *dinfo,
+ int fd, char *devname)
{
mdp_super_t *sb = st->sb;
mdp_disk_t *dk = &sb->disks[dinfo->number];
+ struct devinfo *di, **dip;
dk->number = dinfo->number;
dk->major = dinfo->major;
dk->minor = dinfo->minor;
dk->raid_disk = dinfo->raid_disk;
dk->state = dinfo->state;
+
+ sb->this_disk = sb->disks[dinfo->number];
+ sb->sb_csum = calc_sb0_csum(sb);
+
+ dip = (struct devinfo **)&st->info;
+ while (*dip)
+ dip = &(*dip)->next;
+ di = malloc(sizeof(struct devinfo));
+ di->fd = fd;
+ di->devname = devname;
+ di->disk = *dinfo;
+ di->next = NULL;
+ *dip = di;
}
static int store_super0(struct supertype *st, int fd)
@@ -661,7 +686,8 @@ static int store_super0(struct supertype *st, int fd)
if (super->state & (1<<MD_SB_BITMAP_PRESENT)) {
struct bitmap_super_s * bm = (struct bitmap_super_s*)(super+1);
if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC)
- if (write(fd, bm, sizeof(*bm)) != sizeof(*bm))
+ if (write(fd, bm, ROUND_UP(sizeof(*bm),512)) !=
+ ROUND_UP(sizeof(*bm),512))
return 5;
}
@@ -669,32 +695,41 @@ static int store_super0(struct supertype *st, int fd)
return 0;
}
-static int write_init_super0(struct supertype *st,
- mdu_disk_info_t *dinfo, char *devname)
+#ifndef MDASSEMBLE
+static int write_init_super0(struct supertype *st)
{
mdp_super_t *sb = st->sb;
- int fd = open(devname, O_RDWR|O_EXCL);
- int rv;
+ int rv = 0;
+ struct devinfo *di;
- if (fd < 0) {
- fprintf(stderr, Name ": Failed to open %s to write superblock\n", devname);
- return -1;
- }
+ for (di = st->info ; di && ! rv ; di = di->next) {
- sb->disks[dinfo->number].state &= ~(1<<MD_DISK_FAULTY);
+ if (di->disk.state == 1)
+ continue;
+ if (di->fd == -1)
+ continue;
+ Kill(di->devname, 0, 1, 1);
+ Kill(di->devname, 0, 1, 1);
- sb->this_disk = sb->disks[dinfo->number];
- sb->sb_csum = calc_sb0_csum(sb);
- rv = store_super0(st, fd);
+ sb->disks[di->disk.number].state &= ~(1<<MD_DISK_FAULTY);
- if (rv == 0 && (sb->state & (1<<MD_SB_BITMAP_PRESENT)))
- rv = st->ss->write_bitmap(st, fd);
+ sb->this_disk = sb->disks[di->disk.number];
+ sb->sb_csum = calc_sb0_csum(sb);
+ rv = store_super0(st, di->fd);
- close(fd);
- if (rv)
- fprintf(stderr, Name ": failed to write superblock to %s\n", devname);
+ if (rv == 0 && (sb->state & (1<<MD_SB_BITMAP_PRESENT)))
+ rv = st->ss->write_bitmap(st, di->fd);
+
+ if (rv)
+ fprintf(stderr,
+ Name ": failed to write superblock to %s\n",
+ di->devname);
+ close(di->fd);
+ di->fd = -1;
+ }
return rv;
}
+#endif
static int compare_super0(struct supertype *st, struct supertype *tst)
{
@@ -712,7 +747,8 @@ static int compare_super0(struct supertype *st, struct supertype *tst)
if (second->md_magic != MD_SB_MAGIC)
return 1;
if (!first) {
- first = malloc(MD_SB_BYTES + sizeof(struct bitmap_super_s));
+ posix_memalign((void**)&first, 512,
+ MD_SB_BYTES + sizeof(struct bitmap_super_s));
memcpy(first, second, MD_SB_BYTES + sizeof(struct bitmap_super_s));
st->sb = first;
return 0;
@@ -754,6 +790,9 @@ static int load_super0(struct supertype *st, int fd, char *devname)
free_super0(st);
+ if (st->subarray[0])
+ return 1;
+
if (!get_dev_size(fd, devname, &dsize))
return 1;
@@ -778,7 +817,7 @@ static int load_super0(struct supertype *st, int fd, char *devname)
return 1;
}
- super = malloc(MD_SB_BYTES + sizeof(bitmap_super_t));
+ posix_memalign((void**)&super, 512, MD_SB_BYTES + sizeof(bitmap_super_t)+512);
if (read(fd, super, sizeof(*super)) != MD_SB_BYTES) {
if (devname)
@@ -812,6 +851,7 @@ static int load_super0(struct supertype *st, int fd, char *devname)
st->ss = &super0;
st->minor_version = super->minor_version;
st->max_devs = MD_SB_DISKS;
+ st->info = NULL;
}
/* Now check on the bitmap superblock */
@@ -821,8 +861,8 @@ static int load_super0(struct supertype *st, int fd, char *devname)
* valid. If it doesn't clear the bit. An --assemble --force
* should get that written out.
*/
- if (read(fd, super+1, sizeof(struct bitmap_super_s))
- != sizeof(struct bitmap_super_s))
+ if (read(fd, super+1, ROUND_UP(sizeof(struct bitmap_super_s),512))
+ != ROUND_UP(sizeof(struct bitmap_super_s),512))
goto no_bitmap;
uuid_from_super0(st, uuid);
@@ -843,7 +883,9 @@ static struct supertype *match_metadata_desc0(char *arg)
struct supertype *st = malloc(sizeof(*st));
if (!st) return st;
+ memset(st, 0, sizeof(*st));
st->ss = &super0;
+ st->info = NULL;
st->minor_version = 90;
st->max_devs = MD_SB_DISKS;
st->sb = NULL;
@@ -919,7 +961,7 @@ static int add_internal_bitmap0(struct supertype *st, int *chunkp,
}
-void locate_bitmap0(struct supertype *st, int fd)
+static void locate_bitmap0(struct supertype *st, int fd)
{
unsigned long long dsize;
unsigned long long offset;
@@ -939,7 +981,7 @@ void locate_bitmap0(struct supertype *st, int fd)
lseek64(fd, offset, 0);
}
-int write_bitmap0(struct supertype *st, int fd)
+static int write_bitmap0(struct supertype *st, int fd)
{
unsigned long long dsize;
unsigned long long offset;
@@ -948,7 +990,8 @@ int write_bitmap0(struct supertype *st, int fd)
int rv = 0;
int towrite, n;
- char buf[4096];
+ char abuf[4096+512];
+ char *buf = (char*)(((long)(abuf+512))&~511UL);
if (!get_dev_size(fd, NULL, &dsize))
return 1;
@@ -964,21 +1007,19 @@ int write_bitmap0(struct supertype *st, int fd)
if (lseek64(fd, offset + 4096, 0)< 0LL)
return 3;
-
- if (write(fd, ((char*)sb)+MD_SB_BYTES, sizeof(bitmap_super_t)) !=
- sizeof(bitmap_super_t))
- return -2;
- towrite = 64*1024 - MD_SB_BYTES - sizeof(bitmap_super_t);
- memset(buf, 0xff, sizeof(buf));
+ memset(buf, 0xff, 4096);
+ memcpy(buf, ((char*)sb)+MD_SB_BYTES, sizeof(bitmap_super_t));
+ towrite = 64*1024;
while (towrite > 0) {
n = towrite;
- if (n > sizeof(buf))
- n = sizeof(buf);
+ if (n > 4096)
+ n = 4096;
n = write(fd, buf, n);
if (n > 0)
towrite -= n;
else
break;
+ memset(buf, 0xff, 4096);
}
fsync(fd);
if (towrite)
@@ -994,6 +1035,46 @@ static void free_super0(struct supertype *st)
st->sb = NULL;
}
+static int validate_geometry0(struct supertype *st, int level,
+ int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ char *subdev, unsigned long long *freesize,
+ int verbose)
+{
+ unsigned long long ldsize;
+ int fd;
+
+ if (level == LEVEL_CONTAINER)
+ return 0;
+ if (raiddisks > MD_SB_DISKS)
+ return 0;
+ if (size > (0x7fffffffULL<<10))
+ return 0;
+ if (!subdev)
+ return 1;
+
+ fd = open(subdev, O_RDONLY|O_EXCL, 0);
+ if (fd < 0) {
+ if (verbose)
+ fprintf(stderr, Name ": super0.90 cannot open %s: %s\n",
+ subdev, strerror(errno));
+ return 0;
+ }
+
+ if (!get_dev_size(fd, subdev, &ldsize)) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+
+ if (ldsize < MD_RESERVED_SECTORS * 512)
+ return 0;
+ if (size > (0x7fffffffULL<<10))
+ return 0;
+ *freesize = MD_NEW_SIZE_SECTORS(ldsize >> 9);
+ return 1;
+}
+
struct superswitch super0 = {
#ifndef MDASSEMBLE
.examine_super = examine_super0,
@@ -1002,6 +1083,7 @@ struct superswitch super0 = {
.detail_super = detail_super0,
.brief_detail_super = brief_detail_super0,
.export_detail_super = export_detail_super0,
+ .write_init_super = write_init_super0,
#endif
.match_home = match_home0,
.uuid_from_super = uuid_from_super0,
@@ -1010,7 +1092,6 @@ struct superswitch super0 = {
.init_super = init_super0,
.add_to_super = add_to_super0,
.store_super = store_super0,
- .write_init_super = write_init_super0,
.compare_super = compare_super0,
.load_super = load_super0,
.match_metadata_desc = match_metadata_desc0,
@@ -1019,6 +1100,5 @@ struct superswitch super0 = {
.locate_bitmap = locate_bitmap0,
.write_bitmap = write_bitmap0,
.free_super = free_super0,
- .major = 0,
- .swapuuid = 0,
+ .validate_geometry = validate_geometry0,
};
diff --git a/super1.c b/super1.c
index fe915f8d..06d0a187 100644
--- a/super1.c
+++ b/super1.c
@@ -493,7 +493,7 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info)
int role;
info->array.major_version = 1;
- info->array.minor_version = __le32_to_cpu(sb->feature_map);
+ info->array.minor_version = st->minor_version;
info->array.patch_version = 0;
info->array.raid_disks = __le32_to_cpu(sb->raid_disks);
info->array.level = __le32_to_cpu(sb->level);
@@ -531,6 +531,7 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info)
info->disk.raid_disk = role;
}
info->events = __le64_to_cpu(sb->events);
+ sprintf(info->text_version, "1.%d", st->minor_version);
memcpy(info->uuid, sb->set_uuid, 16);
@@ -670,7 +671,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
__le64_to_cpu(sb->data_offset)) {
/* set data_size to device size less data_offset */
struct misc_dev_info *misc = (struct misc_dev_info*)
- (st->sb + 1024 + sizeof(struct bitmap_super_s));
+ (st->sb + 1024 + 512);
printf("Size was %llu\n", (unsigned long long)
__le64_to_cpu(sb->data_size));
sb->data_size = __cpu_to_le64(
@@ -688,15 +689,17 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
static int init_super1(struct supertype *st, mdu_array_info_t *info,
unsigned long long size, char *name, char *homehost, int *uuid)
{
- struct mdp_superblock_1 *sb = malloc(1024 + sizeof(bitmap_super_t) +
- sizeof(struct misc_dev_info));
+ struct mdp_superblock_1 *sb;
int spares;
int rfd;
char defname[10];
+
+ posix_memalign((void**)&sb, 512, (1024 + 512 +
+ sizeof(struct misc_dev_info)));
memset(sb, 0, 1024);
st->sb = sb;
- if (info->major_version == -1) {
+ if (info == NULL) {
/* zeroing superblock */
return 0;
}
@@ -767,17 +770,39 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
return 1;
}
+struct devinfo {
+ int fd;
+ char *devname;
+ mdu_disk_info_t disk;
+ struct devinfo *next;
+};
/* Add a device to the superblock being created */
-static void add_to_super1(struct supertype *st, mdu_disk_info_t *dk)
+static void add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
+ int fd, char *devname)
{
struct mdp_superblock_1 *sb = st->sb;
__u16 *rp = sb->dev_roles + dk->number;
+ struct devinfo *di, **dip;
+
if ((dk->state & 6) == 6) /* active, sync */
*rp = __cpu_to_le16(dk->raid_disk);
else if ((dk->state & ~2) == 0) /* active or idle -> spare */
*rp = 0xffff;
else
*rp = 0xfffe;
+
+ sb->dev_number = __cpu_to_le32(dk->number);
+ sb->sb_csum = calc_sb_1_csum(sb);
+
+ dip = (struct devinfo **)&st->info;
+ while (*dip)
+ dip = &(*dip)->next;
+ di = malloc(sizeof(struct devinfo));
+ di->fd = fd;
+ di->devname = devname;
+ di->disk = *dk;
+ di->next = NULL;
+ *dip = di;
}
static void locate_bitmap1(struct supertype *st, int fd);
@@ -834,6 +859,7 @@ static int store_super1(struct supertype *st, int fd)
return 3;
sbsize = sizeof(*sb) + 2 * __le32_to_cpu(sb->max_dev);
+ sbsize = (sbsize+511)&(~511UL);
if (write(fd, sb, sbsize) != sbsize)
return 4;
@@ -843,7 +869,8 @@ static int store_super1(struct supertype *st, int fd)
(((char*)sb)+1024);
if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC) {
locate_bitmap1(st, fd);
- if (write(fd, bm, sizeof(*bm)) != sizeof(*bm))
+ if (write(fd, bm, ROUND_UP(sizeof(*bm),512)) !=
+ ROUND_UP(sizeof(*bm),512))
return 5;
}
}
@@ -866,123 +893,133 @@ static unsigned long choose_bm_space(unsigned long devsize)
return 4*2;
}
-static int write_init_super1(struct supertype *st,
- mdu_disk_info_t *dinfo, char *devname)
+#ifndef MDASSEMBLE
+static int write_init_super1(struct supertype *st)
{
struct mdp_superblock_1 *sb = st->sb;
struct supertype refst;
- int fd = open(devname, O_RDWR | O_EXCL);
int rfd;
- int rv;
+ int rv = 0;
int bm_space;
-
+ struct devinfo *di;
unsigned long long dsize, array_size;
long long sb_offset;
+ for (di = st->info; di && ! rv ; di = di->next) {
+ if (di->disk.state == 1)
+ continue;
+ if (di->fd < 0)
+ continue;
- if (fd < 0) {
- fprintf(stderr, Name ": Failed to open %s to write superblock\n",
- devname);
- return -1;
- }
+ Kill(di->devname, 0, 1, 1);
+ Kill(di->devname, 0, 1, 1);
- sb->dev_number = __cpu_to_le32(dinfo->number);
- if (dinfo->state & (1<<MD_DISK_WRITEMOSTLY))
- sb->devflags |= __cpu_to_le32(WriteMostly1);
+ sb->dev_number = __cpu_to_le32(di->disk.number);
+ if (di->disk.state & (1<<MD_DISK_WRITEMOSTLY))
+ sb->devflags |= __cpu_to_le32(WriteMostly1);
- if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
- read(rfd, sb->device_uuid, 16) != 16) {
- *(__u32*)(sb->device_uuid) = random();
- *(__u32*)(sb->device_uuid+4) = random();
- *(__u32*)(sb->device_uuid+8) = random();
- *(__u32*)(sb->device_uuid+12) = random();
- }
- if (rfd >= 0) close(rfd);
- sb->events = 0;
-
- refst =*st;
- refst.sb = NULL;
- if (load_super1(&refst, fd, NULL)==0) {
- struct mdp_superblock_1 *refsb = refst.sb;
-
- memcpy(sb->device_uuid, refsb->device_uuid, 16);
- if (memcmp(sb->set_uuid, refsb->set_uuid, 16)==0) {
- /* same array, so preserve events and dev_number */
- sb->events = refsb->events;
- /* bugs in 2.6.17 and earlier mean the dev_number
- * chosen in Manage must be preserved
- */
- if (get_linux_version() >= 2006018)
- sb->dev_number = refsb->dev_number;
+ if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
+ read(rfd, sb->device_uuid, 16) != 16) {
+ *(__u32*)(sb->device_uuid) = random();
+ *(__u32*)(sb->device_uuid+4) = random();
+ *(__u32*)(sb->device_uuid+8) = random();
+ *(__u32*)(sb->device_uuid+12) = random();
+ }
+ if (rfd >= 0) close(rfd);
+ sb->events = 0;
+
+ refst =*st;
+ refst.sb = NULL;
+ if (load_super1(&refst, di->fd, NULL)==0) {
+ struct mdp_superblock_1 *refsb = refst.sb;
+
+ memcpy(sb->device_uuid, refsb->device_uuid, 16);
+ if (memcmp(sb->set_uuid, refsb->set_uuid, 16)==0) {
+ /* same array, so preserve events and
+ * dev_number */
+ sb->events = refsb->events;
+ /* bugs in 2.6.17 and earlier mean the
+ * dev_number chosen in Manage must be preserved
+ */
+ if (get_linux_version() >= 2006018)
+ sb->dev_number = refsb->dev_number;
+ }
+ free(refsb);
}
- free(refsb);
- }
-
- if (!get_dev_size(fd, NULL, &dsize))
- return 1;
- dsize >>= 9;
- if (dsize < 24) {
- close(fd);
- return 2;
- }
+ if (!get_dev_size(di->fd, NULL, &dsize))
+ return 1;
+ dsize >>= 9;
+ if (dsize < 24) {
+ close(di->fd);
+ return 2;
+ }
- /*
- * Calculate the position of the superblock.
- * It is always aligned to a 4K boundary and
- * depending on minor_version, it can be:
- * 0: At least 8K, but less than 12K, from end of device
- * 1: At start of device
- * 2: 4K from start of device.
- * Depending on the array size, we might leave extra space
- * for a bitmap.
- */
- array_size = __le64_to_cpu(sb->size);
- /* work out how much space we left for a bitmap */
- bm_space = choose_bm_space(array_size);
- switch(st->minor_version) {
- case 0:
- sb_offset = dsize;
- sb_offset -= 8*2;
- sb_offset &= ~(4*2-1);
- sb->super_offset = __cpu_to_le64(sb_offset);
- sb->data_offset = __cpu_to_le64(0);
+ /*
+ * Calculate the position of the superblock.
+ * It is always aligned to a 4K boundary and
+ * depending on minor_version, it can be:
+ * 0: At least 8K, but less than 12K, from end of device
+ * 1: At start of device
+ * 2: 4K from start of device.
+ * Depending on the array size, we might leave extra space
+ * for a bitmap.
+ */
+ array_size = __le64_to_cpu(sb->size);
+ /* work out how much space we left for a bitmap */
+ bm_space = choose_bm_space(array_size);
+
+ switch(st->minor_version) {
+ case 0:
+ sb_offset = dsize;
+ sb_offset -= 8*2;
+ sb_offset &= ~(4*2-1);
+ sb->super_offset = __cpu_to_le64(sb_offset);
+ sb->data_offset = __cpu_to_le64(0);
if (sb_offset - bm_space < array_size)
bm_space = sb_offset - array_size;
- sb->data_size = __cpu_to_le64(sb_offset - bm_space);
- break;
- case 1:
- sb->super_offset = __cpu_to_le64(0);
- if (4*2 + bm_space + __le64_to_cpu(sb->size) > dsize)
- bm_space = dsize - __le64_to_cpu(sb->size) - 4*2;
- sb->data_offset = __cpu_to_le64(bm_space + 4*2);
- sb->data_size = __cpu_to_le64(dsize - bm_space - 4*2);
- break;
- case 2:
- sb_offset = 4*2;
- sb->super_offset = __cpu_to_le64(4*2);
- if (4*2 + 4*2 + bm_space + __le64_to_cpu(sb->size) > dsize)
- bm_space = dsize - __le64_to_cpu(sb->size) - 4*2 - 4*2;
- sb->data_offset = __cpu_to_le64(4*2 + 4*2 + bm_space);
- sb->data_size = __cpu_to_le64(dsize - 4*2 - 4*2 - bm_space );
- break;
- default:
- return -EINVAL;
- }
+ sb->data_size = __cpu_to_le64(sb_offset - bm_space);
+ break;
+ case 1:
+ sb->super_offset = __cpu_to_le64(0);
+ if (4*2 + bm_space + __le64_to_cpu(sb->size) > dsize)
+ bm_space = dsize - __le64_to_cpu(sb->size) -4*2;
+ sb->data_offset = __cpu_to_le64(bm_space + 4*2);
+ sb->data_size = __cpu_to_le64(dsize - bm_space - 4*2);
+ break;
+ case 2:
+ sb_offset = 4*2;
+ sb->super_offset = __cpu_to_le64(4*2);
+ if (4*2 + 4*2 + bm_space + __le64_to_cpu(sb->size)
+ > dsize)
+ bm_space = dsize - __le64_to_cpu(sb->size)
+ - 4*2 - 4*2;
+ sb->data_offset = __cpu_to_le64(4*2 + 4*2 + bm_space);
+ sb->data_size = __cpu_to_le64(dsize - 4*2 - 4*2
+ - bm_space );
+ break;
+ default:
+ return -EINVAL;
+ }
- sb->sb_csum = calc_sb_1_csum(sb);
- rv = store_super1(st, fd);
- if (rv)
- fprintf(stderr, Name ": failed to write superblock to %s\n", devname);
+ sb->sb_csum = calc_sb_1_csum(sb);
+ rv = store_super1(st, di->fd);
+ if (rv)
+ fprintf(stderr,
+ Name ": failed to write superblock to %s\n",
+ di->devname);
- if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
- rv = st->ss->write_bitmap(st, fd);
- close(fd);
+ if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
+ rv = st->ss->write_bitmap(st, di->fd);
+ close(di->fd);
+ di->fd = -1;
+ }
return rv;
}
+#endif
static int compare_super1(struct supertype *st, struct supertype *tst)
{
@@ -1002,9 +1039,10 @@ static int compare_super1(struct supertype *st, struct supertype *tst)
return 1;
if (!first) {
- first = malloc(1024+sizeof(bitmap_super_t) +
+ posix_memalign((void**)&first, 512,
+ 1024 + 512 +
sizeof(struct misc_dev_info));
- memcpy(first, second, 1024+sizeof(bitmap_super_t) +
+ memcpy(first, second, 1024 + 512 +
sizeof(struct misc_dev_info));
st->sb = first;
return 0;
@@ -1035,13 +1073,16 @@ static int load_super1(struct supertype *st, int fd, char *devname)
free_super1(st);
+ if (st->subarray[0])
+ return 1;
+
if (st->ss == NULL || st->minor_version == -1) {
int bestvers = -1;
struct supertype tst;
__u64 bestctime = 0;
/* guess... choose latest ctime */
+ memset(&tst, 0, sizeof(tst));
tst.ss = &super1;
- tst.sb = NULL;
for (tst.minor_version = 0; tst.minor_version <= 2 ; tst.minor_version++) {
switch(load_super1(&tst, fd, devname)) {
case 0: super = tst.sb;
@@ -1114,7 +1155,8 @@ static int load_super1(struct supertype *st, int fd, char *devname)
return 1;
}
- super = malloc(1024 + sizeof(bitmap_super_t) +
+ posix_memalign((void**)&super, 512,
+ 1024 + 512 +
sizeof(struct misc_dev_info));
if (read(fd, super, 1024) != 1024) {
@@ -1151,7 +1193,7 @@ static int load_super1(struct supertype *st, int fd, char *devname)
bsb = (struct bitmap_super_s *)(((char*)super)+1024);
- misc = (struct misc_dev_info*) (bsb+1);
+ misc = (struct misc_dev_info*) (((char*)super)+1024+512);
misc->device_size = dsize;
/* Now check on the bitmap superblock */
@@ -1162,8 +1204,8 @@ static int load_super1(struct supertype *st, int fd, char *devname)
* should get that written out.
*/
locate_bitmap1(st, fd);
- if (read(fd, ((char*)super)+1024, sizeof(struct bitmap_super_s))
- != sizeof(struct bitmap_super_s))
+ if (read(fd, ((char*)super)+1024, 512)
+ != 512)
goto no_bitmap;
uuid_from_super1(st, uuid);
@@ -1183,6 +1225,7 @@ static struct supertype *match_metadata_desc1(char *arg)
struct supertype *st = malloc(sizeof(*st));
if (!st) return st;
+ memset(st, 0, sizeof(*st));
st->ss = &super1;
st->max_devs = 384;
st->sb = NULL;
@@ -1199,7 +1242,7 @@ static struct supertype *match_metadata_desc1(char *arg)
return st;
}
if (strcmp(arg, "1") == 0 ||
- strcmp(arg, "default/large") == 0) {
+ strcmp(arg, "default") == 0) {
st->minor_version = -1;
return st;
}
@@ -1382,25 +1425,28 @@ static int write_bitmap1(struct supertype *st, int fd)
int rv = 0;
int towrite, n;
- char buf[4096];
+ char abuf[4096+512];
+ char *buf = (char*)(((long)(abuf+512))&~511UL);
locate_bitmap1(st, fd);
- if (write(fd, ((char*)sb)+1024, sizeof(bitmap_super_t)) !=
- sizeof(bitmap_super_t))
- return -2;
+ memset(buf, 0xff, 4096);
+ memcpy(buf, ((char*)sb)+1024, sizeof(bitmap_super_t));
+
towrite = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
towrite = (towrite+7) >> 3; /* bits to bytes */
- memset(buf, 0xff, sizeof(buf));
+ towrite += sizeof(bitmap_super_t);
+ towrite = ROUND_UP(towrite, 512);
while (towrite > 0) {
n = towrite;
- if (n > sizeof(buf))
- n = sizeof(buf);
+ if (n > 4096)
+ n = 4096;
n = write(fd, buf, n);
if (n > 0)
towrite -= n;
else
break;
+ memset(buf, 0xff, 4096);
}
fsync(fd);
if (towrite)
@@ -1416,6 +1462,38 @@ static void free_super1(struct supertype *st)
st->sb = NULL;
}
+static int validate_geometry1(struct supertype *st, int level,
+ int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ char *subdev, unsigned long long *freesize,
+ int verbose)
+{
+ unsigned long long ldsize;
+ int fd;
+
+ if (level == LEVEL_CONTAINER)
+ return 0;
+ if (!subdev)
+ return 1;
+
+ fd = open(subdev, O_RDONLY|O_EXCL, 0);
+ if (fd < 0) {
+ if (verbose)
+ fprintf(stderr, Name ": super1.x cannot open %s: %s\n",
+ subdev, strerror(errno));
+ return 0;
+ }
+
+ if (!get_dev_size(fd, subdev, &ldsize)) {
+ close(fd);
+ return 0;
+ }
+ close(fd);
+
+ *freesize = avail_size1(st, ldsize >> 9);
+ return 1;
+}
+
struct superswitch super1 = {
#ifndef MDASSEMBLE
.examine_super = examine_super1,
@@ -1424,6 +1502,7 @@ struct superswitch super1 = {
.detail_super = detail_super1,
.brief_detail_super = brief_detail_super1,
.export_detail_super = export_detail_super1,
+ .write_init_super = write_init_super1,
#endif
.match_home = match_home1,
.uuid_from_super = uuid_from_super1,
@@ -1432,7 +1511,6 @@ struct superswitch super1 = {
.init_super = init_super1,
.add_to_super = add_to_super1,
.store_super = store_super1,
- .write_init_super = write_init_super1,
.compare_super = compare_super1,
.load_super = load_super1,
.match_metadata_desc = match_metadata_desc1,
@@ -1441,7 +1519,7 @@ struct superswitch super1 = {
.locate_bitmap = locate_bitmap1,
.write_bitmap = write_bitmap1,
.free_super = free_super1,
- .major = 1,
+ .validate_geometry = validate_geometry1,
#if __BYTE_ORDER == BIG_ENDIAN
.swapuuid = 0,
#else
diff --git a/sysfs.c b/sysfs.c
index 0255f882..0ea17eb9 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -34,10 +34,10 @@ int load_sys(char *path, char *buf)
return -1;
n = read(fd, buf, 1024);
close(fd);
- if (n <=0 || n >= 1024)
+ if (n <0 || n >= 1024)
return -1;
buf[n] = 0;
- if (buf[n-1] == '\n')
+ if (n && buf[n-1] == '\n')
buf[n-1] = 0;
return 0;
}
@@ -56,6 +56,23 @@ void sysfs_free(struct mdinfo *sra)
}
}
+int sysfs_open(int devnum, char *devname, char *attr)
+{
+ char fname[50];
+ int fd;
+
+ sprintf(fname, "/sys/block/%s/md/", devnum2devname(devnum));
+ if (devname) {
+ strcat(fname, devname);
+ strcat(fname, "/");
+ }
+ strcat(fname, attr);
+ fd = open(fname, O_RDWR);
+ if (fd < 0 && errno == EACCES)
+ fd = open(fname, O_RDONLY);
+ return fd;
+}
+
struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
{
/* Longest possible name in sysfs, mounted at /sys, is
@@ -69,7 +86,7 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
char *dbase;
struct mdinfo *sra;
struct mdinfo *dev;
- DIR *dir;
+ DIR *dir = NULL;
struct dirent *de;
sra = malloc(sizeof(*sra));
@@ -111,10 +128,12 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
sra->array.major_version = -1;
sra->array.minor_version = -2;
strcpy(sra->text_version, buf+9);
- } else
+ } else {
sscanf(buf, "%d.%d",
&sra->array.major_version,
&sra->array.minor_version);
+ strcpy(sra->text_version, buf);
+ }
}
if (options & GET_LEVEL) {
strcpy(base, "level");
@@ -128,6 +147,12 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
goto abort;
sra->array.layout = strtoul(buf, NULL, 0);
}
+ if (options & GET_DISKS) {
+ strcpy(base, "raid_disks");
+ if (load_sys(fname, buf))
+ goto abort;
+ sra->array.raid_disks = strtoul(buf, NULL, 0);
+ }
if (options & GET_COMPONENT) {
strcpy(base, "component_size");
if (load_sys(fname, buf))
@@ -203,7 +228,7 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
strcpy(dbase, "size");
if (load_sys(fname, buf))
goto abort;
- dev->component_size = strtoull(buf, NULL, 0);
+ dev->component_size = strtoull(buf, NULL, 0) * 2;
}
if (options & GET_STATE) {
dev->disk.state = 0;
@@ -224,9 +249,12 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options)
dev->errors = strtoul(buf, NULL, 0);
}
}
+ closedir(dir);
return sra;
abort:
+ if (dir)
+ closedir(dir);
sysfs_free(sra);
return NULL;
}
@@ -267,6 +295,7 @@ int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
char fname[50];
int n;
int fd;
+
sprintf(fname, "/sys/block/%s/md/%s/%s",
sra->sys_name, dev?dev->sys_name:"", name);
fd = open(fname, O_WRONLY);
@@ -310,3 +339,240 @@ int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
return -1;
return 0;
}
+
+int sysfs_set_array(struct mdinfo *sra,
+ struct mdinfo *info)
+{
+ int rv = 0;
+ sra->array = info->array;
+
+ if (info->array.level < 0)
+ return 0; /* FIXME */
+ rv |= sysfs_set_str(sra, NULL, "level",
+ map_num(pers, info->array.level));
+ rv |= sysfs_set_num(sra, NULL, "raid_disks", info->array.raid_disks);
+ rv |= sysfs_set_num(sra, NULL, "chunk_size", info->array.chunk_size);
+ rv |= sysfs_set_num(sra, NULL, "layout", info->array.layout);
+ rv |= sysfs_set_num(sra, NULL, "component_size", info->component_size/2);
+ rv |= sysfs_set_num(sra, NULL, "resync_start", info->resync_start);
+ sra->array = info->array;
+ return rv;
+}
+
+int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd)
+{
+ char dv[100];
+ char nm[100];
+ struct mdinfo *sd2;
+ char *dname;
+ int rv;
+
+ sprintf(dv, "%d:%d", sd->disk.major, sd->disk.minor);
+ rv = sysfs_set_str(sra, NULL, "new_dev", dv);
+ if (rv)
+ return rv;
+
+ memset(nm, 0, sizeof(nm));
+ sprintf(dv, "/sys/dev/block/%d:%d", sd->disk.major, sd->disk.minor);
+ rv = readlink(dv, nm, sizeof(nm));
+ if (rv <= 0)
+ return -1;
+ nm[rv] = '\0';
+ dname = strrchr(nm, '/');
+ if (dname) dname++;
+ strcpy(sd->sys_name, "dev-");
+ strcpy(sd->sys_name+4, dname);
+
+ rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
+ rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
+ if (sra->array.level != LEVEL_CONTAINER) {
+ rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk);
+// rv |= sysfs_set_str(sra, sd, "state", "in_sync");
+ }
+ if (! rv) {
+ sd2 = malloc(sizeof(*sd2));
+ *sd2 = *sd;
+ sd2->next = sra->devs;
+ sra->devs = sd2;
+ }
+ return rv;
+}
+
+#if 0
+int sysfs_disk_to_sg(int fd)
+{
+ /* from an open block device, try find and open its corresponding
+ * scsi_generic interface
+ */
+ struct stat st;
+ char path[256];
+ char sg_path[256];
+ char sg_major_minor[8];
+ char *c;
+ DIR *dir;
+ struct dirent *de;
+ int major, minor, rv;
+
+ if (fstat(fd, &st))
+ return -1;
+
+ snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device",
+ major(st.st_rdev), minor(st.st_rdev));
+
+ dir = opendir(path);
+ if (!dir)
+ return -1;
+
+ de = readdir(dir);
+ while (de) {
+ if (strncmp("scsi_generic:", de->d_name,
+ strlen("scsi_generic:")) == 0)
+ break;
+ de = readdir(dir);
+ }
+ closedir(dir);
+
+ if (!de)
+ return -1;
+
+ snprintf(sg_path, sizeof(sg_path), "%s/%s/dev", path, de->d_name);
+ fd = open(sg_path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ rv = read(fd, sg_major_minor, sizeof(sg_major_minor));
+ close(fd);
+ if (rv < 0)
+ return -1;
+ else
+ sg_major_minor[rv - 1] = '\0';
+
+ c = strchr(sg_major_minor, ':');
+ *c = '\0';
+ c++;
+ major = strtol(sg_major_minor, NULL, 10);
+ minor = strtol(c, NULL, 10);
+ snprintf(path, sizeof(path), "/dev/.tmp.md.%d:%d:%d",
+ (int) getpid(), major, minor);
+ if (mknod(path, S_IFCHR|0600, makedev(major, minor))==0) {
+ fd = open(path, O_RDONLY);
+ unlink(path);
+ return fd;
+ }
+
+ return -1;
+}
+#endif
+
+int sysfs_disk_to_scsi_id(int fd, __u32 *id)
+{
+ /* from an open block device, try to retrieve it scsi_id */
+ struct stat st;
+ char path[256];
+ char *c1, *c2;
+ DIR *dir;
+ struct dirent *de;
+
+ if (fstat(fd, &st))
+ return 1;
+
+ snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device",
+ major(st.st_rdev), minor(st.st_rdev));
+
+ dir = opendir(path);
+ if (!dir)
+ return 1;
+
+ de = readdir(dir);
+ while (de) {
+ if (strncmp("scsi_disk:", de->d_name,
+ strlen("scsi_disk:")) == 0)
+ break;
+ de = readdir(dir);
+ }
+ closedir(dir);
+
+ if (!de)
+ return 1;
+
+ c1 = strchr(de->d_name, ':');
+ c1++;
+ c2 = strchr(c1, ':');
+ *c2 = '\0';
+ *id = strtol(c1, NULL, 10) << 24; /* host */
+ c1 = c2 + 1;
+ c2 = strchr(c1, ':');
+ *c2 = '\0';
+ *id |= strtol(c1, NULL, 10) << 16; /* channel */
+ c1 = c2 + 1;
+ c2 = strchr(c1, ':');
+ *c2 = '\0';
+ *id |= strtol(c1, NULL, 10) << 8; /* lun */
+ c1 = c2 + 1;
+ *id |= strtol(c1, NULL, 10); /* id */
+
+ return 0;
+}
+
+
+int sysfs_unique_holder(int devnum, long rdev)
+{
+ /* Check that devnum is a holder of rdev,
+ * and is the only holder.
+ * we should be locked against races by
+ * an O_EXCL on devnum
+ */
+ DIR *dir;
+ struct dirent *de;
+ char dirname[100];
+ char l;
+ int found = 0;
+ sprintf(dirname, "/sys/dev/block/%d:%d/holders",
+ major(rdev), minor(rdev));
+ dir = opendir(dirname);
+ errno = ENOENT;
+ if (!dir)
+ return 0;
+ l = strlen(dirname);
+ while ((de = readdir(dir)) != NULL) {
+ char buf[10];
+ int n;
+ int mj, mn;
+ char c;
+ int fd;
+
+ if (de->d_ino == 0)
+ continue;
+ if (de->d_name[0] == '.')
+ continue;
+ strcpy(dirname+l, "/");
+ strcat(dirname+l, de->d_name);
+ strcat(dirname+l, "/dev");
+ fd = open(dirname, O_RDONLY);
+ if (fd < 0) {
+ errno = ENOENT;
+ break;
+ }
+ n = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ buf[n] = 0;
+ if (sscanf(buf, "%d:%d%c", &mj, &mn, &c) != 3 ||
+ c != '\n') {
+ errno = ENOENT;
+ break;
+ }
+ if (mj != MD_MAJOR)
+ mn = -1-(mn>>6);
+
+ if (devnum != mn) {
+ errno = EEXIST;
+ break;
+ }
+ found = 1;
+ }
+ closedir(dir);
+ if (de)
+ return 0;
+ else
+ return found;
+}
diff --git a/test b/test
index 1a79bab4..bd8d2793 100644
--- a/test
+++ b/test
@@ -174,6 +174,8 @@ do
if [ -f "$script" ]
then
rm -f $targetdir/stderr
+ # stop all arrays, just incase some script left an array active.
+ mdadm -Ssq
# source script in a subshell, so it has access to our
# namespace, but cannot change it.
if ( set -ex ; . $script ) 2> $targetdir/log
diff --git a/util.c b/util.c
index 75f37064..3bf4cbe3 100644
--- a/util.c
+++ b/util.c
@@ -29,8 +29,13 @@
#include "mdadm.h"
#include "md_p.h"
+#include <sys/socket.h>
#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <sys/un.h>
#include <ctype.h>
+#include <dirent.h>
+#include <signal.h>
/*
* following taken from linux/blkpg.h because they aren't
@@ -389,6 +394,9 @@ int is_standard(char *dev, int *nump)
/* tests if dev is a "standard" md dev name.
* i.e if the last component is "/dNN" or "/mdNN",
* where NN is a string of digits
+ * Returns 1 if a partitionable standard,
+ * -1 if non-partitonable,
+ * 0 if not a standard name.
*/
char *d = strrchr(dev, '/');
int type=0;
@@ -608,6 +616,23 @@ char *human_size_brief(long long bytes)
}
#endif
+unsigned long long calc_array_size(int level, int raid_disks, int layout,
+ int chunksize, unsigned long long devsize)
+{
+ int data_disks = 0;
+ switch (level) {
+ case 0: data_disks = raid_disks; break;
+ case 1: data_disks = 1; break;
+ case 4:
+ case 5: data_disks = raid_disks - 1; break;
+ case 6: data_disks = raid_disks - 2; break;
+ case 10: data_disks = raid_disks / (layout & 255) / ((layout>>8)&255);
+ break;
+ }
+ devsize &= ~(unsigned long long)((chunksize>>9)-1);
+ return data_disks * devsize;
+}
+
#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
int get_mdp_major(void)
{
@@ -693,21 +718,6 @@ void put_md_name(char *name)
unlink(name);
}
-static int dev2major(int d)
-{
- if (d >= 0)
- return MD_MAJOR;
- else
- return get_mdp_major();
-}
-
-static int dev2minor(int d)
-{
- if (d >= 0)
- return d;
- return (-1-d) << MdpMinorShift;
-}
-
int find_free_devnum(int use_partitions)
{
int devnum;
@@ -749,19 +759,38 @@ int dev_open(char *dev, int flags)
if (e > dev && *e == ':' && e[1] &&
(minor = strtoul(e+1, &e, 0)) >= 0 &&
*e == 0) {
- snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d", major, minor);
+ snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d",
+ (int)getpid(), major, minor);
if (mknod(devname, S_IFBLK|0600, makedev(major, minor))==0) {
- fd = open(devname, flags);
+ fd = open(devname, flags|O_DIRECT);
unlink(devname);
}
} else
- fd = open(dev, flags);
+ fd = open(dev, flags|O_DIRECT);
return fd;
}
-struct superswitch *superlist[] = { &super0, &super1, NULL };
+int open_dev_excl(int devnum)
+{
+ char buf[20];
+ int i;
+
+ sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum));
+ for (i=0 ; i<25 ; i++) {
+ int fd = dev_open(buf, O_RDWR|O_EXCL);
+ if (fd >= 0)
+ return fd;
+ if (errno != EBUSY)
+ return fd;
+ usleep(200000);
+ }
+ return -1;
+}
+
+struct superswitch *superlist[] = { &super0, &super1, &super_ddf, &super_imsm, NULL };
#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
+
struct supertype *super_by_fd(int fd)
{
mdu_array_info_t array;
@@ -772,6 +801,7 @@ struct supertype *super_by_fd(int fd)
char *verstr;
char version[20];
int i;
+ char *subarray = NULL;
sra = sysfs_read(fd, 0, GET_VERSION);
@@ -791,40 +821,56 @@ struct supertype *super_by_fd(int fd)
sprintf(version, "%d.%d", vers, minor);
verstr = version;
}
+ if (minor == -2 && verstr[0] == '/') {
+ char *dev = verstr+1;
+ subarray = strchr(dev, '/');
+ int devnum;
+ if (subarray)
+ *subarray++ = '\0';
+ devnum = devname2devnum(dev);
+ subarray = strdup(subarray);
+ if (sra)
+ sysfs_free(sra);
+ sra = sysfs_read(-1, devnum, GET_VERSION);
+ verstr = sra->text_version ? : "-no-metadata-";
+ }
+
for (i = 0; st == NULL && superlist[i] ; i++)
st = superlist[i]->match_metadata_desc(verstr);
if (sra)
sysfs_free(sra);
- if (st)
+ if (st) {
st->sb = NULL;
+ if (subarray) {
+ strncpy(st->subarray, subarray, 32);
+ st->subarray[31] = 0;
+ free(subarray);
+ } else
+ st->subarray[0] = 0;
+ }
return st;
}
#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
-struct supertype *dup_super(struct supertype *st)
+struct supertype *dup_super(struct supertype *orig)
{
- struct supertype *stnew = NULL;
- char *verstr = NULL;
- char version[20];
- int i;
+ struct supertype *st;
+ if (!orig)
+ return orig;
+ st = malloc(sizeof(*st));
if (!st)
return st;
-
- if (st->minor_version == -1)
- sprintf(version, "%d", st->ss->major);
- else
- sprintf(version, "%d.%d", st->ss->major, st->minor_version);
- verstr = version;
-
- for (i = 0; stnew == NULL && superlist[i] ; i++)
- stnew = superlist[i]->match_metadata_desc(verstr);
-
- if (stnew)
- stnew->sb = NULL;
- return stnew;
+ memset(st, 0, sizeof(*st));
+ st->ss = orig->ss;
+ st->max_devs = orig->max_devs;
+ st->minor_version = orig->minor_version;
+ strcpy(st->subarray, orig->subarray);
+ st->sb = NULL;
+ st->info = NULL;
+ return st;
}
struct supertype *guess_super(int fd)
@@ -839,11 +885,10 @@ struct supertype *guess_super(int fd)
int i;
st = malloc(sizeof(*st));
- memset(st, 0, sizeof(*st));
for (i=0 ; superlist[i]; i++) {
int rv;
ss = superlist[i];
- st->ss = NULL;
+ memset(st, 0, sizeof(*st));
rv = ss->load_super(st, fd, NULL);
if (rv == 0) {
struct mdinfo info;
@@ -858,7 +903,7 @@ struct supertype *guess_super(int fd)
}
if (bestsuper != -1) {
int rv;
- st->ss = NULL;
+ memset(st, 0, sizeof(*st));
rv = superlist[bestsuper]->load_super(st, fd, NULL);
if (rv == 0) {
superlist[bestsuper]->free_super(st);
@@ -906,6 +951,236 @@ void get_one_disk(int mdfd, mdu_array_info_t *ainf, mdu_disk_info_t *disk)
return;
}
+int open_container(int fd)
+{
+ /* 'fd' is a block device. Find out if it is in use
+ * by a container, and return an open fd on that container.
+ */
+ char path[256];
+ char *e;
+ DIR *dir;
+ struct dirent *de;
+ int dfd, n;
+ char buf[200];
+ int major, minor;
+ struct stat st;
+
+ if (fstat(fd, &st) != 0)
+ return -1;
+ sprintf(path, "/sys/dev/block/%d:%d/holders",
+ (int)major(st.st_rdev), (int)minor(st.st_rdev));
+ e = path + strlen(path);
+
+ dir = opendir(path);
+ if (!dir)
+ return -1;
+ while ((de = readdir(dir))) {
+ if (de->d_ino == 0)
+ continue;
+ if (de->d_name[0] == '.')
+ continue;
+ sprintf(e, "/%s/dev", de->d_name);
+ dfd = open(path, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ n = read(dfd, buf, sizeof(buf));
+ close(dfd);
+ if (n <= 0 || n >= sizeof(buf))
+ continue;
+ buf[n] = 0;
+ if (sscanf(buf, "%d:%d", &major, &minor) != 2)
+ continue;
+ sprintf(buf, "%d:%d", major, minor);
+ dfd = dev_open(buf, O_RDONLY);
+ if (dfd >= 0) {
+ closedir(dir);
+ return dfd;
+ }
+ }
+ closedir(dir);
+ return -1;
+}
+
+char *devnum2devname(int num)
+{
+ char name[100];
+ if (num > 0)
+ sprintf(name, "md%d", num);
+ else
+ sprintf(name, "md_d%d", -1-num);
+ return strdup(name);
+}
+
+int devname2devnum(char *name)
+{
+ char *ep;
+ int num;
+ if (strncmp(name, "md_d", 4)==0)
+ num = -1-strtoul(name+4, &ep, 10);
+ else
+ num = strtoul(name+2, &ep, 10);
+ return num;
+}
+
+int fd2devnum(int fd)
+{
+ struct stat stb;
+ if (fstat(fd, &stb) == 0 &&
+ (S_IFMT&stb.st_mode)==S_IFBLK) {
+ if (major(stb.st_rdev) == MD_MAJOR)
+ return minor(stb.st_rdev);
+ else
+ return -1- (minor(stb.st_rdev)>>6);
+ }
+ return -1;
+}
+
+int mdmon_running(int devnum)
+{
+ char path[100];
+ char pid[10];
+ int fd;
+ int n;
+ sprintf(path, "/var/run/mdadm/%s.pid", devnum2devname(devnum));
+ fd = open(path, O_RDONLY, 0);
+
+ if (fd < 0)
+ return 0;
+ n = read(fd, pid, 9);
+ close(fd);
+ if (n <= 0)
+ return 0;
+ if (kill(atoi(pid), 0) == 0)
+ return 1;
+ return 0;
+}
+
+int signal_mdmon(int devnum)
+{
+ char path[100];
+ char pid[10];
+ int fd;
+ int n;
+ sprintf(path, "/var/run/mdadm/%s.pid", devnum2devname(devnum));
+ fd = open(path, O_RDONLY, 0);
+
+ if (fd < 0)
+ return 0;
+ n = read(fd, pid, 9);
+ close(fd);
+ if (n <= 0)
+ return 0;
+ if (kill(atoi(pid), SIGUSR1) == 0)
+ return 1;
+ return 0;
+}
+
+int start_mdmon(int devnum)
+{
+ int i;
+ int len;
+ pid_t pid;
+ int status;
+ char pathbuf[1024];
+ char *paths[4] = {
+ pathbuf,
+ "/sbin/mdmon",
+ "mdmon",
+ NULL
+ };
+
+ if (env_no_mdmon())
+ return 0;
+
+ len = readlink("/proc/self/exe", pathbuf, sizeof(pathbuf));
+ if (len > 0) {
+ char *sl;
+ pathbuf[len] = 0;
+ sl = strrchr(pathbuf, '/');
+ if (sl)
+ sl++;
+ else
+ sl = pathbuf;
+ strcpy(sl, "mdmon");
+ } else
+ pathbuf[0] = '\0';
+
+ switch(fork()) {
+ case 0:
+ /* FIXME yuk. CLOSE_EXEC?? */
+ for (i=3; i < 100; i++)
+ close(i);
+ for (i=0; paths[i]; i++)
+ if (paths[i][0])
+ execl(paths[i], "mdmon",
+ map_dev(dev2major(devnum),
+ dev2minor(devnum),
+ 1), NULL);
+ exit(1);
+ case -1: fprintf(stderr, Name ": cannot run mdmon. "
+ "Array remains readonly\n");
+ return -1;
+ default: /* parent - good */
+ pid = wait(&status);
+ if (pid < 0 || status != 0)
+ return -1;
+ }
+ return 0;
+}
+
+int env_no_mdmon(void)
+{
+ char *val = getenv("MDADM_NO_MDMON");
+
+ if (val && atoi(val) == 1)
+ return 1;
+
+ return 0;
+}
+
+
+int flush_metadata_updates(struct supertype *st)
+{
+ int sfd;
+ if (!st->updates) {
+ st->update_tail = NULL;
+ return -1;
+ }
+
+ sfd = connect_monitor(devnum2devname(st->container_dev));
+ if (sfd < 0)
+ return -1;
+
+ while (st->updates) {
+ struct metadata_update *mu = st->updates;
+ st->updates = mu->next;
+
+ send_message(sfd, mu, 0);
+ wait_reply(sfd, 0);
+ free(mu->buf);
+ free(mu);
+ }
+ ack(sfd, 0);
+ wait_reply(sfd, 0);
+ close(sfd);
+ st->update_tail = NULL;
+ return 0;
+}
+
+void append_metadata_update(struct supertype *st, void *buf, int len)
+{
+
+ struct metadata_update *mu = malloc(sizeof(*mu));
+
+ mu->buf = buf;
+ mu->len = len;
+ mu->space = NULL;
+ mu->next = NULL;
+ *st->update_tail = mu;
+ st->update_tail = &mu->next;
+}
+
+
#ifdef __TINYC__
/* tinyc doesn't optimize this check in ioctl.h out ... */
unsigned int __invalid_size_argument_for_IOC = 0;