summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ANNOUNCE-4.022
-rw-r--r--Assemble.c25
-rw-r--r--Build.c2
-rw-r--r--Create.c17
-rw-r--r--Detail.c10
-rwxr-xr-xGrow.c216
-rw-r--r--Incremental.c18
-rw-r--r--Makefile2
-rw-r--r--Manage.c144
-rw-r--r--Monitor.c6
-rw-r--r--ReadMe.c9
-rw-r--r--bitmap.c87
-rw-r--r--config.c228
-rw-r--r--debian/changelog6
-rwxr-xr-xinventory2
-rw-r--r--lib.c109
-rw-r--r--managemon.c17
-rw-r--r--mapfile.c2
-rw-r--r--md.454
-rw-r--r--md_p.h1
-rw-r--r--mdadm.8.in57
-rw-r--r--mdadm.c123
-rw-r--r--[-rwxr-xr-x]mdadm.h120
-rw-r--r--mdadm.spec2
-rw-r--r--mdassemble.82
-rw-r--r--mdassemble.c2
-rw-r--r--mdmon.82
-rw-r--r--mdmon.h2
-rw-r--r--mdopen.c14
-rw-r--r--mdstat.c6
-rw-r--r--monitor.c207
-rw-r--r--msg.c2
-rw-r--r--part.h2
-rw-r--r--platform-intel.c50
-rw-r--r--platform-intel.h8
-rw-r--r--raid6check.c3
-rw-r--r--restripe.c62
-rw-r--r--sg_io.c23
-rw-r--r--super-ddf.c4
-rw-r--r--super-gpt.c10
-rw-r--r--super-intel.c1535
-rw-r--r--super-mbr.c10
-rw-r--r--super0.c147
-rw-r--r--super1.c221
-rw-r--r--sysfs.c115
-rw-r--r--tests/21raid5cache87
-rw-r--r--util.c125
47 files changed, 2792 insertions, 1126 deletions
diff --git a/ANNOUNCE-4.0 b/ANNOUNCE-4.0
new file mode 100644
index 00000000..f79c5408
--- /dev/null
+++ b/ANNOUNCE-4.0
@@ -0,0 +1,22 @@
+Subject: ANNOUNCE: mdadm 4.0 - A tool for managing md Soft RAID under Linux
+
+I am pleased to announce the availability of
+ mdadm version 4.0
+
+It is available at the usual places:
+ http://www.kernel.org/pub/linux/utils/raid/mdadm/
+and via git at
+ git://git.kernel.org/pub/scm/utils/mdadm/mdadm.git
+ http://git.kernel.org/cgit/utils/mdadm/
+
+The update in major version number primarily indicates this is a
+release by it's new maintainer. In addition it contains a large number
+of fixes in particular for IMSM RAID and clustered RAID support. In
+addition this release includes support for IMSM 4k sector drives,
+failfast and better documentation for journaled RAID.
+
+This is my first release of mdadm. Please thank Neil Brown for his
+previous work as maintainer and blame me for all the bugs I caused
+since taking over.
+
+Jes Sorensen, 2017-01-09
diff --git a/Assemble.c b/Assemble.c
index d199afc9..3da09033 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -599,18 +599,9 @@ static int load_devices(struct devs *devices, char *devmap,
int err;
fstat(mdfd, &stb2);
- if (strcmp(c->update, "uuid")==0 &&
- !ident->uuid_set) {
- int rfd;
- if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
- read(rfd, ident->uuid, 16) != 16) {
- *(__u32*)(ident->uuid) = random();
- *(__u32*)(ident->uuid+1) = random();
- *(__u32*)(ident->uuid+2) = random();
- *(__u32*)(ident->uuid+3) = random();
- }
- if (rfd >= 0) close(rfd);
- }
+ if (strcmp(c->update, "uuid") == 0 && !ident->uuid_set)
+ random_uuid((__u8 *)ident->uuid);
+
dfd = dev_open(devname,
tmpdev->disposition == 'I'
? O_RDWR : (O_RDWR|O_EXCL));
@@ -1880,10 +1871,9 @@ int assemble_container_content(struct supertype *st, int mdfd,
struct mdinfo *dev, *sra, *dev2;
int working = 0, preexist = 0;
int expansion = 0;
- struct map_ent *map = NULL;
int old_raid_disks;
int start_reshape;
- char *avail = NULL;
+ char *avail;
int err;
sysfs_init(content, mdfd, NULL);
@@ -1896,8 +1886,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
content->text_version[0] == '/')
content->text_version[0] = '-';
if (sysfs_set_array(content, md_get_version(mdfd)) != 0) {
- if (sra)
- sysfs_free(sra);
+ sysfs_free(sra);
return 1;
}
}
@@ -1950,11 +1939,9 @@ int assemble_container_content(struct supertype *st, int mdfd,
free(avail);
return 1;/* Nothing new, don't try to start */
}
- map_update(&map, fd2devnm(mdfd),
- content->text_version,
+ map_update(NULL, fd2devnm(mdfd), content->text_version,
content->uuid, chosen_name);
-
if (enough(content->array.level, content->array.raid_disks,
content->array.layout, content->array.state & 1, avail) == 0) {
if (c->export && result)
diff --git a/Build.c b/Build.c
index 8603c710..74a440e7 100644
--- a/Build.c
+++ b/Build.c
@@ -192,7 +192,7 @@ int Build(char *mddev, struct mddev_dev *devlist,
disk.number = i;
disk.raid_disk = i;
disk.state = (1<<MD_DISK_SYNC) | (1<<MD_DISK_ACTIVE);
- if (dv->writemostly == 1)
+ if (dv->writemostly == FlagSet)
disk.state |= 1<<MD_DISK_WRITEMOSTLY;
disk.major = major(stb.st_rdev);
disk.minor = minor(stb.st_rdev);
diff --git a/Create.c b/Create.c
index 1e4a6ee0..2721884e 100644
--- a/Create.c
+++ b/Create.c
@@ -114,8 +114,13 @@ int Create(struct supertype *st, char *mddev,
unsigned long long newsize;
int major_num = BITMAP_MAJOR_HI;
- if (s->bitmap_file && strcmp(s->bitmap_file, "clustered") == 0)
+ if (s->bitmap_file && strcmp(s->bitmap_file, "clustered") == 0) {
major_num = BITMAP_MAJOR_CLUSTERED;
+ if (c->nodes <= 1) {
+ pr_err("At least 2 nodes are needed for cluster-md\n");
+ return 1;
+ }
+ }
memset(&info, 0, sizeof(info));
if (s->level == UnSet && st && st->ss->default_geometry)
@@ -769,9 +774,9 @@ int Create(struct supertype *st, char *mddev,
st->ss->name);
goto abort_locked;
}
- if (!st->ss->add_internal_bitmap(st, &s->bitmap_chunk,
- c->delay, s->write_behind,
- bitmapsize, 1, major_num)) {
+ if (st->ss->add_internal_bitmap(st, &s->bitmap_chunk,
+ c->delay, s->write_behind,
+ bitmapsize, 1, major_num)) {
pr_err("Given bitmap chunk size not supported.\n");
goto abort_locked;
}
@@ -883,8 +888,10 @@ int Create(struct supertype *st, char *mddev,
else
inf->disk.state = 0;
- if (dv->writemostly == 1)
+ if (dv->writemostly == FlagSet)
inf->disk.state |= (1<<MD_DISK_WRITEMOSTLY);
+ if (dv->failfast == FlagSet)
+ inf->disk.state |= (1<<MD_DISK_FAILFAST);
if (have_container)
fd = -1;
diff --git a/Detail.c b/Detail.c
index 0cfccadb..509b0d41 100644
--- a/Detail.c
+++ b/Detail.c
@@ -130,7 +130,7 @@ int Detail(char *dev, struct context *c)
/* This is a subarray of some container.
* We want the name of the container, and the member
*/
- int devid = devnm2devid(st->container_devnm);
+ dev_t devid = devnm2devid(st->container_devnm);
int cfd, err;
member = subarray;
@@ -323,7 +323,8 @@ int Detail(char *dev, struct context *c)
if (disk.major == 0 && disk.minor == 0)
continue;
if (disk.raid_disk >= 0 && disk.raid_disk < array.raid_disks
- && disks[disk.raid_disk*2].state == (1<<MD_DISK_REMOVED))
+ && disks[disk.raid_disk*2].state == (1<<MD_DISK_REMOVED)
+ && ((disk.state & (1<<MD_DISK_JOURNAL)) == 0))
disks[disk.raid_disk*2] = disk;
else if (disk.raid_disk >= 0 && disk.raid_disk < array.raid_disks
&& disks[disk.raid_disk*2+1].state == (1<<MD_DISK_REMOVED)
@@ -577,12 +578,12 @@ This is pretty boring
char path[200];
char vbuf[1024];
int nlen = strlen(sra->sys_name);
- int devid;
+ dev_t devid;
if (de->d_name[0] == '.')
continue;
sprintf(path, "/sys/block/%s/md/metadata_version",
de->d_name);
- if (load_sys(path, vbuf) < 0)
+ if (load_sys(path, vbuf, sizeof(vbuf)) < 0)
continue;
if (strncmp(vbuf, "external:", 9) != 0 ||
!is_subarray(vbuf+9) ||
@@ -657,6 +658,7 @@ This is pretty boring
}
if (disk.state & (1<<MD_DISK_REMOVED)) printf(" removed");
if (disk.state & (1<<MD_DISK_WRITEMOSTLY)) printf(" writemostly");
+ if (disk.state & (1<<MD_DISK_FAILFAST)) printf(" failfast");
if (disk.state & (1<<MD_DISK_JOURNAL)) printf(" journal");
if ((disk.state &
((1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC)
diff --git a/Grow.c b/Grow.c
index bbdd46c0..455c5f90 100755
--- a/Grow.c
+++ b/Grow.c
@@ -297,7 +297,14 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
" between different architectures. Consider upgrading the Linux kernel.\n");
}
- if (s->bitmap_file && strcmp(s->bitmap_file, "clustered") == 0)
+ /*
+ * We only ever get called if s->bitmap_file is != NULL, so this check
+ * is just here to quiet down static code checkers.
+ */
+ if (!s->bitmap_file)
+ return 1;
+
+ if (strcmp(s->bitmap_file, "clustered") == 0)
major = BITMAP_MAJOR_CLUSTERED;
if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
@@ -308,8 +315,8 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
return 1;
}
if (bmf.pathname[0]) {
- if (strcmp(s->bitmap_file,"none")==0) {
- if (ioctl(fd, SET_BITMAP_FILE, -1)!= 0) {
+ if (strcmp(s->bitmap_file,"none") == 0) {
+ if (ioctl(fd, SET_BITMAP_FILE, -1) != 0) {
pr_err("failed to remove bitmap %s\n",
bmf.pathname);
return 1;
@@ -324,11 +331,11 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
pr_err("cannot get array status for %s\n", devname);
return 1;
}
- if (array.state & (1<<MD_SB_BITMAP_PRESENT)) {
+ if (array.state & (1 << MD_SB_BITMAP_PRESENT)) {
if (strcmp(s->bitmap_file, "none")==0) {
- array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
- if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
- if (array.state & (1<<MD_SB_CLUSTERED))
+ array.state &= ~(1 << MD_SB_BITMAP_PRESENT);
+ if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
+ if (array.state & (1 << MD_SB_CLUSTERED))
pr_err("failed to remove clustered bitmap.\n");
else
pr_err("failed to remove internal bitmap.\n");
@@ -352,7 +359,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
bitmapsize = array.size;
bitmapsize <<= 1;
if (get_dev_size(fd, NULL, &array_size) &&
- array_size > (0x7fffffffULL<<9)) {
+ array_size > (0x7fffffffULL << 9)) {
/* Array is big enough that we cannot trust array.size
* try other approaches
*/
@@ -364,7 +371,9 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
}
if (array.level == 10) {
- int ncopies = (array.layout&255)*((array.layout>>8)&255);
+ int ncopies;
+
+ ncopies = (array.layout & 255) * ((array.layout >> 8) & 255);
bitmapsize = bitmapsize * array.raid_disks / ncopies;
}
@@ -395,38 +404,42 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
mdi = sysfs_read(fd, NULL, GET_BITMAP_LOCATION);
if (mdi)
offset_setable = 1;
- for (d=0; d< st->max_devs; d++) {
+ for (d = 0; d < st->max_devs; d++) {
mdu_disk_info_t disk;
char *dv;
+ int fd2;
+
disk.number = d;
if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
continue;
- if (disk.major == 0 &&
- disk.minor == 0)
+ if (disk.major == 0 && disk.minor == 0)
continue;
- if ((disk.state & (1<<MD_DISK_SYNC))==0)
+ if ((disk.state & (1 << MD_DISK_SYNC)) == 0)
continue;
dv = map_dev(disk.major, disk.minor, 1);
- if (dv) {
- int fd2 = dev_open(dv, O_RDWR);
- if (fd2 < 0)
- continue;
- if (st->ss->load_super(st, fd2, NULL)==0) {
- if (st->ss->add_internal_bitmap(
- st,
- &s->bitmap_chunk, c->delay, s->write_behind,
- bitmapsize, offset_setable,
- major)
- )
- st->ss->write_bitmap(st, fd2, NoUpdate);
- else {
- pr_err("failed to create internal bitmap - chunksize problem.\n");
- close(fd2);
- return 1;
- }
+ if (!dv)
+ continue;
+ fd2 = dev_open(dv, O_RDWR);
+ if (fd2 < 0)
+ continue;
+ rv = st->ss->load_super(st, fd2, NULL);
+ if (!rv) {
+ rv = st->ss->add_internal_bitmap(
+ st, &s->bitmap_chunk, c->delay,
+ s->write_behind, bitmapsize,
+ offset_setable, major);
+ if (!rv) {
+ st->ss->write_bitmap(st, fd2,
+ NodeNumUpdate);
+ } else {
+ pr_err("failed to create internal bitmap - chunksize problem.\n");
}
- close(fd2);
+ } else {
+ pr_err("failed to load super-block.\n");
}
+ close(fd2);
+ if (rv)
+ return 1;
}
if (offset_setable) {
st->ss->getinfo_super(st, mdi, NULL);
@@ -435,8 +448,8 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
mdi->bitmap_offset);
} else {
if (strcmp(s->bitmap_file, "clustered") == 0)
- array.state |= (1<<MD_SB_CLUSTERED);
- array.state |= (1<<MD_SB_BITMAP_PRESENT);
+ array.state |= (1 << MD_SB_CLUSTERED);
+ array.state |= (1 << MD_SB_BITMAP_PRESENT);
rv = ioctl(fd, SET_ARRAY_INFO, &array);
}
if (rv < 0) {
@@ -459,8 +472,8 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
disk.number = d;
if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
continue;
- if ((disk.major==0 && disk.minor==0) ||
- (disk.state & (1<<MD_DISK_REMOVED)))
+ if ((disk.major==0 && disk.minor == 0) ||
+ (disk.state & (1 << MD_DISK_REMOVED)))
continue;
dv = map_dev(disk.major, disk.minor, 1);
if (!dv)
@@ -479,14 +492,14 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
pr_err("cannot find UUID for array!\n");
return 1;
}
- if (CreateBitmap(s->bitmap_file, c->force, (char*)uuid, s->bitmap_chunk,
- c->delay, s->write_behind, bitmapsize, major)) {
+ if (CreateBitmap(s->bitmap_file, c->force, (char*)uuid,
+ s->bitmap_chunk, c->delay, s->write_behind,
+ bitmapsize, major)) {
return 1;
}
bitmap_fd = open(s->bitmap_file, O_RDWR);
if (bitmap_fd < 0) {
- pr_err("weird: %s cannot be opened\n",
- s->bitmap_file);
+ pr_err("weird: %s cannot be opened\n", s->bitmap_file);
return 1;
}
if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) {
@@ -740,6 +753,14 @@ int start_reshape(struct mdinfo *sra, int already_running,
void abort_reshape(struct mdinfo *sra)
{
sysfs_set_str(sra, NULL, "sync_action", "idle");
+ /*
+ * Prior to kernel commit: 23ddff3792f6 ("md: allow suspend_lo and
+ * suspend_hi to decrease as well as increase.")
+ * you could only increase suspend_{lo,hi} unless the region they
+ * covered was empty. So to reset to 0, you need to push suspend_lo
+ * up past suspend_hi first. So to maximize the chance of mdadm
+ * working on all kernels, we want to keep doing that.
+ */
sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
sysfs_set_num(sra, NULL, "suspend_hi", 0);
sysfs_set_num(sra, NULL, "suspend_lo", 0);
@@ -756,6 +777,26 @@ int remove_disks_for_takeover(struct supertype *st,
struct mdinfo *remaining;
int slot;
+ if (st->ss->external) {
+ int rv = 0;
+ struct mdinfo *arrays = st->ss->container_content(st, NULL);
+ /*
+ * containter_content returns list of arrays in container
+ * If arrays->next is not NULL it means that there are
+ * 2 arrays in container and operation should be blocked
+ */
+ if (arrays) {
+ if (arrays->next)
+ rv = 1;
+ sysfs_free(arrays);
+ if (rv) {
+ pr_err("Error. Cannot perform operation on /dev/%s\n", st->devnm);
+ pr_err("For this operation it MUST be single array in container\n");
+ return rv;
+ }
+ }
+ }
+
if (sra->array.level == 10)
nr_of_copies = layout & 0xff;
else if (sra->array.level == 1)
@@ -1062,6 +1103,9 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
re->level = 1;
return NULL;
}
+ if (info->array.raid_disks != 2 &&
+ info->new_level == 5)
+ return "Can only convert a 2-device array to RAID5";
if (info->array.raid_disks == 2 &&
info->new_level == 5) {
@@ -2055,6 +2099,10 @@ size_change_error:
* number of devices (On-Line Capacity Expansion) must be
* performed at the level of the container
*/
+ if (fd > 0) {
+ close(fd);
+ fd = -1;
+ }
rv = reshape_container(container, devname, -1, st, &info,
c->force, c->backup_file, c->verbose, 0, 0, 0);
frozen = 0;
@@ -2987,9 +3035,11 @@ static int reshape_array(char *container, int fd, char *devname,
* array. Now that the array has been changed to the right
* level and frozen, we can safely add them.
*/
- if (devlist)
- Manage_subdevs(devname, fd, devlist, verbose,
- 0,NULL, 0);
+ if (devlist) {
+ if (Manage_subdevs(devname, fd, devlist, verbose,
+ 0, NULL, 0))
+ goto release;
+ }
if (reshape.backup_blocks == 0 && data_offset != INVALID_SECTORS)
reshape.backup_blocks = reshape.before.data_disks * info->array.chunk_size/512;
@@ -3503,7 +3553,7 @@ int reshape_container(char *container, char *devname,
int fd;
struct mdstat_ent *mdstat;
char *adev;
- int devid;
+ dev_t devid;
sysfs_free(cc);
@@ -3998,8 +4048,10 @@ static int grow_backup(struct mdinfo *sra,
if (sd->disk.state & (1<<MD_DISK_FAULTY))
continue;
if (sd->disk.state & (1<<MD_DISK_SYNC)) {
- char sbuf[20];
- if (sysfs_get_str(sra, sd, "state", sbuf, 20) < 0 ||
+ char sbuf[100];
+
+ if (sysfs_get_str(sra, sd, "state",
+ sbuf, sizeof(sbuf)) < 0 ||
strstr(sbuf, "faulty") ||
strstr(sbuf, "in_sync") == NULL) {
/* this device is dead */
@@ -4744,7 +4796,7 @@ int Grow_continue_command(char *devname, int fd,
struct mdinfo *cc = NULL;
struct mdstat_ent *mdstat = NULL;
int cfd = -1;
- int fd2 = -1;
+ int fd2;
dprintf("Grow continue from command line called for %s\n",
devname);
@@ -4758,6 +4810,7 @@ int Grow_continue_command(char *devname, int fd,
dprintf("Grow continue is run for ");
if (st->ss->external == 0) {
int d;
+ int cnt = 5;
dprintf_cont("native array (%s)\n", devname);
if (ioctl(fd, GET_ARRAY_INFO, &array.array) < 0) {
pr_err("%s is not an active md array - aborting\n", devname);
@@ -4769,38 +4822,42 @@ int Grow_continue_command(char *devname, int fd,
* FIXME we should really get what we need from
* sysfs
*/
- for (d = 0; d < MAX_DISKS; d++) {
- mdu_disk_info_t disk;
- char *dv;
- int err;
- disk.number = d;
- if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
- continue;
- if (disk.major == 0 && disk.minor == 0)
- continue;
- if ((disk.state & (1 << MD_DISK_ACTIVE)) == 0)
- continue;
- dv = map_dev(disk.major, disk.minor, 1);
- if (!dv)
- continue;
- fd2 = dev_open(dv, O_RDONLY);
- if (fd2 < 0)
- continue;
- err = st->ss->load_super(st, fd2, NULL);
- close(fd2);
- /* invalidate fd2 to avoid possible double close() */
- fd2 = -1;
- if (err)
- continue;
- break;
- }
- if (d == MAX_DISKS) {
- pr_err("Unable to load metadata for %s\n",
- devname);
- ret_val = 1;
- goto Grow_continue_command_exit;
- }
- st->ss->getinfo_super(st, content, NULL);
+ do {
+ for (d = 0; d < MAX_DISKS; d++) {
+ mdu_disk_info_t disk;
+ char *dv;
+ int err;
+ disk.number = d;
+ if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
+ continue;
+ if (disk.major == 0 && disk.minor == 0)
+ continue;
+ if ((disk.state & (1 << MD_DISK_ACTIVE)) == 0)
+ continue;
+ dv = map_dev(disk.major, disk.minor, 1);
+ if (!dv)
+ continue;
+ fd2 = dev_open(dv, O_RDONLY);
+ if (fd2 < 0)
+ continue;
+ err = st->ss->load_super(st, fd2, NULL);
+ close(fd2);
+ if (err)
+ continue;
+ break;
+ }
+ if (d == MAX_DISKS) {
+ pr_err("Unable to load metadata for %s\n",
+ devname);
+ ret_val = 1;
+ goto Grow_continue_command_exit;
+ }
+ st->ss->getinfo_super(st, content, NULL);
+ if (!content->reshape_active)
+ sleep(3);
+ else
+ break;
+ } while (cnt-- > 0);
} else {
char *container;
@@ -4886,7 +4943,6 @@ int Grow_continue_command(char *devname, int fd,
sysfs_init(content, fd2, mdstat->devnm);
close(fd2);
- fd2 = -1;
/* start mdmon in case it is not running
*/
@@ -4916,8 +4972,6 @@ int Grow_continue_command(char *devname, int fd,
ret_val = Grow_continue(fd, st, content, backup_file, 1, 0);
Grow_continue_command_exit:
- if (fd2 > -1)
- close(fd2);
if (cfd > -1)
close(cfd);
st->ss->free_super(st);
diff --git a/Incremental.c b/Incremental.c
index 24fd8276..0f507bb3 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -644,8 +644,7 @@ out:
close(mdfd);
if (policy)
dev_policy_free(policy);
- if (sra)
- sysfs_free(sra);
+ sysfs_free(sra);
return rv;
out_unlock:
map_unlock(&map);
@@ -989,8 +988,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
sizeof(target->uuid)) == 0 &&
sra->array.failed_disks > 0) {
/* This is our target!! */
- if (chosen)
- sysfs_free(chosen);
+ sysfs_free(chosen);
chosen = sra;
sra = NULL;
/* skip to end so we don't check any more */
@@ -1022,8 +1020,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
sra = NULL;
}
next:
- if (sra)
- sysfs_free(sra);
+ sysfs_free(sra);
if (st != st2)
free(st2);
if (dl)
@@ -1037,7 +1034,8 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
char chosen_devname[24]; // 2*11 for int (including signs) + colon + null
devlist.next = NULL;
devlist.used = 0;
- devlist.writemostly = 0;
+ devlist.writemostly = FlagDefault;
+ devlist.failfast = FlagDefault;
devlist.devname = chosen_devname;
sprintf(chosen_devname, "%d:%d", major(stb.st_rdev),
minor(stb.st_rdev));
@@ -1350,8 +1348,12 @@ restart:
if (devnm && strcmp(devnm, me->devnm) != 0)
continue;
- if (devnm && me->metadata[0] == '/') {
+ if (me->metadata[0] == '/') {
char *sl;
+
+ if (!devnm)
+ continue;
+
/* member array, need to work on container */
strncpy(container, me->metadata+1, 32);
container[31] = 0;
diff --git a/Makefile b/Makefile
index 664c79ff..8c8b6589 100644
--- a/Makefile
+++ b/Makefile
@@ -41,7 +41,7 @@ KLIBC=/home/src/klibc/klibc-0.77
KLIBC_GCC = gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32
-CC = $(CROSS_COMPILE)gcc
+CC ?= $(CROSS_COMPILE)gcc
CXFLAGS ?= -ggdb
CWFLAGS = -Wall -Wstrict-prototypes -Wextra -Wno-unused-parameter
ifdef WARN_UNUSED
diff --git a/Manage.c b/Manage.c
index 7e1b94be..5c3d2b9b 100644
--- a/Manage.c
+++ b/Manage.c
@@ -119,8 +119,7 @@ int Manage_ro(char *devname, int fd, int readonly)
}
out:
#ifndef MDASSEMBLE
- if (mdi)
- sysfs_free(mdi);
+ sysfs_free(mdi);
#endif
return rv;
}
@@ -493,14 +492,17 @@ done:
rv = 1;
goto out;
}
- /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
- * was stopped, so We'll do it here just to be sure. Drop any
- * partitions as well...
- */
- if (fd >= 0)
- ioctl(fd, BLKRRPART, 0);
- if (mdi)
- sysfs_uevent(mdi, "change");
+
+ if (get_linux_version() < 2006028) {
+ /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
+ * was stopped, so We'll do it here just to be sure. Drop any
+ * partitions as well...
+ */
+ if (fd >= 0)
+ ioctl(fd, BLKRRPART, 0);
+ if (mdi)
+ sysfs_uevent(mdi, "change");
+ }
if (devnm[0] && use_udev()) {
struct map_ent *mp = map_by_devnm(&map, devnm);
@@ -513,8 +515,7 @@ done:
map_remove(&map, devnm);
map_unlock(&map);
out:
- if (mdi)
- sysfs_free(mdi);
+ sysfs_free(mdi);
return rv;
}
@@ -678,12 +679,17 @@ int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
else
disc.state |= (1 << MD_DISK_CLUSTER_ADD);
}
- if (dv->writemostly == 1)
+ if (dv->writemostly == FlagSet)
disc.state |= 1 << MD_DISK_WRITEMOSTLY;
- if (dv->writemostly == 2)
+ if (dv->writemostly == FlagClear)
disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
+ if (dv->failfast == FlagSet)
+ disc.state |= 1 << MD_DISK_FAILFAST;
+ if (dv->failfast == FlagClear)
+ disc.state &= ~(1 << MD_DISK_FAILFAST);
remove_partitions(tfd);
- if (update || dv->writemostly > 0) {
+ if (update || dv->writemostly != FlagDefault
+ || dv->failfast != FlagDefault) {
int rv = -1;
tfd = dev_open(dv->devname, O_RDWR);
if (tfd < 0) {
@@ -691,14 +697,22 @@ int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
return -1;
}
- if (dv->writemostly == 1)
+ if (dv->writemostly == FlagSet)
rv = dev_st->ss->update_super(
dev_st, NULL, "writemostly",
devname, verbose, 0, NULL);
- if (dv->writemostly == 2)
+ if (dv->writemostly == FlagClear)
rv = dev_st->ss->update_super(
dev_st, NULL, "readwrite",
devname, verbose, 0, NULL);
+ if (dv->failfast == FlagSet)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, "failfast",
+ devname, verbose, 0, NULL);
+ if (dv->failfast == FlagClear)
+ rv = dev_st->ss->update_super(
+ dev_st, NULL, "nofailfast",
+ devname, verbose, 0, NULL);
if (update)
rv = dev_st->ss->update_super(
dev_st, NULL, update,
@@ -737,7 +751,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
int raid_slot)
{
unsigned long long ldsize;
- struct supertype *dev_st = NULL;
+ struct supertype *dev_st;
int j;
mdu_disk_info_t disc;
@@ -842,20 +856,19 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
* simply re-add it.
*/
- if (array->not_persistent==0) {
+ if (array->not_persistent == 0) {
dev_st = dup_super(tst);
dev_st->ss->load_super(dev_st, tfd, NULL);
- }
- if (dev_st && dev_st->sb && dv->disposition != 'S') {
- int rv = attempt_re_add(fd, tfd, dv,
- dev_st, tst,
- rdev,
- update, devname,
- verbose,
- array);
- dev_st->ss->free_super(dev_st);
- if (rv)
- return rv;
+ if (dev_st->sb && dv->disposition != 'S') {
+ int rv;
+
+ rv = attempt_re_add(fd, tfd, dv, dev_st, tst,
+ rdev, update, devname,
+ verbose, array);
+ dev_st->ss->free_super(dev_st);
+ if (rv)
+ return rv;
+ }
}
if (dv->disposition == 'M') {
if (verbose > 0)
@@ -879,10 +892,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
continue;
if (disc.major == 0 && disc.minor == 0)
continue;
- found++;
if (!(disc.state & (1<<MD_DISK_SYNC)))
continue;
avail[disc.raid_disk] = 1;
+ found++;
}
array_failed = !enough(array->level, array->raid_disks,
array->layout, 1, avail);
@@ -937,12 +950,19 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
struct mdinfo *mdp;
mdp = sysfs_read(fd, NULL, GET_ARRAY_STATE);
+ if (!mdp) {
+ pr_err("%s unable to read array state.\n", devname);
+ return -1;
+ }
if (strncmp(mdp->sysfs_array_state, "readonly", 8) != 0) {
+ sysfs_free(mdp);
pr_err("%s is not readonly, cannot add journal.\n", devname);
return -1;
}
+ sysfs_free(mdp);
+
tst->ss->getinfo_super(tst, &mdi, NULL);
if (mdi.journal_device_required == 0) {
pr_err("%s does not support journal device.\n", devname);
@@ -955,8 +975,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
int dfd;
if (dv->disposition == 'j')
disc.state |= (1 << MD_DISK_JOURNAL) | (1 << MD_DISK_SYNC);
- if (dv->writemostly == 1)
+ if (dv->writemostly == FlagSet)
disc.state |= 1 << MD_DISK_WRITEMOSTLY;
+ if (dv->failfast == FlagSet)
+ disc.state |= 1 << MD_DISK_FAILFAST;
dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
if (tst->ss->add_to_super(tst, &disc, dfd,
dv->devname, INVALID_SECTORS))
@@ -1000,8 +1022,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
disc.state |= (1 << MD_DISK_CLUSTER_ADD);
}
- if (dv->writemostly == 1)
+ if (dv->writemostly == FlagSet)
disc.state |= (1 << MD_DISK_WRITEMOSTLY);
+ if (dv->failfast == FlagSet)
+ disc.state |= (1 << MD_DISK_FAILFAST);
if (tst->ss->external) {
/* add a disk
* to an external metadata container */
@@ -1118,19 +1142,34 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
*/
if (rdev == 0)
ret = -1;
- else
- ret = sysfs_unique_holder(devnm, rdev);
- if (ret == 0) {
- pr_err("%s is not a member, cannot remove.\n",
- dv->devname);
- close(lfd);
- return -1;
- }
- if (ret >= 2) {
- pr_err("%s is still in use, cannot remove.\n",
- dv->devname);
- close(lfd);
- return -1;
+ else {
+ /*
+ * The drive has already been set to 'faulty', however
+ * monitor might not have had time to process it and the
+ * drive might still have an entry in the 'holders'
+ * directory. Try a few times to avoid a false error
+ */
+ int count = 20;
+
+ do {
+ ret = sysfs_unique_holder(devnm, rdev);
+ if (ret < 2)
+ break;
+ usleep(100 * 1000); /* 100ms */
+ } while (--count > 0);
+
+ if (ret == 0) {
+ pr_err("%s is not a member, cannot remove.\n",
+ dv->devname);
+ close(lfd);
+ return -1;
+ }
+ if (ret >= 2) {
+ pr_err("%s is still in use, cannot remove.\n",
+ dv->devname);
+ close(lfd);
+ return -1;
+ }
}
}
/* FIXME check that it is a current member */
@@ -1161,8 +1200,7 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
"state", "remove");
else
err = -1;
- if (sra)
- sysfs_free(sra);
+ sysfs_free(sra);
}
}
if (err) {
@@ -1410,7 +1448,7 @@ int Manage_subdevs(char *devname, int fd,
}
if (strcmp(dv->devname, "missing") == 0) {
- struct mddev_dev *add_devlist = NULL;
+ struct mddev_dev *add_devlist;
struct mddev_dev **dp;
if (dv->disposition == 'c') {
rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
@@ -1504,9 +1542,10 @@ int Manage_subdevs(char *devname, int fd,
} else {
struct stat stb;
tfd = dev_open(dv->devname, O_RDONLY);
- if (tfd >= 0)
+ if (tfd >= 0) {
fstat(tfd, &stb);
- else {
+ close(tfd);
+ } else {
int open_err = errno;
if (stat(dv->devname, &stb) != 0) {
pr_err("Cannot find %s: %s\n",
@@ -1762,7 +1801,8 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
devlist.next = NULL;
devlist.used = 0;
- devlist.writemostly = 0;
+ devlist.writemostly = FlagDefault;
+ devlist.failfast = FlagDefault;
devlist.devname = devname;
sprintf(devname, "%d:%d", major(devid), minor(devid));
diff --git a/Monitor.c b/Monitor.c
index f19c2e58..802a9d98 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -33,7 +33,7 @@
struct state {
char *devname;
char devnm[32]; /* to sync with mdstat info */
- long utime;
+ unsigned int utime;
int err;
char *spare_group;
int active, working, failed, spare, raid;
@@ -213,6 +213,8 @@ int Monitor(struct mddev_dev *devlist,
if (mdstat)
free_mdstat(mdstat);
mdstat = mdstat_read(oneshot?0:1, 0);
+ if (!mdstat)
+ mdstat_close();
for (st=statelist; st; st=st->next)
if (check_array(st, mdstat, c->test, &info,
@@ -597,7 +599,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
} else
alert("RebuildFinished", dev, NULL, ainfo);
if (sra)
- free(sra);
+ sysfs_free(sra);
}
st->percent = mse->percent;
diff --git a/ReadMe.c b/ReadMe.c
index d40310a9..f5c156f4 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -2,6 +2,7 @@
* mdadm - manage Linux "md" devices aka RAID arrays.
*
* Copyright (C) 2001-2016 Neil Brown <neilb@suse.com>
+ * Copyright (C) 2016-2017 Jes Sorensen <Jes.Sorensen@gmail.com>
*
*
* This program is free software; you can redistribute it and/or modify
@@ -20,15 +21,17 @@
*
* Author: Neil Brown
* Email: <neilb@suse.de>
+ * Maintainer: Jes Sorensen
+ * Email: <Jes.Sorensen@gmail.com>
*/
#include "mdadm.h"
#ifndef VERSION
-#define VERSION "3.4"
+#define VERSION "4.0"
#endif
#ifndef VERS_DATE
-#define VERS_DATE "28th January 2016"
+#define VERS_DATE "2017-01-09"
#endif
char Version[] = "mdadm - v" VERSION " - " VERS_DATE "\n";
@@ -136,6 +139,8 @@ struct option long_options[] = {
{"bitmap-chunk", 1, 0, BitmapChunk},
{"write-behind", 2, 0, WriteBehind},
{"write-mostly",0, 0, WriteMostly},
+ {"failfast", 0, 0, FailFast},
+ {"nofailfast",0, 0, NoFailFast},
{"re-add", 0, 0, ReAdd},
{"homehost", 1, 0, HomeHost},
{"symlinks", 1, 0, Symlinks},
diff --git a/bitmap.c b/bitmap.c
index dab674b4..ccedfd34 100644
--- a/bitmap.c
+++ b/bitmap.c
@@ -47,13 +47,13 @@ mapping_t bitmap_states[] = {
{ NULL, -1 }
};
-const char *bitmap_state(int state_num)
+static const char *bitmap_state(int state_num)
{
char *state = map_num(bitmap_states, state_num);
return state ? state : "Unknown";
}
-const char *human_chunksize(unsigned long bytes)
+static const char *human_chunksize(unsigned long bytes)
{
static char buf[16];
char *suffixes[] = { "B", "KB", "MB", "GB", "TB", NULL };
@@ -95,7 +95,7 @@ static inline int count_dirty_bits_byte(char byte, int num_bits)
return num;
}
-int count_dirty_bits(char *buf, int num_bits)
+static int count_dirty_bits(char *buf, int num_bits)
{
int i, num = 0;
@@ -108,22 +108,7 @@ int count_dirty_bits(char *buf, int num_bits)
return num;
}
-/* calculate the size of the bitmap given the array size and bitmap chunksize */
-unsigned long long bitmap_bits(unsigned long long array_size,
- unsigned long chunksize)
-{
- return (array_size * 512 + chunksize - 1) / chunksize;
-}
-
-unsigned long bitmap_sectors(struct bitmap_super_s *bsb)
-{
- unsigned long long bits = bitmap_bits(__le64_to_cpu(bsb->sync_size),
- __le32_to_cpu(bsb->chunksize));
- int bits_per_sector = 8*512;
- return (bits + bits_per_sector - 1) / bits_per_sector;
-}
-
-bitmap_info_t *bitmap_fd_read(int fd, int brief)
+static bitmap_info_t *bitmap_fd_read(int fd, int brief)
{
/* Note: fd might be open O_DIRECT, so we must be
* careful to align reads properly
@@ -194,54 +179,54 @@ out:
return info;
}
-int bitmap_file_open(char *filename, struct supertype **stp)
+static int
+bitmap_file_open(char *filename, struct supertype **stp, int node_num)
{
int fd;
struct stat stb;
struct supertype *st = *stp;
- if (stat(filename, &stb) < 0) {
- pr_err("failed to find file %s: %s\n",
- filename, strerror(errno));
+ fd = open(filename, O_RDONLY|O_DIRECT);
+ if (fd < 0) {
+ pr_err("failed to open bitmap file %s: %s\n",
+ filename, strerror(errno));
return -1;
}
- if ((S_IFMT & stb.st_mode) == S_IFBLK) {
- fd = open(filename, O_RDONLY|O_DIRECT);
- if (fd < 0) {
- pr_err("failed to open bitmap file %s: %s\n",
- filename, strerror(errno));
- return -1;
- }
+
+ if (fstat(fd, &stb) < 0) {
+ pr_err("failed to determine bitmap file/device type: %s\n",
+ strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ if ((stb.st_mode & S_IFMT) == S_IFBLK) {
/* block device, so we are probably after an internal bitmap */
- if (!st) st = guess_super(fd);
+ if (!st)
+ st = guess_super(fd);
if (!st) {
/* just look at device... */
lseek(fd, 0, 0);
} else if (!st->ss->locate_bitmap) {
pr_err("No bitmap possible with %s metadata\n",
st->ss->name);
+ close(fd);
return -1;
} else {
- if (st->ss->locate_bitmap(st, fd)) {
+ if (st->ss->locate_bitmap(st, fd, node_num)) {
pr_err("%s doesn't have bitmap\n", filename);
+ close(fd);
fd = -1;
}
}
*stp = st;
- } else {
- fd = open(filename, O_RDONLY|O_DIRECT);
- if (fd < 0) {
- pr_err("failed to open bitmap file %s: %s\n",
- filename, strerror(errno));
- return -1;
- }
}
return fd;
}
-__u32 swapl(__u32 l)
+static __u32 swapl(__u32 l)
{
char *c = (char*)&l;
char t= c[0];
@@ -267,7 +252,7 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
int fd, i;
__u32 uuid32[4];
- fd = bitmap_file_open(filename, &st);
+ fd = bitmap_file_open(filename, &st, 0);
if (fd < 0)
return rv;
@@ -348,11 +333,21 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
printf(" Cluster nodes : %d\n", sb->nodes);
printf(" Cluster name : %-64s\n", sb->cluster_name);
for (i = 0; i < (int)sb->nodes; i++) {
- if (i) {
- free(info);
- info = bitmap_fd_read(fd, brief);
- sb = &info->sb;
+ st = NULL;
+ free(info);
+ fd = bitmap_file_open(filename, &st, i);
+ if (fd < 0) {
+ printf(" Unable to open bitmap file on node: %i\n", i);
+
+ continue;
}
+ info = bitmap_fd_read(fd, brief);
+ if (!info) {
+ close(fd);
+ printf(" Unable to read bitmap on node: %i\n", i);
+ continue;
+ }
+ sb = &info->sb;
if (sb->magic != BITMAP_MAGIC)
pr_err("invalid bitmap magic 0x%x, the bitmap file appears to be corrupted\n", sb->magic);
@@ -367,7 +362,7 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n",
info->total_bits, info->dirty_bits,
100.0 * info->dirty_bits / (info->total_bits?:1));
-
+ close(fd);
}
}
diff --git a/config.c b/config.c
index b308b6cc..9b008e34 100644
--- a/config.c
+++ b/config.c
@@ -106,11 +106,13 @@ int match_keyword(char *word)
int len = strlen(word);
int n;
- if (len < 3) return -1;
- for (n=0; keywords[n]; n++) {
- if (strncasecmp(word, keywords[n], len)==0)
+ if (len < 3)
+ return -1;
+ for (n = 0; keywords[n]; n++) {
+ if (strncasecmp(word, keywords[n], len) == 0)
return n;
}
+
return -1;
}
@@ -124,6 +126,7 @@ struct mddev_dev *load_partitions(void)
FILE *f = fopen("/proc/partitions", "r");
char buf[1024];
struct mddev_dev *rv = NULL;
+
if (f == NULL) {
pr_err("cannot open /proc/partitions\n");
return NULL;
@@ -144,8 +147,7 @@ struct mddev_dev *load_partitions(void)
name = map_dev(major, minor, 1);
if (!name)
continue;
- d = xmalloc(sizeof(*d));
- memset(d, 0, sizeof(*d));
+ d = xcalloc(1, sizeof(*d));
d->devname = xstrdup(name);
d->next = rv;
rv = d;
@@ -169,8 +171,7 @@ struct mddev_dev *load_containers(void)
if (ent->metadata_version &&
strncmp(ent->metadata_version, "external:", 9) == 0 &&
!is_subarray(&ent->metadata_version[9])) {
- d = xmalloc(sizeof(*d));
- memset(d, 0, sizeof(*d));
+ d = xcalloc(1, sizeof(*d));
me = map_by_devnm(&map, ent->devnm);
if (me)
d->devname = xstrdup(me->path);
@@ -205,12 +206,12 @@ int parse_auto(char *str, char *msg, int config)
int autof;
if (str == NULL || *str == 0)
autof = 2;
- else if (strcasecmp(str,"no")==0)
+ else if (strcasecmp(str, "no") == 0)
autof = 1;
- else if (strcasecmp(str,"yes")==0)
+ else if (strcasecmp(str, "yes") == 0)
autof = 2;
- else if (strcasecmp(str,"md")==0)
- autof = config?5:3;
+ else if (strcasecmp(str, "md") == 0)
+ autof = config ? 5:3;
else {
/* There might be digits, and maybe a hypen, at the end */
char *e = str + strlen(str);
@@ -220,19 +221,20 @@ int parse_auto(char *str, char *msg, int config)
e--;
if (*e) {
num = atoi(e);
- if (num <= 0) num = 1;
+ if (num <= 0)
+ num = 1;
}
if (e > str && e[-1] == '-')
e--;
len = e - str;
- if ((len == 2 && strncasecmp(str,"md",2)==0)) {
+ if ((len == 2 && strncasecmp(str, "md", 2) == 0)) {
autof = config ? 5 : 3;
- } else if ((len == 3 && strncasecmp(str,"yes",3)==0)) {
+ } else if ((len == 3 && strncasecmp(str, "yes", 3) == 0)) {
autof = 2;
- } else if ((len == 3 && strncasecmp(str,"mdp",3)==0)) {
+ } else if ((len == 3 && strncasecmp(str, "mdp", 3) == 0)) {
autof = config ? 6 : 4;
- } else if ((len == 1 && strncasecmp(str,"p",1)==0) ||
- (len >= 4 && strncasecmp(str,"part",4)==0)) {
+ } else if ((len == 1 && strncasecmp(str, "p", 1) == 0) ||
+ (len >= 4 && strncasecmp(str, "part", 4) == 0)) {
autof = 6;
} else {
pr_err("%s arg of \"%s\" unrecognised: use no,yes,md,mdp,part\n"
@@ -250,56 +252,57 @@ static void createline(char *line)
char *w;
char *ep;
- for (w=dl_next(line); w!=line; w=dl_next(w)) {
+ for (w = dl_next(line); w != line; w = dl_next(w)) {
if (strncasecmp(w, "auto=", 5) == 0)
- createinfo.autof = parse_auto(w+5, "auto=", 1);
+ createinfo.autof = parse_auto(w + 5, "auto=", 1);
else if (strncasecmp(w, "owner=", 6) == 0) {
if (w[6] == 0) {
pr_err("missing owner name\n");
continue;
}
- createinfo.uid = strtoul(w+6, &ep, 10);
+ createinfo.uid = strtoul(w + 6, &ep, 10);
if (*ep != 0) {
struct passwd *pw;
/* must be a name */
- pw = getpwnam(w+6);
+ pw = getpwnam(w + 6);
if (pw)
createinfo.uid = pw->pw_uid;
else
- pr_err("CREATE user %s not found\n", w+6);
+ pr_err("CREATE user %s not found\n",
+ w + 6);
}
} else if (strncasecmp(w, "group=", 6) == 0) {
if (w[6] == 0) {
pr_err("missing group name\n");
continue;
}
- createinfo.gid = strtoul(w+6, &ep, 10);
+ createinfo.gid = strtoul(w + 6, &ep, 10);
if (*ep != 0) {
struct group *gr;
/* must be a name */
- gr = getgrnam(w+6);
+ gr = getgrnam(w + 6);
if (gr)
createinfo.gid = gr->gr_gid;
else
- pr_err("CREATE group %s not found\n", w+6);
+ pr_err("CREATE group %s not found\n",
+ w + 6);
}
} else if (strncasecmp(w, "mode=", 5) == 0) {
if (w[5] == 0) {
pr_err("missing CREATE mode\n");
continue;
}
- createinfo.mode = strtoul(w+5, &ep, 8);
+ createinfo.mode = strtoul(w + 5, &ep, 8);
if (*ep != 0) {
createinfo.mode = 0600;
pr_err("unrecognised CREATE mode %s\n",
- w+5);
+ w + 5);
}
} else if (strncasecmp(w, "metadata=", 9) == 0) {
/* style of metadata to use by default */
int i;
- for (i=0; superlist[i] && !createinfo.supertype; i++)
- createinfo.supertype =
- superlist[i]->match_metadata_desc(w+9);
+ for (i = 0; superlist[i] && !createinfo.supertype; i++)
+ createinfo.supertype = superlist[i]->match_metadata_desc(w + 9);
if (!createinfo.supertype)
pr_err("metadata format %s unknown, ignoring\n",
w+9);
@@ -327,7 +330,7 @@ void devline(char *line)
char *w;
struct conf_dev *cd;
- for (w=dl_next(line); w != line; w=dl_next(w)) {
+ for (w = dl_next(line); w != line; w = dl_next(w)) {
if (w[0] == '/' || strcasecmp(w, "partitions") == 0 ||
strcasecmp(w, "containers") == 0) {
cd = xmalloc(sizeof(*cd));
@@ -335,8 +338,7 @@ void devline(char *line)
cd->next = cdevlist;
cdevlist = cd;
} else {
- pr_err("unreconised word on DEVICE line: %s\n",
- w);
+ pr_err("unreconised word on DEVICE line: %s\n", w);
}
}
}
@@ -379,7 +381,7 @@ void arrayline(char *line)
mis.container = NULL;
mis.member = NULL;
- for (w=dl_next(line); w!=line; w=dl_next(w)) {
+ for (w = dl_next(line); w != line; w = dl_next(w)) {
if (w[0] == '/' || strchr(w, '=') == NULL) {
/* This names the device, or is '<ignore>'.
* The rules match those in create_mddev.
@@ -394,10 +396,9 @@ void arrayline(char *line)
strncmp(w, "/dev/md/", 8) == 0 ||
(w[0] != '/' && w[0] != '<') ||
(strncmp(w, "/dev/md", 7) == 0 &&
- is_number(w+7)) ||
+ is_number(w + 7)) ||
(strncmp(w, "/dev/md_d", 9) == 0 &&
- is_number(w+9))
- ) {
+ is_number(w + 9))) {
/* This is acceptable */;
if (mis.devname)
pr_err("only give one device per ARRAY line: %s and %s\n",
@@ -407,89 +408,91 @@ void arrayline(char *line)
}else {
pr_err("%s is an invalid name for an md device - ignored.\n", w);
}
- } else if (strncasecmp(w, "uuid=", 5)==0 ) {
+ } else if (strncasecmp(w, "uuid=", 5) == 0) {
if (mis.uuid_set)
pr_err("only specify uuid once, %s ignored.\n",
- w);
+ w);
else {
- if (parse_uuid(w+5, mis.uuid))
+ if (parse_uuid(w + 5, mis.uuid))
mis.uuid_set = 1;
else
pr_err("bad uuid: %s\n", w);
}
- } else if (strncasecmp(w, "super-minor=", 12)==0 ) {
+ } else if (strncasecmp(w, "super-minor=", 12) == 0) {
if (mis.super_minor != UnSet)
pr_err("only specify super-minor once, %s ignored.\n",
w);
else {
char *endptr;
- int minor = strtol(w+12, &endptr, 10);
+ int minor = strtol(w + 12, &endptr, 10);
- if (w[12]==0 || endptr[0]!=0 || minor < 0)
+ if (w[12] == 0 || endptr[0] != 0 || minor < 0)
pr_err("invalid super-minor number: %s\n",
- w);
+ w);
else
mis.super_minor = minor;
}
- } else if (strncasecmp(w, "name=", 5)==0) {
+ } else if (strncasecmp(w, "name=", 5) == 0) {
if (mis.name[0])
pr_err("only specify name once, %s ignored.\n",
w);
- else if (strlen(w+5) > 32)
+ else if (strlen(w + 5) > 32)
pr_err("name too long, ignoring %s\n", w);
else
- strcpy(mis.name, w+5);
+ strcpy(mis.name, w + 5);
} else if (strncasecmp(w, "bitmap=", 7) == 0) {
if (mis.bitmap_file)
pr_err("only specify bitmap file once. %s ignored\n",
w);
else
- mis.bitmap_file = xstrdup(w+7);
+ mis.bitmap_file = xstrdup(w + 7);
- } else if (strncasecmp(w, "devices=", 8 ) == 0 ) {
+ } else if (strncasecmp(w, "devices=", 8 ) == 0) {
if (mis.devices)
pr_err("only specify devices once (use a comma separated list). %s ignored\n",
w);
else
- mis.devices = xstrdup(w+8);
- } else if (strncasecmp(w, "spare-group=", 12) == 0 ) {
+ mis.devices = xstrdup(w + 8);
+ } else if (strncasecmp(w, "spare-group=", 12) == 0) {
if (mis.spare_group)
pr_err("only specify one spare group per array. %s ignored.\n",
w);
else
- mis.spare_group = xstrdup(w+12);
+ mis.spare_group = xstrdup(w + 12);
} else if (strncasecmp(w, "level=", 6) == 0 ) {
/* this is mainly for compatability with --brief output */
- mis.level = map_name(pers, w+6);
- } else if (strncasecmp(w, "disks=", 6) == 0 ) {
+ mis.level = map_name(pers, w + 6);
+ } else if (strncasecmp(w, "disks=", 6) == 0) {
/* again, for compat */
- mis.raid_disks = atoi(w+6);
- } else if (strncasecmp(w, "num-devices=", 12) == 0 ) {
+ mis.raid_disks = atoi(w + 6);
+ } else if (strncasecmp(w, "num-devices=", 12) == 0) {
/* again, for compat */
- mis.raid_disks = atoi(w+12);
- } else if (strncasecmp(w, "spares=", 7) == 0 ) {
+ mis.raid_disks = atoi(w + 12);
+ } else if (strncasecmp(w, "spares=", 7) == 0) {
/* for warning if not all spares present */
- mis.spare_disks = atoi(w+7);
+ mis.spare_disks = atoi(w + 7);
} else if (strncasecmp(w, "metadata=", 9) == 0) {
/* style of metadata on the devices. */
int i;
for(i=0; superlist[i] && !mis.st; i++)
- mis.st = superlist[i]->match_metadata_desc(w+9);
+ mis.st = superlist[i]->
+ match_metadata_desc(w + 9);
if (!mis.st)
- pr_err("metadata format %s unknown, ignored.\n", w+9);
+ pr_err("metadata format %s unknown, ignored.\n",
+ w + 9);
} else if (strncasecmp(w, "auto=", 5) == 0 ) {
/* whether to create device special files as needed */
- mis.autof = parse_auto(w+5, "auto type", 0);
+ mis.autof = parse_auto(w + 5, "auto type", 0);
} else if (strncasecmp(w, "member=", 7) == 0) {
/* subarray within a container */
- mis.member = xstrdup(w+7);
+ mis.member = xstrdup(w + 7);
} else if (strncasecmp(w, "container=", 10) == 0) {
- /* the container holding this subarray. Either a device name
- * or a uuid */
- mis.container = xstrdup(w+10);
+ /* The container holding this subarray.
+ * Either a device name or a uuid */
+ mis.container = xstrdup(w + 10);
} else {
pr_err("unrecognised word on ARRAY line: %s\n",
w);
@@ -498,7 +501,8 @@ void arrayline(char *line)
if (mis.uuid_set == 0 && mis.devices == NULL &&
mis.super_minor == UnSet && mis.name[0] == 0 &&
(mis.container == NULL || mis.member == NULL))
- pr_err("ARRAY line %s has no identity information.\n", mis.devname);
+ pr_err("ARRAY line %s has no identity information.\n",
+ mis.devname);
else {
mi = xmalloc(sizeof(*mi));
*mi = mis;
@@ -514,7 +518,7 @@ void mailline(char *line)
{
char *w;
- for (w=dl_next(line); w != line ; w=dl_next(w))
+ for (w = dl_next(line); w != line; w = dl_next(w))
if (alert_email == NULL)
alert_email = xstrdup(w);
}
@@ -524,7 +528,7 @@ void mailfromline(char *line)
{
char *w;
- for (w=dl_next(line); w != line ; w=dl_next(w)) {
+ for (w = dl_next(line); w != line; w = dl_next(w)) {
if (alert_mail_from == NULL)
alert_mail_from = xstrdup(w);
else {
@@ -543,7 +547,7 @@ void programline(char *line)
{
char *w;
- for (w=dl_next(line); w != line ; w=dl_next(w))
+ for (w = dl_next(line); w != line; w = dl_next(w))
if (alert_program == NULL)
alert_program = xstrdup(w);
}
@@ -554,11 +558,11 @@ void homehostline(char *line)
{
char *w;
- for (w=dl_next(line); w != line ; w=dl_next(w)) {
- if (strcasecmp(w, "<ignore>")==0)
+ for (w = dl_next(line); w != line; w = dl_next(w)) {
+ if (strcasecmp(w, "<ignore>") == 0)
require_homehost = 0;
else if (home_host == NULL) {
- if (strcasecmp(w, "<none>")==0)
+ if (strcasecmp(w, "<none>") == 0)
home_host = xstrdup("");
else
home_host = xstrdup(w);
@@ -571,9 +575,9 @@ void homeclusterline(char *line)
{
char *w;
- for (w=dl_next(line); w != line ; w=dl_next(w)) {
+ for (w = dl_next(line); w != line; w = dl_next(w)) {
if (home_cluster == NULL) {
- if (strcasecmp(w, "<none>")==0)
+ if (strcasecmp(w, "<none>") == 0)
home_cluster = xstrdup("");
else
home_cluster = xstrdup(w);
@@ -599,7 +603,9 @@ void autoline(char *line)
return;
auto_seen = 1;
- /* Parse the 'auto' line creating policy statements for the 'auto' policy.
+ /*
+ * Parse the 'auto' line creating policy statements for the 'auto'
+ * policy.
*
* The default is 'yes' but the 'auto' line might over-ride that.
* Words in the line are processed in order with the first
@@ -625,7 +631,8 @@ void autoline(char *line)
* been seen gets an appropriate auto= entry.
*/
- /* If environment variable MDADM_CONF_AUTO is defined, then
+ /*
+ * If environment variable MDADM_CONF_AUTO is defined, then
* it is prepended to the auto line. This allow a script
* to easily disable some metadata types.
*/
@@ -647,7 +654,7 @@ void autoline(char *line)
;
seen = xcalloc(super_cnt, 1);
- for (w = dl_next(line); w != line ; w = dl_next(w)) {
+ for (w = dl_next(line); w != line; w = dl_next(w)) {
char *val;
if (strcasecmp(w, "yes") == 0) {
@@ -675,22 +682,21 @@ void autoline(char *line)
} else
continue;
- if (strcasecmp(w+1, "all") == 0) {
+ if (strcasecmp(w + 1, "all") == 0) {
dflt = val;
break;
}
for (i = 0; superlist[i]; i++) {
const char *version = superlist[i]->name;
- if (strcasecmp(w+1, version) == 0)
+ if (strcasecmp(w + 1, version) == 0)
break;
/* 1 matches 1.x, 0 matches 0.90 */
- if (version[1] == '.' &&
- strlen(w+1) == 1 &&
+ if (version[1] == '.' && strlen(w + 1) == 1 &&
w[1] == version[0])
break;
/* 1.anything matches 1.x */
if (strcmp(version, "1.x") == 0 &&
- strncmp(w+1, "1.", 2) == 0)
+ strncmp(w + 1, "1.", 2) == 0)
break;
}
if (superlist[i] == NULL)
@@ -699,12 +705,14 @@ void autoline(char *line)
if (seen[i])
/* already know about this metadata */
continue;
- policy_add(rule_policy, pol_auto, val, pol_metadata, superlist[i]->name, NULL);
+ policy_add(rule_policy, pol_auto, val, pol_metadata,
+ superlist[i]->name, NULL);
seen[i] = 1;
}
for (i = 0; i < super_cnt; i++)
if (!seen[i])
- policy_add(rule_policy, pol_auto, dflt, pol_metadata, superlist[i]->name, NULL);
+ policy_add(rule_policy, pol_auto, dflt, pol_metadata,
+ superlist[i]->name, NULL);
free(seen);
}
@@ -720,7 +728,7 @@ void set_conffile(char *file)
void conf_file(FILE *f)
{
char *line;
- while ((line=conf_line(f))) {
+ while ((line = conf_line(f))) {
switch(match_keyword(line)) {
case Devices:
devline(line);
@@ -791,9 +799,9 @@ void conf_file_or_dir(FILE *f)
if (dp->d_name[0] == '.')
continue;
l = strlen(dp->d_name);
- if (l < 6 || strcmp(dp->d_name+l-5, ".conf") != 0)
+ if (l < 6 || strcmp(dp->d_name + l - 5, ".conf") != 0)
continue;
- fn = xmalloc(sizeof(*fn)+l+1);
+ fn = xmalloc(sizeof(*fn) + l + 1);
strcpy(fn->name, dp->d_name);
for (p = &list;
*p && strcmp((*p)->name, fn->name) < 0;
@@ -836,7 +844,7 @@ void load_conffile(void)
confdir = DefaultConfDir;
}
- if (strcmp(conffile, "partitions")==0) {
+ if (strcmp(conffile, "partitions") == 0) {
char *list = dl_strdup("DEV");
dl_init(list);
dl_add(list, dl_strdup("partitions"));
@@ -849,8 +857,7 @@ void load_conffile(void)
* have a working mdadm, we read /etc/mdadm/mdadm.conf
* if /etc/mdadm.conf doesn't exist
*/
- if (f == NULL &&
- conffile == DefaultConfFile) {
+ if (f == NULL && conffile == DefaultConfFile) {
f = fopen(DefaultAltConfFile, "r");
if (f) {
conffile = DefaultAltConfFile;
@@ -923,8 +930,8 @@ struct mddev_ident *conf_get_ident(char *dev)
struct mddev_ident *rv;
load_conffile();
rv = mddevlist;
- while (dev && rv && (rv->devname == NULL
- || !devname_matches(dev, rv->devname)))
+ while (dev && rv && (rv->devname == NULL ||
+ !devname_matches(dev, rv->devname)))
rv = rv->next;
return rv;
}
@@ -959,10 +966,10 @@ struct mddev_dev *conf_get_devs()
append_dlist(&dlist, load_containers());
}
- for (cd=cdevlist; cd; cd=cd->next) {
- if (strcasecmp(cd->name, "partitions")==0)
+ for (cd = cdevlist; cd; cd = cd->next) {
+ if (strcasecmp(cd->name, "partitions") == 0)
append_dlist(&dlist, load_partitions());
- else if (strcasecmp(cd->name, "containers")==0)
+ else if (strcasecmp(cd->name, "containers") == 0)
append_dlist(&dlist, load_containers());
else {
glob(cd->name, flags, NULL, &globbuf);
@@ -970,9 +977,9 @@ struct mddev_dev *conf_get_devs()
}
}
if (flags & GLOB_APPEND) {
- for (i=0; i<globbuf.gl_pathc; i++) {
- struct mddev_dev *t = xmalloc(sizeof(*t));
- memset(t, 0, sizeof(*t));
+ for (i = 0; i < globbuf.gl_pathc; i++) {
+ struct mddev_dev *t;
+ t = xcalloc(1, sizeof(*t));
t->devname = xstrdup(globbuf.gl_pathv[i]);
t->next = dlist;
dlist = t;
@@ -990,7 +997,7 @@ int conf_test_dev(char *devname)
if (cdevlist == NULL)
/* allow anything by default */
return 1;
- for (cd = cdevlist ; cd ; cd = cd->next) {
+ for (cd = cdevlist; cd; cd = cd->next) {
if (strcasecmp(cd->name, "partitions") == 0)
return 1;
if (fnmatch(cd->name, devname, FNM_PATHNAME) == 0)
@@ -1007,7 +1014,7 @@ int conf_test_metadata(const char *version, struct dev_policy *pol, int is_homeh
* else 'yes'.
*/
struct dev_policy *p;
- int no=0, found_homehost=0;
+ int no = 0, found_homehost = 0;
load_conffile();
pol = pol_find(pol, pol_auto);
@@ -1039,9 +1046,9 @@ int match_oneof(char *devices, char *devname)
if (!devices)
devices = p + strlen(p);
if (devices-p < 1024) {
- strncpy(patn, p, devices-p);
+ strncpy(patn, p, devices - p);
patn[devices-p] = 0;
- if (fnmatch(patn, devname, FNM_PATHNAME)==0)
+ if (fnmatch(patn, devname, FNM_PATHNAME) == 0)
return 1;
}
if (*devices == ',')
@@ -1070,11 +1077,9 @@ int devname_matches(char *name, char *match)
else if (strncmp(match, "/dev/", 5) == 0)
match += 5;
- if (strncmp(name, "md", 2) == 0 &&
- isdigit(name[2]))
+ if (strncmp(name, "md", 2) == 0 && isdigit(name[2]))
name += 2;
- if (strncmp(match, "md", 2) == 0 &&
- isdigit(match[2]))
+ if (strncmp(match, "md", 2) == 0 && isdigit(match[2]))
match += 2;
return (strcmp(name, match) == 0);
@@ -1097,8 +1102,7 @@ int conf_name_is_free(char *name)
if (dev->name[0] && devname_matches(name, dev->name))
return 0;
sprintf(nbuf, "%d", dev->super_minor);
- if (dev->super_minor != UnSet &&
- devname_matches(name, nbuf))
+ if (dev->super_minor != UnSet && devname_matches(name, nbuf))
return 0;
}
return 1;
@@ -1142,10 +1146,8 @@ struct mddev_ident *conf_match(struct supertype *st,
array_list->devname);
continue;
}
- if (!array_list->uuid_set &&
- !array_list->name[0] &&
- !array_list->devices &&
- array_list->super_minor == UnSet) {
+ if (!array_list->uuid_set && !array_list->name[0] &&
+ !array_list->devices && array_list->super_minor == UnSet) {
if (verbose >= 2 && array_list->devname)
pr_err("%s doesn't have any identifying information.\n",
array_list->devname);
diff --git a/debian/changelog b/debian/changelog
index 557e7414..ae37eff7 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+mdadm (4.0-1) UNRELEASED; urgency=medium
+
+ * New upstream release.
+
+ -- Dimitri John Ledkov <xnox@ubuntu.com> Fri, 12 May 2017 12:31:46 +0100
+
mdadm (3.4-4) unstable; urgency=high
* Bring in changes from Ubuntu, to make the two packages in sync:
diff --git a/inventory b/inventory
index ace5df04..f0277d2e 100755
--- a/inventory
+++ b/inventory
@@ -23,6 +23,7 @@ ANNOUNCE-3.3.2
ANNOUNCE-3.3.3
ANNOUNCE-3.3.4
ANNOUNCE-3.4
+ANNOUNCE-4.0
Assemble.c
Build.c
COPYING
@@ -242,6 +243,7 @@ tests/19raid6check
tests/19raid6repair
tests/19repair-does-not-destroy
tests/20raid5journal
+tests/21raid5cache
tests/ToTest
tests/check
tests/env-ddf-template
diff --git a/lib.c b/lib.c
index 6808f62d..b640634e 100644
--- a/lib.c
+++ b/lib.c
@@ -32,7 +32,7 @@
int get_mdp_major(void)
{
-static int mdp_major = -1;
+ static int mdp_major = -1;
FILE *fl;
char *w;
int have_block = 0;
@@ -41,27 +41,30 @@ static int mdp_major = -1;
if (mdp_major != -1)
return mdp_major;
+
fl = fopen("/proc/devices", "r");
if (!fl)
return -1;
+
while ((w = conf_word(fl, 1))) {
- if (have_block && strcmp(w, "devices:")==0)
+ if (have_block && strcmp(w, "devices:") == 0)
have_devices = 1;
- have_block = (strcmp(w, "Block")==0);
+ have_block = (strcmp(w, "Block") == 0);
if (isdigit(w[0]))
last_num = atoi(w);
- if (have_devices && strcmp(w, "mdp")==0)
+ if (have_devices && strcmp(w, "mdp") == 0)
mdp_major = last_num;
free(w);
}
fclose(fl);
+
return mdp_major;
}
char *devid2kname(int devid)
{
char path[30];
- char link[200];
+ char link[PATH_MAX];
static char devnm[32];
char *cp;
int n;
@@ -72,19 +75,37 @@ char *devid2kname(int devid)
*/
sprintf(path, "/sys/dev/block/%d:%d", major(devid),
minor(devid));
- n = readlink(path, link, sizeof(link)-1);
+ n = readlink(path, link, sizeof(link) - 1);
if (n > 0) {
link[n] = 0;
cp = strrchr(link, '/');
if (cp) {
- strcpy(devnm, cp+1);
+ strcpy(devnm, cp + 1);
return devnm;
}
}
return NULL;
}
-char *devid2devnm(int devid)
+char *stat2kname(struct stat *st)
+{
+ if ((S_IFMT & st->st_mode) != S_IFBLK)
+ return NULL;
+
+ return devid2kname(st->st_rdev);
+}
+
+char *fd2kname(int fd)
+{
+ struct stat stb;
+
+ if (fstat(fd, &stb) == 0)
+ return stat2kname(&stb);
+
+ return NULL;
+}
+
+char *devid2devnm(dev_t devid)
{
char path[30];
char link[200];
@@ -99,9 +120,8 @@ char *devid2devnm(int devid)
* or
* ...../block/md_FOO
*/
- sprintf(path, "/sys/dev/block/%d:%d", major(devid),
- minor(devid));
- n = readlink(path, link, sizeof(link)-1);
+ sprintf(path, "/sys/dev/block/%d:%d", major(devid), minor(devid));
+ n = readlink(path, link, sizeof(link) - 1);
if (n > 0) {
link[n] = 0;
cp = strstr(link, "/block/");
@@ -121,6 +141,7 @@ char *devid2devnm(int devid)
(minor(devid)>>MdpMinorShift));
else
return NULL;
+
return devnm;
}
@@ -128,14 +149,17 @@ char *stat2devnm(struct stat *st)
{
if ((S_IFMT & st->st_mode) != S_IFBLK)
return NULL;
+
return devid2devnm(st->st_rdev);
}
char *fd2devnm(int fd)
{
struct stat stb;
+
if (fstat(fd, &stb) == 0)
return stat2devnm(&stb);
+
return NULL;
}
@@ -164,8 +188,8 @@ int add_dev(const char *name, const struct stat *stb, int flag, struct FTW *s)
if ((stb->st_mode&S_IFMT)== S_IFBLK) {
char *n = xstrdup(name);
struct devmap *dm = xmalloc(sizeof(*dm));
- if (strncmp(n, "/dev/./", 7)==0)
- strcpy(n+4, name+6);
+ if (strncmp(n, "/dev/./", 7) == 0)
+ strcpy(n + 4, name + 6);
if (dm) {
dm->major = major(stb->st_rdev);
dm->minor = minor(stb->st_rdev);
@@ -174,6 +198,7 @@ int add_dev(const char *name, const struct stat *stb, int flag, struct FTW *s)
devlist = dm;
}
}
+
return 0;
}
@@ -183,12 +208,16 @@ int add_dev_1(const char *name, const struct stat *stb, int flag)
{
return add_dev(name, stb, flag, NULL);
}
-int nftw(const char *path, int (*han)(const char *name, const struct stat *stb, int flag, struct FTW *s), int nopenfd, int flags)
+int nftw(const char *path,
+ int (*han)(const char *name, const struct stat *stb,
+ int flag, struct FTW *s), int nopenfd, int flags)
{
return ftw(path, add_dev_1, nopenfd);
}
#else
-int nftw(const char *path, int (*han)(const char *name, const struct stat *stb, int flag, struct FTW *s), int nopenfd, int flags)
+int nftw(const char *path,
+ int (*han)(const char *name, const struct stat *stb,
+ int flag, struct FTW *s), int nopenfd, int flags)
{
return 0;
}
@@ -211,7 +240,7 @@ char *map_dev_preferred(int major, int minor, int create,
int did_check = 0;
if (major == 0 && minor == 0)
- return NULL;
+ return NULL;
retry:
if (!devlist_ready) {
@@ -223,19 +252,17 @@ char *map_dev_preferred(int major, int minor, int create,
free(d->name);
free(d);
}
- if (lstat(dev, &stb)==0 &&
- S_ISLNK(stb.st_mode))
+ if (lstat(dev, &stb) == 0 && S_ISLNK(stb.st_mode))
dev = "/dev/.";
nftw(dev, add_dev, 10, FTW_PHYS);
devlist_ready=1;
did_check = 1;
}
- for (p=devlist; p; p=p->next)
- if (p->major == major &&
- p->minor == minor) {
- if (strncmp(p->name, "/dev/md/",8) == 0
- || (prefer && strstr(p->name, prefer))) {
+ for (p = devlist; p; p = p->next)
+ if (p->major == major && p->minor == minor) {
+ if (strncmp(p->name, "/dev/md/",8) == 0 ||
+ (prefer && strstr(p->name, prefer))) {
if (preferred == NULL ||
strlen(p->name) < strlen(preferred))
preferred = p->name;
@@ -274,14 +301,16 @@ char *conf_word(FILE *file, int allow_key)
int wordfound = 0;
char *word = xmalloc(wsize);
- while (wordfound==0) {
+ while (wordfound == 0) {
/* at the end of a word.. */
c = getc(file);
if (c == '#')
while (c != EOF && c != '\n')
c = getc(file);
- if (c == EOF) break;
- if (c == '\n') continue;
+ if (c == EOF)
+ break;
+ if (c == '\n')
+ continue;
if (c != ' ' && c != '\t' && ! allow_key) {
ungetc(c, file);
@@ -294,9 +323,11 @@ char *conf_word(FILE *file, int allow_key)
c = getc(file);
if (c != EOF && c != '\n' && c != '#') {
/* we really have a character of a word, so start saving it */
- while (c != EOF && c != '\n' && (quote || (c!=' ' && c != '\t'))) {
+ while (c != EOF && c != '\n' &&
+ (quote || (c != ' ' && c != '\t'))) {
wordfound = 1;
- if (quote && c == quote) quote = 0;
+ if (quote && c == quote)
+ quote = 0;
else if (quote == 0 && (c == '\'' || c == '"'))
quote = c;
else {
@@ -312,12 +343,13 @@ char *conf_word(FILE *file, int allow_key)
* in /proc/mdstat instead of
* "active (auto-read-only)"
*/
- if (c == '(' && len >= 6
- && strncmp(word+len-6, "active", 6) == 0)
+ if (c == '(' && len >= 6 &&
+ strncmp(word + len - 6, "active", 6) == 0)
c = ' ';
}
}
- if (c != EOF) ungetc(c, file);
+ if (c != EOF)
+ ungetc(c, file);
}
word[len] = 0;
@@ -386,7 +418,7 @@ void print_escape(char *str)
/* print str, but change space and tab to '_'
* as is suitable for device names
*/
- for (; *str ; str++) {
+ for (; *str; str++) {
switch (*str) {
case ' ':
case '\t':
@@ -417,9 +449,9 @@ int use_udev(void)
struct stat stb;
if (use < 0) {
- use = ((stat("/dev/.udev", &stb) == 0
- || stat("/run/udev", &stb) == 0)
- && check_env("MDADM_NO_UDEV") == 0);
+ use = ((stat("/dev/.udev", &stb) == 0 ||
+ stat("/run/udev", &stb) == 0) &&
+ check_env("MDADM_NO_UDEV") == 0);
}
return use;
}
@@ -449,13 +481,14 @@ char *conf_line(FILE *file)
char *list;
w = conf_word(file, 1);
- if (w == NULL) return NULL;
+ if (w == NULL)
+ return NULL;
list = dl_strdup(w);
free(w);
dl_init(list);
- while ((w = conf_word(file,0))){
+ while ((w = conf_word(file, 0))){
char *w2 = dl_strdup(w);
free(w);
dl_add(list, w2);
@@ -467,7 +500,7 @@ char *conf_line(FILE *file)
void free_line(char *line)
{
char *w;
- for (w=dl_next(line); w != line; w=dl_next(line)) {
+ for (w = dl_next(line); w != line; w = dl_next(line)) {
dl_del(w);
dl_free(w);
}
diff --git a/managemon.c b/managemon.c
index 6d1b3d85..3c1d4cb0 100644
--- a/managemon.c
+++ b/managemon.c
@@ -115,6 +115,8 @@ static void close_aa(struct active_array *aa)
for (d = aa->info.devs; d; d = d->next) {
close(d->recovery_fd);
close(d->state_fd);
+ close(d->bb_fd);
+ close(d->ubb_fd);
}
if (aa->action_fd >= 0)
@@ -433,6 +435,21 @@ static int disk_init_and_add(struct mdinfo *disk, struct mdinfo *clone,
close(disk->recovery_fd);
return -1;
}
+ disk->bb_fd = sysfs_open2(aa->info.sys_name, disk->sys_name,
+ "bad_blocks");
+ if (disk->bb_fd < 0) {
+ close(disk->recovery_fd);
+ close(disk->state_fd);
+ return -1;
+ }
+ disk->ubb_fd = sysfs_open2(aa->info.sys_name, disk->sys_name,
+ "unacknowledged_bad_blocks");
+ if (disk->ubb_fd < 0) {
+ close(disk->recovery_fd);
+ close(disk->state_fd);
+ close(disk->bb_fd);
+ return -1;
+ }
disk->prev_state = read_dev_state(disk->state_fd);
disk->curr_state = disk->prev_state;
disk->next = aa->info.devs;
diff --git a/mapfile.c b/mapfile.c
index 243ded18..c89d403f 100644
--- a/mapfile.c
+++ b/mapfile.c
@@ -374,7 +374,7 @@ void RebuildMap(void)
char dn[30];
int dfd;
int ok;
- int devid;
+ dev_t devid;
struct supertype *st;
char *subarray = NULL;
char *path;
diff --git a/md.4 b/md.4
index f1b88ee6..5bdf7a7b 100644
--- a/md.4
+++ b/md.4
@@ -916,6 +916,60 @@ slow). The extra latency of the remote link will not slow down normal
operations, but the remote system will still have a reasonably
up-to-date copy of all data.
+.SS FAILFAST
+
+From Linux 4.10,
+.I
+md
+supports FAILFAST for RAID1 and RAID10 arrays. This is a flag that
+can be set on individual drives, though it is usually set on all
+drives, or no drives.
+
+When
+.I md
+sends an I/O request to a drive that is marked as FAILFAST, and when
+the array could survive the loss of that drive without losing data,
+.I md
+will request that the underlying device does not perform any retries.
+This means that a failure will be reported to
+.I md
+promptly, and it can mark the device as faulty and continue using the
+other device(s).
+.I md
+cannot control the timeout that the underlying devices use to
+determine failure. Any changes desired to that timeout must be set
+explictly on the underlying device, separately from using
+.IR mdadm .
+
+If a FAILFAST request does fail, and if it is still safe to mark the
+device as faulty without data loss, that will be done and the array
+will continue functioning on a reduced number of devices. If it is not
+possible to safely mark the device as faulty,
+.I md
+will retry the request without disabling retries in the underlying
+device. In any case,
+.I md
+will not attempt to repair read errors on a device marked as FAILFAST
+by writing out the correct. It will just mark the device as faulty.
+
+FAILFAST is appropriate for storage arrays that have a low probability
+of true failure, but will sometimes introduce unacceptable delays to
+I/O requests while performing internal maintenance. The value of
+setting FAILFAST involves a trade-off. The gain is that the chance of
+unacceptable delays is substantially reduced. The cost is that the
+unlikely event of data-loss on one device is slightly more likely to
+result in data-loss for the array.
+
+When a device in an array using FAILFAST is marked as faulty, it will
+usually become usable again in a short while.
+.I mdadm
+makes no attempt to detect that possibility. Some separate
+mechanism, tuned to the specific details of the expected failure modes,
+needs to be created to monitor devices to see when they return to full
+functionality, and to then re-add them to the array. In order of
+this "re-add" functionality to be effective, an array using FAILFAST
+should always have a write-intent bitmap.
+
.SS RESTRIPING
.IR Restriping ,
diff --git a/md_p.h b/md_p.h
index 0d691fbc..dc9fec16 100644
--- a/md_p.h
+++ b/md_p.h
@@ -89,6 +89,7 @@
* read requests will only be sent here in
* dire need
*/
+#define MD_DISK_FAILFAST 10 /* Fewer retries, more failures */
#define MD_DISK_REPLACEMENT 17
#define MD_DISK_JOURNAL 18 /* disk is used as the write journal in RAID-5/6 */
diff --git a/mdadm.8.in b/mdadm.8.in
index 7bae49d8..f789d434 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -5,7 +5,7 @@
.\" the Free Software Foundation; either version 2 of the License, or
.\" (at your option) any later version.
.\" See file COPYING in distribution for details.
-.TH MDADM 8 "" v3.4
+.TH MDADM 8 "" v4.0
.SH NAME
mdadm \- manage MD devices
.I aka
@@ -459,7 +459,7 @@ number of spare devices.
.TP
.BR \-z ", " \-\-size=
-Amount (in Kibibytes) of space to use from each drive in RAID levels 1/4/5/6.
+Amount (in Kilobytes) of space to use from each drive in RAID levels 1/4/5/6.
This must be a multiple of the chunk size, and must leave about 128Kb
of space at the end of the drive for the RAID superblock.
If this is not specified
@@ -467,7 +467,7 @@ If this is not specified
size, though if there is a variance among the drives of greater than 1%, a warning is
issued.
-A suffix of 'M' or 'G' can be given to indicate Megabytes or
+A suffix of 'K', 'M' or 'G' can be given to indicate Kilobytes, Megabytes or
Gigabytes respectively.
Sometimes a replacement drive can be a little smaller than the
@@ -534,7 +534,7 @@ problems the array can be made bigger again with no loss with another
.B "\-\-grow \-\-array\-size="
command.
-A suffix of 'M' or 'G' can be given to indicate Megabytes or
+A suffix of 'K', 'M' or 'G' can be given to indicate Kilobytes, Megabytes or
Gigabytes respectively.
A value of
.B max
@@ -543,7 +543,7 @@ amount of available space is.
.TP
.BR \-c ", " \-\-chunk=
-Specify chunk size of kibibytes. The default when creating an
+Specify chunk size of kilobytes. The default when creating an
array is 512KB. To ensure compatibility with earlier versions, the
default when building an array with no persistent metadata is 64KB.
This is only meaningful for RAID0, RAID4, RAID5, RAID6, and RAID10.
@@ -551,7 +551,7 @@ This is only meaningful for RAID0, RAID4, RAID5, RAID6, and RAID10.
RAID4, RAID5, RAID6, and RAID10 require the chunk size to be a power
of 2. In any case it must be a multiple of 4KB.
-A suffix of 'M' or 'G' can be given to indicate Megabytes or
+A suffix of 'K', 'M' or 'G' can be given to indicate Kilobytes, Megabytes or
Gigabytes respectively.
.TP
@@ -737,7 +737,7 @@ When using an
bitmap, the chunksize defaults to 64Meg, or larger if necessary to
fit the bitmap into the available space.
-A suffix of 'M' or 'G' can be given to indicate Megabytes or
+A suffix of 'K', 'M' or 'G' can be given to indicate Kilobytes, Megabytes or
Gigabytes respectively.
.TP
@@ -747,7 +747,7 @@ subsequent devices listed in a
.BR \-\-create ,
or
.B \-\-add
-command will be flagged as 'write-mostly'. This is valid for RAID1
+command will be flagged as 'write\-mostly'. This is valid for RAID1
only and means that the 'md' driver will avoid reading from these
devices if at all possible. This can be useful if mirroring over a
slow link.
@@ -762,6 +762,25 @@ mode, and write-behind is only attempted on drives marked as
.IR write-mostly .
.TP
+.BR \-\-failfast
+subsequent devices listed in a
+.B \-\-create
+or
+.B \-\-add
+command will be flagged as 'failfast'. This is valid for RAID1 and
+RAID10 only. IO requests to these devices will be encouraged to fail
+quickly rather than cause long delays due to error handling. Also no
+attempt is made to repair a read error on these devices.
+
+If an array becomes degraded so that the 'failfast' device is the only
+usable device, the 'failfast' flag will then be ignored and extended
+delays will be preferred to complete failure.
+
+The 'failfast' flag is appropriate for storage arrays which have a
+low probability of true failure, but which may sometimes
+cause unacceptable delays due to internal maintenance functions.
+
+.TP
.BR \-\-assume\-clean
Tell
.I mdadm
@@ -808,7 +827,8 @@ an array which was originally created using a different version of
which computed a different offset.
Setting the offset explicitly over-rides the default. The value given
-is in Kilobytes unless an 'M' or 'G' suffix is given.
+is in Kilobytes unless a suffix of 'K', 'M' or 'G' is used to explicitly
+indicate Kilobytes, Megabytes or Gigabytes respectively.
Since Linux 3.4,
.B \-\-data\-offset
@@ -1443,6 +1463,25 @@ number. The receiving node must acknowledge this message
with \-\-cluster\-confirm. Valid arguments are <slot>:<devicename> in case
the device is found or <slot>:missing in case the device is not found.
+.TP
+.BR \-\-add-journal
+Recreate journal for RAID-4/5/6 array that lost a journal device. In the
+current implementation, this command cannot add a journal to an array
+that had a failed journal. To avoid interrupting on-going write opertions,
+.B \-\-add-journal
+only works for array in Read-Only state.
+
+.TP
+.BR \-\-failfast
+Subsequent devices that are added or re\-added will have
+the 'failfast' flag set. This is only valid for RAID1 and RAID10 and
+means that the 'md' driver will avoid long timeouts on error handling
+where possible.
+.TP
+.BR \-\-nofailfast
+Subsequent devices that are re\-added will be re\-added without
+the 'failfast' flag set.
+
.P
Each of these options requires that the first device listed is the array
to be acted upon, and the remainder are component devices to be added,
diff --git a/mdadm.c b/mdadm.c
index 51e16f3f..c3a265b8 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -89,7 +89,8 @@ int main(int argc, char *argv[])
int oneshot = 0;
int spare_sharing = 1;
struct supertype *ss = NULL;
- int writemostly = 0;
+ enum flag_mode writemostly = FlagDefault;
+ enum flag_mode failfast = FlagDefault;
char *shortopt = short_options;
int dosyslog = 0;
int rebuild_map = 0;
@@ -143,9 +144,9 @@ int main(int argc, char *argv[])
continue;
case 'b':
- if (mode == ASSEMBLE || mode == BUILD || mode == CREATE
- || mode == GROW || mode == INCREMENTAL
- || mode == MANAGE)
+ if (mode == ASSEMBLE || mode == BUILD ||
+ mode == CREATE || mode == GROW ||
+ mode == INCREMENTAL || mode == MANAGE)
break; /* b means bitmap */
case Brief:
c.brief = 1;
@@ -282,8 +283,8 @@ int main(int argc, char *argv[])
} else {
/* special case of -c --help */
if ((opt == 'c' || opt == ConfigFile) &&
- ( strncmp(optarg, "--h", 3)==0 ||
- strncmp(optarg, "-h", 2)==0)) {
+ (strncmp(optarg, "--h", 3) == 0 ||
+ strncmp(optarg, "-h", 2) == 0)) {
fputs(Help_config, stdout);
exit(0);
}
@@ -295,6 +296,7 @@ int main(int argc, char *argv[])
dv->devname = optarg;
dv->disposition = devmode;
dv->writemostly = writemostly;
+ dv->failfast = failfast;
dv->used = 0;
dv->next = NULL;
*devlistend = dv;
@@ -351,6 +353,7 @@ int main(int argc, char *argv[])
dv->devname = optarg;
dv->disposition = devmode;
dv->writemostly = writemostly;
+ dv->failfast = failfast;
dv->used = 0;
dv->next = NULL;
*devlistend = dv;
@@ -409,12 +412,20 @@ int main(int argc, char *argv[])
case O(CREATE,'W'):
case O(CREATE,WriteMostly):
/* set write-mostly for following devices */
- writemostly = 1;
+ writemostly = FlagSet;
continue;
case O(MANAGE,'w'):
/* clear write-mostly for following devices */
- writemostly = 2;
+ writemostly = FlagClear;
+ continue;
+
+ case O(MANAGE,FailFast):
+ case O(CREATE,FailFast):
+ failfast = FlagSet;
+ continue;
+ case O(MANAGE,NoFailFast):
+ failfast = FlagClear;
continue;
case O(GROW,'z'):
@@ -424,14 +435,12 @@ int main(int argc, char *argv[])
pr_err("size may only be specified once. Second value is %s.\n", optarg);
exit(2);
}
- if (strcmp(optarg, "max")==0)
+ if (strcmp(optarg, "max") == 0)
s.size = MAX_SIZE;
else {
s.size = parse_size(optarg);
- if (s.size == INVALID_SECTORS ||
- s.size < 8) {
- pr_err("invalid size: %s\n",
- optarg);
+ if (s.size == INVALID_SECTORS || s.size < 8) {
+ pr_err("invalid size: %s\n", optarg);
exit(2);
}
/* convert sectors to K */
@@ -463,8 +472,7 @@ int main(int argc, char *argv[])
pr_err("data-offset may only be specified one. Second value is %s.\n", optarg);
exit(2);
}
- if (mode == CREATE &&
- strcmp(optarg, "variable") == 0)
+ if (mode == CREATE && strcmp(optarg, "variable") == 0)
data_offset = VARIABLE_OFFSET;
else
data_offset = parse_size(optarg);
@@ -488,9 +496,9 @@ int main(int argc, char *argv[])
optarg);
exit(2);
}
- if (s.level != 0 && s.level != LEVEL_LINEAR && s.level != 1 &&
- s.level != LEVEL_MULTIPATH && s.level != LEVEL_FAULTY &&
- s.level != 10 &&
+ if (s.level != 0 && s.level != LEVEL_LINEAR &&
+ s.level != 1 && s.level != LEVEL_MULTIPATH &&
+ s.level != LEVEL_FAULTY && s.level != 10 &&
mode == BUILD) {
pr_err("Raid level %s not permitted with --build.\n",
optarg);
@@ -592,6 +600,7 @@ int main(int argc, char *argv[])
ident.raid_disks = s.raiddisks;
continue;
case O(ASSEMBLE, Nodes):
+ case O(GROW, Nodes):
case O(CREATE, Nodes):
c.nodes = parse_num(optarg);
if (c.nodes <= 0) {
@@ -702,7 +711,7 @@ int main(int argc, char *argv[])
pr_err("super-minor cannot be set twice. Second value: %s.\n", optarg);
exit(2);
}
- if (strcmp(optarg, "dev")==0)
+ if (strcmp(optarg, "dev") == 0)
ident.super_minor = -2;
else {
ident.super_minor = parse_num(optarg);
@@ -731,27 +740,27 @@ int main(int argc, char *argv[])
exit(2);
}
c.update = optarg;
- if (strcmp(c.update, "sparc2.2")==0)
+ if (strcmp(c.update, "sparc2.2") == 0)
continue;
if (strcmp(c.update, "super-minor") == 0)
continue;
- if (strcmp(c.update, "summaries")==0)
+ if (strcmp(c.update, "summaries") == 0)
continue;
- if (strcmp(c.update, "resync")==0)
+ if (strcmp(c.update, "resync") == 0)
continue;
- if (strcmp(c.update, "uuid")==0)
+ if (strcmp(c.update, "uuid") == 0)
continue;
- if (strcmp(c.update, "name")==0)
+ if (strcmp(c.update, "name") == 0)
continue;
- if (strcmp(c.update, "homehost")==0)
+ if (strcmp(c.update, "homehost") == 0)
continue;
- if (strcmp(c.update, "home-cluster")==0)
+ if (strcmp(c.update, "home-cluster") == 0)
continue;
- if (strcmp(c.update, "nodes")==0)
+ if (strcmp(c.update, "nodes") == 0)
continue;
- if (strcmp(c.update, "devicesize")==0)
+ if (strcmp(c.update, "devicesize") == 0)
continue;
- if (strcmp(c.update, "no-bitmap")==0)
+ if (strcmp(c.update, "no-bitmap") == 0)
continue;
if (strcmp(c.update, "bbl") == 0)
continue;
@@ -1031,7 +1040,8 @@ int main(int argc, char *argv[])
}
}
if (devmode && devmode != opt &&
- (devmode == 'E' || (opt == 'E' && devmode != 'Q'))) {
+ (devmode == 'E' ||
+ (opt == 'E' && devmode != 'Q'))) {
pr_err("--examine/-E cannot be given with ");
if (devmode == 'E') {
if (option_index >= 0)
@@ -1085,7 +1095,7 @@ int main(int argc, char *argv[])
pr_err("bitmap file needed with -b in --assemble mode\n");
exit(2);
}
- if (strcmp(optarg, "internal")==0) {
+ if (strcmp(optarg, "internal") == 0) {
pr_err("there is no need to specify --bitmap when assembling arrays with internal bitmaps\n");
continue;
}
@@ -1127,13 +1137,13 @@ int main(int argc, char *argv[])
case O(CREATE,Bitmap): /* here we create the bitmap */
case O(GROW,'b'):
case O(GROW,Bitmap):
- if (strcmp(optarg, "internal")== 0 ||
- strcmp(optarg, "none")== 0 ||
+ if (strcmp(optarg, "internal") == 0 ||
+ strcmp(optarg, "none") == 0 ||
strchr(optarg, '/') != NULL) {
s.bitmap_file = optarg;
continue;
}
- if (strcmp(optarg, "clustered")== 0) {
+ if (strcmp(optarg, "clustered") == 0) {
s.bitmap_file = optarg;
/* Set the default number of cluster nodes
* to 4 if not already set by user
@@ -1143,7 +1153,7 @@ int main(int argc, char *argv[])
continue;
}
/* probable typo */
- pr_err("bitmap file must contain a '/', or be 'internal', or 'none'\n"
+ pr_err("bitmap file must contain a '/', or be 'internal', or be 'clustered', or 'none'\n"
" not '%s'\n", optarg);
exit(2);
@@ -1264,9 +1274,8 @@ int main(int argc, char *argv[])
* an md device. We check that here and open it.
*/
- if (mode == MANAGE || mode == BUILD || mode == CREATE
- || mode == GROW
- || (mode == ASSEMBLE && ! c.scan)) {
+ if (mode == MANAGE || mode == BUILD || mode == CREATE ||
+ mode == GROW || (mode == ASSEMBLE && ! c.scan)) {
if (devs_found < 1) {
pr_err("an md device must be given in this mode\n");
exit(2);
@@ -1316,13 +1325,14 @@ int main(int argc, char *argv[])
if (c.homehost == NULL && c.require_homehost)
c.homehost = conf_get_homehost(&c.require_homehost);
- if (c.homehost == NULL || strcasecmp(c.homehost, "<system>")==0) {
+ if (c.homehost == NULL || strcasecmp(c.homehost, "<system>") == 0) {
if (gethostname(sys_hostname, sizeof(sys_hostname)) == 0) {
sys_hostname[sizeof(sys_hostname)-1] = 0;
c.homehost = sys_hostname;
}
}
- if (c.homehost && (!c.homehost[0] || strcasecmp(c.homehost, "<none>") == 0)) {
+ if (c.homehost &&
+ (!c.homehost[0] || strcasecmp(c.homehost, "<none>") == 0)) {
c.homehost = NULL;
c.require_homehost = 0;
}
@@ -1346,8 +1356,8 @@ int main(int argc, char *argv[])
exit(2);
}
- if ((mode == MISC && devmode == 'E')
- || (mode == MONITOR && spare_sharing == 0))
+ if ((mode == MISC && devmode == 'E') ||
+ (mode == MONITOR && spare_sharing == 0))
/* Anyone may try this */;
else if (geteuid() != 0) {
pr_err("must be super-user to perform this action\n");
@@ -1378,7 +1388,8 @@ int main(int argc, char *argv[])
break;
case ASSEMBLE:
if (devs_found == 1 && ident.uuid_set == 0 &&
- ident.super_minor == UnSet && ident.name[0] == 0 && !c.scan ) {
+ ident.super_minor == UnSet && ident.name[0] == 0 &&
+ !c.scan ) {
/* Only a device has been given, so get details from config file */
struct mddev_ident *array_ident = conf_get_ident(devlist->devname);
if (array_ident == NULL) {
@@ -1446,7 +1457,7 @@ int main(int argc, char *argv[])
}
if (s.bitmap_file) {
- if (strcmp(s.bitmap_file, "internal")==0 ||
+ if (strcmp(s.bitmap_file, "internal") == 0 ||
strcmp(s.bitmap_file, "clustered") == 0) {
pr_err("'internal' and 'clustered' bitmaps not supported with --build\n");
rv |= 1;
@@ -1460,7 +1471,8 @@ int main(int argc, char *argv[])
c.delay = DEFAULT_BITMAP_DELAY;
if (c.nodes) {
- if (!s.bitmap_file || strcmp(s.bitmap_file, "clustered") != 0) {
+ if (!s.bitmap_file ||
+ strcmp(s.bitmap_file, "clustered") != 0) {
pr_err("--nodes argument only compatible with --bitmap=clustered\n");
rv = 1;
break;
@@ -1509,7 +1521,8 @@ int main(int argc, char *argv[])
} else if (devlist == NULL) {
if (devmode == 'S' && c.scan)
rv = stop_scan(c.verbose);
- else if ((devmode == 'D' || devmode == Waitclean) && c.scan)
+ else if ((devmode == 'D' || devmode == Waitclean) &&
+ c.scan)
rv = misc_scan(devmode, &c);
else if (devmode == UdevRules)
rv = Write_rules(udev_filename);
@@ -1575,7 +1588,8 @@ int main(int argc, char *argv[])
}
if (devs_found > 1 && s.raiddisks == 0 && s.level == UnSet) {
/* must be '-a'. */
- if (s.size > 0 || s.chunk || s.layout_str != NULL || s.bitmap_file) {
+ if (s.size > 0 || s.chunk ||
+ s.layout_str || s.bitmap_file) {
pr_err("--add cannot be used with other geometry changes in --grow mode\n");
rv = 1;
break;
@@ -1588,7 +1602,7 @@ int main(int argc, char *argv[])
}
} else if (s.bitmap_file) {
if (s.size > 0 || s.raiddisks || s.chunk ||
- s.layout_str != NULL || devs_found > 1) {
+ s.layout_str || devs_found > 1) {
pr_err("--bitmap changes cannot be used with other geometry changes in --grow mode\n");
rv = 1;
break;
@@ -1600,9 +1614,9 @@ int main(int argc, char *argv[])
rv = Grow_continue_command(devlist->devname,
mdfd, c.backup_file,
c.verbose);
- else if (s.size > 0 || s.raiddisks || s.layout_str != NULL
- || s.chunk != 0 || s.level != UnSet
- || data_offset != INVALID_SECTORS) {
+ else if (s.size > 0 || s.raiddisks || s.layout_str ||
+ s.chunk != 0 || s.level != UnSet ||
+ data_offset != INVALID_SECTORS) {
rv = Grow_reshape(devlist->devname, mdfd,
devlist->next,
data_offset, &c, &s);
@@ -1718,11 +1732,11 @@ static int scan_assemble(struct supertype *ss,
rv2 = Assemble(ss, NULL,
ident,
devlist, c);
- if (rv2==0) {
+ if (rv2 == 0) {
cnt++;
acnt++;
}
- } while (rv2!=2);
+ } while (rv2 != 2);
/* Incase there are stacked devices, we need to go around again */
} while (acnt);
if (cnt == 0 && rv == 0) {
@@ -1763,8 +1777,7 @@ static int misc_scan(char devmode, struct context *c)
if (me && me->path
&& strcmp(me->path, "/unknown") != 0)
name = me->path;
- if (name == NULL ||
- stat(name, &stb) != 0)
+ if (name == NULL || stat(name, &stb) != 0)
name = get_md_name(e->devnm);
if (!name) {
diff --git a/mdadm.h b/mdadm.h
index dd02be71..71b8afb9 100755..100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -45,6 +45,10 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#include <errno.h>
#include <string.h>
#include <syslog.h>
+#ifdef __GLIBC__
+/* Newer glibc requires sys/sysmacros.h directly for makedev() */
+#include <sys/sysmacros.h>
+#endif
#ifdef __dietlibc__
#include <strings.h>
/* dietlibc has deprecated random and srandom!! */
@@ -64,7 +68,6 @@ typedef uint64_t cmap_handle_t;
#include <errno.h>
#else
#define LKF_NOQUEUE 0x00000001
-#define LKF_CONVERT 0x00000004
#define LKM_PWMODE 4
#define EUNLOCK 0x10002
@@ -139,20 +142,20 @@ struct dlm_lksb {
* and there is no standard conversion function so... */
/* And dietlibc doesn't think byteswap is ok, so.. */
/* #include <byteswap.h> */
-#define bswap_16(x) (((x) & 0x00ffU) << 8 | \
- ((x) & 0xff00U) >> 8)
-#define bswap_32(x) (((x) & 0x000000ffU) << 24 | \
- ((x) & 0xff000000U) >> 24 | \
- ((x) & 0x0000ff00U) << 8 | \
- ((x) & 0x00ff0000U) >> 8)
-#define bswap_64(x) (((x) & 0x00000000000000ffULL) << 56 | \
- ((x) & 0xff00000000000000ULL) >> 56 | \
- ((x) & 0x000000000000ff00ULL) << 40 | \
- ((x) & 0x00ff000000000000ULL) >> 40 | \
- ((x) & 0x0000000000ff0000ULL) << 24 | \
- ((x) & 0x0000ff0000000000ULL) >> 24 | \
- ((x) & 0x00000000ff000000ULL) << 8 | \
- ((x) & 0x000000ff00000000ULL) >> 8)
+#define __mdadm_bswap_16(x) (((x) & 0x00ffU) << 8 | \
+ ((x) & 0xff00U) >> 8)
+#define __mdadm_bswap_32(x) (((x) & 0x000000ffU) << 24 | \
+ ((x) & 0xff000000U) >> 24 | \
+ ((x) & 0x0000ff00U) << 8 | \
+ ((x) & 0x00ff0000U) >> 8)
+#define __mdadm_bswap_64(x) (((x) & 0x00000000000000ffULL) << 56 | \
+ ((x) & 0xff00000000000000ULL) >> 56 | \
+ ((x) & 0x000000000000ff00ULL) << 40 | \
+ ((x) & 0x00ff000000000000ULL) >> 40 | \
+ ((x) & 0x0000000000ff0000ULL) << 24 | \
+ ((x) & 0x0000ff0000000000ULL) >> 24 | \
+ ((x) & 0x00000000ff000000ULL) << 8 | \
+ ((x) & 0x000000ff00000000ULL) >> 8)
#if !defined(__KLIBC__)
#if BYTE_ORDER == LITTLE_ENDIAN
@@ -163,19 +166,19 @@ struct dlm_lksb {
#define __le32_to_cpu(_x) (unsigned int)(_x)
#define __le64_to_cpu(_x) (unsigned long long)(_x)
-#define __cpu_to_be16(_x) bswap_16(_x)
-#define __cpu_to_be32(_x) bswap_32(_x)
-#define __cpu_to_be64(_x) bswap_64(_x)
-#define __be16_to_cpu(_x) bswap_16(_x)
-#define __be32_to_cpu(_x) bswap_32(_x)
-#define __be64_to_cpu(_x) bswap_64(_x)
+#define __cpu_to_be16(_x) __mdadm_bswap_16(_x)
+#define __cpu_to_be32(_x) __mdadm_bswap_32(_x)
+#define __cpu_to_be64(_x) __mdadm_bswap_64(_x)
+#define __be16_to_cpu(_x) __mdadm_bswap_16(_x)
+#define __be32_to_cpu(_x) __mdadm_bswap_32(_x)
+#define __be64_to_cpu(_x) __mdadm_bswap_64(_x)
#elif BYTE_ORDER == BIG_ENDIAN
-#define __cpu_to_le16(_x) bswap_16(_x)
-#define __cpu_to_le32(_x) bswap_32(_x)
-#define __cpu_to_le64(_x) bswap_64(_x)
-#define __le16_to_cpu(_x) bswap_16(_x)
-#define __le32_to_cpu(_x) bswap_32(_x)
-#define __le64_to_cpu(_x) bswap_64(_x)
+#define __cpu_to_le16(_x) __mdadm_bswap_16(_x)
+#define __cpu_to_le32(_x) __mdadm_bswap_32(_x)
+#define __cpu_to_le64(_x) __mdadm_bswap_64(_x)
+#define __le16_to_cpu(_x) __mdadm_bswap_16(_x)
+#define __le32_to_cpu(_x) __mdadm_bswap_32(_x)
+#define __le64_to_cpu(_x) __mdadm_bswap_64(_x)
#define __cpu_to_be16(_x) (unsigned int)(_x)
#define __cpu_to_be32(_x) (unsigned int)(_x)
@@ -234,6 +237,17 @@ struct dlm_lksb {
extern const char Name[];
+struct md_bb_entry {
+ unsigned long long sector;
+ int length;
+};
+
+struct md_bb {
+ int supported;
+ int count;
+ struct md_bb_entry *entries;
+};
+
/* general information that might be extracted from a superblock */
struct mdinfo {
mdu_array_info_t array;
@@ -290,13 +304,15 @@ struct mdinfo {
int container_enough; /* flag external handlers can set to
* indicate that subarrays have not enough (-1),
* enough to start (0), or all expected disks (1) */
- char sys_name[20];
+ char sys_name[32];
struct mdinfo *devs;
struct mdinfo *next;
/* Device info for mdmon: */
int recovery_fd;
int state_fd;
+ int bb_fd;
+ int ubb_fd;
#define DS_FAULTY 1
#define DS_INSYNC 2
#define DS_WRITE_MOSTLY 4
@@ -308,6 +324,8 @@ struct mdinfo {
/* info read from sysfs */
char sysfs_array_state[20];
+
+ struct md_bb bb;
};
struct createinfo {
@@ -380,6 +398,8 @@ enum special_options {
ConfigFile,
ChunkSize,
WriteMostly,
+ FailFast,
+ NoFailFast,
Layout,
Auto,
Force,
@@ -419,6 +439,10 @@ enum bitmap_update {
NodeNumUpdate,
};
+enum flag_mode {
+ FlagDefault, FlagSet, FlagClear,
+};
+
/* structures read from config file */
/* List of mddevice names and identifiers
* Identifiers can be:
@@ -512,7 +536,8 @@ struct mddev_dev {
* 'A' for re_add.
* Not set for names read from .config
*/
- char writemostly; /* 1 for 'set writemostly', 2 for 'clear writemostly' */
+ enum flag_mode writemostly;
+ enum flag_mode failfast;
int used; /* set when used */
long long data_offset;
struct mddev_dev *next;
@@ -632,7 +657,7 @@ extern int sysfs_disk_to_scsi_id(int fd, __u32 *id);
extern int sysfs_unique_holder(char *devnm, long rdev);
extern int sysfs_freeze_array(struct mdinfo *sra);
extern int sysfs_wait(int fd, int *msec);
-extern int load_sys(char *path, char *buf);
+extern int load_sys(char *path, char *buf, int len);
extern int reshape_prepare_fdlist(char *devname,
struct mdinfo *sra,
int raid_disks,
@@ -818,6 +843,8 @@ extern struct superswitch {
* linear-grow-update - now change the size of the array.
* writemostly - set the WriteMostly1 bit in the superblock devflags
* readwrite - clear the WriteMostly1 bit in the superblock devflags
+ * failfast - set the FailFast1 bit in the superblock
+ * nofailfast - clear the FailFast1 bit
* no-bitmap - clear any record that a bitmap is present.
* bbl - add a bad-block-log if possible
* no-bbl - remove any bad-block-log is it is empty.
@@ -897,6 +924,8 @@ extern struct superswitch {
* created, in which case data_size may be updated, or it might
* already exist. Metadata handler can know if init_super
* has been called, but not write_init_super.
+ * 0: Success
+ * -Exxxx: On error
*/
int (*add_internal_bitmap)(struct supertype *st, int *chunkp,
int delay, int write_behind,
@@ -904,7 +933,7 @@ extern struct superswitch {
/* Seek 'fd' to start of write-intent-bitmap. Must be an
* md-native format bitmap
*/
- int (*locate_bitmap)(struct supertype *st, int fd);
+ int (*locate_bitmap)(struct supertype *st, int fd, int node_num);
/* if add_internal_bitmap succeeded for existing array, this
* writes it out.
*/
@@ -1030,6 +1059,17 @@ extern struct superswitch {
/* validate container after assemble */
int (*validate_container)(struct mdinfo *info);
+ /* records new bad block in metadata */
+ int (*record_bad_block)(struct active_array *a, int n,
+ unsigned long long sector, int length);
+
+ /* clears bad block from metadata */
+ int (*clear_bad_block)(struct active_array *a, int n,
+ unsigned long long sector, int length);
+
+ /* get list of bad blocks from metadata */
+ struct md_bb *(*get_bad_blocks)(struct active_array *a, int n);
+
int swapuuid; /* true if uuid is bigending rather than hostendian */
int external;
const char *name; /* canonical metadata name */
@@ -1107,6 +1147,7 @@ static inline struct supertype *guess_super(int fd) {
}
extern struct supertype *dup_super(struct supertype *st);
extern int get_dev_size(int fd, char *dname, unsigned long long *sizep);
+extern int get_dev_sector_size(int fd, char *dname, unsigned int *sectsizep);
extern int must_be_container(int fd);
extern int dev_size_from_id(dev_t id, unsigned long long *size);
void wait_for(char *dev, int fd);
@@ -1326,7 +1367,14 @@ extern int CreateBitmap(char *filename, int force, char uuid[16],
extern int ExamineBitmap(char *filename, int brief, struct supertype *st);
extern int Write_rules(char *rule_name);
extern int bitmap_update_uuid(int fd, int *uuid, int swap);
-extern unsigned long bitmap_sectors(struct bitmap_super_s *bsb);
+
+/* calculate the size of the bitmap given the array size and bitmap chunksize */
+static inline unsigned long long
+bitmap_bits(unsigned long long array_size, unsigned long chunksize)
+{
+ return (array_size * 512 + chunksize - 1) / chunksize;
+}
+
extern int Dump_metadata(char *dev, char *dir, struct context *c,
struct supertype *st);
extern int Restore_metadata(char *dev, char *dir, struct context *c,
@@ -1439,8 +1487,8 @@ extern char *find_free_devnm(int use_partitions);
extern void put_md_name(char *name);
extern char *devid2kname(int devid);
-extern char *devid2devnm(int devid);
-extern int devnm2devid(char *devnm);
+extern char *devid2devnm(dev_t devid);
+extern dev_t devnm2devid(char *devnm);
extern char *get_md_name(char *devnm);
extern char DefaultConfFile[];
@@ -1465,6 +1513,7 @@ extern int mdmon_running(char *devnm);
extern int mdmon_pid(char *devnm);
extern int check_env(char *name);
extern __u32 random32(void);
+extern void random_uuid(__u8 *buf);
extern int start_mdmon(char *devnm);
extern int child_monitor(int afd, struct mdinfo *sra, struct reshape *reshape,
@@ -1475,7 +1524,8 @@ void abort_reshape(struct mdinfo *sra);
void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0);
-extern void fmt_devname(char *name, int num);
+extern char *stat2kname(struct stat *st);
+extern char *fd2kname(int fd);
extern char *stat2devnm(struct stat *st);
extern char *fd2devnm(int fd);
diff --git a/mdadm.spec b/mdadm.spec
index 685a5642..4e97efb4 100644
--- a/mdadm.spec
+++ b/mdadm.spec
@@ -1,6 +1,6 @@
Summary: mdadm is used for controlling Linux md devices (aka RAID arrays)
Name: mdadm
-Version: 3.4
+Version: 4.0
Release: 1
Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.gz
URL: http://neil.brown.name/blog/mdadm
diff --git a/mdassemble.8 b/mdassemble.8
index 6cb005c5..9ac8cd2c 100644
--- a/mdassemble.8
+++ b/mdassemble.8
@@ -1,5 +1,5 @@
.\" -*- nroff -*-
-.TH MDASSEMBLE 8 "" v3.4
+.TH MDASSEMBLE 8 "" v4.0
.SH NAME
mdassemble \- assemble MD devices
.I aka
diff --git a/mdassemble.c b/mdassemble.c
index 78d363a3..471ffeb0 100644
--- a/mdassemble.c
+++ b/mdassemble.c
@@ -32,7 +32,7 @@ char const Name[] = "mdassemble";
/* from mdopen.c */
int open_mddev(char *dev, int report_errors/*unused*/)
{
- int mdfd = open(dev, O_RDWR);
+ int mdfd = open(dev, O_RDONLY);
if (mdfd < 0)
pr_err("error opening %s: %s\n",
dev, strerror(errno));
diff --git a/mdmon.8 b/mdmon.8
index cc6add8f..ac7352fc 100644
--- a/mdmon.8
+++ b/mdmon.8
@@ -1,5 +1,5 @@
.\" See file COPYING in distribution for details.
-.TH MDMON 8 "" v3.4
+.TH MDMON 8 "" v4.0
.SH NAME
mdmon \- monitor MD external metadata arrays
diff --git a/mdmon.h b/mdmon.h
index aa750c68..0b08c3d7 100644
--- a/mdmon.h
+++ b/mdmon.h
@@ -101,7 +101,7 @@ static inline int is_resync_complete(struct mdinfo *array)
break;
case 10:
l = array->array.layout;
- ncopies = (l & 0xff) * ((l >> 8) && 0xff);
+ ncopies = (l & 0xff) * ((l >> 8) & 0xff);
sync_size = array->component_size * array->array.raid_disks;
sync_size /= ncopies;
break;
diff --git a/mdopen.c b/mdopen.c
index 28410f46..685ca328 100644
--- a/mdopen.c
+++ b/mdopen.c
@@ -144,7 +144,7 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
struct createinfo *ci = conf_get_create_info();
int parts;
char *cname;
- char devname[20];
+ char devname[37];
char devnm[32];
char cbuf[400];
if (chosen == NULL)
@@ -318,7 +318,7 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
else if (num < 0) {
/* need to choose a free number. */
char *_devnm = find_free_devnm(use_mdp);
- if (devnm == NULL) {
+ if (_devnm == NULL) {
pr_err("No avail md devices - aborting\n");
return -1;
}
@@ -348,7 +348,7 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
if (lstat(devname, &stb) == 0) {
/* Must be the correct device, else error */
if ((stb.st_mode&S_IFMT) != S_IFBLK ||
- stb.st_rdev != (dev_t)devnm2devid(devnm)) {
+ stb.st_rdev != devnm2devid(devnm)) {
pr_err("%s exists but looks wrong, please fix\n",
devname);
return -1;
@@ -416,9 +416,7 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
*/
int open_mddev(char *dev, int report_errors)
{
- int mdfd = open(dev, O_RDWR);
- if (mdfd < 0 && errno == EACCES)
- mdfd = open(dev, O_RDONLY);
+ int mdfd = open(dev, O_RDONLY);
if (mdfd < 0) {
if (report_errors)
pr_err("error opening %s: %s\n",
@@ -439,7 +437,7 @@ char *find_free_devnm(int use_partitions)
static char devnm[32];
int devnum;
for (devnum = 127; devnum != 128;
- devnum = devnum ? devnum-1 : (1<<20)-1) {
+ devnum = devnum ? devnum-1 : (1<<9)-1) {
if (use_partitions)
sprintf(devnm, "md_d%d", devnum);
@@ -452,7 +450,7 @@ char *find_free_devnm(int use_partitions)
if (!use_udev()) {
/* make sure it is new to /dev too, at least as a
* non-standard */
- int devid = devnm2devid(devnm);
+ dev_t devid = devnm2devid(devnm);
if (devid) {
char *dn = map_dev(major(devid),
minor(devid), 0);
diff --git a/mdstat.c b/mdstat.c
index 2972cdf6..39628967 100644
--- a/mdstat.c
+++ b/mdstat.c
@@ -133,7 +133,11 @@ struct mdstat_ent *mdstat_read(int hold, int start)
int fd;
if (hold && mdstat_fd != -1) {
- lseek(mdstat_fd, 0L, 0);
+ off_t offset = lseek(mdstat_fd, 0L, 0);
+ if (offset == (off_t)-1) {
+ mdstat_close();
+ return NULL;
+ }
fd = dup(mdstat_fd);
if (fd >= 0)
f = fdopen(fd, "r");
diff --git a/monitor.c b/monitor.c
index 870cc1a7..00b7c689 100644
--- a/monitor.c
+++ b/monitor.c
@@ -31,6 +31,11 @@ static char *sync_actions[] = {
"idle", "reshape", "resync", "recover", "check", "repair", NULL
};
+enum bb_action {
+ RECORD_BB = 1,
+ COMPARE_BB,
+};
+
static int write_attr(char *attr, int fd)
{
return write(fd, attr, strlen(attr));
@@ -131,8 +136,8 @@ static enum sync_action read_action( int fd)
int read_dev_state(int fd)
{
- char buf[60];
- int n = read_attr(buf, 60, fd);
+ char buf[100];
+ int n = read_attr(buf, sizeof(buf), fd);
char *cp;
int rv = 0;
@@ -158,6 +163,177 @@ int read_dev_state(int fd)
return rv;
}
+int process_ubb(struct active_array *a, struct mdinfo *mdi, const unsigned long
+ long sector, const int length, const char *buf,
+ const int buf_len)
+{
+ struct superswitch *ss = a->container->ss;
+
+ /*
+ * record bad block in metadata first, then acknowledge it to the driver
+ * via sysfs file
+ */
+ if ((ss->record_bad_block(a, mdi->disk.raid_disk, sector, length)) &&
+ (write(mdi->bb_fd, buf, buf_len) == buf_len))
+ return 1;
+
+ /*
+ * failed to store or acknowledge bad block, switch of bad block support
+ * to get it out of blocked state
+ */
+ sysfs_set_str(&a->info, mdi, "state", "-external_bbl");
+ return -1;
+}
+
+int compare_bb(struct active_array *a, struct mdinfo *mdi, const unsigned long
+ long sector, const unsigned int length, void *arg)
+{
+ struct superswitch *ss = a->container->ss;
+ struct md_bb *bb = (struct md_bb *) arg;
+ int record = 1;
+ int i;
+
+ for (i = 0; i < bb->count; i++) {
+ unsigned long long start = bb->entries[i].sector;
+ unsigned long long len = bb->entries[i].length;
+
+ /*
+ * bad block in metadata exactly matches bad block in kernel
+ * list, just remove it from a list
+ */
+ if ((start == sector) && (len == length)) {
+ if (i < bb->count - 1)
+ bb->entries[i] = bb->entries[bb->count - 1];
+ bb->count -= 1;
+ record = 0;
+ break;
+ }
+ /*
+ * bad block in metadata spans bad block in kernel list,
+ * clear it and record new bad block
+ */
+ if ((sector >= start) && (sector + length <= start + len)) {
+ ss->clear_bad_block(a, mdi->disk.raid_disk, start, len);
+ break;
+ }
+ }
+
+ /* record all bad blocks not in metadata list */
+ if (record && (ss->record_bad_block(a, mdi->disk.raid_disk, sector,
+ length) <= 0)) {
+ sysfs_set_str(&a->info, mdi, "state", "-external_bbl");
+ return -1;
+ }
+
+ return 1;
+}
+
+static int read_bb_file(int fd, struct active_array *a, struct mdinfo *mdi,
+ enum bb_action action, void *arg)
+{
+ char buf[30];
+ int n = 0;
+ int ret = 0;
+ int read_again = 0;
+ int off = 0;
+ int pos = 0;
+ int preserve_pos = (action == RECORD_BB ? 0 : 1);
+
+ if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
+ return -1;
+
+ do {
+ read_again = 0;
+ n = read(fd, buf + pos, sizeof(buf) - 1 - pos);
+ if (n < 0)
+ return -1;
+ n += pos;
+
+ buf[n] = '\0';
+ off = 0;
+
+ while (off < n) {
+ unsigned long long sector;
+ int length;
+ char newline;
+ int consumed;
+ int matched;
+ int rc;
+
+ /* kernel sysfs file format: "sector length\n" */
+ matched = sscanf(buf + off, "%llu %d%c%n", &sector,
+ &length, &newline, &consumed);
+ if ((matched != 3) && (off > 0)) {
+ /* truncated entry, read again */
+ if (preserve_pos) {
+ pos = sizeof(buf) - off - 1;
+ memmove(buf, buf + off, pos);
+ } else {
+ if (lseek(fd, 0, SEEK_SET) ==
+ (off_t) -1)
+ return -1;
+ }
+ read_again = 1;
+ break;
+ }
+ if (matched != 3)
+ return -1;
+ if (newline != '\n')
+ return -1;
+ if (length <= 0)
+ return -1;
+
+ if (action == RECORD_BB)
+ rc = process_ubb(a, mdi, sector, length,
+ buf + off, consumed);
+ else if (action == COMPARE_BB)
+ rc = compare_bb(a, mdi, sector, length, arg);
+ else
+ rc = -1;
+
+ if (rc < 0)
+ return rc;
+ ret += rc;
+ off += consumed;
+ }
+ } while (read_again);
+
+ return ret;
+}
+
+static int process_dev_ubb(struct active_array *a, struct mdinfo *mdi)
+{
+ return read_bb_file(mdi->ubb_fd, a, mdi, RECORD_BB, NULL);
+}
+
+static int check_for_cleared_bb(struct active_array *a, struct mdinfo *mdi)
+{
+ struct superswitch *ss = a->container->ss;
+ struct md_bb *bb;
+ int i;
+
+ /*
+ * Get a list of bad blocks for an array, then read list of
+ * acknowledged bad blocks from kernel and compare it against metadata
+ * list, clear all bad blocks remaining in metadata list
+ */
+ bb = ss->get_bad_blocks(a, mdi->disk.raid_disk);
+ if (!bb)
+ return -1;
+
+ if (read_bb_file(mdi->bb_fd, a, mdi, COMPARE_BB, bb) < 0)
+ return -1;
+
+ for (i = 0; i < bb->count; i++) {
+ unsigned long long sector = bb->entries[i].sector;
+ int length = bb->entries[i].length;
+
+ ss->clear_bad_block(a, mdi->disk.raid_disk, sector, length);
+ }
+
+ return 0;
+}
+
static void signal_manager(void)
{
/* tgkill(getpid(), mon_tid, SIGUSR1); */
@@ -224,7 +400,7 @@ static void signal_manager(void)
#define ARRAY_DIRTY 1
#define ARRAY_BUSY 2
-static int read_and_act(struct active_array *a)
+static int read_and_act(struct active_array *a, fd_set *fds)
{
unsigned long long sync_completed;
int check_degraded = 0;
@@ -256,6 +432,18 @@ static int read_and_act(struct active_array *a)
&mdi->recovery_start);
mdi->curr_state = read_dev_state(mdi->state_fd);
}
+ /*
+ * If array is blocked and metadata handler is able to handle
+ * BB, check if you can acknowledge them to md driver. If
+ * successful, clear faulty state and unblock the array.
+ */
+ if ((mdi->curr_state & DS_BLOCKED) &&
+ a->container->ss->record_bad_block &&
+ (process_dev_ubb(a, mdi) > 0)) {
+ mdi->next_state |= DS_UNBLOCK;
+ }
+ if (FD_ISSET(mdi->bb_fd, fds))
+ check_for_cleared_bb(a, mdi);
}
gettimeofday(&tv, NULL);
@@ -420,6 +608,9 @@ static int read_and_act(struct active_array *a)
if (sync_completed > a->last_checkpoint)
a->last_checkpoint = sync_completed;
+ if (sync_completed >= a->info.component_size)
+ a->last_checkpoint = 0;
+
a->container->ss->sync_metadata(a->container);
dprintf("(%d): state:%s action:%s next(", a->info.container_member,
array_states[a->curr_state], sync_actions[a->curr_action]);
@@ -451,6 +642,8 @@ static int read_and_act(struct active_array *a)
dprintf_cont(" %d:removed", mdi->disk.raid_disk);
close(mdi->state_fd);
close(mdi->recovery_fd);
+ close(mdi->bb_fd);
+ close(mdi->ubb_fd);
mdi->state_fd = -1;
} else
ret |= ARRAY_BUSY;
@@ -580,8 +773,11 @@ static int wait_and_act(struct supertype *container, int nowait)
add_fd(&rfds, &maxfd, a->info.state_fd);
add_fd(&rfds, &maxfd, a->action_fd);
add_fd(&rfds, &maxfd, a->sync_completed_fd);
- for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
add_fd(&rfds, &maxfd, mdi->state_fd);
+ add_fd(&rfds, &maxfd, mdi->bb_fd);
+ add_fd(&rfds, &maxfd, mdi->ubb_fd);
+ }
ap = &(*ap)->next;
}
@@ -634,6 +830,7 @@ static int wait_and_act(struct supertype *container, int nowait)
if (rv == -1) {
if (errno == EINTR) {
rv = 0;
+ FD_ZERO(&rfds);
dprintf("monitor: caught signal\n");
} else
dprintf("monitor: error %d in pselect\n",
@@ -675,7 +872,7 @@ static int wait_and_act(struct supertype *container, int nowait)
signal_manager();
}
if (a->container && !a->to_remove) {
- int ret = read_and_act(a);
+ int ret = read_and_act(a, &rfds);
rv |= 1;
dirty_arrays += !!(ret & ARRAY_DIRTY);
/* when terminating stop manipulating the array after it
diff --git a/msg.c b/msg.c
index 45cd4504..c66b0a13 100644
--- a/msg.c
+++ b/msg.c
@@ -171,6 +171,8 @@ int connect_monitor(char *devname)
addr.sun_family = PF_LOCAL;
strcpy(addr.sun_path, path);
if (connect(sfd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
+ pr_err("Error connecting monitor with %s: %s\n",
+ addr.sun_path, strerror(errno));
close(sfd);
return -1;
}
diff --git a/part.h b/part.h
index 862a14c3..e697fb46 100644
--- a/part.h
+++ b/part.h
@@ -40,7 +40,7 @@ struct MBR_part_record {
__u8 last_cyl;
__u32 first_sect_lba;
__u32 blocks_num;
-};
+} __attribute__((packed));
struct MBR {
__u8 pad[446];
diff --git a/platform-intel.c b/platform-intel.c
index 88818f34..9867697b 100644
--- a/platform-intel.c
+++ b/platform-intel.c
@@ -48,9 +48,9 @@ static void free_sys_dev(struct sys_dev **list)
struct sys_dev *find_driver_devices(const char *bus, const char *driver)
{
/* search sysfs for devices driven by 'driver' */
- char path[292];
- char link[256];
- char *c;
+ char path[PATH_MAX];
+ char link[PATH_MAX];
+ char *c, *p;
DIR *driver_dir;
struct dirent *de;
struct sys_dev *head = NULL;
@@ -123,6 +123,22 @@ struct sys_dev *find_driver_devices(const char *bus, const char *driver)
if (devpath_to_ll(path, "class", &class) != 0)
continue;
+ /*
+ * Each VMD device (domain) adds separate PCI bus, it is better
+ * to store path as a path to that bus (easier further
+ * determination which NVMe dev is connected to this particular
+ * VMD domain).
+ */
+ if (type == SYS_DEV_VMD) {
+ sprintf(path, "/sys/bus/%s/drivers/%s/%s/domain/device",
+ bus, driver, de->d_name);
+ }
+ p = realpath(path, NULL);
+ if (p == NULL) {
+ pr_err("Unable to get real path for '%s'\n", path);
+ continue;
+ }
+
/* start / add list entry */
if (!head) {
head = xmalloc(sizeof(*head));
@@ -140,16 +156,9 @@ struct sys_dev *find_driver_devices(const char *bus, const char *driver)
list->dev_id = (__u16) dev_id;
list->class = (__u32) class;
list->type = type;
- /* Each VMD device (domain) adds separate PCI bus, it is better to
- * store path as a path to that bus (easier further determination which
- * NVMe dev is connected to this particular VMD domain).
- */
- if (type == SYS_DEV_VMD) {
- sprintf(path, "/sys/bus/%s/drivers/%s/%s/domain/device",
- bus, driver, de->d_name);
- }
- list->path = realpath(path, NULL);
list->next = NULL;
+ list->path = p;
+
if ((list->pci_id = strrchr(list->path, '/')) != NULL)
list->pci_id++;
}
@@ -178,6 +187,16 @@ struct sys_dev *device_by_id(__u16 device_id)
return NULL;
}
+struct sys_dev *device_by_id_and_path(__u16 device_id, const char *path)
+{
+ struct sys_dev *iter;
+
+ for (iter = intel_devices; iter != NULL; iter = iter->next)
+ if ((iter->dev_id == device_id) && strstr(iter->path, path))
+ return iter;
+ return NULL;
+}
+
static int devpath_to_ll(const char *dev_path, const char *entry, unsigned long long *val)
{
char path[strlen(dev_path) + strlen(entry) + 2];
@@ -724,8 +743,10 @@ char *vmd_domain_to_controller(struct sys_dev *hba, char *buf)
return NULL;
dir = opendir("/sys/bus/pci/drivers/vmd");
+ if (!dir)
+ return NULL;
- for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
+ for (ent = readdir(dir); ent; ent = readdir(dir)) {
sprintf(path, "/sys/bus/pci/drivers/vmd/%s/domain/device",
ent->d_name);
@@ -734,8 +755,11 @@ char *vmd_domain_to_controller(struct sys_dev *hba, char *buf)
if (strncmp(buf, hba->path, strlen(buf)) == 0) {
sprintf(path, "/sys/bus/pci/drivers/vmd/%s", ent->d_name);
+ closedir(dir);
return realpath(path, buf);
}
}
+
+ closedir(dir);
return NULL;
}
diff --git a/platform-intel.h b/platform-intel.h
index a8ae85f4..29c85f12 100644
--- a/platform-intel.h
+++ b/platform-intel.h
@@ -99,6 +99,8 @@ struct imsm_orom {
#define IMSM_OROM_CAPABILITIES_Rohi (1 << 5)
#define IMSM_OROM_CAPABILITIES_ReadPatrol (1 << 6)
#define IMSM_OROM_CAPABILITIES_XorHw (1 << 7)
+ #define IMSM_OROM_CAPABILITIES_SKUMode ((1 << 8)|(1 << 9))
+ #define IMSM_OROM_CAPABILITIES_TPV (1 << 10)
} __attribute__((packed));
static inline int imsm_orom_has_raid0(const struct imsm_orom *orom)
@@ -184,6 +186,11 @@ static inline int imsm_orom_is_nvme(const struct imsm_orom *orom)
sizeof(orom->signature)) == 0;
}
+static inline int imsm_orom_has_tpv_support(const struct imsm_orom *orom)
+{
+ return !!(orom->driver_features & IMSM_OROM_CAPABILITIES_TPV);
+}
+
enum sys_dev_type {
SYS_DEV_UNKNOWN = 0,
SYS_DEV_SAS,
@@ -244,4 +251,5 @@ const char *get_sys_dev_type(enum sys_dev_type);
const struct orom_entry *get_orom_entry_by_device_id(__u16 dev_id);
const struct imsm_orom *get_orom_by_device_id(__u16 device_id);
struct sys_dev *device_by_id(__u16 device_id);
+struct sys_dev *device_by_id_and_path(__u16 device_id, const char *path);
char *vmd_domain_to_controller(struct sys_dev *hba, char *buf);
diff --git a/raid6check.c b/raid6check.c
index ad7ffe7e..551f8355 100644
--- a/raid6check.c
+++ b/raid6check.c
@@ -266,7 +266,8 @@ int manual_repair(int chunk_size, int syndrome_disks,
failed_data = failed_slot2;
else
failed_data = failed_slot1;
- printf("Repairing D(%d) and P\n", failed_data);
+
+ printf("Repairing D(%d) and P\n", failed_data);
raid6_datap_recov(syndrome_disks+2, chunk_size,
failed_data, (uint8_t**)blocks, 1);
} else {
diff --git a/restripe.c b/restripe.c
index 56dca73e..de85ee46 100644
--- a/restripe.c
+++ b/restripe.c
@@ -58,26 +58,30 @@ int geo_map(int block, unsigned long long stripe, int raid_disks,
return block;
case 500 + ALGORITHM_LEFT_ASYMMETRIC:
pd = (raid_disks-1) - stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
if (block >= pd)
block++;
return block;
case 500 + ALGORITHM_RIGHT_ASYMMETRIC:
pd = stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
if (block >= pd)
block++;
return block;
case 500 + ALGORITHM_LEFT_SYMMETRIC:
pd = (raid_disks - 1) - stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
return (pd + 1 + block) % raid_disks;
case 500 + ALGORITHM_RIGHT_SYMMETRIC:
pd = stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
return (pd + 1 + block) % raid_disks;
case 500 + ALGORITHM_PARITY_0:
@@ -94,7 +98,8 @@ int geo_map(int block, unsigned long long stripe, int raid_disks,
return raid_disks - 1;
raid_disks--;
pd = (raid_disks-1) - stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
if (block >= pd)
block++;
return block;
@@ -104,7 +109,8 @@ int geo_map(int block, unsigned long long stripe, int raid_disks,
return raid_disks - 1;
raid_disks--;
pd = stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
if (block >= pd)
block++;
return block;
@@ -114,7 +120,8 @@ int geo_map(int block, unsigned long long stripe, int raid_disks,
return raid_disks - 1;
raid_disks--;
pd = (raid_disks - 1) - stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
return (pd + 1 + block) % raid_disks;
case 600 + ALGORITHM_RIGHT_SYMMETRIC_6:
@@ -122,7 +129,8 @@ int geo_map(int block, unsigned long long stripe, int raid_disks,
return raid_disks - 1;
raid_disks--;
pd = stripe % raid_disks;
- if (block == -1) return pd;
+ if (block == -1)
+ return pd;
return (pd + 1 + block) % raid_disks;
case 600 + ALGORITHM_PARITY_0_6:
@@ -139,8 +147,10 @@ int geo_map(int block, unsigned long long stripe, int raid_disks,
case 600 + ALGORITHM_LEFT_ASYMMETRIC:
pd = raid_disks - 1 - (stripe % raid_disks);
- if (block == -1) return pd;
- if (block == -2) return (pd+1) % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
if (pd == raid_disks - 1)
return block+1;
if (block >= pd)
@@ -151,8 +161,10 @@ int geo_map(int block, unsigned long long stripe, int raid_disks,
/* Different order for calculating Q, otherwize same as ... */
case 600 + ALGORITHM_RIGHT_ASYMMETRIC:
pd = stripe % raid_disks;
- if (block == -1) return pd;
- if (block == -2) return (pd+1) % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
if (pd == raid_disks - 1)
return block+1;
if (block >= pd)
@@ -161,14 +173,18 @@ int geo_map(int block, unsigned long long stripe, int raid_disks,
case 600 + ALGORITHM_LEFT_SYMMETRIC:
pd = raid_disks - 1 - (stripe % raid_disks);
- if (block == -1) return pd;
- if (block == -2) return (pd+1) % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
return (pd + 2 + block) % raid_disks;
case 600 + ALGORITHM_RIGHT_SYMMETRIC:
pd = stripe % raid_disks;
- if (block == -1) return pd;
- if (block == -2) return (pd+1) % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
return (pd + 2 + block) % raid_disks;
case 600 + ALGORITHM_ROTATING_N_RESTART:
@@ -177,8 +193,10 @@ int geo_map(int block, unsigned long long stripe, int raid_disks,
* Q D D D P
*/
pd = raid_disks - 1 - ((stripe + 1) % raid_disks);
- if (block == -1) return pd;
- if (block == -2) return (pd+1) % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+1) % raid_disks;
if (pd == raid_disks - 1)
return block+1;
if (block >= pd)
@@ -188,8 +206,10 @@ int geo_map(int block, unsigned long long stripe, int raid_disks,
case 600 + ALGORITHM_ROTATING_N_CONTINUE:
/* Same as left_symmetric but Q is before P */
pd = raid_disks - 1 - (stripe % raid_disks);
- if (block == -1) return pd;
- if (block == -2) return (pd+raid_disks-1) % raid_disks;
+ if (block == -1)
+ return pd;
+ if (block == -2)
+ return (pd+raid_disks-1) % raid_disks;
return (pd + 1 + block) % raid_disks;
}
return -1;
@@ -462,7 +482,7 @@ int raid6_check_disks(int data_disks, int start, int chunk_size,
}
if((Px == 0) && (Qx == 0))
- curr_broken_disk = curr_broken_disk;
+ curr_broken_disk = prev_broken_disk;
if(curr_broken_disk >= data_disks + 2)
broken_status = 2;
diff --git a/sg_io.c b/sg_io.c
index 50ad180d..42c91e1e 100644
--- a/sg_io.c
+++ b/sg_io.c
@@ -23,20 +23,35 @@
int scsi_get_serial(int fd, void *buf, size_t buf_len)
{
- unsigned char inq_cmd[] = {INQUIRY, 1, 0x80, 0, buf_len, 0};
+ unsigned char rsp_buf[255];
+ unsigned char inq_cmd[] = {INQUIRY, 1, 0x80, 0, sizeof(rsp_buf), 0};
unsigned char sense[32];
struct sg_io_hdr io_hdr;
+ int rv;
+ unsigned int rsp_len;
memset(&io_hdr, 0, sizeof(io_hdr));
io_hdr.interface_id = 'S';
io_hdr.cmdp = inq_cmd;
io_hdr.cmd_len = sizeof(inq_cmd);
- io_hdr.dxferp = buf;
- io_hdr.dxfer_len = buf_len;
+ io_hdr.dxferp = rsp_buf;
+ io_hdr.dxfer_len = sizeof(rsp_buf);
io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
io_hdr.sbp = sense;
io_hdr.mx_sb_len = sizeof(sense);
io_hdr.timeout = 5000;
- return ioctl(fd, SG_IO, &io_hdr);
+ rv = ioctl(fd, SG_IO, &io_hdr);
+
+ if (rv)
+ return rv;
+
+ rsp_len = rsp_buf[3];
+
+ if (!rsp_len || buf_len < rsp_len)
+ return -1;
+
+ memcpy(buf, &rsp_buf[4], rsp_len);
+
+ return 0;
}
diff --git a/super-ddf.c b/super-ddf.c
index faaf0a7c..1707ad1e 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -2688,10 +2688,10 @@ static int init_super_ddf_bvd(struct supertype *st,
free(vcl);
return 0;
}
- vc->blocks = cpu_to_be64(info->size * 2);
+ vc->blocks = cpu_to_be64(size * 2);
vc->array_blocks = cpu_to_be64(
calc_array_size(info->level, info->raid_disks, info->layout,
- info->chunk_size, info->size*2));
+ info->chunk_size, size * 2));
memset(vc->pad1, 0xff, 8);
vc->spare_refs[0] = cpu_to_be32(0xffffffff);
vc->spare_refs[1] = cpu_to_be32(0xffffffff);
diff --git a/super-gpt.c b/super-gpt.c
index 1a2adce0..8b080a05 100644
--- a/super-gpt.c
+++ b/super-gpt.c
@@ -73,6 +73,7 @@ static int load_gpt(struct supertype *st, int fd, char *devname)
struct MBR *super;
struct GPT *gpt_head;
int to_read;
+ unsigned int sector_size;
free_gpt(st);
@@ -81,6 +82,11 @@ static int load_gpt(struct supertype *st, int fd, char *devname)
return 1;
}
+ if (!get_dev_sector_size(fd, devname, &sector_size)) {
+ free(super);
+ return 1;
+ }
+
lseek(fd, 0, 0);
if (read(fd, super, sizeof(*super)) != sizeof(*super)) {
no_read:
@@ -100,6 +106,8 @@ static int load_gpt(struct supertype *st, int fd, char *devname)
free(super);
return 1;
}
+ /* Set offset to second block (GPT header) */
+ lseek(fd, sector_size, SEEK_SET);
/* Seem to have GPT, load the header */
gpt_head = (struct GPT*)(super+1);
if (read(fd, gpt_head, sizeof(*gpt_head)) != sizeof(*gpt_head))
@@ -111,6 +119,8 @@ static int load_gpt(struct supertype *st, int fd, char *devname)
to_read = __le32_to_cpu(gpt_head->part_cnt) * sizeof(struct GPT_part_entry);
to_read = ((to_read+511)/512) * 512;
+ /* Set offset to third block (GPT entries) */
+ lseek(fd, sector_size*2, SEEK_SET);
if (read(fd, gpt_head+1, to_read) != to_read)
goto no_read;
diff --git a/super-intel.c b/super-intel.c
index 90b7b6de..433bb6d3 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -81,7 +81,8 @@
MPB_ATTRIB_RAID1 | \
MPB_ATTRIB_RAID10 | \
MPB_ATTRIB_RAID5 | \
- MPB_ATTRIB_EXP_STRIPE_SIZE)
+ MPB_ATTRIB_EXP_STRIPE_SIZE | \
+ MPB_ATTRIB_BBM)
/* Define attributes that are unused but not harmful */
#define MPB_ATTRIB_IGNORED (MPB_ATTRIB_NEVER_USE)
@@ -90,6 +91,7 @@
#define IMSM_RESERVED_SECTORS 4096
#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056
#define SECT_PER_MB_SHIFT 11
+#define MAX_SECTOR_SIZE 4096
/* Disk configuration info. */
#define IMSM_MAX_DEVICES 255
@@ -217,22 +219,24 @@ struct imsm_super {
} __attribute__ ((packed));
#define BBM_LOG_MAX_ENTRIES 254
+#define BBM_LOG_MAX_LBA_ENTRY_VAL 256 /* Represents 256 LBAs */
+#define BBM_LOG_SIGNATURE 0xabadb10c
+
+struct bbm_log_block_addr {
+ __u16 w1;
+ __u32 dw1;
+} __attribute__ ((__packed__));
struct bbm_log_entry {
- __u64 defective_block_start;
-#define UNREADABLE 0xFFFFFFFF
- __u32 spare_block_offset;
- __u16 remapped_marked_count;
- __u16 disk_ordinal;
+ __u8 marked_count; /* Number of blocks marked - 1 */
+ __u8 disk_ordinal; /* Disk entry within the imsm_super */
+ struct bbm_log_block_addr defective_block_start;
} __attribute__ ((__packed__));
struct bbm_log {
__u32 signature; /* 0xABADB10C */
__u32 entry_count;
- __u32 reserved_spare_block_count; /* 0 */
- __u32 reserved; /* 0xFFFF */
- __u64 first_spare_lba;
- struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
+ struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES];
} __attribute__ ((__packed__));
#ifndef MDASSEMBLE
@@ -243,9 +247,9 @@ static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed"
#define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
-#define MIGR_REC_BUF_SIZE 512 /* size of migr_record i/o buffer */
-#define MIGR_REC_POSITION 512 /* migr_record position offset on disk,
- * MIGR_REC_BUF_SIZE <= MIGR_REC_POSITION
+#define MIGR_REC_BUF_SECTORS 1 /* size of migr_record i/o buffer in sectors */
+#define MIGR_REC_SECTOR_POSITION 1 /* migr_record position offset on disk,
+ * MIGR_REC_BUF_SECTORS <= MIGR_REC_SECTOR_POS
*/
#define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
@@ -318,14 +322,15 @@ static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
}
}
-static unsigned int sector_count(__u32 bytes)
+static unsigned int sector_count(__u32 bytes, unsigned int sector_size)
{
- return ROUND_UP(bytes, 512) / 512;
+ return ROUND_UP(bytes, sector_size) / sector_size;
}
-static unsigned int mpb_sectors(struct imsm_super *mpb)
+static unsigned int mpb_sectors(struct imsm_super *mpb,
+ unsigned int sector_size)
{
- return sector_count(__le32_to_cpu(mpb->mpb_size));
+ return sector_count(__le32_to_cpu(mpb->mpb_size), sector_size);
}
struct intel_dev {
@@ -359,6 +364,7 @@ struct intel_super {
array, it indicates that mdmon is allowed to clean migration
record */
size_t len; /* size of the 'buf' allocation */
+ size_t extra_space; /* extra space in 'buf' that is not used yet */
void *next_buf; /* for realloc'ing buf from the manager */
size_t next_len;
int updates_pending; /* count of pending updates for mdmon */
@@ -366,6 +372,7 @@ struct intel_super {
unsigned long long create_offset; /* common start for 'current_vol' */
__u32 random; /* random data for seeding new family numbers */
struct intel_dev *devlist;
+ unsigned int sector_size; /* sector size of used member drives */
struct dl {
struct dl *next;
int index;
@@ -386,6 +393,7 @@ struct intel_super {
struct intel_hba *hba; /* device path of the raid controller for this metadata */
const struct imsm_orom *orom; /* platform firmware support */
struct intel_super *next; /* (temp) list for disambiguating family_num */
+ struct md_bb bb; /* memory for get_bad_blocks call */
};
struct intel_disk {
@@ -418,6 +426,7 @@ enum imsm_update_type {
update_takeover,
update_general_migration_checkpoint,
update_size_change,
+ update_prealloc_badblocks_mem,
};
struct imsm_update_activate_spare {
@@ -506,6 +515,10 @@ struct imsm_update_add_remove_disk {
enum imsm_update_type type;
};
+struct imsm_update_prealloc_bb_mem {
+ enum imsm_update_type type;
+};
+
static const char *_sys_dev_type[] = {
[SYS_DEV_UNKNOWN] = "Unknown",
[SYS_DEV_SAS] = "SAS",
@@ -537,7 +550,8 @@ static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
{
- struct intel_hba *result=NULL;
+ struct intel_hba *result;
+
for (result = hba; result; result = result->next) {
if (result->type == device->type && strcmp(result->path, device->path) == 0)
break;
@@ -566,10 +580,6 @@ static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device
if (device->type != hba->type)
return 2;
- /* Always forbid spanning between VMD domains (seen as different controllers by mdadm) */
- if (device->type == SYS_DEV_VMD && !path_attached_to_hba(device->path, hba->path))
- return 2;
-
/* Multiple same type HBAs can be used if they share the same OROM */
const struct imsm_orom *device_orom = get_orom_by_device_id(device->dev_id);
@@ -788,6 +798,244 @@ static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
return NULL;
}
+static inline unsigned long long __le48_to_cpu(const struct bbm_log_block_addr
+ *addr)
+{
+ return ((((__u64)__le32_to_cpu(addr->dw1)) << 16) |
+ __le16_to_cpu(addr->w1));
+}
+
+static inline struct bbm_log_block_addr __cpu_to_le48(unsigned long long sec)
+{
+ struct bbm_log_block_addr addr;
+
+ addr.w1 = __cpu_to_le16((__u16)(sec & 0xffff));
+ addr.dw1 = __cpu_to_le32((__u32)(sec >> 16) & 0xffffffff);
+ return addr;
+}
+
+#ifndef MDASSEMBLE
+/* get size of the bbm log */
+static __u32 get_imsm_bbm_log_size(struct bbm_log *log)
+{
+ if (!log || log->entry_count == 0)
+ return 0;
+
+ return sizeof(log->signature) +
+ sizeof(log->entry_count) +
+ log->entry_count * sizeof(struct bbm_log_entry);
+}
+
+/* check if bad block is not partially stored in bbm log */
+static int is_stored_in_bbm(struct bbm_log *log, const __u8 idx, const unsigned
+ long long sector, const int length, __u32 *pos)
+{
+ __u32 i;
+
+ for (i = *pos; i < log->entry_count; i++) {
+ struct bbm_log_entry *entry = &log->marked_block_entries[i];
+ unsigned long long bb_start;
+ unsigned long long bb_end;
+
+ bb_start = __le48_to_cpu(&entry->defective_block_start);
+ bb_end = bb_start + (entry->marked_count + 1);
+
+ if ((entry->disk_ordinal == idx) && (bb_start >= sector) &&
+ (bb_end <= sector + length)) {
+ *pos = i;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* record new bad block in bbm log */
+static int record_new_badblock(struct bbm_log *log, const __u8 idx, unsigned
+ long long sector, int length)
+{
+ int new_bb = 0;
+ __u32 pos = 0;
+ struct bbm_log_entry *entry = NULL;
+
+ while (is_stored_in_bbm(log, idx, sector, length, &pos)) {
+ struct bbm_log_entry *e = &log->marked_block_entries[pos];
+
+ if ((e->marked_count + 1 == BBM_LOG_MAX_LBA_ENTRY_VAL) &&
+ (__le48_to_cpu(&e->defective_block_start) == sector)) {
+ sector += BBM_LOG_MAX_LBA_ENTRY_VAL;
+ length -= BBM_LOG_MAX_LBA_ENTRY_VAL;
+ pos = pos + 1;
+ continue;
+ }
+ entry = e;
+ break;
+ }
+
+ if (entry) {
+ int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length :
+ BBM_LOG_MAX_LBA_ENTRY_VAL;
+ entry->defective_block_start = __cpu_to_le48(sector);
+ entry->marked_count = cnt - 1;
+ if (cnt == length)
+ return 1;
+ sector += cnt;
+ length -= cnt;
+ }
+
+ new_bb = ROUND_UP(length, BBM_LOG_MAX_LBA_ENTRY_VAL) /
+ BBM_LOG_MAX_LBA_ENTRY_VAL;
+ if (log->entry_count + new_bb > BBM_LOG_MAX_ENTRIES)
+ return 0;
+
+ while (length > 0) {
+ int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length :
+ BBM_LOG_MAX_LBA_ENTRY_VAL;
+ struct bbm_log_entry *entry =
+ &log->marked_block_entries[log->entry_count];
+
+ entry->defective_block_start = __cpu_to_le48(sector);
+ entry->marked_count = cnt - 1;
+ entry->disk_ordinal = idx;
+
+ sector += cnt;
+ length -= cnt;
+
+ log->entry_count++;
+ }
+
+ return new_bb;
+}
+
+/* clear all bad blocks for given disk */
+static void clear_disk_badblocks(struct bbm_log *log, const __u8 idx)
+{
+ __u32 i = 0;
+
+ while (i < log->entry_count) {
+ struct bbm_log_entry *entries = log->marked_block_entries;
+
+ if (entries[i].disk_ordinal == idx) {
+ if (i < log->entry_count - 1)
+ entries[i] = entries[log->entry_count - 1];
+ log->entry_count--;
+ } else {
+ i++;
+ }
+ }
+}
+
+/* clear given bad block */
+static int clear_badblock(struct bbm_log *log, const __u8 idx, const unsigned
+ long long sector, const int length) {
+ __u32 i = 0;
+
+ while (i < log->entry_count) {
+ struct bbm_log_entry *entries = log->marked_block_entries;
+
+ if ((entries[i].disk_ordinal == idx) &&
+ (__le48_to_cpu(&entries[i].defective_block_start) ==
+ sector) && (entries[i].marked_count + 1 == length)) {
+ if (i < log->entry_count - 1)
+ entries[i] = entries[log->entry_count - 1];
+ log->entry_count--;
+ break;
+ }
+ i++;
+ }
+
+ return 1;
+}
+#endif /* MDASSEMBLE */
+
+/* allocate and load BBM log from metadata */
+static int load_bbm_log(struct intel_super *super)
+{
+ struct imsm_super *mpb = super->anchor;
+ __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
+
+ super->bbm_log = xcalloc(1, sizeof(struct bbm_log));
+ if (!super->bbm_log)
+ return 1;
+
+ if (bbm_log_size) {
+ struct bbm_log *log = (void *)mpb +
+ __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
+
+ __u32 entry_count;
+
+ if (bbm_log_size < sizeof(log->signature) +
+ sizeof(log->entry_count))
+ return 2;
+
+ entry_count = __le32_to_cpu(log->entry_count);
+ if ((__le32_to_cpu(log->signature) != BBM_LOG_SIGNATURE) ||
+ (entry_count > BBM_LOG_MAX_ENTRIES))
+ return 3;
+
+ if (bbm_log_size !=
+ sizeof(log->signature) + sizeof(log->entry_count) +
+ entry_count * sizeof(struct bbm_log_entry))
+ return 4;
+
+ memcpy(super->bbm_log, log, bbm_log_size);
+ } else {
+ super->bbm_log->signature = __cpu_to_le32(BBM_LOG_SIGNATURE);
+ super->bbm_log->entry_count = 0;
+ }
+
+ return 0;
+}
+
+/* checks if bad block is within volume boundaries */
+static int is_bad_block_in_volume(const struct bbm_log_entry *entry,
+ const unsigned long long start_sector,
+ const unsigned long long size)
+{
+ unsigned long long bb_start;
+ unsigned long long bb_end;
+
+ bb_start = __le48_to_cpu(&entry->defective_block_start);
+ bb_end = bb_start + (entry->marked_count + 1);
+
+ if (((bb_start >= start_sector) && (bb_start < start_sector + size)) ||
+ ((bb_end >= start_sector) && (bb_end <= start_sector + size)))
+ return 1;
+
+ return 0;
+}
+
+/* get list of bad blocks on a drive for a volume */
+static void get_volume_badblocks(const struct bbm_log *log, const __u8 idx,
+ const unsigned long long start_sector,
+ const unsigned long long size,
+ struct md_bb *bbs)
+{
+ __u32 count = 0;
+ __u32 i;
+
+ for (i = 0; i < log->entry_count; i++) {
+ const struct bbm_log_entry *ent =
+ &log->marked_block_entries[i];
+ struct md_bb_entry *bb;
+
+ if ((ent->disk_ordinal == idx) &&
+ is_bad_block_in_volume(ent, start_sector, size)) {
+
+ if (!bbs->entries) {
+ bbs->entries = xmalloc(BBM_LOG_MAX_ENTRIES *
+ sizeof(*bb));
+ if (!bbs->entries)
+ break;
+ }
+
+ bb = &bbs->entries[count++];
+ bb->sector = __le48_to_cpu(&ent->defective_block_start);
+ bb->length = ent->marked_count + 1;
+ }
+ }
+ bbs->count = count;
+}
+
/*
* for second_map:
* == MAP_0 get first map
@@ -910,7 +1158,6 @@ static unsigned long long blocks_per_member(struct imsm_map *map)
return join_u32(map->blocks_per_member_lo, map->blocks_per_member_hi);
}
-#ifndef MDASSEMBLE
static unsigned long long num_data_stripes(struct imsm_map *map)
{
if (map == NULL)
@@ -922,7 +1169,6 @@ static void set_total_blocks(struct imsm_disk *disk, unsigned long long n)
{
split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi);
}
-#endif
static void set_pba_of_lba0(struct imsm_map *map, unsigned long long n)
{
@@ -1124,6 +1370,8 @@ static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
static int is_gen_migration(struct imsm_dev *dev);
+#define IMSM_4K_DIV 8
+
#ifndef MDASSEMBLE
static __u64 blocks_per_migr_unit(struct intel_super *super,
struct imsm_dev *dev);
@@ -1222,7 +1470,7 @@ static void print_imsm_dev(struct intel_super *super,
printf(" <-- %s", map_state_str[map->map_state]);
printf("\n Checkpoint : %u ",
__le32_to_cpu(dev->vol.curr_migr_unit));
- if ((is_gen_migration(dev)) && ((slot > 1) || (slot < 0)))
+ if (is_gen_migration(dev) && (slot > 1 || slot < 0))
printf("(N/A)");
else
printf("(%llu)", (unsigned long long)
@@ -1232,8 +1480,10 @@ static void print_imsm_dev(struct intel_super *super,
printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
}
-static void print_imsm_disk(struct imsm_disk *disk, int index, __u32 reserved)
-{
+static void print_imsm_disk(struct imsm_disk *disk,
+ int index,
+ __u32 reserved,
+ unsigned int sector_size) {
char str[MAX_RAID_SERIAL_LEN + 1];
__u64 sz;
@@ -1251,10 +1501,85 @@ static void print_imsm_disk(struct imsm_disk *disk, int index, __u32 reserved)
is_failed(disk) ? " failed" : "");
printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
sz = total_blocks(disk) - reserved;
- printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
+ printf(" Usable Size : %llu%s\n",
+ (unsigned long long)sz * 512 / sector_size,
human_size(sz * 512));
}
+void convert_to_4k_imsm_migr_rec(struct intel_super *super)
+{
+ struct migr_record *migr_rec = super->migr_rec;
+
+ migr_rec->blocks_per_unit /= IMSM_4K_DIV;
+ migr_rec->ckpt_area_pba /= IMSM_4K_DIV;
+ migr_rec->dest_1st_member_lba /= IMSM_4K_DIV;
+ migr_rec->dest_depth_per_unit /= IMSM_4K_DIV;
+ split_ull((join_u32(migr_rec->post_migr_vol_cap,
+ migr_rec->post_migr_vol_cap_hi) / IMSM_4K_DIV),
+ &migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi);
+}
+
+void convert_to_4k_imsm_disk(struct imsm_disk *disk)
+{
+ set_total_blocks(disk, (total_blocks(disk)/IMSM_4K_DIV));
+}
+
+void convert_to_4k(struct intel_super *super)
+{
+ struct imsm_super *mpb = super->anchor;
+ struct imsm_disk *disk;
+ int i;
+ __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
+
+ for (i = 0; i < mpb->num_disks ; i++) {
+ disk = __get_imsm_disk(mpb, i);
+ /* disk */
+ convert_to_4k_imsm_disk(disk);
+ }
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ struct imsm_dev *dev = __get_imsm_dev(mpb, i);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ /* dev */
+ split_ull((join_u32(dev->size_low, dev->size_high)/IMSM_4K_DIV),
+ &dev->size_low, &dev->size_high);
+ dev->vol.curr_migr_unit /= IMSM_4K_DIV;
+
+ /* map0 */
+ set_blocks_per_member(map, blocks_per_member(map)/IMSM_4K_DIV);
+ map->blocks_per_strip /= IMSM_4K_DIV;
+ set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV);
+
+ if (dev->vol.migr_state) {
+ /* map1 */
+ map = get_imsm_map(dev, MAP_1);
+ set_blocks_per_member(map,
+ blocks_per_member(map)/IMSM_4K_DIV);
+ map->blocks_per_strip /= IMSM_4K_DIV;
+ set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV);
+ }
+ }
+ if (bbm_log_size) {
+ struct bbm_log *log = (void *)mpb +
+ __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
+ __u32 i;
+
+ for (i = 0; i < log->entry_count; i++) {
+ struct bbm_log_entry *entry =
+ &log->marked_block_entries[i];
+
+ __u8 count = entry->marked_count + 1;
+ unsigned long long sector =
+ __le48_to_cpu(&entry->defective_block_start);
+
+ entry->defective_block_start =
+ __cpu_to_le48(sector/IMSM_4K_DIV);
+ entry->marked_count = max(count/IMSM_4K_DIV, 1) - 1;
+ }
+ }
+
+ mpb->check_sum = __gen_imsm_checksum(mpb);
+}
+
void examine_migr_rec_imsm(struct intel_super *super)
{
struct migr_record *migr_rec = super->migr_rec;
@@ -1275,7 +1600,7 @@ void examine_migr_rec_imsm(struct intel_super *super)
map = get_imsm_map(dev, MAP_0);
if (map)
slot = get_imsm_disk_slot(map, super->disks->index);
- if ((map == NULL) || (slot > 1) || (slot < 0)) {
+ if (map == NULL || slot > 1 || slot < 0) {
printf(" Empty\n ");
printf("Examine one of first two disks in array\n");
break;
@@ -1312,6 +1637,78 @@ void examine_migr_rec_imsm(struct intel_super *super)
}
}
#endif /* MDASSEMBLE */
+
+void convert_from_4k_imsm_migr_rec(struct intel_super *super)
+{
+ struct migr_record *migr_rec = super->migr_rec;
+
+ migr_rec->blocks_per_unit *= IMSM_4K_DIV;
+ migr_rec->ckpt_area_pba *= IMSM_4K_DIV;
+ migr_rec->dest_1st_member_lba *= IMSM_4K_DIV;
+ migr_rec->dest_depth_per_unit *= IMSM_4K_DIV;
+ split_ull((join_u32(migr_rec->post_migr_vol_cap,
+ migr_rec->post_migr_vol_cap_hi) * IMSM_4K_DIV),
+ &migr_rec->post_migr_vol_cap,
+ &migr_rec->post_migr_vol_cap_hi);
+}
+
+void convert_from_4k(struct intel_super *super)
+{
+ struct imsm_super *mpb = super->anchor;
+ struct imsm_disk *disk;
+ int i;
+ __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
+
+ for (i = 0; i < mpb->num_disks ; i++) {
+ disk = __get_imsm_disk(mpb, i);
+ /* disk */
+ set_total_blocks(disk, (total_blocks(disk)*IMSM_4K_DIV));
+ }
+
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ struct imsm_dev *dev = __get_imsm_dev(mpb, i);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ /* dev */
+ split_ull((join_u32(dev->size_low, dev->size_high)*IMSM_4K_DIV),
+ &dev->size_low, &dev->size_high);
+ dev->vol.curr_migr_unit *= IMSM_4K_DIV;
+
+ /* map0 */
+ set_blocks_per_member(map, blocks_per_member(map)*IMSM_4K_DIV);
+ map->blocks_per_strip *= IMSM_4K_DIV;
+ set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV);
+
+ if (dev->vol.migr_state) {
+ /* map1 */
+ map = get_imsm_map(dev, MAP_1);
+ set_blocks_per_member(map,
+ blocks_per_member(map)*IMSM_4K_DIV);
+ map->blocks_per_strip *= IMSM_4K_DIV;
+ set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV);
+ }
+ }
+ if (bbm_log_size) {
+ struct bbm_log *log = (void *)mpb +
+ __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
+ __u32 i;
+
+ for (i = 0; i < log->entry_count; i++) {
+ struct bbm_log_entry *entry =
+ &log->marked_block_entries[i];
+
+ __u8 count = entry->marked_count + 1;
+ unsigned long long sector =
+ __le48_to_cpu(&entry->defective_block_start);
+
+ entry->defective_block_start =
+ __cpu_to_le48(sector*IMSM_4K_DIV);
+ entry->marked_count = count*IMSM_4K_DIV - 1;
+ }
+ }
+
+ mpb->check_sum = __gen_imsm_checksum(mpb);
+}
+
/*******************************************************************************
* function: imsm_check_attributes
* Description: Function checks if features represented by attributes flags
@@ -1432,11 +1829,12 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
sum = __le32_to_cpu(mpb->check_sum);
printf(" Checksum : %08x %s\n", sum,
__gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
- printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
+ printf(" MPB Sectors : %d\n", mpb_sectors(mpb, super->sector_size));
printf(" Disks : %d\n", mpb->num_disks);
printf(" RAID Devices : %d\n", mpb->num_raid_devs);
- print_imsm_disk(__get_imsm_disk(mpb, super->disks->index), super->disks->index, reserved);
- if (super->bbm_log) {
+ print_imsm_disk(__get_imsm_disk(mpb, super->disks->index),
+ super->disks->index, reserved, super->sector_size);
+ if (get_imsm_bbm_log_size(super->bbm_log)) {
struct bbm_log *log = super->bbm_log;
printf("\n");
@@ -1444,9 +1842,6 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
printf(" Signature : %x\n", __le32_to_cpu(log->signature));
printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
- printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
- printf(" First Spare : %llx\n",
- (unsigned long long) __le64_to_cpu(log->first_spare_lba));
}
for (i = 0; i < mpb->num_raid_devs; i++) {
struct mdinfo info;
@@ -1460,12 +1855,14 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
for (i = 0; i < mpb->num_disks; i++) {
if (i == super->disks->index)
continue;
- print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved);
+ print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved,
+ super->sector_size);
}
for (dl = super->disks; dl; dl = dl->next)
if (dl->index == -1)
- print_imsm_disk(&dl->disk, -1, reserved);
+ print_imsm_disk(&dl->disk, -1, reserved,
+ super->sector_size);
examine_migr_rec_imsm(super);
}
@@ -1529,7 +1926,7 @@ static void export_examine_super_imsm(struct supertype *st)
static int copy_metadata_imsm(struct supertype *st, int from, int to)
{
- /* The second last 512byte sector of the device contains
+ /* The second last sector of the device contains
* the "struct imsm_super" metadata.
* This contains mpb_size which is the size in bytes of the
* extended metadata. This is located immediately before
@@ -1542,29 +1939,31 @@ static int copy_metadata_imsm(struct supertype *st, int from, int to)
unsigned long long dsize, offset;
int sectors;
struct imsm_super *sb;
- int written = 0;
+ struct intel_super *super = st->sb;
+ unsigned int sector_size = super->sector_size;
+ unsigned int written = 0;
- if (posix_memalign(&buf, 4096, 4096) != 0)
+ if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE) != 0)
return 1;
if (!get_dev_size(from, NULL, &dsize))
goto err;
- if (lseek64(from, dsize-1024, 0) < 0)
+ if (lseek64(from, dsize-(2*sector_size), 0) < 0)
goto err;
- if (read(from, buf, 512) != 512)
+ if ((unsigned int)read(from, buf, sector_size) != sector_size)
goto err;
sb = buf;
if (strncmp((char*)sb->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0)
goto err;
- sectors = mpb_sectors(sb) + 2;
- offset = dsize - sectors * 512;
+ sectors = mpb_sectors(sb, sector_size) + 2;
+ offset = dsize - sectors * sector_size;
if (lseek64(from, offset, 0) < 0 ||
lseek64(to, offset, 0) < 0)
goto err;
- while (written < sectors * 512) {
- int n = sectors*512 - written;
+ while (written < sectors * sector_size) {
+ int n = sectors*sector_size - written;
if (n > 4096)
n = 4096;
if (read(from, buf, n) != n)
@@ -1624,7 +2023,10 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
* this hba
*/
dir = opendir("/sys/dev/block");
- for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
+ if (!dir)
+ return 1;
+
+ for (ent = readdir(dir); ent; ent = readdir(dir)) {
int fd;
char model[64];
char vendor[64];
@@ -1654,7 +2056,7 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
break;
}
sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
- if (load_sys(device, buf) != 0) {
+ if (load_sys(device, buf, sizeof(buf)) != 0) {
if (verbose > 0)
pr_err("failed to read device type for %s\n",
path);
@@ -1669,7 +2071,7 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
vendor[0] = '\0';
model[0] = '\0';
sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
- if (load_sys(device, buf) == 0) {
+ if (load_sys(device, buf, sizeof(buf)) == 0) {
strncpy(vendor, buf, sizeof(vendor));
vendor[sizeof(vendor) - 1] = '\0';
c = (char *) &vendor[sizeof(vendor) - 1];
@@ -1678,7 +2080,7 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
}
sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
- if (load_sys(device, buf) == 0) {
+ if (load_sys(device, buf, sizeof(buf)) == 0) {
strncpy(model, buf, sizeof(model));
model[sizeof(model) - 1] = '\0';
c = (char *) &model[sizeof(model) - 1];
@@ -1781,7 +2183,10 @@ static int print_vmd_attached_devs(struct sys_dev *hba)
* this hba
*/
dir = opendir("/sys/bus/pci/drivers/nvme");
- for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
+ if (!dir)
+ return 1;
+
+ for (ent = readdir(dir); ent; ent = readdir(dir)) {
int n;
/* is 'ent' a device? check that the 'subsystem' link exists and
@@ -1800,9 +2205,6 @@ static int print_vmd_attached_devs(struct sys_dev *hba)
continue;
sprintf(path, "/sys/bus/pci/drivers/nvme/%s", ent->d_name);
- /* if not a intel NVMe - skip it*/
- if (devpath_to_vendor(path) != 0x8086)
- continue;
rp = realpath(path, NULL);
if (!rp)
@@ -1814,6 +2216,7 @@ static int print_vmd_attached_devs(struct sys_dev *hba)
free(rp);
}
+ closedir(dir);
return 0;
}
@@ -2015,16 +2418,22 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle
for (entry = orom_entries; entry; entry = entry->next) {
if (entry->type == SYS_DEV_VMD) {
+ print_imsm_capability(&entry->orom);
+ printf(" 3rd party NVMe :%s supported\n",
+ imsm_orom_has_tpv_support(&entry->orom)?"":" not");
for (hba = list; hba; hba = hba->next) {
if (hba->type == SYS_DEV_VMD) {
char buf[PATH_MAX];
- print_imsm_capability(&entry->orom);
printf(" I/O Controller : %s (%s)\n",
vmd_domain_to_controller(hba, buf), get_sys_dev_type(hba->type));
- print_vmd_attached_devs(hba);
- printf("\n");
+ if (print_vmd_attached_devs(hba)) {
+ if (verbose > 0)
+ pr_err("failed to get devices attached to VMD domain.\n");
+ result |= 2;
+ }
}
}
+ printf("\n");
continue;
}
@@ -2403,21 +2812,26 @@ static int imsm_level_to_layout(int level)
static int read_imsm_migr_rec(int fd, struct intel_super *super)
{
int ret_val = -1;
+ unsigned int sector_size = super->sector_size;
unsigned long long dsize;
get_dev_size(fd, NULL, &dsize);
- if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) {
+ if (lseek64(fd, dsize - (sector_size*MIGR_REC_SECTOR_POSITION),
+ SEEK_SET) < 0) {
pr_err("Cannot seek to anchor block: %s\n",
strerror(errno));
goto out;
}
- if (read(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) !=
- MIGR_REC_BUF_SIZE) {
+ if ((unsigned int)read(fd, super->migr_rec_buf,
+ MIGR_REC_BUF_SECTORS*sector_size) !=
+ MIGR_REC_BUF_SECTORS*sector_size) {
pr_err("Cannot read migr record block: %s\n",
strerror(errno));
goto out;
}
ret_val = 0;
+ if (sector_size == 4096)
+ convert_from_4k_imsm_migr_rec(super);
out:
return ret_val;
@@ -2451,12 +2865,12 @@ static struct imsm_dev *imsm_get_device_during_migration(
static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
{
struct mdinfo *sd;
- struct dl *dl = NULL;
+ struct dl *dl;
char nm[30];
int retval = -1;
int fd = -1;
struct imsm_dev *dev;
- struct imsm_map *map = NULL;
+ struct imsm_map *map;
int slot = -1;
/* find map under migration */
@@ -2465,19 +2879,12 @@ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
*/
if (dev == NULL)
return -2;
- map = get_imsm_map(dev, MAP_0);
if (info) {
for (sd = info->devs ; sd ; sd = sd->next) {
- /* skip spare and failed disks
- */
- if (sd->disk.raid_disk < 0)
- continue;
/* read only from one of the first two slots */
- if (map)
- slot = get_imsm_disk_slot(map,
- sd->disk.raid_disk);
- if ((map == NULL) || (slot > 1) || (slot < 0))
+ if ((sd->disk.raid_disk < 0) ||
+ (sd->disk.raid_disk > 1))
continue;
sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
@@ -2487,6 +2894,7 @@ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
}
}
if (fd < 0) {
+ map = get_imsm_map(dev, MAP_0);
for (dl = super->disks; dl; dl = dl->next) {
/* skip spare and failed disks
*/
@@ -2495,7 +2903,7 @@ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
/* read only from one of the first two slots */
if (map)
slot = get_imsm_disk_slot(map, dl->index);
- if ((map == NULL) || (slot > 1) || (slot < 0))
+ if (map == NULL || slot > 1 || slot < 0)
continue;
sprintf(nm, "%d:%d", dl->major, dl->minor);
fd = dev_open(nm, O_RDONLY);
@@ -2569,6 +2977,7 @@ static void imsm_update_metadata_locally(struct supertype *st,
static int write_imsm_migr_rec(struct supertype *st)
{
struct intel_super *super = st->sb;
+ unsigned int sector_size = super->sector_size;
unsigned long long dsize;
char nm[30];
int fd = -1;
@@ -2577,7 +2986,7 @@ static int write_imsm_migr_rec(struct supertype *st)
int len;
struct imsm_update_general_migration_checkpoint *u;
struct imsm_dev *dev;
- struct imsm_map *map = NULL;
+ struct imsm_map *map;
/* find map under migration */
dev = imsm_get_device_during_migration(super);
@@ -2590,6 +2999,8 @@ static int write_imsm_migr_rec(struct supertype *st)
map = get_imsm_map(dev, MAP_0);
+ if (sector_size == 4096)
+ convert_to_4k_imsm_migr_rec(super);
for (sd = super->disks ; sd ; sd = sd->next) {
int slot = -1;
@@ -2599,7 +3010,7 @@ static int write_imsm_migr_rec(struct supertype *st)
/* write to 2 first slots only */
if (map)
slot = get_imsm_disk_slot(map, sd->index);
- if ((map == NULL) || (slot > 1) || (slot < 0))
+ if (map == NULL || slot > 1 || slot < 0)
continue;
sprintf(nm, "%d:%d", sd->major, sd->minor);
@@ -2607,13 +3018,15 @@ static int write_imsm_migr_rec(struct supertype *st)
if (fd < 0)
continue;
get_dev_size(fd, NULL, &dsize);
- if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) {
+ if (lseek64(fd, dsize - (MIGR_REC_SECTOR_POSITION*sector_size),
+ SEEK_SET) < 0) {
pr_err("Cannot seek to anchor block: %s\n",
strerror(errno));
goto out;
}
- if (write(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) !=
- MIGR_REC_BUF_SIZE) {
+ if ((unsigned int)write(fd, super->migr_rec_buf,
+ MIGR_REC_BUF_SECTORS*sector_size) !=
+ MIGR_REC_BUF_SECTORS*sector_size) {
pr_err("Cannot write migr record block: %s\n",
strerror(errno));
goto out;
@@ -2621,9 +3034,10 @@ static int write_imsm_migr_rec(struct supertype *st)
close(fd);
fd = -1;
}
+ if (sector_size == 4096)
+ convert_from_4k_imsm_migr_rec(super);
/* update checkpoint information in metadata */
len = imsm_create_metadata_checkpoint_update(super, &u);
-
if (len <= 0) {
dprintf("imsm: Cannot prepare update\n");
goto out;
@@ -2675,13 +3089,14 @@ int imsm_reshape_blocks_arrays_changes(struct intel_super *super)
}
static unsigned long long imsm_component_size_aligment_check(int level,
int chunk_size,
+ unsigned int sector_size,
unsigned long long component_size)
{
unsigned int component_size_alligment;
/* check component size aligment
*/
- component_size_alligment = component_size % (chunk_size/512);
+ component_size_alligment = component_size % (chunk_size/sector_size);
dprintf("(Level: %i, chunk_size = %i, component_size = %llu), component_size_alligment = %u\n",
level, chunk_size, component_size,
@@ -2787,12 +3202,20 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
}
info->data_offset = pba_of_lba0(map_to_analyse);
- info->component_size = blocks_per_member(map_to_analyse);
+
+ if (info->array.level == 5) {
+ info->component_size = num_data_stripes(map_to_analyse) *
+ map_to_analyse->blocks_per_strip;
+ } else {
+ info->component_size = blocks_per_member(map_to_analyse);
+ }
info->component_size = imsm_component_size_aligment_check(
info->array.level,
info->array.chunk_size,
+ super->sector_size,
info->component_size);
+ info->bb.supported = 1;
memset(info->uuid, 0, sizeof(info->uuid));
info->recovery_start = MaxSector;
@@ -2959,9 +3382,11 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *
info->name[0] = 0;
info->recovery_start = MaxSector;
info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
+ info->bb.supported = 1;
/* do we have the all the insync disks that we expect? */
mpb = super->anchor;
+ info->events = __le32_to_cpu(mpb->generation_num);
for (i = 0; i < mpb->num_raid_devs; i++) {
struct imsm_dev *dev = get_imsm_dev(super, i);
@@ -3049,7 +3474,7 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *
* for each disk in array */
struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
{
- struct mdinfo *mddev = NULL;
+ struct mdinfo *mddev;
struct intel_super *super = st->sb;
struct imsm_disk *disk;
int count = 0;
@@ -3163,6 +3588,8 @@ static size_t disks_to_mpb_size(int disks)
size += (4 - 2) * sizeof(struct imsm_map);
/* 4 possible disk_ord_tbl's */
size += 4 * (disks - 1) * sizeof(__u32);
+ /* maximum bbm log */
+ size += sizeof(struct bbm_log);
return size;
}
@@ -3319,23 +3746,40 @@ static void fd2devname(int fd, char *name)
}
}
+static int nvme_get_serial(int fd, void *buf, size_t buf_len)
+{
+ char path[60];
+ char *name = fd2kname(fd);
+
+ if (!name)
+ return 1;
+
+ if (strncmp(name, "nvme", 4) != 0)
+ return 1;
+
+ snprintf(path, sizeof(path) - 1, "/sys/block/%s/device/serial", name);
+
+ return load_sys(path, buf, buf_len);
+}
+
extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
static int imsm_read_serial(int fd, char *devname,
__u8 serial[MAX_RAID_SERIAL_LEN])
{
- unsigned char scsi_serial[255];
+ char buf[50];
int rv;
- int rsp_len;
int len;
char *dest;
char *src;
- char *rsp_buf;
- int i;
+ unsigned int i;
+
+ memset(buf, 0, sizeof(buf));
- memset(scsi_serial, 0, sizeof(scsi_serial));
+ rv = nvme_get_serial(fd, buf, sizeof(buf));
- rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
+ if (rv)
+ rv = scsi_get_serial(fd, buf, sizeof(buf));
if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
memset(serial, 0, MAX_RAID_SERIAL_LEN);
@@ -3350,20 +3794,11 @@ static int imsm_read_serial(int fd, char *devname,
return rv;
}
- rsp_len = scsi_serial[3];
- if (!rsp_len) {
- if (devname)
- pr_err("Failed to retrieve serial for %s\n",
- devname);
- return 2;
- }
- rsp_buf = (char *) &scsi_serial[4];
-
/* trim all whitespace and non-printable characters and convert
* ':' to ';'
*/
- for (i = 0, dest = rsp_buf; i < rsp_len; i++) {
- src = &rsp_buf[i];
+ for (i = 0, dest = buf; i < sizeof(buf) && buf[i]; i++) {
+ src = &buf[i];
if (*src > 0x20) {
/* ':' is reserved for use in placeholder serial
* numbers for missing disks
@@ -3374,8 +3809,8 @@ static int imsm_read_serial(int fd, char *devname,
*dest++ = *src;
}
}
- len = dest - rsp_buf;
- dest = rsp_buf;
+ len = dest - buf;
+ dest = buf;
/* truncate leading characters */
if (len > MAX_RAID_SERIAL_LEN) {
@@ -3507,8 +3942,7 @@ static void migrate(struct imsm_dev *dev, struct intel_super *super,
/* duplicate and then set the target end state in map[0] */
memcpy(dest, src, sizeof_imsm_map(src));
- if ((migr_type == MIGR_REBUILD) ||
- (migr_type == MIGR_GEN_MIGR)) {
+ if (migr_type == MIGR_REBUILD || migr_type == MIGR_GEN_MIGR) {
__u32 ord;
int i;
@@ -3538,8 +3972,8 @@ static void end_migration(struct imsm_dev *dev, struct intel_super *super,
*
* FIXME add support for raid-level-migration
*/
- if ((map_state != map->map_state) && (is_gen_migration(dev) == 0) &&
- (prev->map_state != IMSM_T_STATE_UNINITIALIZED)) {
+ if (map_state != map->map_state && (is_gen_migration(dev) == 0) &&
+ prev->map_state != IMSM_T_STATE_UNINITIALIZED) {
/* when final map state is other than expected
* merge maps (not for migration)
*/
@@ -3605,8 +4039,9 @@ static int parse_raid_devices(struct intel_super *super)
if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
void *buf;
- len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
- if (posix_memalign(&buf, 512, len) != 0)
+ len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed,
+ super->sector_size);
+ if (posix_memalign(&buf, MAX_SECTOR_SIZE, len) != 0)
return 1;
memcpy(buf, super->buf, super->len);
@@ -3616,20 +4051,9 @@ static int parse_raid_devices(struct intel_super *super)
super->len = len;
}
- return 0;
-}
-
-/* retrieve a pointer to the bbm log which starts after all raid devices */
-struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
-{
- void *ptr = NULL;
-
- if (__le32_to_cpu(mpb->bbm_log_size)) {
- ptr = mpb;
- ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
- }
+ super->extra_space += space_needed;
- return ptr;
+ return 0;
}
/*******************************************************************************
@@ -3679,31 +4103,32 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
{
unsigned long long dsize;
unsigned long long sectors;
+ unsigned int sector_size = super->sector_size;
struct stat;
struct imsm_super *anchor;
__u32 check_sum;
get_dev_size(fd, NULL, &dsize);
- if (dsize < 1024) {
+ if (dsize < 2*sector_size) {
if (devname)
pr_err("%s: device to small for imsm\n",
devname);
return 1;
}
- if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
+ if (lseek64(fd, dsize - (sector_size * 2), SEEK_SET) < 0) {
if (devname)
pr_err("Cannot seek to anchor block on %s: %s\n",
devname, strerror(errno));
return 1;
}
- if (posix_memalign((void**)&anchor, 512, 512) != 0) {
+ if (posix_memalign((void **)&anchor, sector_size, sector_size) != 0) {
if (devname)
pr_err("Failed to allocate imsm anchor buffer on %s\n", devname);
return 1;
}
- if (read(fd, anchor, 512) != 512) {
+ if ((unsigned int)read(fd, anchor, sector_size) != sector_size) {
if (devname)
pr_err("Cannot read anchor block on %s: %s\n",
devname, strerror(errno));
@@ -3723,20 +4148,21 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
/* capability and hba must be updated with new super allocation */
find_intel_hba_capability(fd, super, devname);
- super->len = ROUND_UP(anchor->mpb_size, 512);
- if (posix_memalign(&super->buf, 512, super->len) != 0) {
+ super->len = ROUND_UP(anchor->mpb_size, sector_size);
+ if (posix_memalign(&super->buf, MAX_SECTOR_SIZE, super->len) != 0) {
if (devname)
pr_err("unable to allocate %zu byte mpb buffer\n",
super->len);
free(anchor);
return 2;
}
- memcpy(super->buf, anchor, 512);
+ memcpy(super->buf, anchor, sector_size);
- sectors = mpb_sectors(anchor) - 1;
+ sectors = mpb_sectors(anchor, sector_size) - 1;
free(anchor);
- if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) {
+ if (posix_memalign(&super->migr_rec_buf, sector_size,
+ MIGR_REC_BUF_SECTORS*sector_size) != 0) {
pr_err("could not allocate migr_rec buffer\n");
free(super->buf);
return 2;
@@ -3758,14 +4184,15 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
}
/* read the extended mpb */
- if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
+ if (lseek64(fd, dsize - (sector_size * (2 + sectors)), SEEK_SET) < 0) {
if (devname)
pr_err("Cannot seek to extended mpb on %s: %s\n",
devname, strerror(errno));
return 1;
}
- if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
+ if ((unsigned int)read(fd, super->buf + sector_size,
+ super->len - sector_size) != super->len - sector_size) {
if (devname)
pr_err("Cannot read extended mpb on %s: %s\n",
devname, strerror(errno));
@@ -3781,12 +4208,6 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
return 3;
}
- /* FIXME the BBM log is disk specific so we cannot use this global
- * buffer for all disks. Ok for now since we only look at the global
- * bbm_log_size parameter to gate assembly
- */
- super->bbm_log = __get_imsm_bbm_log(super->anchor);
-
return 0;
}
@@ -3826,10 +4247,15 @@ load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd
err = load_imsm_mpb(fd, super, devname);
if (err)
return err;
+ if (super->sector_size == 4096)
+ convert_from_4k(super);
err = load_imsm_disk(fd, super, devname, keep_fd);
if (err)
return err;
err = parse_raid_devices(super);
+ if (err)
+ return err;
+ err = load_bbm_log(super);
clear_hi(super);
return err;
}
@@ -3894,12 +4320,15 @@ static void __free_imsm(struct intel_super *super, int free_disks)
free(elem);
elem = next;
}
+ if (super->bbm_log)
+ free(super->bbm_log);
super->hba = NULL;
}
static void free_imsm(struct intel_super *super)
{
__free_imsm(super, 1);
+ free(super->bb.entries);
free(super);
}
@@ -3920,6 +4349,14 @@ static struct intel_super *alloc_super(void)
super->current_vol = -1;
super->create_offset = ~((unsigned long long) 0);
+
+ super->bb.entries = xmalloc(BBM_LOG_MAX_ENTRIES *
+ sizeof(struct md_bb_entry));
+ if (!super->bb.entries) {
+ free(super);
+ return NULL;
+ }
+
return super;
}
@@ -3931,7 +4368,7 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de
struct sys_dev *hba_name;
int rv = 0;
- if ((fd < 0) || check_env("IMSM_NO_PLATFORM")) {
+ if (fd < 0 || check_env("IMSM_NO_PLATFORM")) {
super->orom = NULL;
super->hba = NULL;
return 0;
@@ -4374,7 +4811,7 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
}
/* Check migration compatibility */
- if ((err == 0) && (check_mpb_migr_compatibility(super) != 0)) {
+ if (err == 0 && check_mpb_migr_compatibility(super) != 0) {
pr_err("Unsupported migration detected");
if (devname)
fprintf(stderr, " on %s\n", devname);
@@ -4463,7 +4900,7 @@ get_devlist_super_block(struct md_list *devlist, struct intel_super **super_list
static int get_super_block(struct intel_super **super_list, char *devnm, char *devname,
int major, int minor, int keep_fd)
{
- struct intel_super*s = NULL;
+ struct intel_super *s;
char nm[32];
int dfd = -1;
int err = 0;
@@ -4482,6 +4919,7 @@ static int get_super_block(struct intel_super **super_list, char *devnm, char *d
goto error;
}
+ get_dev_sector_size(dfd, NULL, &s->sector_size);
find_intel_hba_capability(dfd, s, devname);
err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
@@ -4499,11 +4937,11 @@ static int get_super_block(struct intel_super **super_list, char *devnm, char *d
*super_list = s;
} else {
if (s)
- free(s);
+ free_imsm(s);
if (dfd >= 0)
close(dfd);
}
- if ((dfd >= 0) && (!keep_fd))
+ if (dfd >= 0 && !keep_fd)
close(dfd);
return err;
@@ -4561,13 +4999,16 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname)
free_super_imsm(st);
super = alloc_super();
+ get_dev_sector_size(fd, NULL, &super->sector_size);
+ if (!super)
+ return 1;
/* Load hba and capabilities if they exist.
* But do not preclude loading metadata in case capabilities or hba are
* non-compliant and ignore_hw_compat is set.
*/
rv = find_intel_hba_capability(fd, super, devname);
/* no orom/efi or non-intel hba of the disk */
- if ((rv != 0) && (st->ignore_hw_compat == 0)) {
+ if (rv != 0 && st->ignore_hw_compat == 0) {
if (devname)
pr_err("No OROM/EFI properties for %s\n", devname);
free_imsm(super);
@@ -4577,7 +5018,11 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname)
/* retry the load if we might have raced against mdmon */
if (rv == 3) {
- struct mdstat_ent *mdstat = mdstat_by_component(fd2devnm(fd));
+ struct mdstat_ent *mdstat = NULL;
+ char *name = fd2kname(fd);
+
+ if (name)
+ mdstat = mdstat_by_component(name);
if (mdstat && mdmon_running(mdstat->devnm) && getpid() != mdmon_pid(mdstat->devnm)) {
for (retry = 0; retry < 3; retry++) {
@@ -4717,6 +5162,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
* so st->sb is already set.
*/
struct intel_super *super = st->sb;
+ unsigned int sector_size = super->sector_size;
struct imsm_super *mpb = super->anchor;
struct intel_dev *dv;
struct imsm_dev *dev;
@@ -4738,14 +5184,14 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
size_new = disks_to_mpb_size(info->nr_disks);
if (size_new > size_old) {
void *mpb_new;
- size_t size_round = ROUND_UP(size_new, 512);
+ size_t size_round = ROUND_UP(size_new, sector_size);
- if (posix_memalign(&mpb_new, 512, size_round) != 0) {
+ if (posix_memalign(&mpb_new, sector_size, size_round) != 0) {
pr_err("could not allocate new mpb\n");
return 0;
}
- if (posix_memalign(&super->migr_rec_buf, 512,
- MIGR_REC_BUF_SIZE) != 0) {
+ if (posix_memalign(&super->migr_rec_buf, sector_size,
+ MIGR_REC_BUF_SECTORS*sector_size) != 0) {
pr_err("could not allocate migr_rec buffer\n");
free(super->buf);
free(super);
@@ -4758,6 +5204,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
super->anchor = mpb_new;
mpb->mpb_size = __cpu_to_le32(size_new);
memset(mpb_new + size_old, 0, size_round - size_old);
+ super->len = size_round;
}
super->current_vol = idx;
@@ -4895,21 +5342,23 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
if (info)
mpb_size = disks_to_mpb_size(info->nr_disks);
else
- mpb_size = 512;
+ mpb_size = MAX_SECTOR_SIZE;
super = alloc_super();
- if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
- free(super);
+ if (super &&
+ posix_memalign(&super->buf, MAX_SECTOR_SIZE, mpb_size) != 0) {
+ free_imsm(super);
super = NULL;
}
if (!super) {
pr_err("could not allocate superblock\n");
return 0;
}
- if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) {
+ if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE,
+ MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE) != 0) {
pr_err("could not allocate migr_rec buffer\n");
free(super->buf);
- free(super);
+ free_imsm(super);
return 0;
}
memset(super->buf, 0, mpb_size);
@@ -5013,8 +5462,7 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
struct imsm_map *map2 = get_imsm_map(dev,
MAP_1);
int slot2 = get_imsm_disk_slot(map2, df->index);
- if ((slot2 < map2->num_members) &&
- (slot2 >= 0)) {
+ if (slot2 < map2->num_members && slot2 >= 0) {
__u32 ord2 = get_imsm_ord_tbl_ent(dev,
slot2,
MAP_1);
@@ -5090,6 +5538,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
struct intel_super *super = st->sb;
struct dl *dd;
unsigned long long size;
+ unsigned int member_sector_size;
__u32 id;
int rv;
struct stat stb;
@@ -5121,16 +5570,82 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
rv = imsm_read_serial(fd, devname, dd->serial);
if (rv) {
pr_err("failed to retrieve scsi serial, aborting\n");
+ if (dd->devname)
+ free(dd->devname);
free(dd);
abort();
}
+ if (super->hba && ((super->hba->type == SYS_DEV_NVME) ||
+ (super->hba->type == SYS_DEV_VMD))) {
+ int i;
+ char *devpath = diskfd_to_devpath(fd);
+ char controller_path[PATH_MAX];
+
+ if (!devpath) {
+ pr_err("failed to get devpath, aborting\n");
+ if (dd->devname)
+ free(dd->devname);
+ free(dd);
+ return 1;
+ }
+
+ snprintf(controller_path, PATH_MAX-1, "%s/device", devpath);
+ free(devpath);
+
+ if (devpath_to_vendor(controller_path) == 0x8086) {
+ /*
+ * If Intel's NVMe drive has serial ended with
+ * "-A","-B","-1" or "-2" it means that this is "x8"
+ * device (double drive on single PCIe card).
+ * User should be warned about potential data loss.
+ */
+ for (i = MAX_RAID_SERIAL_LEN-1; i > 0; i--) {
+ /* Skip empty character at the end */
+ if (dd->serial[i] == 0)
+ continue;
+
+ if (((dd->serial[i] == 'A') ||
+ (dd->serial[i] == 'B') ||
+ (dd->serial[i] == '1') ||
+ (dd->serial[i] == '2')) &&
+ (dd->serial[i-1] == '-'))
+ pr_err("\tThe action you are about to take may put your data at risk.\n"
+ "\tPlease note that x8 devices may consist of two separate x4 devices "
+ "located on a single PCIe port.\n"
+ "\tRAID 0 is the only supported configuration for this type of x8 device.\n");
+ break;
+ }
+ } else if (super->hba->type == SYS_DEV_VMD && super->orom &&
+ !imsm_orom_has_tpv_support(super->orom)) {
+ pr_err("\tPlatform configuration does not support non-Intel NVMe drives.\n"
+ "\tPlease refer to Intel(R) RSTe user guide.\n");
+ free(dd->devname);
+ free(dd);
+ return 1;
+ }
+ }
get_dev_size(fd, NULL, &size);
+ get_dev_sector_size(fd, NULL, &member_sector_size);
+
+ if (super->sector_size == 0) {
+ /* this a first device, so sector_size is not set yet */
+ super->sector_size = member_sector_size;
+ } else if (member_sector_size != super->sector_size) {
+ pr_err("Mixing between different sector size is forbidden, aborting...\n");
+ if (dd->devname)
+ free(dd->devname);
+ free(dd);
+ return 1;
+ }
+
/* clear migr_rec when adding disk to container */
- memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SIZE);
- if (lseek64(fd, size - MIGR_REC_POSITION, SEEK_SET) >= 0) {
- if (write(fd, super->migr_rec_buf,
- MIGR_REC_BUF_SIZE) != MIGR_REC_BUF_SIZE)
+ memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*super->sector_size);
+ if (lseek64(fd, size - MIGR_REC_SECTOR_POSITION*super->sector_size,
+ SEEK_SET) >= 0) {
+ if ((unsigned int)write(fd, super->migr_rec_buf,
+ MIGR_REC_BUF_SECTORS*super->sector_size) !=
+ MIGR_REC_BUF_SECTORS*super->sector_size)
perror("Write migr_rec failed");
}
@@ -5188,9 +5703,9 @@ static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
static int store_imsm_mpb(int fd, struct imsm_super *mpb);
static union {
- char buf[512];
+ char buf[MAX_SECTOR_SIZE];
struct imsm_super anchor;
-} spare_record __attribute__ ((aligned(512)));
+} spare_record __attribute__ ((aligned(MAX_SECTOR_SIZE)));
/* spare records have their own family number and do not have any defined raid
* devices
@@ -5221,6 +5736,9 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose)
if (__le32_to_cpu(d->disk.total_blocks_hi) > 0)
spare->attributes |= MPB_ATTRIB_2TB_DISK;
+ if (super->sector_size == 4096)
+ convert_to_4k_imsm_disk(&spare->disk[0]);
+
sum = __gen_imsm_checksum(spare);
spare->family_num = __cpu_to_le32(sum);
spare->orig_family_num = 0;
@@ -5244,6 +5762,7 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose)
static int write_super_imsm(struct supertype *st, int doclose)
{
struct intel_super *super = st->sb;
+ unsigned int sector_size = super->sector_size;
struct imsm_super *mpb = super->anchor;
struct dl *d;
__u32 generation;
@@ -5253,6 +5772,7 @@ static int write_super_imsm(struct supertype *st, int doclose)
__u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
int num_disks = 0;
int clear_migration_record = 1;
+ __u32 bbm_log_size;
/* 'generation' is incremented everytime the metadata is written */
generation = __le32_to_cpu(mpb->generation_num);
@@ -5290,9 +5810,23 @@ static int write_super_imsm(struct supertype *st, int doclose)
if (is_gen_migration(dev2))
clear_migration_record = 0;
}
- mpb_size += __le32_to_cpu(mpb->bbm_log_size);
+
+ bbm_log_size = get_imsm_bbm_log_size(super->bbm_log);
+
+ if (bbm_log_size) {
+ memcpy((void *)mpb + mpb_size, super->bbm_log, bbm_log_size);
+ mpb->attributes |= MPB_ATTRIB_BBM;
+ } else
+ mpb->attributes &= ~MPB_ATTRIB_BBM;
+
+ super->anchor->bbm_log_size = __cpu_to_le32(bbm_log_size);
+ mpb_size += bbm_log_size;
mpb->mpb_size = __cpu_to_le32(mpb_size);
+#ifdef DEBUG
+ assert(super->len == 0 || mpb_size <= super->len);
+#endif
+
/* recalculate checksum */
sum = __gen_imsm_checksum(mpb);
mpb->check_sum = __cpu_to_le32(sum);
@@ -5302,7 +5836,11 @@ static int write_super_imsm(struct supertype *st, int doclose)
super->clean_migration_record_by_mdmon = 0;
}
if (clear_migration_record)
- memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SIZE);
+ memset(super->migr_rec_buf, 0,
+ MIGR_REC_BUF_SECTORS*sector_size);
+
+ if (sector_size == 4096)
+ convert_to_4k(super);
/* write the mpb for disks that compose raid devices */
for (d = super->disks; d ; d = d->next) {
@@ -5313,9 +5851,12 @@ static int write_super_imsm(struct supertype *st, int doclose)
unsigned long long dsize;
get_dev_size(d->fd, NULL, &dsize);
- if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) {
- if (write(d->fd, super->migr_rec_buf,
- MIGR_REC_BUF_SIZE) != MIGR_REC_BUF_SIZE)
+ if (lseek64(d->fd, dsize - sector_size,
+ SEEK_SET) >= 0) {
+ if ((unsigned int)write(d->fd,
+ super->migr_rec_buf,
+ MIGR_REC_BUF_SECTORS*sector_size) !=
+ MIGR_REC_BUF_SECTORS*sector_size)
perror("Write migr_rec failed");
}
}
@@ -5427,17 +5968,14 @@ static int store_super_imsm(struct supertype *st, int fd)
return 1;
#ifndef MDASSEMBLE
+ if (super->sector_size == 4096)
+ convert_to_4k(super);
return store_imsm_mpb(fd, mpb);
#else
return 1;
#endif
}
-static int imsm_bbm_log_size(struct imsm_super *mpb)
-{
- return __le32_to_cpu(mpb->bbm_log_size);
-}
-
#ifndef MDASSEMBLE
static int validate_geometry_imsm_container(struct supertype *st, int level,
int layout, int raiddisks, int chunk,
@@ -5449,7 +5987,7 @@ static int validate_geometry_imsm_container(struct supertype *st, int level,
{
int fd;
unsigned long long ldsize;
- struct intel_super *super=NULL;
+ struct intel_super *super;
int rv = 0;
if (level != LEVEL_CONTAINER)
@@ -5473,6 +6011,16 @@ static int validate_geometry_imsm_container(struct supertype *st, int level,
* note that there is no fd for the disks in array.
*/
super = alloc_super();
+ if (!super) {
+ close(fd);
+ return 0;
+ }
+ if (!get_dev_sector_size(fd, NULL, &super->sector_size)) {
+ close(fd);
+ free_imsm(super);
+ return 0;
+ }
+
rv = find_intel_hba_capability(fd, super, verbose > 0 ? dev : NULL);
if (rv != 0) {
#if DEBUG
@@ -5642,10 +6190,10 @@ active_arrays_by_format(char *name, char* hba, struct md_list **devlist,
int dpa, int verbose)
{
struct mdstat_ent *mdstat = mdstat_read(0, 0);
- struct mdstat_ent *memb = NULL;
+ struct mdstat_ent *memb;
int count = 0;
int num = 0;
- struct md_list *dv = NULL;
+ struct md_list *dv;
int found;
for (memb = mdstat ; memb ; memb = memb->next) {
@@ -5661,18 +6209,18 @@ active_arrays_by_format(char *name, char* hba, struct md_list **devlist,
num = sprintf(path, "%s%s", "/dev/", dev->name);
if (num > 0)
fd = open(path, O_RDONLY, 0);
- if ((num <= 0) || (fd < 0)) {
- pr_vrb(": Cannot open %s: %s\n",
+ if (num <= 0 || fd < 0) {
+ pr_vrb("Cannot open %s: %s\n",
dev->name, strerror(errno));
}
free(path);
dev = dev->next;
}
found = 0;
- if ((fd >= 0) && disk_attached_to_hba(fd, hba)) {
+ if (fd >= 0 && disk_attached_to_hba(fd, hba)) {
struct mdstat_ent *vol;
for (vol = mdstat ; vol ; vol = vol->next) {
- if ((vol->active > 0) &&
+ if (vol->active > 0 &&
vol->metadata_version &&
is_container_member(vol, memb->devnm)) {
found++;
@@ -5703,7 +6251,7 @@ get_loop_devices(void)
{
int i;
struct md_list *devlist = NULL;
- struct md_list *dv = NULL;
+ struct md_list *dv;
for(i = 0; i < 12; i++) {
dv = xcalloc(1, sizeof(*dv));
@@ -5720,7 +6268,7 @@ static struct md_list*
get_devices(const char *hba_path)
{
struct md_list *devlist = NULL;
- struct md_list *dv = NULL;
+ struct md_list *dv;
struct dirent *ent;
DIR *dir;
int err = 0;
@@ -5783,7 +6331,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
{
struct md_list *tmpdev;
int count = 0;
- struct supertype *st = NULL;
+ struct supertype *st;
/* first walk the list of devices to find a consistent set
* that match the criterea, if that is possible.
@@ -5792,7 +6340,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
*found = 0;
st = match_metadata_desc_imsm("imsm");
if (st == NULL) {
- pr_vrb(": cannot allocate memory for imsm supertype\n");
+ pr_vrb("cannot allocate memory for imsm supertype\n");
return 0;
}
@@ -5805,7 +6353,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
continue;
tst = dup_super(st);
if (tst == NULL) {
- pr_vrb(": cannot allocate memory for imsm supertype\n");
+ pr_vrb("cannot allocate memory for imsm supertype\n");
goto err_1;
}
tmpdev->container = 0;
@@ -5924,7 +6472,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
}
for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
- if ((tmpdev->used == 1) && (tmpdev->found)) {
+ if (tmpdev->used == 1 && tmpdev->found) {
if (count) {
if (count < tmpdev->found)
count = 0;
@@ -5941,20 +6489,20 @@ count_volumes_list(struct md_list *devlist, char *homehost,
return count;
}
-static int
-count_volumes(struct intel_hba *hba, int dpa, int verbose)
+static int __count_volumes(char *hba_path, int dpa, int verbose,
+ int cmp_hba_path)
{
struct sys_dev *idev, *intel_devices = find_intel_devices();
int count = 0;
const struct orom_entry *entry;
struct devid_list *dv, *devid_list;
- if (!hba || !hba->path)
+ if (!hba_path)
return 0;
for (idev = intel_devices; idev; idev = idev->next) {
- if (strstr(idev->path, hba->path))
- break;
+ if (strstr(idev->path, hba_path))
+ break;
}
if (!idev || !idev->dev_id)
@@ -5967,31 +6515,29 @@ count_volumes(struct intel_hba *hba, int dpa, int verbose)
devid_list = entry->devid_list;
for (dv = devid_list; dv; dv = dv->next) {
- struct md_list *devlist = NULL;
- struct sys_dev *device = device_by_id(dv->devid);
- char *hba_path;
+ struct md_list *devlist;
+ struct sys_dev *device = NULL;
+ char *hpath;
int found = 0;
+ if (cmp_hba_path)
+ device = device_by_id_and_path(dv->devid, hba_path);
+ else
+ device = device_by_id(dv->devid);
+
if (device)
- hba_path = device->path;
+ hpath = device->path;
else
return 0;
- /* VMD has one orom entry for all domain, but spanning is not allowed.
- * VMD arrays should be counted per domain (controller), so skip
- * domains that are not the given one.
- */
- if ((hba->type == SYS_DEV_VMD) &&
- (strncmp(device->path, hba->path, strlen(device->path)) != 0))
- continue;
-
- devlist = get_devices(hba_path);
+ devlist = get_devices(hpath);
/* if no intel devices return zero volumes */
if (devlist == NULL)
return 0;
- count += active_arrays_by_format("imsm", hba_path, &devlist, dpa, verbose);
- dprintf("path: %s active arrays: %d\n", hba_path, count);
+ count += active_arrays_by_format("imsm", hpath, &devlist, dpa,
+ verbose);
+ dprintf("path: %s active arrays: %d\n", hpath, count);
if (devlist == NULL)
return 0;
do {
@@ -6003,7 +6549,7 @@ count_volumes(struct intel_hba *hba, int dpa, int verbose)
dprintf("found %d count: %d\n", found, count);
} while (found);
- dprintf("path: %s total number of volumes: %d\n", hba_path, count);
+ dprintf("path: %s total number of volumes: %d\n", hpath, count);
while (devlist) {
struct md_list *dv = devlist;
@@ -6015,6 +6561,24 @@ count_volumes(struct intel_hba *hba, int dpa, int verbose)
return count;
}
+static int count_volumes(struct intel_hba *hba, int dpa, int verbose)
+{
+ if (!hba)
+ return 0;
+ if (hba->type == SYS_DEV_VMD) {
+ struct sys_dev *dev;
+ int count = 0;
+
+ for (dev = find_intel_devices(); dev; dev = dev->next) {
+ if (dev->type == SYS_DEV_VMD)
+ count += __count_volumes(dev->path, dpa,
+ verbose, 1);
+ }
+ return count;
+ }
+ return __count_volumes(hba->path, dpa, verbose, 0);
+}
+
static int imsm_default_chunk(const struct imsm_orom *orom)
{
/* up to 512 if the plaform supports it, otherwise the platform max.
@@ -6031,14 +6595,14 @@ validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
{
/* check/set platform and metadata limits/defaults */
if (super->orom && raiddisks > super->orom->dpa) {
- pr_vrb(": platform supports a maximum of %d disks per array\n",
+ pr_vrb("platform supports a maximum of %d disks per array\n",
super->orom->dpa);
return 0;
}
/* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
if (!is_raid_level_supported(super->orom, level, raiddisks)) {
- pr_vrb(": platform does not support raid%d with %d disk%s\n",
+ pr_vrb("platform does not support raid%d with %d disk%s\n",
level, raiddisks, raiddisks > 1 ? "s" : "");
return 0;
}
@@ -6047,24 +6611,24 @@ validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
*chunk = imsm_default_chunk(super->orom);
if (super->orom && !imsm_orom_has_chunk(super->orom, *chunk)) {
- pr_vrb(": platform does not support a chunk size of: %d\n", *chunk);
+ pr_vrb("platform does not support a chunk size of: %d\n", *chunk);
return 0;
}
if (layout != imsm_level_to_layout(level)) {
if (level == 5)
- pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
+ pr_vrb("imsm raid 5 only supports the left-asymmetric layout\n");
else if (level == 10)
- pr_vrb(": imsm raid 10 only supports the n2 layout\n");
+ pr_vrb("imsm raid 10 only supports the n2 layout\n");
else
- pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
+ pr_vrb("imsm unknown layout %#x for this raid level %d\n",
layout, level);
return 0;
}
if (super->orom && (super->orom->attr & IMSM_OROM_ATTR_2TB) == 0 &&
(calc_array_size(level, raiddisks, layout, *chunk, size) >> 32) > 0) {
- pr_vrb(": platform does not support a volume size over 2TB\n");
+ pr_vrb("platform does not support a volume size over 2TB\n");
return 0;
}
@@ -6235,7 +6799,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
int count = count_volumes(super->hba,
super->orom->dpa, verbose);
if (super->orom->vphba <= count) {
- pr_vrb(": platform does not support more than %d raid volumes.\n",
+ pr_vrb("platform does not support more than %d raid volumes.\n",
super->orom->vphba);
return 0;
}
@@ -6391,7 +6955,7 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
count = count_volumes(super->hba,
super->orom->dpa, verbose);
if (super->orom->vphba <= count) {
- pr_vrb(": platform does not support more than %d raid volumes.\n",
+ pr_vrb("platform does not support more than %d raid volumes.\n",
super->orom->vphba);
return 0;
}
@@ -6712,12 +7276,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
pr_err("Unsupported attributes in IMSM metadata.Arrays activation is blocked.\n");
}
- /* check for bad blocks */
- if (imsm_bbm_log_size(super->anchor)) {
- pr_err("BBM log found in IMSM metadata.Arrays activation is blocked.\n");
- sb_errors = 1;
- }
-
/* count spare devices, not used in maps
*/
for (d = super->disks; d; d = d->next)
@@ -6844,7 +7402,20 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
info_d->events = __le32_to_cpu(mpb->generation_num);
info_d->data_offset = pba_of_lba0(map);
- info_d->component_size = blocks_per_member(map);
+
+ if (map->raid_level == 5) {
+ info_d->component_size =
+ num_data_stripes(map) *
+ map->blocks_per_strip;
+ } else {
+ info_d->component_size = blocks_per_member(map);
+ }
+
+ info_d->bb.supported = 1;
+ get_volume_badblocks(super->bbm_log, ord_to_idx(ord),
+ info_d->data_offset,
+ info_d->component_size,
+ &info_d->bb);
}
/* now that the disk list is up-to-date fixup recovery_start */
update_recovery_start(super, dev, this);
@@ -6958,8 +7529,8 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
/* when MAP_X is passed both maps failures are counted
*/
if (prev &&
- ((look_in_map == MAP_1) || (look_in_map == MAP_X)) &&
- (i < prev->num_members)) {
+ (look_in_map == MAP_1 || look_in_map == MAP_X) &&
+ i < prev->num_members) {
ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
idx_1 = ord_to_idx(ord);
@@ -6967,8 +7538,8 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
failed++;
}
- if (((look_in_map == MAP_0) || (look_in_map == MAP_X)) &&
- (i < map->num_members)) {
+ if ((look_in_map == MAP_0 || look_in_map == MAP_X) &&
+ i < map->num_members) {
ord = __le32_to_cpu(map->disk_ord_tbl[i]);
idx = ord_to_idx(ord);
@@ -6990,6 +7561,7 @@ static int imsm_open_new(struct supertype *c, struct active_array *a,
{
struct intel_super *super = c->sb;
struct imsm_super *mpb = super->anchor;
+ struct imsm_update_prealloc_bb_mem u;
if (atoi(inst) >= mpb->num_raid_devs) {
pr_err("subarry index %d, out of range\n", atoi(inst));
@@ -6998,6 +7570,10 @@ static int imsm_open_new(struct supertype *c, struct active_array *a,
dprintf("imsm: open_new %s\n", inst);
a->info.container_member = atoi(inst);
+
+ u.type = update_prealloc_badblocks_mem;
+ imsm_update_metadata_locally(c, &u, sizeof(u));
+
return 0;
}
@@ -7017,15 +7593,16 @@ static int is_resyncing(struct imsm_dev *dev)
migr_map = get_imsm_map(dev, MAP_1);
- if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
- (dev->vol.migr_type != MIGR_GEN_MIGR))
+ if (migr_map->map_state == IMSM_T_STATE_NORMAL &&
+ dev->vol.migr_type != MIGR_GEN_MIGR)
return 1;
else
return 0;
}
/* return true if we recorded new information */
-static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
+static int mark_failure(struct intel_super *super,
+ struct imsm_dev *dev, struct imsm_disk *disk, int idx)
{
__u32 ord;
int slot;
@@ -7061,19 +7638,22 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
int slot2 = get_imsm_disk_slot(map2, idx);
- if ((slot2 < map2->num_members) &&
- (slot2 >= 0))
+ if (slot2 < map2->num_members && slot2 >= 0)
set_imsm_ord_tbl_ent(map2, slot2,
idx | IMSM_ORD_REBUILD);
}
if (map->failed_disk_num == 0xff)
map->failed_disk_num = slot;
+
+ clear_disk_badblocks(super->bbm_log, ord_to_idx(ord));
+
return 1;
}
-static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
+static void mark_missing(struct intel_super *super,
+ struct imsm_dev *dev, struct imsm_disk *disk, int idx)
{
- mark_failure(dev, disk, idx);
+ mark_failure(super, dev, disk, idx);
if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
return;
@@ -7109,7 +7689,7 @@ static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
end_migration(dev, super, map_state);
}
for (dl = super->missing; dl; dl = dl->next)
- mark_missing(dev, &dl->disk, dl->index);
+ mark_missing(super, dev, &dl->disk, dl->index);
super->updates_pending++;
}
@@ -7357,6 +7937,25 @@ skip_mark_checkpoint:
return consistent;
}
+static int imsm_disk_slot_to_ord(struct active_array *a, int slot)
+{
+ int inst = a->info.container_member;
+ struct intel_super *super = a->container->sb;
+ struct imsm_dev *dev = get_imsm_dev(super, inst);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+
+ if (slot > map->num_members) {
+ pr_err("imsm: imsm_disk_slot_to_ord %d out of range 0..%d\n",
+ slot, map->num_members - 1);
+ return -1;
+ }
+
+ if (slot < 0)
+ return -1;
+
+ return get_imsm_ord_tbl_ent(dev, slot, MAP_0);
+}
+
static void imsm_set_disk(struct active_array *a, int n, int state)
{
int inst = a->info.container_member;
@@ -7367,24 +7966,19 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
struct mdinfo *mdi;
int recovery_not_finished = 0;
int failed;
- __u32 ord;
+ int ord;
__u8 map_state;
- if (n > map->num_members)
- pr_err("imsm: set_disk %d out of range 0..%d\n",
- n, map->num_members - 1);
-
- if (n < 0)
+ ord = imsm_disk_slot_to_ord(a, n);
+ if (ord < 0)
return;
dprintf("imsm: set_disk %d:%x\n", n, state);
-
- ord = get_imsm_ord_tbl_ent(dev, n, MAP_0);
disk = get_imsm_disk(super, ord_to_idx(ord));
/* check for new failures */
if (state & DS_FAULTY) {
- if (mark_failure(dev, disk, ord_to_idx(ord)))
+ if (mark_failure(super, dev, disk, ord_to_idx(ord)))
super->updates_pending++;
}
@@ -7442,8 +8036,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
break;
case IMSM_T_STATE_DEGRADED: /* transition to degraded state */
dprintf_cont("degraded: ");
- if ((map->map_state != map_state) &&
- !dev->vol.migr_state) {
+ if (map->map_state != map_state && !dev->vol.migr_state) {
dprintf_cont("mark degraded");
map->map_state = map_state;
super->updates_pending++;
@@ -7505,27 +8098,30 @@ static int store_imsm_mpb(int fd, struct imsm_super *mpb)
__u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
unsigned long long dsize;
unsigned long long sectors;
+ unsigned int sector_size;
+ get_dev_sector_size(fd, NULL, &sector_size);
get_dev_size(fd, NULL, &dsize);
- if (mpb_size > 512) {
+ if (mpb_size > sector_size) {
/* -1 to account for anchor */
- sectors = mpb_sectors(mpb) - 1;
+ sectors = mpb_sectors(mpb, sector_size) - 1;
/* write the extended mpb to the sectors preceeding the anchor */
- if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
+ if (lseek64(fd, dsize - (sector_size * (2 + sectors)),
+ SEEK_SET) < 0)
return 1;
- if ((unsigned long long)write(fd, buf + 512, 512 * sectors)
- != 512 * sectors)
+ if ((unsigned long long)write(fd, buf + sector_size,
+ sector_size * sectors) != sector_size * sectors)
return 1;
}
/* first block is stored on second to last sector of the disk */
- if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
+ if (lseek64(fd, dsize - (sector_size * 2), SEEK_SET) < 0)
return 1;
- if (write(fd, buf, 512) != 512)
+ if ((unsigned int)write(fd, buf, sector_size) != sector_size)
return 1;
return 0;
@@ -7773,6 +8369,11 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
IMSM_T_STATE_DEGRADED)
return NULL;
+ if (get_imsm_map(dev, MAP_0)->map_state == IMSM_T_STATE_UNINITIALIZED) {
+ dprintf("imsm: No spare activation allowed. Volume is not initialized.\n");
+ return NULL;
+ }
+
/*
* If there are any failed disks check state of the other volume.
* Block rebuild if the another one is failed until failed disks
@@ -7840,6 +8441,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
di->data_offset = pba_of_lba0(map);
di->component_size = a->info.component_size;
di->container_member = inst;
+ di->bb.supported = 1;
super->random = random32();
di->next = rv;
rv = di;
@@ -7901,21 +8503,22 @@ static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_
static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
{
- struct dl *dl = NULL;
+ struct dl *dl;
+
for (dl = super->disks; dl; dl = dl->next)
- if ((dl->major == major) && (dl->minor == minor))
+ if (dl->major == major && dl->minor == minor)
return dl;
return NULL;
}
static int remove_disk_super(struct intel_super *super, int major, int minor)
{
- struct dl *prev = NULL;
+ struct dl *prev;
struct dl *dl;
prev = NULL;
for (dl = super->disks; dl; dl = dl->next) {
- if ((dl->major == major) && (dl->minor == minor)) {
+ if (dl->major == major && dl->minor == minor) {
/* remove */
if (prev)
prev->next = dl->next;
@@ -7936,7 +8539,8 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
static int add_remove_disk_update(struct intel_super *super)
{
int check_degraded = 0;
- struct dl *disk = NULL;
+ struct dl *disk;
+
/* add/remove some spares to/from the metadata/contrainer */
while (super->disk_mgmt_list) {
struct dl *disk_cfg;
@@ -7983,12 +8587,11 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration *
int ret_val = 0;
dprintf("(enter)\n");
- if ((u->subdev < 0) ||
- (u->subdev > 1)) {
+ if (u->subdev < 0 || u->subdev > 1) {
dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
return ret_val;
}
- if ((space_list == NULL) || (*space_list == NULL)) {
+ if (space_list == NULL || *space_list == NULL) {
dprintf("imsm: Error: Memory is not allocated\n");
return ret_val;
}
@@ -8043,15 +8646,28 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration *
/* update chunk size
*/
- if (u->new_chunksize > 0)
+ if (u->new_chunksize > 0) {
+ unsigned long long num_data_stripes;
+ int used_disks =
+ imsm_num_data_members(dev, MAP_0);
+
+ if (used_disks == 0)
+ return ret_val;
+
map->blocks_per_strip =
__cpu_to_le16(u->new_chunksize * 2);
+ num_data_stripes =
+ (join_u32(dev->size_low, dev->size_high)
+ / used_disks);
+ num_data_stripes /= map->blocks_per_strip;
+ num_data_stripes /= map->num_domains;
+ set_num_data_stripes(map, num_data_stripes);
+ }
/* add disk
*/
- if ((u->new_level != 5) ||
- (migr_map->raid_level != 0) ||
- (migr_map->raid_level == map->raid_level))
+ if (u->new_level != 5 || migr_map->raid_level != 0 ||
+ migr_map->raid_level == map->raid_level)
goto skip_disk_add;
if (u->new_disks[0] >= 0) {
@@ -8102,8 +8718,7 @@ static int apply_size_change_update(struct imsm_update_size_change *u,
int ret_val = 0;
dprintf("(enter)\n");
- if ((u->subdev < 0) ||
- (u->subdev > 1)) {
+ if (u->subdev < 0 || u->subdev > 1) {
dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
return ret_val;
}
@@ -8114,13 +8729,19 @@ static int apply_size_change_update(struct imsm_update_size_change *u,
struct imsm_map *map = get_imsm_map(dev, MAP_0);
int used_disks = imsm_num_data_members(dev, MAP_0);
unsigned long long blocks_per_member;
+ unsigned long long num_data_stripes;
/* calculate new size
*/
blocks_per_member = u->new_size / used_disks;
- dprintf("(size: %llu, blocks per member: %llu)\n",
- u->new_size, blocks_per_member);
+ num_data_stripes = blocks_per_member /
+ map->blocks_per_strip;
+ num_data_stripes /= map->num_domains;
+ dprintf("(size: %llu, blocks per member: %llu, num_data_stipes: %llu)\n",
+ u->new_size, blocks_per_member,
+ num_data_stripes);
set_blocks_per_member(map, blocks_per_member);
+ set_num_data_stripes(map, num_data_stripes);
imsm_set_array_size(dev, u->new_size);
ret_val = 1;
@@ -8270,9 +8891,9 @@ static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
dprintf("imsm: new disk for reshape is: %i:%i (%p, index = %i)\n",
major(u->new_disks[i]), minor(u->new_disks[i]),
new_disk, new_disk->index);
- if ((new_disk == NULL) ||
- ((new_disk->index >= 0) &&
- (new_disk->index < u->old_raid_disks)))
+ if (new_disk == NULL ||
+ (new_disk->index >= 0 &&
+ new_disk->index < u->old_raid_disks))
goto update_reshape_exit;
new_disk->index = disk_count++;
/* slot to fill in autolayout
@@ -8371,6 +8992,14 @@ static int apply_takeover_update(struct imsm_update_takeover *u,
map = get_imsm_map(dev, MAP_0);
if (u->direction == R10_TO_R0) {
+ unsigned long long num_data_stripes;
+
+ map->num_domains = 1;
+ num_data_stripes = blocks_per_member(map);
+ num_data_stripes /= map->blocks_per_strip;
+ num_data_stripes /= map->num_domains;
+ set_num_data_stripes(map, num_data_stripes);
+
/* Number of failed disks must be half of initial disk number */
if (imsm_count_failed(super, dev, MAP_0) !=
(map->num_members / 2))
@@ -8447,7 +9076,7 @@ static int apply_takeover_update(struct imsm_update_takeover *u,
for (du = super->missing; du; du = du->next)
if (du->index >= 0) {
set_imsm_ord_tbl_ent(map, du->index, du->index);
- mark_missing(dv->dev, &du->disk, du->index);
+ mark_missing(super, dv->dev, &du->disk, du->index);
}
return 1;
@@ -8738,6 +9367,8 @@ static void imsm_process_update(struct supertype *st,
}
break;
}
+ case update_prealloc_badblocks_mem:
+ break;
default:
pr_err("error: unsuported process update type:(type: %d)\n", type);
}
@@ -8757,6 +9388,7 @@ static int imsm_prepare_update(struct supertype *st,
*/
enum imsm_update_type type;
struct intel_super *super = st->sb;
+ unsigned int sector_size = super->sector_size;
struct imsm_super *mpb = super->anchor;
size_t buf_len;
size_t len = 0;
@@ -8894,7 +9526,7 @@ static int imsm_prepare_update(struct supertype *st,
current_level = map->raid_level;
break;
}
- if ((u->new_level == 5) && (u->new_level != current_level)) {
+ if (u->new_level == 5 && u->new_level != current_level) {
struct mdinfo *spares;
spares = get_spares_for_grow(st);
@@ -8978,6 +9610,10 @@ static int imsm_prepare_update(struct supertype *st,
case update_add_remove_disk:
/* no update->len needed */
break;
+ case update_prealloc_badblocks_mem:
+ super->extra_space += sizeof(struct bbm_log) -
+ get_imsm_bbm_log_size(super->bbm_log);
+ break;
default:
return 0;
}
@@ -8988,17 +9624,18 @@ static int imsm_prepare_update(struct supertype *st,
else
buf_len = super->len;
- if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
+ if (__le32_to_cpu(mpb->mpb_size) + super->extra_space + len > buf_len) {
/* ok we need a larger buf than what is currently allocated
* if this allocation fails process_update will notice that
* ->next_len is set and ->next_buf is NULL
*/
- buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
+ buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) +
+ super->extra_space + len, sector_size);
if (super->next_buf)
free(super->next_buf);
super->next_len = buf_len;
- if (posix_memalign(&super->next_buf, 512, buf_len) == 0)
+ if (posix_memalign(&super->next_buf, sector_size, buf_len) == 0)
memset(super->next_buf, 0, buf_len);
else
super->next_buf = NULL;
@@ -9013,8 +9650,9 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
struct dl *iter;
struct imsm_dev *dev;
struct imsm_map *map;
- int i, j, num_members;
+ unsigned int i, j, num_members;
__u32 ord;
+ struct bbm_log *log = super->bbm_log;
dprintf("deleting device[%d] from imsm_super\n", index);
@@ -9047,6 +9685,14 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
}
}
+ for (i = 0; i < log->entry_count; i++) {
+ struct bbm_log_entry *entry = &log->marked_block_entries[i];
+
+ if (entry->disk_ordinal <= index)
+ continue;
+ entry->disk_ordinal--;
+ }
+
mpb->num_disks--;
super->updates_pending++;
if (*dlp) {
@@ -9158,8 +9804,7 @@ int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds,
continue;
}
- if ((sd->disk.raid_disk >= raid_disks) ||
- (sd->disk.raid_disk < 0))
+ if (sd->disk.raid_disk >= raid_disks || sd->disk.raid_disk < 0)
continue;
dn = map_dev(sd->disk.major,
@@ -9174,9 +9819,8 @@ int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds,
/* check if maximum array degradation level is not exceeded
*/
if ((raid_disks - opened) >
- imsm_get_allowed_degradation(info->new_level,
- raid_disks,
- super, dev)) {
+ imsm_get_allowed_degradation(info->new_level, raid_disks,
+ super, dev)) {
pr_err("Not enough disks can be opened.\n");
close_targets(raid_fds, raid_disks);
return -2;
@@ -9246,7 +9890,7 @@ int validate_container_imsm(struct mdinfo *info)
return 1;
}
- if ((orom != orom2) || ((hba->type == SYS_DEV_VMD) && (hba != hba2))) {
+ if (orom != orom2) {
pr_err("WARNING - IMSM container assembled with disks under different HBAs!\n"
" This operation is not supported and can lead to data loss.\n");
return 1;
@@ -9263,6 +9907,150 @@ int validate_container_imsm(struct mdinfo *info)
}
#ifndef MDASSEMBLE
/*******************************************************************************
+* Function: imsm_record_badblock
+* Description: This routine stores new bad block record in BBM log
+*
+* Parameters:
+* a : array containing a bad block
+* slot : disk number containing a bad block
+* sector : bad block sector
+* length : bad block sectors range
+* Returns:
+* 1 : Success
+* 0 : Error
+******************************************************************************/
+static int imsm_record_badblock(struct active_array *a, int slot,
+ unsigned long long sector, int length)
+{
+ struct intel_super *super = a->container->sb;
+ int ord;
+ int ret;
+
+ ord = imsm_disk_slot_to_ord(a, slot);
+ if (ord < 0)
+ return 0;
+
+ ret = record_new_badblock(super->bbm_log, ord_to_idx(ord), sector,
+ length);
+ if (ret)
+ super->updates_pending++;
+
+ return ret;
+}
+/*******************************************************************************
+* Function: imsm_clear_badblock
+* Description: This routine clears bad block record from BBM log
+*
+* Parameters:
+* a : array containing a bad block
+* slot : disk number containing a bad block
+* sector : bad block sector
+* length : bad block sectors range
+* Returns:
+* 1 : Success
+* 0 : Error
+******************************************************************************/
+static int imsm_clear_badblock(struct active_array *a, int slot,
+ unsigned long long sector, int length)
+{
+ struct intel_super *super = a->container->sb;
+ int ord;
+ int ret;
+
+ ord = imsm_disk_slot_to_ord(a, slot);
+ if (ord < 0)
+ return 0;
+
+ ret = clear_badblock(super->bbm_log, ord_to_idx(ord), sector, length);
+ if (ret)
+ super->updates_pending++;
+
+ return ret;
+}
+/*******************************************************************************
+* Function: imsm_get_badblocks
+* Description: This routine get list of bad blocks for an array
+*
+* Parameters:
+* a : array
+* slot : disk number
+* Returns:
+* bb : structure containing bad blocks
+* NULL : error
+******************************************************************************/
+static struct md_bb *imsm_get_badblocks(struct active_array *a, int slot)
+{
+ int inst = a->info.container_member;
+ struct intel_super *super = a->container->sb;
+ struct imsm_dev *dev = get_imsm_dev(super, inst);
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ int ord;
+
+ ord = imsm_disk_slot_to_ord(a, slot);
+ if (ord < 0)
+ return NULL;
+
+ get_volume_badblocks(super->bbm_log, ord_to_idx(ord), pba_of_lba0(map),
+ blocks_per_member(map), &super->bb);
+
+ return &super->bb;
+}
+/*******************************************************************************
+* Function: examine_badblocks_imsm
+* Description: Prints list of bad blocks on a disk to the standard output
+*
+* Parameters:
+* st : metadata handler
+* fd : open file descriptor for device
+* devname : device name
+* Returns:
+* 0 : Success
+* 1 : Error
+******************************************************************************/
+static int examine_badblocks_imsm(struct supertype *st, int fd, char *devname)
+{
+ struct intel_super *super = st->sb;
+ struct bbm_log *log = super->bbm_log;
+ struct dl *d = NULL;
+ int any = 0;
+
+ for (d = super->disks; d ; d = d->next) {
+ if (strcmp(d->devname, devname) == 0)
+ break;
+ }
+
+ if ((d == NULL) || (d->index < 0)) { /* serial mismatch probably */
+ pr_err("%s doesn't appear to be part of a raid array\n",
+ devname);
+ return 1;
+ }
+
+ if (log != NULL) {
+ unsigned int i;
+ struct bbm_log_entry *entry = &log->marked_block_entries[0];
+
+ for (i = 0; i < log->entry_count; i++) {
+ if (entry[i].disk_ordinal == d->index) {
+ unsigned long long sector = __le48_to_cpu(
+ &entry[i].defective_block_start);
+ int cnt = entry[i].marked_count + 1;
+
+ if (!any) {
+ printf("Bad-blocks on %s:\n", devname);
+ any = 1;
+ }
+
+ printf("%20llu for %d sectors\n", sector, cnt);
+ }
+ }
+ }
+
+ if (!any)
+ printf("No bad-blocks list configured on %s\n", devname);
+
+ return 0;
+}
+/*******************************************************************************
* Function: init_migr_record_imsm
* Description: Function inits imsm migration record
* Parameters:
@@ -9358,8 +10146,8 @@ int save_backup_imsm(struct supertype *st,
{
int rv = -1;
struct intel_super *super = st->sb;
- unsigned long long *target_offsets = NULL;
- int *targets = NULL;
+ unsigned long long *target_offsets;
+ int *targets;
int i;
struct imsm_map *map_dest = get_imsm_map(dev, MAP_0);
int new_disks = map_dest->num_members;
@@ -9486,7 +10274,7 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
{
struct intel_super *super = st->sb;
struct migr_record *migr_rec = super->migr_rec;
- struct imsm_map *map_dest = NULL;
+ struct imsm_map *map_dest;
struct intel_dev *id = NULL;
unsigned long long read_offset;
unsigned long long write_offset;
@@ -9495,6 +10283,7 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
int new_disks, i, err;
char *buf = NULL;
int retval = 1;
+ unsigned int sector_size = super->sector_size;
unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit);
unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
char buffer[20];
@@ -9531,7 +10320,7 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
pba_of_lba0(map_dest)) * 512;
unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
- if (posix_memalign((void **)&buf, 512, unit_len) != 0)
+ if (posix_memalign((void **)&buf, sector_size, unit_len) != 0)
goto abort;
targets = xcalloc(new_disks, sizeof(int));
@@ -9609,7 +10398,7 @@ static const char *imsm_get_disk_controller_domain(const char *path)
strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
if (stat(disk_path, &st) == 0) {
struct sys_dev* hba;
- char *path=NULL;
+ char *path;
path = devt_to_devpath(st.st_rdev);
if (path == NULL)
@@ -9688,8 +10477,7 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st,
break;
}
- if ((info->array.level != 0) &&
- (info->array.level != 5)) {
+ if (info->array.level != 0 && info->array.level != 5) {
/* we cannot use this container with other raid level
*/
dprintf("imsm: for container operation wrong raid level (%i) detected\n",
@@ -9774,11 +10562,11 @@ static int imsm_create_metadata_update_for_reshape(
{
struct intel_super *super = st->sb;
struct imsm_super *mpb = super->anchor;
- int update_memory_size = 0;
- struct imsm_update_reshape *u = NULL;
- struct mdinfo *spares = NULL;
+ int update_memory_size;
+ struct imsm_update_reshape *u;
+ struct mdinfo *spares;
int i;
- int delta_disks = 0;
+ int delta_disks;
struct mdinfo *dev;
dprintf("(enter) raid_disks = %i\n", geo->raid_disks);
@@ -9855,8 +10643,8 @@ static int imsm_create_metadata_update_for_size_change(
struct imsm_update_size_change **updatep)
{
struct intel_super *super = st->sb;
- int update_memory_size = 0;
- struct imsm_update_size_change *u = NULL;
+ int update_memory_size;
+ struct imsm_update_size_change *u;
dprintf("(enter) New size = %llu\n", geo->size);
@@ -9885,8 +10673,8 @@ static int imsm_create_metadata_update_for_migration(
struct imsm_update_reshape_migration **updatep)
{
struct intel_super *super = st->sb;
- int update_memory_size = 0;
- struct imsm_update_reshape_migration *u = NULL;
+ int update_memory_size;
+ struct imsm_update_reshape_migration *u;
struct imsm_dev *dev;
int previous_level = -1;
@@ -9921,12 +10709,12 @@ static int imsm_create_metadata_update_for_migration(
previous_level = map->raid_level;
}
}
- if ((geo->level == 5) && (previous_level == 0)) {
+ if (geo->level == 5 && previous_level == 0) {
struct mdinfo *spares = NULL;
u->new_raid_disks++;
spares = get_spares_for_grow(st);
- if ((spares == NULL) || (spares->array.spare_disks < 1)) {
+ if (spares == NULL || spares->array.spare_disks < 1) {
free(u);
sysfs_free(spares);
update_memory_size = 0;
@@ -9990,9 +10778,8 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
int rv;
getinfo_super_imsm_volume(st, &info, NULL);
- if ((geo->level != info.array.level) &&
- (geo->level >= 0) &&
- (geo->level != UnSet)) {
+ if (geo->level != info.array.level && geo->level >= 0 &&
+ geo->level != UnSet) {
switch (info.array.level) {
case 0:
if (geo->level == 5) {
@@ -10030,16 +10817,14 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
} else
geo->level = info.array.level;
- if ((geo->layout != info.array.layout)
- && ((geo->layout != UnSet) && (geo->layout != -1))) {
+ if (geo->layout != info.array.layout &&
+ (geo->layout != UnSet && geo->layout != -1)) {
change = CH_MIGRATION;
- if ((info.array.layout == 0)
- && (info.array.level == 5)
- && (geo->layout == 5)) {
+ if (info.array.layout == 0 && info.array.level == 5 &&
+ geo->layout == 5) {
/* reshape 5 -> 4 */
- } else if ((info.array.layout == 5)
- && (info.array.level == 5)
- && (geo->layout == 0)) {
+ } else if (info.array.layout == 5 && info.array.level == 5 &&
+ geo->layout == 0) {
/* reshape 4 -> 5 */
geo->layout = 0;
geo->level = 5;
@@ -10055,11 +10840,22 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
imsm_layout = info.array.layout;
}
- if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
- && (geo->chunksize != info.array.chunk_size))
+ if (geo->chunksize > 0 && geo->chunksize != UnSet &&
+ geo->chunksize != info.array.chunk_size) {
+ if (info.array.level == 10) {
+ pr_err("Error. Chunk size change for RAID 10 is not supported.\n");
+ change = -1;
+ goto analyse_change_exit;
+ } else if (info.component_size % (geo->chunksize/512)) {
+ pr_err("New chunk size (%dK) does not evenly divide device size (%lluk). Aborting...\n",
+ geo->chunksize/1024, info.component_size/2);
+ change = -1;
+ goto analyse_change_exit;
+ }
change = CH_MIGRATION;
- else
+ } else {
geo->chunksize = info.array.chunk_size;
+ }
chunk = geo->chunksize / 1024;
@@ -10070,12 +10866,12 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
*/
current_size = info.custom_array_size / data_disks;
- if ((geo->size > 0) && (geo->size != MAX_SIZE)) {
+ if (geo->size > 0 && geo->size != MAX_SIZE) {
/* align component size
*/
geo->size = imsm_component_size_aligment_check(
get_imsm_raid_level(dev->vol.map),
- chunk * 1024,
+ chunk * 1024, super->sector_size,
geo->size * 2);
if (geo->size == 0) {
pr_err("Error. Size expansion is supported only (current size is %llu, requested size /rounded/ is 0).\n",
@@ -10084,7 +10880,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
}
}
- if ((current_size != geo->size) && (geo->size > 0)) {
+ if (current_size != geo->size && geo->size > 0) {
if (change != -1) {
pr_err("Error. Size change should be the only one at a time.\n");
change = -1;
@@ -10109,7 +10905,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
*/
max_size = imsm_component_size_aligment_check(
get_imsm_raid_level(dev->vol.map),
- chunk * 1024,
+ chunk * 1024, super->sector_size,
max_size);
}
if (geo->size == MAX_SIZE) {
@@ -10123,7 +10919,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
geo->size = max_size;
}
- if ((direction == ROLLBACK_METADATA_CHANGES)) {
+ if (direction == ROLLBACK_METADATA_CHANGES) {
/* accept size for rollback only
*/
} else {
@@ -10169,8 +10965,8 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
}
analyse_change_exit:
- if ((direction == ROLLBACK_METADATA_CHANGES) &&
- ((change == CH_MIGRATION) || (change == CH_TAKEOVER))) {
+ if (direction == ROLLBACK_METADATA_CHANGES &&
+ (change == CH_MIGRATION || change == CH_TAKEOVER)) {
dprintf("imsm: Metadata changes rollback is not supported for migration and takeover operations.\n");
change = -1;
}
@@ -10345,6 +11141,33 @@ exit_imsm_reshape_super:
return ret_val;
}
+#define COMPLETED_OK 0
+#define COMPLETED_NONE 1
+#define COMPLETED_DELAYED 2
+
+static int read_completed(int fd, unsigned long long *val)
+{
+ int ret;
+ char buf[50];
+
+ ret = sysfs_fd_get_str(fd, buf, 50);
+ if (ret < 0)
+ return ret;
+
+ ret = COMPLETED_OK;
+ if (strncmp(buf, "none", 4) == 0) {
+ ret = COMPLETED_NONE;
+ } else if (strncmp(buf, "delayed", 7) == 0) {
+ ret = COMPLETED_DELAYED;
+ } else {
+ char *ep;
+ *val = strtoull(buf, &ep, 0);
+ if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
+ ret = -1;
+ }
+ return ret;
+}
+
/*******************************************************************************
* Function: wait_for_reshape_imsm
* Description: Function writes new sync_max value and waits until
@@ -10360,6 +11183,7 @@ exit_imsm_reshape_super:
int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
{
int fd = sysfs_get_fd(sra, NULL, "sync_completed");
+ int retry = 3;
unsigned long long completed;
/* to_complete : new sync_max position */
unsigned long long to_complete = sra->reshape_progress;
@@ -10370,11 +11194,17 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
return 1;
}
- if (sysfs_fd_get_ll(fd, &completed) < 0) {
- dprintf("cannot read reshape_position (no reshape in progres)\n");
- close(fd);
- return 1;
- }
+ do {
+ if (sysfs_fd_get_ll(fd, &completed) < 0) {
+ if (!retry) {
+ dprintf("cannot read reshape_position (no reshape in progres)\n");
+ close(fd);
+ return 1;
+ }
+ usleep(30000);
+ } else
+ break;
+ } while (retry--);
if (completed > position_to_set) {
dprintf("wrong next position to set %llu (%llu)\n",
@@ -10392,24 +11222,31 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
}
do {
+ int rc;
char action[20];
int timeout = 3000;
+
sysfs_wait(fd, &timeout);
if (sysfs_get_str(sra, NULL, "sync_action",
action, 20) > 0 &&
strncmp(action, "reshape", 7) != 0) {
+ if (strncmp(action, "idle", 4) == 0)
+ break;
close(fd);
return -1;
}
- if (sysfs_fd_get_ll(fd, &completed) < 0) {
+
+ rc = read_completed(fd, &completed);
+ if (rc < 0) {
dprintf("cannot read reshape_position (in loop)\n");
close(fd);
return 1;
- }
+ } else if (rc == COMPLETED_NONE)
+ break;
} while (completed < position_to_set);
+
close(fd);
return 0;
-
}
/*******************************************************************************
@@ -10430,7 +11267,7 @@ int check_degradation_change(struct mdinfo *info,
int rv;
rv = sysfs_get_ll(info, NULL, "degraded", &new_degraded);
- if ((rv == -1) || (new_degraded != (unsigned long long)degraded)) {
+ if (rv == -1 || (new_degraded != (unsigned long long)degraded)) {
/* check each device to ensure it is still working */
struct mdinfo *sd;
new_degraded = 0;
@@ -10438,9 +11275,10 @@ int check_degradation_change(struct mdinfo *info,
if (sd->disk.state & (1<<MD_DISK_FAULTY))
continue;
if (sd->disk.state & (1<<MD_DISK_SYNC)) {
- char sbuf[20];
+ char sbuf[100];
+
if (sysfs_get_str(info,
- sd, "state", sbuf, 20) < 0 ||
+ sd, "state", sbuf, sizeof(sbuf)) < 0 ||
strstr(sbuf, "faulty") ||
strstr(sbuf, "in_sync") == NULL) {
/* this device is dead */
@@ -10465,7 +11303,7 @@ int check_degradation_change(struct mdinfo *info,
* Function: imsm_manage_reshape
* Description: Function finds array under reshape and it manages reshape
* process. It creates stripes backups (if required) and sets
- * checheckpoits.
+ * checkpoints.
* Parameters:
* afd : Backup handle (nattive) - not used
* sra : general array info
@@ -10489,7 +11327,8 @@ static int imsm_manage_reshape(
{
int ret_val = 0;
struct intel_super *super = st->sb;
- struct intel_dev *dv = NULL;
+ struct intel_dev *dv;
+ unsigned int sector_size = super->sector_size;
struct imsm_dev *dev = NULL;
struct imsm_map *map_src;
int migr_vol_qan = 0;
@@ -10507,7 +11346,10 @@ static int imsm_manage_reshape(
int degraded = 0;
int source_layout = 0;
- if (!fds || !offsets || !sra)
+ if (!sra)
+ return ret_val;
+
+ if (!fds || !offsets)
goto abort;
/* Find volume during the reshape */
@@ -10520,7 +11362,7 @@ static int imsm_manage_reshape(
}
/* Only one volume can migrate at the same time */
if (migr_vol_qan != 1) {
- pr_err(": %s", migr_vol_qan ?
+ pr_err("%s", migr_vol_qan ?
"Number of migrating volumes greater than 1\n" :
"There is no volume during migrationg\n");
goto abort;
@@ -10564,8 +11406,8 @@ static int imsm_manage_reshape(
buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
/* add space for stripe aligment */
buf_size += old_data_stripe_length;
- if (posix_memalign((void **)&buf, 4096, buf_size)) {
- dprintf("imsm: Cannot allocate checpoint buffer\n");
+ if (posix_memalign((void **)&buf, MAX_SECTOR_SIZE, buf_size)) {
+ dprintf("imsm: Cannot allocate checkpoint buffer\n");
goto abort;
}
@@ -10595,7 +11437,7 @@ static int imsm_manage_reshape(
start = current_position * 512;
- /* allign reading start to old geometry */
+ /* align reading start to old geometry */
start_buf_shift = start % old_data_stripe_length;
start_src = start - start_buf_shift;
@@ -10609,7 +11451,7 @@ static int imsm_manage_reshape(
* to backup alligned to source array
* [bytes]
*/
- unsigned long long next_step_filler = 0;
+ unsigned long long next_step_filler;
unsigned long long copy_length = next_step * 512;
/* allign copy area length to stripe in old geometry */
@@ -10681,17 +11523,18 @@ static int imsm_manage_reshape(
/* clear migr_rec on disks after successful migration */
struct dl *d;
- memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SIZE);
+ memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*sector_size);
for (d = super->disks; d; d = d->next) {
if (d->index < 0 || is_failed(&d->disk))
continue;
unsigned long long dsize;
get_dev_size(d->fd, NULL, &dsize);
- if (lseek64(d->fd, dsize - MIGR_REC_POSITION,
+ if (lseek64(d->fd, dsize - MIGR_REC_SECTOR_POSITION*sector_size,
SEEK_SET) >= 0) {
- if (write(d->fd, super->migr_rec_buf,
- MIGR_REC_BUF_SIZE) != MIGR_REC_BUF_SIZE)
+ if ((unsigned int)write(d->fd, super->migr_rec_buf,
+ MIGR_REC_BUF_SECTORS*sector_size) !=
+ MIGR_REC_BUF_SECTORS*sector_size)
perror("Write migr_rec failed");
}
}
@@ -10700,6 +11543,10 @@ static int imsm_manage_reshape(
ret_val = 1;
abort:
free(buf);
+ /* See Grow.c: abort_reshape() for further explanation */
+ sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
+ sysfs_set_num(sra, NULL, "suspend_hi", 0);
+ sysfs_set_num(sra, NULL, "suspend_lo", 0);
return ret_val;
}
@@ -10729,6 +11576,7 @@ struct superswitch super_imsm = {
.manage_reshape = imsm_manage_reshape,
.recover_backup = recover_backup_imsm,
.copy_metadata = copy_metadata_imsm,
+ .examine_badblocks = examine_badblocks_imsm,
#endif
.match_home = match_home_imsm,
.uuid_from_super= uuid_from_super_imsm,
@@ -10761,5 +11609,8 @@ struct superswitch super_imsm = {
.activate_spare = imsm_activate_spare,
.process_update = imsm_process_update,
.prepare_update = imsm_prepare_update,
+ .record_bad_block = imsm_record_badblock,
+ .clear_bad_block = imsm_clear_badblock,
+ .get_bad_blocks = imsm_get_badblocks,
#endif /* MDASSEMBLE */
};
diff --git a/super-mbr.c b/super-mbr.c
index 62b3f031..f5e4ceab 100644
--- a/super-mbr.c
+++ b/super-mbr.c
@@ -57,6 +57,11 @@ static void examine_mbr(struct supertype *st, char *homehost)
printf(" MBR Magic : %04x\n", sb->magic);
for (i = 0; i < MBR_PARTITIONS; i++)
+ /*
+ * Have to make every access through sb rather than using a
+ * pointer to the partition table (or an entry), since the
+ * entries are not properly aligned.
+ */
if (sb->parts[i].blocks_num)
printf("Partition[%d] : %12lu sectors at %12lu (type %02x)\n",
i,
@@ -151,6 +156,11 @@ static void getinfo_mbr(struct supertype *st, struct mdinfo *info, char *map)
info->component_size = 0;
for (i = 0; i < MBR_PARTITIONS ; i++)
+ /*
+ * Have to make every access through sb rather than using a
+ * pointer to the partition table (or an entry), since the
+ * entries are not properly aligned.
+ */
if (sb->parts[i].blocks_num) {
unsigned long last =
(unsigned long)__le32_to_cpu(sb->parts[i].blocks_num)
diff --git a/super0.c b/super0.c
index 59a6a034..938cfd95 100644
--- a/super0.c
+++ b/super0.c
@@ -87,17 +87,17 @@ static void examine_super0(struct supertype *st, char *homehost)
char *c;
printf(" Magic : %08x\n", sb->md_magic);
- printf(" Version : %d.%02d.%02d\n", sb->major_version, sb->minor_version,
- sb->patch_version);
+ printf(" Version : %d.%02d.%02d\n",
+ sb->major_version, sb->minor_version, sb->patch_version);
if (sb->minor_version >= 90) {
- printf(" UUID : %08x:%08x:%08x:%08x", sb->set_uuid0, sb->set_uuid1,
- sb->set_uuid2, sb->set_uuid3);
+ printf(" UUID : %08x:%08x:%08x:%08x", sb->set_uuid0,
+ sb->set_uuid1, sb->set_uuid2, sb->set_uuid3);
if (homehost) {
char buf[20];
- void *hash = sha1_buffer(homehost,
- strlen(homehost),
- buf);
- if (memcmp(&sb->set_uuid2, hash, 8)==0)
+ void *hash;
+
+ hash = sha1_buffer(homehost, strlen(homehost), buf);
+ if (memcmp(&sb->set_uuid2, hash, 8) == 0)
printf(" (local to host %s)", homehost);
}
printf("\n");
@@ -109,19 +109,27 @@ static void examine_super0(struct supertype *st, char *homehost)
atime = sb->ctime;
printf(" Creation Time : %.24s\n", ctime(&atime));
- c=map_num(pers, sb->level);
+ c = map_num(pers, sb->level);
printf(" Raid Level : %s\n", c?c:"-unknown-");
if ((int)sb->level > 0) {
int ddsks = 0, ddsks_denom = 1;
printf(" Used Dev Size : %d%s\n", sb->size,
human_size((long long)sb->size<<10));
switch(sb->level) {
- case 1: ddsks=1;break;
+ case 1:
+ ddsks=1;
+ break;
case 4:
- case 5: ddsks = sb->raid_disks-1; break;
- case 6: ddsks = sb->raid_disks-2; break;
- case 10: ddsks = sb->raid_disks;
- ddsks_denom = (sb->layout&255) * ((sb->layout>>8)&255);
+ case 5:
+ ddsks = sb->raid_disks - 1;
+ break;
+ case 6:
+ ddsks = sb->raid_disks - 2;
+ break;
+ case 10:
+ ddsks = sb->raid_disks;
+ ddsks_denom =
+ (sb->layout & 255) * ((sb->layout >> 8) & 255);
}
if (ddsks) {
long long asize = sb->size;
@@ -134,11 +142,14 @@ static void examine_super0(struct supertype *st, char *homehost)
printf(" Total Devices : %d\n", sb->nr_disks);
printf("Preferred Minor : %d\n", sb->md_minor);
printf("\n");
- if (sb->minor_version > 90 && (sb->reshape_position+1) != 0) {
- printf(" Reshape pos'n : %llu%s\n", (unsigned long long)sb->reshape_position/2, human_size((long long)sb->reshape_position<<9));
+ if (sb->minor_version > 90 && (sb->reshape_position + 1) != 0) {
+ printf(" Reshape pos'n : %llu%s\n",
+ (unsigned long long)sb->reshape_position / 2,
+ human_size((long long)sb->reshape_position << 9));
if (sb->delta_disks) {
printf(" Delta Devices : %d", sb->delta_disks);
- printf(" (%d->%d)\n", sb->raid_disks-sb->delta_disks, sb->raid_disks);
+ printf(" (%d->%d)\n", sb->raid_disks-sb->delta_disks,
+ sb->raid_disks);
if (((int)sb->delta_disks) < 0)
delta_extra = - sb->delta_disks;
}
@@ -149,11 +160,13 @@ static void examine_super0(struct supertype *st, char *homehost)
if (sb->new_layout != sb->layout) {
if (sb->level == 5) {
c = map_num(r5layout, sb->new_layout);
- printf(" New Layout : %s\n", c?c:"-unknown-");
+ printf(" New Layout : %s\n",
+ c?c:"-unknown-");
}
if (sb->level == 6) {
c = map_num(r6layout, sb->new_layout);
- printf(" New Layout : %s\n", c?c:"-unknown-");
+ printf(" New Layout : %s\n",
+ c?c:"-unknown-");
}
if (sb->level == 10) {
printf(" New Layout : near=%d, %s=%d\n",
@@ -169,8 +182,8 @@ static void examine_super0(struct supertype *st, char *homehost)
atime = sb->utime;
printf(" Update Time : %.24s\n", ctime(&atime));
printf(" State : %s\n",
- (sb->state&(1<<MD_SB_CLEAN))?"clean":"active");
- if (sb->state & (1<<MD_SB_BITMAP_PRESENT))
+ (sb->state&(1 << MD_SB_CLEAN)) ? "clean":"active");
+ if (sb->state & (1 << MD_SB_BITMAP_PRESENT))
printf("Internal Bitmap : present\n");
printf(" Active Devices : %d\n", sb->active_disks);
printf("Working Devices : %d\n", sb->working_disks);
@@ -179,10 +192,10 @@ static void examine_super0(struct supertype *st, char *homehost)
if (calc_sb0_csum(sb) == sb->sb_csum)
printf(" Checksum : %x - correct\n", sb->sb_csum);
else
- printf(" Checksum : %x - expected %lx\n", sb->sb_csum, calc_sb0_csum(sb));
+ printf(" Checksum : %x - expected %lx\n",
+ sb->sb_csum, calc_sb0_csum(sb));
printf(" Events : %llu\n",
- ((unsigned long long)sb->events_hi << 32)
- + sb->events_lo);
+ ((unsigned long long)sb->events_hi << 32) + sb->events_lo);
printf("\n");
if (sb->level == 5) {
c = map_num(r5layout, sb->layout);
@@ -203,37 +216,50 @@ static void examine_super0(struct supertype *st, char *homehost)
case 5:
case 6:
case 10:
- printf(" Chunk Size : %dK\n", sb->chunk_size/1024);
+ printf(" Chunk Size : %dK\n", sb->chunk_size / 1024);
break;
case -1:
- printf(" Rounding : %dK\n", sb->chunk_size/1024);
+ printf(" Rounding : %dK\n", sb->chunk_size / 1024);
+ break;
+ default:
break;
- default: break;
}
printf("\n");
printf(" Number Major Minor RaidDevice State\n");
- for (d= -1; d<(signed int)(sb->raid_disks+delta_extra + sb->spare_disks); d++) {
+ for (d = -1;
+ d < (signed int)(sb->raid_disks + delta_extra + sb->spare_disks);
+ d++) {
mdp_disk_t *dp;
char *dv;
char nb[5];
- int wonly;
+ int wonly, failfast;
if (d>=0) dp = &sb->disks[d];
else dp = &sb->this_disk;
snprintf(nb, sizeof(nb), "%4d", d);
- printf("%4s %5d %5d %5d %5d ", d < 0 ? "this" : nb,
+ printf("%4s %5d %5d %5d %5d ", d < 0 ? "this" : nb,
dp->number, dp->major, dp->minor, dp->raid_disk);
- wonly = dp->state & (1<<MD_DISK_WRITEMOSTLY);
- dp->state &= ~(1<<MD_DISK_WRITEMOSTLY);
- if (dp->state & (1<<MD_DISK_FAULTY)) printf(" faulty");
- if (dp->state & (1<<MD_DISK_ACTIVE)) printf(" active");
- if (dp->state & (1<<MD_DISK_SYNC)) printf(" sync");
- if (dp->state & (1<<MD_DISK_REMOVED)) printf(" removed");
- if (wonly) printf(" write-mostly");
- if (dp->state == 0) printf(" spare");
- if ((dv=map_dev(dp->major, dp->minor, 0)))
+ wonly = dp->state & (1 << MD_DISK_WRITEMOSTLY);
+ failfast = dp->state & (1<<MD_DISK_FAILFAST);
+ dp->state &= ~(wonly | failfast);
+ if (dp->state & (1 << MD_DISK_FAULTY))
+ printf(" faulty");
+ if (dp->state & (1 << MD_DISK_ACTIVE))
+ printf(" active");
+ if (dp->state & (1 << MD_DISK_SYNC))
+ printf(" sync");
+ if (dp->state & (1 << MD_DISK_REMOVED))
+ printf(" removed");
+ if (wonly)
+ printf(" write-mostly");
+ if (failfast)
+ printf(" failfast");
+ if (dp->state == 0)
+ printf(" spare");
+ if ((dv = map_dev(dp->major, dp->minor, 0)))
printf(" %s", dv);
printf("\n");
- if (d == -1) printf("\n");
+ if (d == -1)
+ printf("\n");
}
}
@@ -558,7 +584,8 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
} else if (strcmp(update, "assemble")==0) {
int d = info->disk.number;
int wonly = sb->disks[d].state & (1<<MD_DISK_WRITEMOSTLY);
- int mask = (1<<MD_DISK_WRITEMOSTLY);
+ int failfast = sb->disks[d].state & (1<<MD_DISK_FAILFAST);
+ int mask = (1<<MD_DISK_WRITEMOSTLY)|(1<<MD_DISK_FAILFAST);
int add = 0;
if (sb->minor_version >= 91)
/* During reshape we don't insist on everything
@@ -567,7 +594,7 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
add = (1<<MD_DISK_SYNC);
if (((sb->disks[d].state & ~mask) | add)
!= (unsigned)info->disk.state) {
- sb->disks[d].state = info->disk.state | wonly;
+ sb->disks[d].state = info->disk.state | wonly |failfast;
rv = 1;
}
if (info->reshape_active &&
@@ -693,16 +720,14 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
}
/*
- * For verion-0 superblock, the homehost is 'stored' in the
- * uuid. 8 bytes for a hash of the host leaving 8 bytes
- * of random material.
- * We use the first 8 bytes (64bits) of the sha1 of the
- * host name
+ * For version-0 superblock, the homehost is 'stored' in the uuid.
+ * 8 bytes for a hash of the host leaving 8 bytes of random material.
+ * We use the first 8 bytes (64bits) of the sha1 of the host name
*/
-
static int init_super0(struct supertype *st, mdu_array_info_t *info,
- unsigned long long size, char *ignored_name, char *homehost,
- int *uuid, unsigned long long data_offset)
+ unsigned long long size, char *ignored_name,
+ char *homehost, int *uuid,
+ unsigned long long data_offset)
{
mdp_super_t *sb;
int spares;
@@ -752,16 +777,12 @@ static int init_super0(struct supertype *st, mdu_array_info_t *info,
sb->set_uuid2 = uuid[2];
sb->set_uuid3 = uuid[3];
} else {
- int rfd = open("/dev/urandom", O_RDONLY);
- if (rfd < 0 || read(rfd, &sb->set_uuid0, 4) != 4)
- sb->set_uuid0 = random();
- if (rfd < 0 || read(rfd, &sb->set_uuid1, 12) != 12) {
- sb->set_uuid1 = random();
- sb->set_uuid2 = random();
- sb->set_uuid3 = random();
- }
- if (rfd >= 0)
- close(rfd);
+ __u32 r[4];
+ random_uuid((__u8 *)r);
+ sb->set_uuid0 = r[0];
+ sb->set_uuid1 = r[1];
+ sb->set_uuid2 = r[2];
+ sb->set_uuid3 = r[3];
}
if (homehost && !uuid) {
char buf[20];
@@ -1139,7 +1160,7 @@ static int add_internal_bitmap0(struct supertype *st, int *chunkp,
if (chunk < 64*1024*1024)
chunk = 64*1024*1024;
} else if ((unsigned long long)chunk < min_chunk)
- return 0; /* chunk size too small */
+ return -EINVAL; /* chunk size too small */
sb->state |= (1<<MD_SB_BITMAP_PRESENT);
@@ -1153,10 +1174,10 @@ static int add_internal_bitmap0(struct supertype *st, int *chunkp,
bms->sync_size = __cpu_to_le64(size);
bms->write_behind = __cpu_to_le32(write_behind);
*chunkp = chunk;
- return 1;
+ return 0;
}
-static int locate_bitmap0(struct supertype *st, int fd)
+static int locate_bitmap0(struct supertype *st, int fd, int node_num)
{
unsigned long long dsize;
unsigned long long offset;
diff --git a/super1.c b/super1.c
index 8bcaa2fe..87a74cb9 100644
--- a/super1.c
+++ b/super1.c
@@ -77,6 +77,7 @@ struct mdp_superblock_1 {
__u8 device_uuid[16]; /* user-space setable, ignored by kernel */
__u8 devflags; /* per-device flags. Only one defined...*/
#define WriteMostly1 1 /* mask for writemostly flag in above */
+#define FailFast1 2 /* Device should get FailFast requests */
/* bad block log. If there are any bad blocks the feature flag is set.
* if offset and size are non-zero, that space is reserved and available.
*/
@@ -162,7 +163,8 @@ static unsigned int calc_bitmap_size(bitmap_super_t *bms, unsigned int boundary)
{
unsigned long long bits, bytes;
- bits = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
+ bits = bitmap_bits(__le64_to_cpu(bms->sync_size),
+ __le32_to_cpu(bms->chunksize));
bytes = (bits+7) >> 3;
bytes += sizeof(bitmap_super_t);
bytes = ROUND_UP(bytes, boundary);
@@ -212,8 +214,7 @@ struct align_fd {
static void init_afd(struct align_fd *afd, int fd)
{
afd->fd = fd;
-
- if (ioctl(afd->fd, BLKSSZGET, &afd->blk_sz) != 0)
+ if (!get_dev_sector_size(afd->fd, NULL, (unsigned int *)&afd->blk_sz))
afd->blk_sz = 512;
}
@@ -430,6 +431,8 @@ static void examine_super1(struct supertype *st, char *homehost)
printf(" Flags :");
if (sb->devflags & WriteMostly1)
printf(" write-mostly");
+ if (sb->devflags & FailFast1)
+ printf(" failfast");
printf("\n");
}
@@ -730,12 +733,12 @@ static int copy_metadata1(struct supertype *st, int from, int to)
}
if (super.bblog_size != 0 &&
- __le32_to_cpu(super.bblog_size) <= 100 &&
+ __le16_to_cpu(super.bblog_size) <= 100 &&
super.bblog_offset != 0 &&
(super.feature_map & __le32_to_cpu(MD_FEATURE_BAD_BLOCKS))) {
/* There is a bad block log */
unsigned long long bb_offset = sb_offset;
- int bytes = __le32_to_cpu(super.bblog_size) * 512;
+ int bytes = __le16_to_cpu(super.bblog_size) * 512;
int written = 0;
struct align_fd afrom, ato;
@@ -832,7 +835,7 @@ static int examine_badblocks_super1(struct supertype *st, int fd, char *devname)
__u64 *bbl, *bbp;
int i;
- if (!sb->bblog_size || __le32_to_cpu(sb->bblog_size) > 100
+ if (!sb->bblog_size || __le16_to_cpu(sb->bblog_size) > 100
|| !sb->bblog_offset){
printf("No bad-blocks list configured on %s\n", devname);
return 0;
@@ -843,7 +846,7 @@ static int examine_badblocks_super1(struct supertype *st, int fd, char *devname)
return 0;
}
- size = __le32_to_cpu(sb->bblog_size)* 512;
+ size = __le16_to_cpu(sb->bblog_size)* 512;
if (posix_memalign((void**)&bbl, 4096, size) != 0) {
pr_err("could not allocate badblocks list\n");
return 0;
@@ -973,11 +976,7 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
earliest = super_offset + (32+4)*2; /* match kernel */
if (info->bitmap_offset > 0) {
unsigned long long bmend = info->bitmap_offset;
- unsigned long long size = __le64_to_cpu(bsb->sync_size);
- size /= __le32_to_cpu(bsb->chunksize) >> 9;
- size = (size + 7) >> 3;
- size += sizeof(bitmap_super_t);
- size = ROUND_UP(size, 4096);
+ unsigned long long size = calc_bitmap_size(bsb, 4096);
size /= 512;
bmend += size;
if (bmend > earliest)
@@ -986,7 +985,7 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
if (sb->bblog_offset && sb->bblog_size) {
unsigned long long bbend = super_offset;
bbend += (int32_t)__le32_to_cpu(sb->bblog_offset);
- bbend += __le32_to_cpu(sb->bblog_size);
+ bbend += __le16_to_cpu(sb->bblog_size);
if (bbend > earliest)
earliest = bbend;
}
@@ -1024,6 +1023,8 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
}
if (sb->devflags & WriteMostly1)
info->disk.state |= (1 << MD_DISK_WRITEMOSTLY);
+ if (sb->devflags & FailFast1)
+ info->disk.state |= (1 << MD_DISK_FAILFAST);
info->events = __le64_to_cpu(sb->events);
sprintf(info->text_version, "1.%d", st->minor_version);
info->safe_mode_delay = 200;
@@ -1175,7 +1176,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
}
} else if (strcmp(update, "linear-grow-new") == 0) {
unsigned int i;
- int rfd, fd;
+ int fd;
unsigned int max = __le32_to_cpu(sb->max_dev);
for (i=0 ; i < max ; i++)
@@ -1186,13 +1187,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
if (max >= __le32_to_cpu(sb->max_dev))
sb->max_dev = __cpu_to_le32(max+1);
- if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
- read(rfd, sb->device_uuid, 16) != 16) {
- __u32 r[4] = {random(), random(), random(), random()};
- memcpy(sb->device_uuid, r, 16);
- }
- if (rfd >= 0)
- close(rfd);
+ random_uuid(sb->device_uuid);
sb->dev_roles[i] =
__cpu_to_le16(info->disk.raid_disk);
@@ -1225,11 +1220,8 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
} else if (strcmp(update, "uuid") == 0) {
copy_uuid(sb->set_uuid, info->uuid, super1.swapuuid);
- if (__le32_to_cpu(sb->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
- struct bitmap_super_s *bm;
- bm = (struct bitmap_super_s*)(st->sb+MAX_SB_SIZE);
- memcpy(bm->uuid, sb->set_uuid, 16);
- }
+ if (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)
+ memcpy(bms->uuid, sb->set_uuid, 16);
} else if (strcmp(update, "no-bitmap") == 0) {
sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
} else if (strcmp(update, "bbl") == 0) {
@@ -1238,15 +1230,14 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
*/
unsigned long long sb_offset = __le64_to_cpu(sb->super_offset);
unsigned long long data_offset = __le64_to_cpu(sb->data_offset);
- long bitmap_offset = (long)(int32_t)__le32_to_cpu(sb->bitmap_offset);
+ long bitmap_offset = 0;
long bm_sectors = 0;
long space;
#ifndef MDASSEMBLE
if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
- struct bitmap_super_s *bsb;
- bsb = (struct bitmap_super_s *)(((char*)sb)+MAX_SB_SIZE);
- bm_sectors = bitmap_sectors(bsb);
+ bitmap_offset = (long)__le32_to_cpu(sb->bitmap_offset);
+ bm_sectors = calc_bitmap_size(bms, 4096) >> 9;
}
#endif
if (sb_offset < data_offset) {
@@ -1300,7 +1291,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
strcat(sb->set_name, ":");
strcat(sb->set_name, info->name);
} else
- strcpy(sb->set_name, info->name);
+ strncpy(sb->set_name, info->name, sizeof(sb->set_name));
} else if (strcmp(update, "devicesize") == 0 &&
__le64_to_cpu(sb->super_offset) <
__le64_to_cpu(sb->data_offset)) {
@@ -1391,6 +1382,10 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
sb->devflags |= WriteMostly1;
else if (strcmp(update, "readwrite")==0)
sb->devflags &= ~WriteMostly1;
+ else if (strcmp(update, "failfast") == 0)
+ sb->devflags |= FailFast1;
+ else if (strcmp(update, "nofailfast") == 0)
+ sb->devflags &= ~FailFast1;
else
rv = -1;
@@ -1407,7 +1402,6 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
{
struct mdp_superblock_1 *sb;
int spares;
- int rfd;
char defname[10];
int sbsize;
@@ -1437,14 +1431,8 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
if (uuid)
copy_uuid(sb->set_uuid, uuid, super1.swapuuid);
- else {
- if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
- read(rfd, sb->set_uuid, 16) != 16) {
- __u32 r[4] = {random(), random(), random(), random()};
- memcpy(sb->set_uuid, r, 16);
- }
- if (rfd >= 0) close(rfd);
- }
+ else
+ random_uuid(sb->set_uuid);;
if (name == NULL || *name == 0) {
sprintf(defname, "%d", info->md_minor);
@@ -1457,7 +1445,7 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
strcat(sb->set_name, ":");
strcat(sb->set_name, name);
} else
- strcpy(sb->set_name, name);
+ strncpy(sb->set_name, name, sizeof(sb->set_name));
sb->ctime = __cpu_to_le64((unsigned long long)time(0));
sb->level = __cpu_to_le32(info->level);
@@ -1548,7 +1536,7 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
}
#endif
-static int locate_bitmap1(struct supertype *st, int fd);
+static int locate_bitmap1(struct supertype *st, int fd, int node_num);
static int store_super1(struct supertype *st, int fd)
{
@@ -1622,7 +1610,7 @@ static int store_super1(struct supertype *st, int fd)
struct bitmap_super_s *bm = (struct bitmap_super_s*)
(((char*)sb)+MAX_SB_SIZE);
if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC) {
- locate_bitmap1(st, fd);
+ locate_bitmap1(st, fd, 0);
if (awrite(&afd, bm, sizeof(*bm)) != sizeof(*bm))
return 5;
}
@@ -1643,7 +1631,8 @@ static unsigned long choose_bm_space(unsigned long devsize)
* NOTE: result must be multiple of 4K else bad things happen
* on 4K-sector devices.
*/
- if (devsize < 64*2) return 0;
+ if (devsize < 64*2)
+ return 0;
if (devsize - 64*2 >= 200*1024*1024*2)
return 128*2;
if (devsize - 4*2 > 8*1024*1024*2)
@@ -1706,13 +1695,13 @@ static int write_init_super1(struct supertype *st)
{
struct mdp_superblock_1 *sb = st->sb;
struct supertype *refst;
- int rfd;
int rv = 0;
unsigned long long bm_space;
struct devinfo *di;
unsigned long long dsize, array_size;
unsigned long long sb_offset;
unsigned long long data_offset;
+ long bm_offset;
for (di = st->info; di; di = di->next) {
if (di->disk.state & (1 << MD_DISK_JOURNAL))
@@ -1733,14 +1722,12 @@ static int write_init_super1(struct supertype *st)
sb->devflags |= WriteMostly1;
else
sb->devflags &= ~WriteMostly1;
+ if (di->disk.state & (1<<MD_DISK_FAILFAST))
+ sb->devflags |= FailFast1;
+ else
+ sb->devflags &= ~FailFast1;
- if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
- read(rfd, sb->device_uuid, 16) != 16) {
- __u32 r[4] = {random(), random(), random(), random()};
- memcpy(sb->device_uuid, r, 16);
- }
- if (rfd >= 0)
- close(rfd);
+ random_uuid(sb->device_uuid);
if (!(di->disk.state & (1<<MD_DISK_JOURNAL)))
sb->events = 0;
@@ -1786,15 +1773,25 @@ static int write_init_super1(struct supertype *st)
* data_offset has already been set.
*/
array_size = __le64_to_cpu(sb->size);
- /* work out how much space we left for a bitmap,
- * Add 8 sectors for bad block log */
- bm_space = choose_bm_space(array_size) + 8;
+
+ /* work out how much space we left for a bitmap */
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
+ bitmap_super_t *bms = (bitmap_super_t *)
+ (((char *)sb) + MAX_SB_SIZE);
+ bm_space = calc_bitmap_size(bms, 4096) >> 9;
+ bm_offset = (long)__le32_to_cpu(sb->bitmap_offset);
+ } else {
+ bm_space = choose_bm_space(array_size);
+ bm_offset = 8;
+ }
data_offset = di->data_offset;
if (data_offset == INVALID_SECTORS)
data_offset = st->data_offset;
switch(st->minor_version) {
case 0:
+ /* Add 8 sectors for bad block log */
+ bm_space += 8;
if (data_offset == INVALID_SECTORS)
data_offset = 0;
sb_offset = dsize;
@@ -1811,38 +1808,26 @@ static int write_init_super1(struct supertype *st)
}
break;
case 1:
- sb->super_offset = __cpu_to_le64(0);
- if (data_offset == INVALID_SECTORS)
- data_offset = 16;
-
- sb->data_offset = __cpu_to_le64(data_offset);
- sb->data_size = __cpu_to_le64(dsize - data_offset);
- if (data_offset >= 8 + 32*2 + 8) {
- sb->bblog_size = __cpu_to_le16(8);
- sb->bblog_offset = __cpu_to_le32(8 + 32*2);
- } else if (data_offset >= 16) {
- sb->bblog_size = __cpu_to_le16(8);
- sb->bblog_offset = __cpu_to_le32(data_offset-8);
- }
- break;
case 2:
- sb_offset = 4*2;
+ sb_offset = st->minor_version == 2 ? 8 : 0;
sb->super_offset = __cpu_to_le64(sb_offset);
if (data_offset == INVALID_SECTORS)
- data_offset = 24;
+ data_offset = sb_offset + 16;
sb->data_offset = __cpu_to_le64(data_offset);
sb->data_size = __cpu_to_le64(dsize - data_offset);
- if (data_offset >= 16 + 32*2 + 8) {
+ if (data_offset >= sb_offset+bm_offset+bm_space+8) {
sb->bblog_size = __cpu_to_le16(8);
- sb->bblog_offset = __cpu_to_le32(8 + 32*2);
- } else if (data_offset >= 16+16) {
+ sb->bblog_offset = __cpu_to_le32(bm_offset +
+ bm_space);
+ } else if (data_offset >= sb_offset + 16) {
sb->bblog_size = __cpu_to_le16(8);
- /* '8' sectors for the bblog, and another '8'
+ /* '8' sectors for the bblog, and 'sb_offset'
* because we want offset from superblock, not
* start of device.
*/
- sb->bblog_offset = __cpu_to_le32(data_offset-8-8);
+ sb->bblog_offset = __cpu_to_le32(data_offset -
+ 8 - sb_offset);
}
break;
default:
@@ -1867,7 +1852,7 @@ static int write_init_super1(struct supertype *st)
}
if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
- rv = st->ss->write_bitmap(st, di->fd, NoUpdate);
+ rv = st->ss->write_bitmap(st, di->fd, NodeNumUpdate);
close(di->fd);
di->fd = -1;
if (rv)
@@ -2016,6 +2001,8 @@ static int load_super1(struct supertype *st, int fd, char *devname)
return 1;
}
+ memset(super, 0, SUPER1_SIZE);
+
if (aread(&afd, super, MAX_SB_SIZE) != MAX_SB_SIZE) {
if (devname)
pr_err("Cannot read superblock on %s\n",
@@ -2062,7 +2049,7 @@ static int load_super1(struct supertype *st, int fd, char *devname)
* valid. If it doesn't clear the bit. An --assemble --force
* should get that written out.
*/
- locate_bitmap1(st, fd);
+ locate_bitmap1(st, fd, 0);
if (aread(&afd, bsb, 512) != 512)
goto no_bitmap;
@@ -2137,7 +2124,7 @@ static __u64 avail_size1(struct supertype *st, __u64 devsize,
/* hot-add. allow for actual size of bitmap */
struct bitmap_super_s *bsb;
bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
- bmspace = bitmap_sectors(bsb);
+ bmspace = calc_bitmap_size(bsb, 4096) >> 9;
}
#endif
/* Allow space for bad block log */
@@ -2201,6 +2188,7 @@ add_internal_bitmap1(struct supertype *st,
unsigned long long chunk = *chunkp;
int room = 0;
int creating = 0;
+ int len;
struct mdp_superblock_1 *sb = st->sb;
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
int uuid[4];
@@ -2267,7 +2255,7 @@ add_internal_bitmap1(struct supertype *st,
}
break;
default:
- return 0;
+ return -ENOSPC;
}
room -= bbl_size;
@@ -2277,7 +2265,7 @@ add_internal_bitmap1(struct supertype *st,
if (room <= 1)
/* No room for a bitmap */
- return 0;
+ return -ENOSPC;
max_bits = (room * 512 - sizeof(bitmap_super_t)) * 8;
@@ -2295,9 +2283,9 @@ add_internal_bitmap1(struct supertype *st,
if (chunk < 64*1024*1024)
chunk = 64*1024*1024;
} else if (chunk < min_chunk)
- return 0; /* chunk size too small */
+ return -EINVAL; /* chunk size too small */
if (chunk == 0) /* rounding problem */
- return 0;
+ return -EINVAL;
if (offset == 0) {
/* start bitmap on a 4K boundary with enough space for
@@ -2326,15 +2314,17 @@ add_internal_bitmap1(struct supertype *st,
if (st->nodes)
sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map)
| MD_FEATURE_BITMAP_VERSIONED);
- if (st->cluster_name)
- strncpy((char *)bms->cluster_name,
- st->cluster_name, strlen(st->cluster_name));
+ if (st->cluster_name) {
+ len = sizeof(bms->cluster_name);
+ strncpy((char *)bms->cluster_name, st->cluster_name, len);
+ bms->cluster_name[len - 1] = '\0';
+ }
*chunkp = chunk;
- return 1;
+ return 0;
}
-static int locate_bitmap1(struct supertype *st, int fd)
+static int locate_bitmap1(struct supertype *st, int fd, int node_num)
{
unsigned long long offset;
struct mdp_superblock_1 *sb;
@@ -2353,7 +2343,7 @@ static int locate_bitmap1(struct supertype *st, int fd)
else
ret = -1;
offset = __le64_to_cpu(sb->super_offset);
- offset += (int32_t) __le32_to_cpu(sb->bitmap_offset);
+ offset += (int32_t) __le32_to_cpu(sb->bitmap_offset) * (node_num + 1);
if (mustfree)
free(sb);
lseek64(fd, offset<<9, 0);
@@ -2366,7 +2356,7 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE);
int rv = 0;
void *buf;
- int towrite, n;
+ int towrite, n, len;
struct align_fd afd;
unsigned int i = 0;
unsigned long long total_bm_space, bm_space_per_node;
@@ -2375,17 +2365,41 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update
case NameUpdate:
/* update cluster name */
if (st->cluster_name) {
- memset((char *)bms->cluster_name, 0, sizeof(bms->cluster_name));
- strncpy((char *)bms->cluster_name, st->cluster_name, 64);
+ len = sizeof(bms->cluster_name);
+ memset((char *)bms->cluster_name, 0, len);
+ strncpy((char *)bms->cluster_name,
+ st->cluster_name, len);
+ bms->cluster_name[len - 1] = '\0';
}
break;
case NodeNumUpdate:
/* cluster md only supports superblock 1.2 now */
- if (st->minor_version != 2) {
+ if (st->minor_version != 2 && bms->version == BITMAP_MAJOR_CLUSTERED) {
pr_err("Warning: cluster md only works with superblock 1.2\n");
return -EINVAL;
}
+ if (bms->version == BITMAP_MAJOR_CLUSTERED) {
+ if (st->nodes == 1) {
+ /* the parameter for nodes is not valid */
+ pr_err("Warning: cluster-md at least needs two nodes\n");
+ return -EINVAL;
+ } else if (st->nodes == 0)
+ /* --nodes is not specified */
+ break;
+ else if (__cpu_to_le32(st->nodes) < bms->nodes) {
+ /* Since the nodes num is not increased, no need to check the space
+ * is enough or not, just update bms->nodes */
+ bms->nodes = __cpu_to_le32(st->nodes);
+ break;
+ }
+ } else {
+ /* no need to change bms->nodes for other bitmap types */
+ if (st->nodes)
+ pr_err("Warning: --nodes option is only suitable for clustered bitmap\n");
+ break;
+ }
+
/* Each node has an independent bitmap, it is necessary to calculate the
* space is enough or not, first get how many bytes for the total bitmap */
bm_space_per_node = calc_bitmap_size(bms, 4096);
@@ -2408,7 +2422,7 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update
init_afd(&afd, fd);
- locate_bitmap1(st, fd);
+ locate_bitmap1(st, fd, 0);
if (posix_memalign(&buf, 4096, 4096))
return -ENOMEM;
@@ -2423,7 +2437,15 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update
memset(buf, 0xff, 4096);
memcpy(buf, (char *)bms, sizeof(bitmap_super_t));
- towrite = calc_bitmap_size(bms, 4096);
+ /*
+ * use 4096 boundary if bitmap_offset is aligned
+ * with 8 sectors, then it should compatible with
+ * older mdadm.
+ */
+ if (__le32_to_cpu(sb->bitmap_offset) & 7)
+ towrite = calc_bitmap_size(bms, 512);
+ else
+ towrite = calc_bitmap_size(bms, 4096);
while (towrite > 0) {
n = towrite;
if (n > 4096)
@@ -2567,7 +2589,6 @@ void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0
void *ret;
struct mdp_superblock_1 *sb;
int i;
- int rfd;
unsigned long long offset;
if (posix_memalign(&ret, 4096, 1024) != 0)
@@ -2599,13 +2620,7 @@ void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0
sb->super_offset = __cpu_to_le64(offset);
//*(__u64*)(st->other + 128 + 8 + 8) = __cpu_to_le64(offset);
- if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
- read(rfd, sb->device_uuid, 16) != 16) {
- __u32 r[4] = {random(), random(), random(), random()};
- memcpy(sb->device_uuid, r, 16);
- }
- if (rfd >= 0)
- close(rfd);
+ random_uuid(sb->device_uuid);
for (i = 0; i < MD_SB_DISKS; i++) {
int state = sb0->disks[i].state;
diff --git a/sysfs.c b/sysfs.c
index 26003432..b0657a04 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -27,15 +27,17 @@
#include <dirent.h>
#include <ctype.h>
-int load_sys(char *path, char *buf)
+#define MAX_SYSFS_PATH_LEN 120
+
+int load_sys(char *path, char *buf, int len)
{
int fd = open(path, O_RDONLY);
int n;
if (fd < 0)
return -1;
- n = read(fd, buf, 1024);
+ n = read(fd, buf, len);
close(fd);
- if (n <0 || n >= 1024)
+ if (n <0 || n >= len)
return -1;
buf[n] = 0;
if (n && buf[n-1] == '\n')
@@ -50,8 +52,10 @@ void sysfs_free(struct mdinfo *sra)
while (sra->devs) {
struct mdinfo *d = sra->devs;
sra->devs = d->next;
+ free(d->bb.entries);
free(d);
}
+ free(sra->bb.entries);
free(sra);
sra = sra2;
}
@@ -59,15 +63,15 @@ void sysfs_free(struct mdinfo *sra)
int sysfs_open(char *devnm, char *devname, char *attr)
{
- char fname[50];
+ char fname[MAX_SYSFS_PATH_LEN];
int fd;
- sprintf(fname, "/sys/block/%s/md/", devnm);
+ snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/", devnm);
if (devname) {
- strcat(fname, devname);
- strcat(fname, "/");
+ strncat(fname, devname, MAX_SYSFS_PATH_LEN - strlen(fname));
+ strncat(fname, "/", MAX_SYSFS_PATH_LEN - strlen(fname));
}
- strcat(fname, attr);
+ strncat(fname, attr, MAX_SYSFS_PATH_LEN - strlen(fname));
fd = open(fname, O_RDWR);
if (fd < 0 && errno == EACCES)
fd = open(fname, O_RDONLY);
@@ -118,7 +122,7 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
sra->devs = NULL;
if (options & GET_VERSION) {
strcpy(base, "metadata_version");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
if (strncmp(buf, "none", 4) == 0) {
sra->array.major_version =
@@ -137,31 +141,31 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
}
if (options & GET_LEVEL) {
strcpy(base, "level");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
sra->array.level = map_name(pers, buf);
}
if (options & GET_LAYOUT) {
strcpy(base, "layout");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
sra->array.layout = strtoul(buf, NULL, 0);
}
if (options & GET_DISKS) {
strcpy(base, "raid_disks");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
sra->array.raid_disks = strtoul(buf, NULL, 0);
}
if (options & GET_DEGRADED) {
strcpy(base, "degraded");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
sra->array.failed_disks = strtoul(buf, NULL, 0);
}
if (options & GET_COMPONENT) {
strcpy(base, "component_size");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
sra->component_size = strtoull(buf, NULL, 0);
/* sysfs reports "K", but we want sectors */
@@ -169,13 +173,13 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
}
if (options & GET_CHUNK) {
strcpy(base, "chunk_size");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
sra->array.chunk_size = strtoul(buf, NULL, 0);
}
if (options & GET_CACHE) {
strcpy(base, "stripe_cache_size");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
/* Probably level doesn't support it */
sra->cache_size = 0;
else
@@ -183,7 +187,7 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
}
if (options & GET_MISMATCH) {
strcpy(base, "mismatch_cnt");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
sra->mismatch_cnt = strtoul(buf, NULL, 0);
}
@@ -195,7 +199,7 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
size_t len;
strcpy(base, "safe_mode_delay");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
/* remove a period, and count digits after it */
@@ -218,7 +222,7 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
}
if (options & GET_BITMAP_LOCATION) {
strcpy(base, "bitmap/location");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
if (strncmp(buf, "file", 4) == 0)
sra->bitmap_offset = 1;
@@ -232,7 +236,8 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
if (options & GET_ARRAY_STATE) {
strcpy(base, "array_state");
- if (load_sys(fname, sra->sysfs_array_state))
+ if (load_sys(fname, sra->sysfs_array_state,
+ sizeof(sra->sysfs_array_state)))
goto abort;
} else
sra->sysfs_array_state[0] = 0;
@@ -258,11 +263,11 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
dbase = base + strlen(base);
*dbase++ = '/';
- dev = xmalloc(sizeof(*dev));
+ dev = xcalloc(1, sizeof(*dev));
/* Always get slot, major, minor */
strcpy(dbase, "slot");
- if (load_sys(fname, buf)) {
+ if (load_sys(fname, buf, sizeof(buf))) {
/* hmm... unable to read 'slot' maybe the device
* is going away?
*/
@@ -287,7 +292,7 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
if (*ep) dev->disk.raid_disk = -1;
strcpy(dbase, "block/dev");
- if (load_sys(fname, buf)) {
+ if (load_sys(fname, buf, sizeof(buf))) {
/* assume this is a stale reference to a hot
* removed device
*/
@@ -299,7 +304,7 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
/* special case check for block devices that can go 'offline' */
strcpy(dbase, "block/device/state");
- if (load_sys(fname, buf) == 0 &&
+ if (load_sys(fname, buf, sizeof(buf)) == 0 &&
strncmp(buf, "offline", 7) == 0) {
free(dev);
continue;
@@ -312,25 +317,25 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
if (options & GET_OFFSET) {
strcpy(dbase, "offset");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
dev->data_offset = strtoull(buf, NULL, 0);
strcpy(dbase, "new_offset");
- if (load_sys(fname, buf) == 0)
+ if (load_sys(fname, buf, sizeof(buf)) == 0)
dev->new_data_offset = strtoull(buf, NULL, 0);
else
dev->new_data_offset = dev->data_offset;
}
if (options & GET_SIZE) {
strcpy(dbase, "size");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
dev->component_size = strtoull(buf, NULL, 0) * 2;
}
if (options & GET_STATE) {
dev->disk.state = 0;
strcpy(dbase, "state");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
if (strstr(buf, "in_sync"))
dev->disk.state |= (1<<MD_DISK_SYNC);
@@ -341,7 +346,7 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
}
if (options & GET_ERROR) {
strcpy(buf, "errors");
- if (load_sys(fname, buf))
+ if (load_sys(fname, buf, sizeof(buf)))
goto abort;
dev->errors = strtoul(buf, NULL, 0);
}
@@ -391,15 +396,12 @@ unsigned long long get_component_size(int fd)
* This returns in units of sectors.
*/
struct stat stb;
- char fname[50];
+ char fname[MAX_SYSFS_PATH_LEN];
int n;
- if (fstat(fd, &stb)) return 0;
- if (major(stb.st_rdev) != (unsigned)get_mdp_major())
- sprintf(fname, "/sys/block/md%d/md/component_size",
- (int)minor(stb.st_rdev));
- else
- sprintf(fname, "/sys/block/md_d%d/md/component_size",
- (int)minor(stb.st_rdev)>>MdpMinorShift);
+ if (fstat(fd, &stb))
+ return 0;
+ snprintf(fname, MAX_SYSFS_PATH_LEN,
+ "/sys/block/%s/md/component_size", stat2devnm(&stb));
fd = open(fname, O_RDONLY);
if (fd < 0)
return 0;
@@ -414,11 +416,11 @@ unsigned long long get_component_size(int fd)
int sysfs_set_str(struct mdinfo *sra, struct mdinfo *dev,
char *name, char *val)
{
- char fname[50];
+ char fname[MAX_SYSFS_PATH_LEN];
unsigned int n;
int fd;
- sprintf(fname, "/sys/block/%s/md/%s/%s",
+ snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/%s/%s",
sra->sys_name, dev?dev->sys_name:"", name);
fd = open(fname, O_WRONLY);
if (fd < 0)
@@ -451,11 +453,11 @@ int sysfs_set_num_signed(struct mdinfo *sra, struct mdinfo *dev,
int sysfs_uevent(struct mdinfo *sra, char *event)
{
- char fname[50];
+ char fname[MAX_SYSFS_PATH_LEN];
int n;
int fd;
- sprintf(fname, "/sys/block/%s/uevent",
+ snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/uevent",
sra->sys_name);
fd = open(fname, O_WRONLY);
if (fd < 0)
@@ -472,10 +474,10 @@ int sysfs_uevent(struct mdinfo *sra, char *event)
int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev, char *name)
{
- char fname[50];
+ char fname[MAX_SYSFS_PATH_LEN];
struct stat st;
- sprintf(fname, "/sys/block/%s/md/%s/%s",
+ snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/%s/%s",
sra->sys_name, dev?dev->sys_name:"", name);
return stat(fname, &st) == 0;
@@ -484,10 +486,10 @@ int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev, char *name
int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev,
char *name)
{
- char fname[50];
+ char fname[MAX_SYSFS_PATH_LEN];
int fd;
- sprintf(fname, "/sys/block/%s/md/%s/%s",
+ snprintf(fname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/%s/%s",
sra->sys_name, dev?dev->sys_name:"", name);
fd = open(fname, O_RDWR);
if (fd < 0)
@@ -685,6 +687,7 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
char nm[PATH_MAX];
char *dname;
int rv;
+ int i;
sprintf(dv, "%d:%d", sd->disk.major, sd->disk.minor);
rv = sysfs_set_str(sra, NULL, "new_dev", dv);
@@ -716,6 +719,28 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
if (resume)
sysfs_set_num(sra, sd, "recovery_start", sd->recovery_start);
}
+ if (sd->bb.supported) {
+ if (sysfs_set_str(sra, sd, "state", "external_bbl")) {
+ /*
+ * backward compatibility - if kernel doesn't support
+ * bad blocks for external metadata, let it continue
+ * as long as there are none known so far
+ */
+ if (sd->bb.count) {
+ pr_err("The kernel has no support for bad blocks in external metadata\n");
+ return -1;
+ }
+ }
+
+ for (i = 0; i < sd->bb.count; i++) {
+ char s[30];
+ const struct md_bb_entry *entry = &sd->bb.entries[i];
+
+ snprintf(s, sizeof(s) - 1, "%llu %d\n", entry->sector,
+ entry->length);
+ rv |= sysfs_set_str(sra, sd, "bad_blocks", s);
+ }
+ }
return rv;
}
diff --git a/tests/21raid5cache b/tests/21raid5cache
new file mode 100644
index 00000000..0dd97bf8
--- /dev/null
+++ b/tests/21raid5cache
@@ -0,0 +1,87 @@
+# check data integrity with raid5 write back cache
+
+# create a 4kB random file and 4 files each with a 1kB chunk of the random file:
+# randfile: ABCD randchunk[0-3]: A B C D
+#
+# then create another random 1kB chunk E, and a new random page with A, B, E, D:
+# randchunk4: E newrandfile: ABED
+create_random_data() {
+ dd if=/dev/urandom of=/tmp/randfile bs=4k count=1
+ for x in {0..3}
+ do
+ dd if=/tmp/randfile of=/tmp/randchunk$x bs=1k count=1 skip=$x count=1
+ done
+
+ dd if=/dev/urandom of=/tmp/randchunk4 bs=1k count=1
+
+ rm /tmp/newrandfile
+ for x in 0 1 4 3
+ do
+ cat /tmp/randchunk$x >> /tmp/newrandfile
+ done
+}
+
+# create array, $1 could be 5 for raid5 and 6 for raid6
+create_array() {
+ if [ $1 -lt 5 -o $1 -gt 6 ]
+ then
+ echo wrong array type $1
+ exit 2
+ fi
+
+ mdadm -CR $md0 -c4 -l5 -n10 $dev0 $dev1 $dev2 $dev3 $dev4 $dev5 $dev6 $dev11 $dev8 $dev9 --write-journal $dev10
+ check wait
+ echo write-back > /sys/block/md0/md/journal_mode
+}
+
+restart_array_write_back() {
+ mdadm -S $md0
+ mdadm -A $md0 $dev0 $dev1 $dev2 $dev3 $dev4 $dev5 $dev6 $dev11 $dev8 $dev9 $dev10
+ echo write-back > /sys/block/md0/md/journal_mode
+}
+
+# compare the first page of md0 with file in $1
+cmp_first_page() {
+ cmp -n 4096 $1 $md0 || { echo cmp failed ; exit 2 ; }
+}
+
+# write 3 pages after the first page of md0
+write_three_pages() {
+ for x in {1..3}
+ do
+ dd if=/dev/urandom of=$md0 bs=4k count=1 seek=$x count=1
+ done
+}
+
+# run_test <array_type:5/6> <degraded_or_not:yes/no>
+run_test() {
+ create_random_data
+ create_array $1
+
+ if [ $2 == yes ]
+ then
+ mdadm --fail $md0 $dev0
+ fi
+
+ dd if=/tmp/randfile of=$md0 bs=4k count=1
+ restart_array_write_back
+ cmp_first_page /tmp/randfile
+ restart_array_write_back
+ write_three_pages
+ cmp_first_page /tmp/randfile
+
+
+ dd if=/tmp/randchunk4 of=/dev/md0 bs=1k count=1 seek=2
+ restart_array_write_back
+ cmp_first_page /tmp/newrandfile
+ restart_array_write_back
+ write_three_pages
+ cmp_first_page /tmp/newrandfile
+
+ mdadm -S $md0
+}
+
+run_test 5 no
+run_test 5 yes
+run_test 6 no
+run_test 6 yes
diff --git a/util.c b/util.c
index 970d4847..f1009723 100644
--- a/util.c
+++ b/util.c
@@ -24,7 +24,6 @@
#include "mdadm.h"
#include "md_p.h"
-#include <sys/poll.h>
#include <sys/socket.h>
#include <sys/utsname.h>
#include <sys/wait.h>
@@ -32,6 +31,7 @@
#include <sys/resource.h>
#include <sys/vfs.h>
#include <linux/magic.h>
+#include <poll.h>
#include <ctype.h>
#include <dirent.h>
#include <signal.h>
@@ -147,13 +147,7 @@ int cluster_get_dlmlock(int *lockid)
return -ENOMEM;
}
- /* Conversions need the lockid in the LKSB */
- if (flags & LKF_CONVERT)
- dlm_lock_res->lksb.sb_lkid = *lockid;
-
snprintf(str, 64, "bitmap%s", cluster_name);
- /* if flags with LKF_CONVERT causes below return ENOENT which means
- * "No such file or directory" */
ret = dlm_hooks->ls_lock(dlm_lock_res->ls, LKM_PWMODE, &dlm_lock_res->lksb,
flags, str, strlen(str), 0, dlm_ast,
dlm_lock_res, NULL, NULL);
@@ -177,8 +171,6 @@ int cluster_release_dlmlock(int lockid)
if (!cluster_name)
return -1;
- /* if flags with LKF_CONVERT causes below return EINVAL which means
- * "Invalid argument" */
ret = dlm_hooks->ls_unlock(dlm_lock_res->ls, lockid, 0,
&dlm_lock_res->lksb, dlm_lock_res);
if (ret) {
@@ -718,17 +710,22 @@ int check_raid(int fd, char *name)
if (!st)
return 0;
- st->ss->load_super(st, fd, name);
- /* Looks like a raid array .. */
- pr_err("%s appears to be part of a raid array:\n",
- name);
- st->ss->getinfo_super(st, &info, NULL);
- st->ss->free_super(st);
- crtime = info.array.ctime;
- level = map_num(pers, info.array.level);
- if (!level) level = "-unknown-";
- cont_err("level=%s devices=%d ctime=%s",
- level, info.array.raid_disks, ctime(&crtime));
+ if (st->ss->add_to_super != NULL) {
+ st->ss->load_super(st, fd, name);
+ /* Looks like a raid array .. */
+ pr_err("%s appears to be part of a raid array:\n", name);
+ st->ss->getinfo_super(st, &info, NULL);
+ st->ss->free_super(st);
+ crtime = info.array.ctime;
+ level = map_num(pers, info.array.level);
+ if (!level)
+ level = "-unknown-";
+ cont_err("level=%s devices=%d ctime=%s",
+ level, info.array.raid_disks, ctime(&crtime));
+ } else {
+ /* Looks like GPT or MBR */
+ pr_err("partition table exists on %s\n", name);
+ }
return 1;
}
@@ -936,7 +933,7 @@ int get_data_disks(int level, int layout, int raid_disks)
return data_disks;
}
-int devnm2devid(char *devnm)
+dev_t devnm2devid(char *devnm)
{
/* First look in /sys/block/$DEVNM/dev for %d:%d
* If that fails, try parsing out a number
@@ -1047,7 +1044,8 @@ int dev_open(char *dev, int flags)
int major;
int minor;
- if (!dev) return -1;
+ if (!dev)
+ return -1;
flags |= O_DIRECT;
if (get_maj_min(dev, &major, &minor)) {
@@ -1073,7 +1071,7 @@ int dev_open(char *dev, int flags)
int open_dev_flags(char *devnm, int flags)
{
- int devid;
+ dev_t devid;
char buf[20];
devid = devnm2devid(devnm);
@@ -1091,7 +1089,7 @@ int open_dev_excl(char *devnm)
char buf[20];
int i;
int flags = O_RDWR;
- int devid = devnm2devid(devnm);
+ dev_t devid = devnm2devid(devnm);
long delay = 1000;
sprintf(buf, "%d:%d", major(devid), minor(devid));
@@ -1199,8 +1197,7 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
subarray = xstrdup(subarray);
}
strcpy(container, dev);
- if (sra)
- sysfs_free(sra);
+ sysfs_free(sra);
sra = sysfs_read(-1, container, GET_VERSION);
if (sra && sra->text_version[0])
verstr = sra->text_version;
@@ -1211,8 +1208,7 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
for (i = 0; st == NULL && superlist[i] ; i++)
st = superlist[i]->match_metadata_desc(verstr);
- if (sra)
- sysfs_free(sra);
+ sysfs_free(sra);
if (st) {
st->sb = NULL;
if (subarrayp)
@@ -1267,7 +1263,7 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type)
*/
struct superswitch *ss;
struct supertype *st;
- time_t besttime = 0;
+ unsigned int besttime = 0;
int bestsuper = -1;
int i;
@@ -1328,7 +1324,7 @@ int get_dev_size(int fd, char *dname, unsigned long long *sizep)
ldsize <<= 9;
} else {
if (dname)
- pr_err("Cannot get size of %s: %s\b",
+ pr_err("Cannot get size of %s: %s\n",
dname, strerror(errno));
return 0;
}
@@ -1337,6 +1333,22 @@ int get_dev_size(int fd, char *dname, unsigned long long *sizep)
return 1;
}
+/* Return sector size of device in bytes */
+int get_dev_sector_size(int fd, char *dname, unsigned int *sectsizep)
+{
+ unsigned int sectsize;
+
+ if (ioctl(fd, BLKSSZGET, &sectsize) != 0) {
+ if (dname)
+ pr_err("Cannot get sector size of %s: %s\n",
+ dname, strerror(errno));
+ return 0;
+ }
+
+ *sectsizep = sectsize;
+ return 1;
+}
+
/* Return true if this can only be a container, not a member device.
* i.e. is and md device and size is zero
*/
@@ -1366,12 +1378,15 @@ static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart)
unsigned long long curr_part_end;
unsigned all_partitions, entry_size;
unsigned part_nr;
+ unsigned int sector_size = 0;
*endofpart = 0;
BUILD_BUG_ON(sizeof(gpt) != 512);
/* skip protective MBR */
- lseek(fd, 512, SEEK_SET);
+ if (!get_dev_sector_size(fd, NULL, &sector_size))
+ return 0;
+ lseek(fd, sector_size, SEEK_SET);
/* read GPT header */
if (read(fd, &gpt, 512) != 512)
return 0;
@@ -1391,6 +1406,8 @@ static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart)
part = (struct GPT_part_entry *)buf;
+ /* set offset to third block (GPT entries) */
+ lseek(fd, sector_size*2, SEEK_SET);
for (part_nr = 0; part_nr < all_partitions; part_nr++) {
/* read partition entry */
if (read(fd, buf, entry_size) != (ssize_t)entry_size)
@@ -1416,9 +1433,9 @@ static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart)
static int get_last_partition_end(int fd, unsigned long long *endofpart)
{
struct MBR boot_sect;
- struct MBR_part_record *part;
unsigned long long curr_part_end;
unsigned part_nr;
+ unsigned int sector_size;
int retval = 0;
*endofpart = 0;
@@ -1433,26 +1450,34 @@ static int get_last_partition_end(int fd, unsigned long long *endofpart)
if (boot_sect.magic == MBR_SIGNATURE_MAGIC) {
retval = 1;
/* found the correct signature */
- part = boot_sect.parts;
for (part_nr = 0; part_nr < MBR_PARTITIONS; part_nr++) {
+ /*
+ * Have to make every access through boot_sect rather
+ * than using a pointer to the partition table (or an
+ * entry), since the entries are not properly aligned.
+ */
+
/* check for GPT type */
- if (part->part_type == MBR_GPT_PARTITION_TYPE) {
+ if (boot_sect.parts[part_nr].part_type ==
+ MBR_GPT_PARTITION_TYPE) {
retval = get_gpt_last_partition_end(fd, endofpart);
break;
}
/* check the last used lba for the current partition */
- curr_part_end = __le32_to_cpu(part->first_sect_lba) +
- __le32_to_cpu(part->blocks_num);
+ curr_part_end =
+ __le32_to_cpu(boot_sect.parts[part_nr].first_sect_lba) +
+ __le32_to_cpu(boot_sect.parts[part_nr].blocks_num);
if (curr_part_end > *endofpart)
*endofpart = curr_part_end;
-
- part++;
}
} else {
/* Unknown partition table */
retval = -1;
}
+ /* calculate number of 512-byte blocks */
+ if (get_dev_sector_size(fd, NULL, &sector_size))
+ *endofpart *= (sector_size / 512);
abort:
return retval;
}
@@ -1464,9 +1489,8 @@ int check_partitions(int fd, char *dname, unsigned long long freesize,
* Check where the last partition ends
*/
unsigned long long endofpart;
- int ret;
- if ((ret = get_last_partition_end(fd, &endofpart)) > 0) {
+ if (get_last_partition_end(fd, &endofpart) > 0) {
/* There appears to be a partition table here */
if (freesize == 0) {
/* partitions will not be visible in new device */
@@ -1945,6 +1969,27 @@ __u32 random32(void)
return rv;
}
+void random_uuid(__u8 *buf)
+{
+ int fd, i, len;
+ __u32 r[4];
+
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd < 0)
+ goto use_random;
+ len = read(fd, buf, 16);
+ close(fd);
+ if (len != 16)
+ goto use_random;
+
+ return;
+
+use_random:
+ for (i = 0; i < 4; i++)
+ r[i] = random();
+ memcpy(buf, r, 16);
+}
+
#ifndef MDASSEMBLE
int flush_metadata_updates(struct supertype *st)
{