summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--md.428
-rw-r--r--mdadm.8.in5
-rw-r--r--super1.c45
3 files changed, 73 insertions, 5 deletions
diff --git a/md.4 b/md.4
index 99faad1a..fa870655 100644
--- a/md.4
+++ b/md.4
@@ -551,6 +551,34 @@ intent log if one is present.
In 2.6.13, intent bitmaps are only supported with RAID1. Other levels
with redundancy are supported from 2.6.15.
+.SS BAD BLOCK LOG
+
+From Linux 3.5 each device in an
+.I md
+array can store a list of known-bad-blocks. This list is 4K in size
+and usually positioned at the end of the space between the superblock
+and the data.
+
+When a block cannot be read and cannot be repaired by writing data
+recovered from other devices, the address of the block is stored in
+the bad block log. Similarly if an attempt to write a block fails,
+the address will be recorded as a bad block. If attempting to record
+the bad block fails, the whole device will be marked faulty.
+
+Attempting to read from a known bad block will cause a read error.
+Attempting to write to a known bad block will be ignored if any write
+errors have been reported by the device. If there have been no write
+errors then the data will be written to the known bad block and if
+that succeeds, the address will be removed from the list.
+
+This allows an array to fail more gracefully - a few blocks on
+different devices can be faulty without taking the whole array out of
+action.
+
+The log is particularly useful when recovering to a spare. If a few blocks
+cannot be read from the other devices, the bulk of the recovery can
+complete and those few bad blocks will be recorded in the bad block log.
+
.SS WRITE-BEHIND
From Linux 2.6.14,
diff --git a/mdadm.8.in b/mdadm.8.in
index 38c8bc86..1fe5583f 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -1871,6 +1871,11 @@ setting.
.\".B \-\-size
.\"is given, the apparent size of the smallest drive given is used.
+If the metadata type supports it (currently only 1.x metadata), space
+will be allocated to store a bad block list. This allows a modest
+number of bad blocks to be recorded, allowing the drive to remain in
+service while only partially functional.
+
When creating an array within a
.B CONTAINER
.I mdadm
diff --git a/super1.c b/super1.c
index 0df5beea..c0e6cd79 100644
--- a/super1.c
+++ b/super1.c
@@ -70,7 +70,12 @@ struct mdp_superblock_1 {
__u8 device_uuid[16]; /* user-space setable, ignored by kernel */
__u8 devflags; /* per-device flags. Only one defined...*/
#define WriteMostly1 1 /* mask for writemostly flag in above */
- __u8 pad2[64-57]; /* set to 0 when writing */
+ /* bad block log. If there are any bad blocks the feature flag is set.
+ * if offset and size are non-zero, that space is reserved and available.
+ */
+ __u8 bblog_shift; /* shift from sectors to block size for badblocklist */
+ __u16 bblog_size; /* number of sectors reserved for badblocklist */
+ __u32 bblog_offset; /* sector offset from superblock to bblog, signed */
/* array state information - 64 bytes */
__u64 utime; /* 40 bits second, 24 btes microseconds */
@@ -106,8 +111,9 @@ struct misc_dev_info {
* must be honoured
*/
#define MD_FEATURE_RESHAPE_ACTIVE 4
+#define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */
-#define MD_FEATURE_ALL (1|2|4)
+#define MD_FEATURE_ALL (1|2|4|8)
#ifndef offsetof
#define offsetof(t,f) ((size_t)&(((t*)0)->f))
@@ -319,7 +325,7 @@ static void examine_super1(struct supertype *st, char *homehost)
printf("Internal Bitmap : %ld sectors from superblock\n",
(long)(int32_t)__le32_to_cpu(sb->bitmap_offset));
}
- if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE)) {
+ if (sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)) {
printf(" Reshape pos'n : %llu%s\n", (unsigned long long)__le64_to_cpu(sb->reshape_position)/2,
human_size(__le64_to_cpu(sb->reshape_position)<<9));
if (__le32_to_cpu(sb->delta_disks)) {
@@ -363,6 +369,17 @@ static void examine_super1(struct supertype *st, char *homehost)
atime = __le64_to_cpu(sb->utime) & 0xFFFFFFFFFFULL;
printf(" Update Time : %.24s\n", ctime(&atime));
+ if (sb->bblog_size && sb->bblog_offset) {
+ printf(" Bad Block Log : %d entries available at offset %ld sectors",
+ __le16_to_cpu(sb->bblog_size)*512/8,
+ (long)__le32_to_cpu(sb->bblog_offset));
+ if (sb->feature_map &
+ __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
+ printf(" - bad blocks present.");
+ printf("\n");
+ }
+
+
if (calc_sb_1_csum(sb) == sb->sb_csum)
printf(" Checksum : %x - correct\n", __le32_to_cpu(sb->sb_csum));
else
@@ -1180,10 +1197,12 @@ static int write_init_super1(struct supertype *st)
* 2: 4K from start of device.
* Depending on the array size, we might leave extra space
* for a bitmap.
+ * Also leave 4K for bad-block log.
*/
array_size = __le64_to_cpu(sb->size);
- /* work out how much space we left for a bitmap */
- bm_space = choose_bm_space(array_size);
+ /* work out how much space we left for a bitmap,
+ * Add 8 sectors for bad block log */
+ bm_space = choose_bm_space(array_size) + 8;
/* We try to leave 0.1% at the start for reshape
* operations, but limit this to 128Meg (0.1% of 10Gig)
@@ -1203,6 +1222,10 @@ static int write_init_super1(struct supertype *st)
if (sb_offset < array_size + bm_space)
bm_space = sb_offset - array_size;
sb->data_size = __cpu_to_le64(sb_offset - bm_space);
+ if (bm_space >= 8) {
+ sb->bblog_size = __cpu_to_le16(8);
+ sb->bblog_offset = __cpu_to_le32((unsigned)-8);
+ }
break;
case 1:
sb->super_offset = __cpu_to_le64(0);
@@ -1221,6 +1244,10 @@ static int write_init_super1(struct supertype *st)
sb->data_offset = __cpu_to_le64(reserved);
sb->data_size = __cpu_to_le64(dsize - reserved);
+ if (reserved >= 16) {
+ sb->bblog_size = __cpu_to_le16(8);
+ sb->bblog_offset = __cpu_to_le32(reserved-8);
+ }
break;
case 2:
sb_offset = 4*2;
@@ -1245,6 +1272,14 @@ static int write_init_super1(struct supertype *st)
sb->data_offset = __cpu_to_le64(reserved);
sb->data_size = __cpu_to_le64(dsize - reserved);
+ if (reserved >= 16+16) {
+ sb->bblog_size = __cpu_to_le16(8);
+ /* '8' sectors for the bblog, and another '8'
+ * because we want offset from superblock, not
+ * start of device.
+ */
+ sb->bblog_offset = __cpu_to_le32(reserved-8-8);
+ }
break;
default:
pr_err("Failed to write invalid "