summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2014-05-22 16:00:39 +1000
committerNeilBrown <neilb@suse.de>2014-05-22 16:00:39 +1000
commitdf881f757b3d61e5dfc1a2b23436ab1cf5247157 (patch)
tree6070bc82f53e9ae3a302d9e2e45171cf0542928d
parenta740cf6432245d607ee216b5380a3215084f101d (diff)
mdcheck: new script to help with regular checks of md arrays.
This script allows arrays to be 'checked' for a limited amount of time on a regular basis. For example, running mdcheck --duration 6hours early every Sunday morning and mdcheck --continue 6hours ever other morning will check all arrays every week, but if that take more than 6 hours, will won't run into the day, but will be continued the next morning, and the next ... etc. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--misc/mdcheck158
1 files changed, 158 insertions, 0 deletions
diff --git a/misc/mdcheck b/misc/mdcheck
new file mode 100644
index 00000000..60d8501f
--- /dev/null
+++ b/misc/mdcheck
@@ -0,0 +1,158 @@
+#!/bin/bash
+
+# Copyright (C) 2014 Neil Brown <neilb@suse.de>
+#
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Author: Neil Brown
+# Email: <neilb@suse.de>
+
+# This script should be run periodically to automatically
+# perform a 'check' on any md arrays.
+#
+# It supports a 'time budget' such that any incomplete 'check'
+# will be checkpointed when that time has expired.
+# A subsequent invocation can allow the 'check' to continue.
+#
+# Options are:
+# --continue Don't start new checks, only continue old ones.
+# --duration This is passed to "date --date=+$duration" to find out
+# when to finish
+#
+# To support '--continue', arrays are identified by UUID and the 'sync_completed'
+# value is stored in /var/lib/mdcheck/$UUID
+
+# convert a /dev/md name into /sys/.../md equivalent
+sysname() {
+ set `ls -lLd $1`
+ maj=${5%,}
+ min=$6
+ readlink -f /sys/dev/block/$maj:$min
+}
+
+args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@")
+rv=$?
+if [ $rv -ne 0 ]; then exit $rv; fi
+
+eval set -- $args
+
+cont=
+endtime=
+while [ " $1" != " --" ]
+do
+ case $1 in
+ --help )
+ echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]'
+ echo >&2 ' time-offset must be understood by "date --date"'
+ exit 0
+ ;;
+ --continue ) cont=yes ;;
+ --duration ) shift; dur=$1
+ endtime=$(date --date "+$dur" "+%s")
+ ;;
+ esac
+ shift
+done
+shift
+
+# We need a temp file occasionally...
+tmp=/var/lib/mdcheck/.md-check-$$
+trap 'rm -f "$tmp"' 0
+
+
+# firstly, clean out really old state files
+mkdir -p /var/lib/mdcheck
+find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \;
+
+# Now look at each md device.
+cnt=0
+for dev in /dev/md?*
+do
+ sys=`sysname $dev`
+ if [ ! -f "$sys/md/sync_action" ]
+ then # cannot check this array
+ continue
+ fi
+ if [ "`cat $sys/md/sync_action`" != 'idle' ]
+ then # This array is busy
+ continue
+ fi
+
+ mdadm --detail --export "$dev" > $tmp || continue
+ source $tmp
+ fl="/var/lib/mdcheck/MD_UUID_$MD_UUID"
+ if [ -z "$cont" ]
+ then
+ start=0
+ elif [ -z "$MD_UUID" -o ! -f "$fl" ]
+ then
+ # Nothing to continue here
+ continue
+ else
+ start=`cat "$fl"`
+ fi
+
+ cnt=$[cnt+1]
+ eval MD_${cnt}_fl=\$fl
+ eval MD_${cnt}_sys=\$sys
+ echo $start > $fl
+ echo $start > $sys/md/sync_min
+ echo check > $sys/md/sync_action
+done
+
+if [ -z "$endtime" ]
+then
+ exit 0
+fi
+
+while [ `date +%s` -lt $endtime ]
+do
+ any=
+ for i in `eval echo {1..$cnt}`
+ do
+ eval fl=\$MD_${i}_fl
+ eval sys=\$MD_${i}_sys
+
+ if [ -z "$fl" ]; then continue; fi
+
+ if [ "`cat $sys/md/sync_action`" != 'check' ]
+ then
+ eval MD_${i}_fl=
+ rm -f $fl
+ continue;
+ fi
+ read a rest < $sys/md/sync_completed
+ echo $a > $fl
+ any=yes
+ done
+ if [ -z "$any" ]; then exit 0; fi
+ sleep 120
+done
+
+# We've waited, and there are still checks running.
+# Time to stop them.
+for i in `eval echo {1..$cnt}`
+do
+ eval fl=\$MD_${i}_fl
+ eval sys=\$MD_${i}_sys
+
+ if [ -z "$fl" ]; then continue; fi
+
+ if [ "`cat $sys/md/sync_action`" != 'check' ]
+ then
+ eval MD_${i}_fl=
+ rm -f $fl
+ continue;
+ fi
+ echo idle > $sys/md/sync_action
+ cat $sys/md/sync_min > $fl
+done