mdmon: handle failures versus readauto arrays

Transition readauto arrays to active before failing drives. Hmm... why do we keep reblocking / renotifying in the readonly case? Need to bottom out on this, but not right now. Signed-off-by: Dan Williams <dan.j.williams@intel.com>
author: Dan Williams <dan.j.williams@intel.com> 2008-07-30 04:25:15 +0200
committer: Dan Williams <dan.j.williams@intel.com> 2008-08-15 19:58:43 +0200
commit: 9296754385aa1fecb45097ba06fc82cbc0e5f14a (patch)
tree: 17b51c8dbbf6d30d28ce407378c11ebff6b08b28
parent: mdmon: allow degraded arrays to be monitored (diff)
download: mdadm-9296754385aa1fecb45097ba06fc82cbc0e5f14a.tar.xz
mdadm-9296754385aa1fecb45097ba06fc82cbc0e5f14a.zip
2 files changed, 21 insertions, 4 deletions
diff --git a/mdadm.h b/mdadm.h
index 12eef2a2..80a6f92f 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -171,6 +171,7 @@ struct mdinfo {
 	#define	DS_SPARE	8
 	#define DS_BLOCKED	16
 	#define	DS_REMOVE	1024
+	#define	DS_UNBLOCK	2048
 	int prev_state, curr_state, next_state;
 
 };
diff --git a/monitor.c b/monitor.c
index 382cad44..ffb4c9c4 100644
--- a/monitor.c
+++ b/monitor.c
@@ -284,12 +284,25 @@ static int read_and_act(struct active_array *a)
 		}
 	}
 
+	/* Check for failures and if found:
+	 * 1/ Record the failure in the metadata and unblock the device.
+	 *    FIXME update the kernel to stop notifying on failed drives when
+	 *    the array is readonly and we have cleared 'blocked'
+	 * 2/ Try to remove the device if the array is writable, or can be
+	 *    made writable.
+	 */
 	for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
 		if (mdi->curr_state & DS_FAULTY) {
 			a->container->ss->set_disk(a, mdi->disk.raid_disk,
 						   mdi->curr_state);
 			check_degraded = 1;
-			mdi->next_state = DS_REMOVE;
+			mdi->next_state |= DS_UNBLOCK;
+			if (a->curr_state == read_auto) {
+				a->container->ss->set_array_state(a, 0);
+				a->next_state = active;
+			}
+			if (a->curr_state > readonly)
+				mdi->next_state |= DS_REMOVE;
 		}
 	}
 
@@ -306,15 +319,18 @@ static int read_and_act(struct active_array *a)
 		dprintf(" action:%s", array_states[a->next_state]);
 	}
 	for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
-		if (mdi->next_state == DS_REMOVE && mdi->state_fd >= 0) {
+		if (mdi->next_state & DS_UNBLOCK) {
+			dprintf(" %d:-blocked", mdi->disk.raid_disk);
+			write_attr("-blocked", mdi->state_fd);
+		}
+
+		if ((mdi->next_state & DS_REMOVE) && mdi->state_fd >= 0) {
 			int remove_result;
 
-			write_attr("-blocked", mdi->state_fd);
 			/* the kernel may not be able to immediately remove the
 			 * disk, we can simply wait until the next event to try
 			 * again.
 			 */
-			dprintf(" %d:-blocked", mdi->disk.raid_disk);
 			remove_result = write_attr("remove", mdi->state_fd);
 			if (remove_result > 0) {
 				dprintf(" %d:removed", mdi->disk.raid_disk);
author	Dan Williams <dan.j.williams@intel.com>	2008-07-30 04:25:15 +0200
committer	Dan Williams <dan.j.williams@intel.com>	2008-08-15 19:58:43 +0200
commit	9296754385aa1fecb45097ba06fc82cbc0e5f14a (patch)
tree	17b51c8dbbf6d30d28ce407378c11ebff6b08b28
parent	mdmon: allow degraded arrays to be monitored (diff)
download	mdadm-9296754385aa1fecb45097ba06fc82cbc0e5f14a.tar.xz mdadm-9296754385aa1fecb45097ba06fc82cbc0e5f14a.zip