From 9296754385aa1fecb45097ba06fc82cbc0e5f14a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 29 Jul 2008 19:25:15 -0700 Subject: mdmon: handle failures versus readauto arrays Transition readauto arrays to active before failing drives. Hmm... why do we keep reblocking / renotifying in the readonly case? Need to bottom out on this, but not right now. Signed-off-by: Dan Williams --- mdadm.h | 1 + monitor.c | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/mdadm.h b/mdadm.h index 12eef2a2..80a6f92f 100644 --- a/mdadm.h +++ b/mdadm.h @@ -171,6 +171,7 @@ struct mdinfo { #define DS_SPARE 8 #define DS_BLOCKED 16 #define DS_REMOVE 1024 + #define DS_UNBLOCK 2048 int prev_state, curr_state, next_state; }; diff --git a/monitor.c b/monitor.c index 382cad44..ffb4c9c4 100644 --- a/monitor.c +++ b/monitor.c @@ -284,12 +284,25 @@ static int read_and_act(struct active_array *a) } } + /* Check for failures and if found: + * 1/ Record the failure in the metadata and unblock the device. + * FIXME update the kernel to stop notifying on failed drives when + * the array is readonly and we have cleared 'blocked' + * 2/ Try to remove the device if the array is writable, or can be + * made writable. + */ for (mdi = a->info.devs ; mdi ; mdi = mdi->next) { if (mdi->curr_state & DS_FAULTY) { a->container->ss->set_disk(a, mdi->disk.raid_disk, mdi->curr_state); check_degraded = 1; - mdi->next_state = DS_REMOVE; + mdi->next_state |= DS_UNBLOCK; + if (a->curr_state == read_auto) { + a->container->ss->set_array_state(a, 0); + a->next_state = active; + } + if (a->curr_state > readonly) + mdi->next_state |= DS_REMOVE; } } @@ -306,15 +319,18 @@ static int read_and_act(struct active_array *a) dprintf(" action:%s", array_states[a->next_state]); } for (mdi = a->info.devs; mdi ; mdi = mdi->next) { - if (mdi->next_state == DS_REMOVE && mdi->state_fd >= 0) { + if (mdi->next_state & DS_UNBLOCK) { + dprintf(" %d:-blocked", mdi->disk.raid_disk); + write_attr("-blocked", mdi->state_fd); + } + + if ((mdi->next_state & DS_REMOVE) && mdi->state_fd >= 0) { int remove_result; - write_attr("-blocked", mdi->state_fd); /* the kernel may not be able to immediately remove the * disk, we can simply wait until the next event to try * again. */ - dprintf(" %d:-blocked", mdi->disk.raid_disk); remove_result = write_attr("remove", mdi->state_fd); if (remove_result > 0) { dprintf(" %d:removed", mdi->disk.raid_disk); -- cgit v1.2.3