summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMariusz Tkaczyk <mariusz.tkaczyk@intel.com>2018-10-17 12:11:41 +0200
committerJes Sorensen <jsorensen@fb.com>2018-12-06 13:46:40 +0100
commitd7a1fda2769ba272d89de6caeab35d52b73a9c3c (patch)
treebc2eb8f5ac72c63a0dae76be63f7ea3e64bff287
parentMonitor: add system timer to run --oneshot periodically (diff)
downloadmdadm-d7a1fda2769ba272d89de6caeab35d52b73a9c3c.tar.xz
mdadm-d7a1fda2769ba272d89de6caeab35d52b73a9c3c.zip
imsm: update metadata correctly while raid10 double degradation
Mdmon calls end_migration() when map state changes from normal to degraded. It is not valid because in raid 10 double degradation case mdmon breaks checkpointing but array is still rebuilding. In this case mdmon has to mark map as degraded and continues marking recovery checkpoint in metadata. Migration can be finished only if newly failed device is a rebuilding device. Add catching double degraded to degraded transition. Migration is finished but map state doesn't change, array is still degraded. Update failed_disk_num correctly. If double degradation happens rebuild will start on the lowest slot, but this variable points to the first failed slot. If second fail happens while rebuild this variable shouldn't be updated until rebuild is not finished. Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com> Signed-off-by: Jes Sorensen <jsorensen@fb.com>
-rw-r--r--super-intel.c25
1 files changed, 19 insertions, 6 deletions
diff --git a/super-intel.c b/super-intel.c
index 6438987b..d2035ccd 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8136,7 +8136,8 @@ static int mark_failure(struct intel_super *super,
set_imsm_ord_tbl_ent(map2, slot2,
idx | IMSM_ORD_REBUILD);
}
- if (map->failed_disk_num == 0xff)
+ if (map->failed_disk_num == 0xff ||
+ (!is_rebuilding(dev) && map->failed_disk_num > slot))
map->failed_disk_num = slot;
clear_disk_badblocks(super->bbm_log, ord_to_idx(ord));
@@ -8558,13 +8559,25 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
break;
}
if (is_rebuilding(dev)) {
- dprintf_cont("while rebuilding.");
+ dprintf_cont("while rebuilding ");
if (map->map_state != map_state) {
- dprintf_cont(" Map state change");
- end_migration(dev, super, map_state);
+ dprintf_cont("map state change ");
+ if (n == map->failed_disk_num) {
+ dprintf_cont("end migration");
+ end_migration(dev, super, map_state);
+ } else {
+ dprintf_cont("raid10 double degradation, map state change");
+ map->map_state = map_state;
+ }
super->updates_pending++;
- } else if (!rebuild_done) {
+ } else if (!rebuild_done)
break;
+ else if (n == map->failed_disk_num) {
+ /* r10 double degraded to degraded transition */
+ dprintf_cont("raid10 double degradation end migration");
+ end_migration(dev, super, map_state);
+ a->last_checkpoint = 0;
+ super->updates_pending++;
}
/* check if recovery is really finished */
@@ -8575,7 +8588,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
}
if (recovery_not_finished) {
dprintf_cont("\n");
- dprintf("Rebuild has not finished yet, state not changed");
+ dprintf_cont("Rebuild has not finished yet, map state changes only if raid10 double degradation happens");
if (a->last_checkpoint < mdi->recovery_start) {
a->last_checkpoint =
mdi->recovery_start;