diff options
author | Sage Weil <sage@newdream.net> | 2011-05-24 18:42:39 +0200 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2011-05-24 18:42:39 +0200 |
commit | 87309e946bed4175433048276b13e94e844ef958 (patch) | |
tree | c2105597a8cc6deb850973504511a212366eb9c1 | |
parent | mds: initialize stray_index on startup (diff) | |
download | ceph-87309e946bed4175433048276b13e94e844ef958.tar.xz ceph-87309e946bed4175433048276b13e94e844ef958.zip |
osd: make automatically marking of unfound as lost optional
We may not want to do this automatically until we have more confidense in
the recovery code. Even then, possible not. In particular, the OSDs may
believe they have contact all possible homes for the data even though there
is some long-lost OSD that has the data on disk that if offline.
For now, we make the marking process explicit so that the administrator can
make the call.
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | src/common/config.cc | 1 | ||||
-rw-r--r-- | src/common/config.h | 1 | ||||
-rw-r--r-- | src/osd/PG.cc | 11 |
3 files changed, 10 insertions, 3 deletions
diff --git a/src/common/config.cc b/src/common/config.cc index 0794dcaa73f..3e743157807 100644 --- a/src/common/config.cc +++ b/src/common/config.cc @@ -361,6 +361,7 @@ struct config_option config_optionsp[] = { OPTION(osd_min_down_reports, OPT_INT, 3), // number of times a down OSD must be reported for it to count OPTION(osd_replay_window, OPT_INT, 45), OPTION(osd_preserve_trimmed_log, OPT_BOOL, true), + OPTION(osd_auto_mark_unfound_lost, OPT_BOOL, false), OPTION(osd_recovery_delay_start, OPT_FLOAT, 15), OPTION(osd_recovery_max_active, OPT_INT, 5), OPTION(osd_recovery_max_chunk, OPT_U64, 1<<20), // max size of push chunk diff --git a/src/common/config.h b/src/common/config.h index 76a0674d33c..5e325af7222 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -433,6 +433,7 @@ public: int osd_min_down_reports; int osd_replay_window; bool osd_preserve_trimmed_log; + bool osd_auto_mark_unfound_lost; float osd_recovery_delay_start; int osd_recovery_max_active; diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 27e85f82227..8e58d884dd7 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -4115,10 +4115,15 @@ PG::RecoveryState::Active::react(const ActMap&) { if (g_conf.osd_check_for_log_corruption) pg->check_log_for_corruption(pg->osd->store); - if (pg->missing.num_missing() > pg->missing_loc.size()) { - if (pg->all_unfound_are_lost(pg->osd->osdmap)) { + int unfound = pg->missing.num_missing() - pg->missing_loc.size(); + if (unfound > 0 && + pg->all_unfound_are_lost(pg->osd->osdmap)) { + if (g_conf.osd_auto_mark_unfound_lost) { + pg->osd->clog.error() << pg->info.pgid << " has " << unfound + << " objects unfound and apparently lost, automatically marking lost\n"; pg->mark_all_unfound_as_lost(*context< RecoveryMachine >().get_cur_transaction()); - } + } else + pg->osd->clog.error() << pg->info.pgid << " has " << unfound << " objects unfound and apparently lost\n"; } if (!pg->snap_trimq.empty() && |