summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2011-05-24 18:42:39 +0200
committerSage Weil <sage@newdream.net>2011-05-24 18:42:39 +0200
commit87309e946bed4175433048276b13e94e844ef958 (patch)
treec2105597a8cc6deb850973504511a212366eb9c1
parentmds: initialize stray_index on startup (diff)
downloadceph-87309e946bed4175433048276b13e94e844ef958.tar.xz
ceph-87309e946bed4175433048276b13e94e844ef958.zip
osd: make automatically marking of unfound as lost optional
We may not want to do this automatically until we have more confidense in the recovery code. Even then, possible not. In particular, the OSDs may believe they have contact all possible homes for the data even though there is some long-lost OSD that has the data on disk that if offline. For now, we make the marking process explicit so that the administrator can make the call. Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r--src/common/config.cc1
-rw-r--r--src/common/config.h1
-rw-r--r--src/osd/PG.cc11
3 files changed, 10 insertions, 3 deletions
diff --git a/src/common/config.cc b/src/common/config.cc
index 0794dcaa73f..3e743157807 100644
--- a/src/common/config.cc
+++ b/src/common/config.cc
@@ -361,6 +361,7 @@ struct config_option config_optionsp[] = {
OPTION(osd_min_down_reports, OPT_INT, 3), // number of times a down OSD must be reported for it to count
OPTION(osd_replay_window, OPT_INT, 45),
OPTION(osd_preserve_trimmed_log, OPT_BOOL, true),
+ OPTION(osd_auto_mark_unfound_lost, OPT_BOOL, false),
OPTION(osd_recovery_delay_start, OPT_FLOAT, 15),
OPTION(osd_recovery_max_active, OPT_INT, 5),
OPTION(osd_recovery_max_chunk, OPT_U64, 1<<20), // max size of push chunk
diff --git a/src/common/config.h b/src/common/config.h
index 76a0674d33c..5e325af7222 100644
--- a/src/common/config.h
+++ b/src/common/config.h
@@ -433,6 +433,7 @@ public:
int osd_min_down_reports;
int osd_replay_window;
bool osd_preserve_trimmed_log;
+ bool osd_auto_mark_unfound_lost;
float osd_recovery_delay_start;
int osd_recovery_max_active;
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 27e85f82227..8e58d884dd7 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -4115,10 +4115,15 @@ PG::RecoveryState::Active::react(const ActMap&) {
if (g_conf.osd_check_for_log_corruption)
pg->check_log_for_corruption(pg->osd->store);
- if (pg->missing.num_missing() > pg->missing_loc.size()) {
- if (pg->all_unfound_are_lost(pg->osd->osdmap)) {
+ int unfound = pg->missing.num_missing() - pg->missing_loc.size();
+ if (unfound > 0 &&
+ pg->all_unfound_are_lost(pg->osd->osdmap)) {
+ if (g_conf.osd_auto_mark_unfound_lost) {
+ pg->osd->clog.error() << pg->info.pgid << " has " << unfound
+ << " objects unfound and apparently lost, automatically marking lost\n";
pg->mark_all_unfound_as_lost(*context< RecoveryMachine >().get_cur_transaction());
- }
+ } else
+ pg->osd->clog.error() << pg->info.pgid << " has " << unfound << " objects unfound and apparently lost\n";
}
if (!pg->snap_trimq.empty() &&