summaryrefslogtreecommitdiffstats
path: root/src/mon
diff options
context:
space:
mode:
authorYuri Weinstein <yweinste@redhat.com>2025-01-22 01:53:51 +0100
committerGitHub <noreply@github.com>2025-01-22 01:53:51 +0100
commit2c8bfcf2094ab2af352757ac4fc2813c27c26020 (patch)
treeab53330f8796d5229ee340e788879d9acc584190 /src/mon
parentMerge pull request #60984 from ljflores/wip-fix-telemetry-tests (diff)
parentmon: Encapuslate all scrub related objects into a single atomic ScrubContext (diff)
downloadceph-2c8bfcf2094ab2af352757ac4fc2813c27c26020.tar.xz
ceph-2c8bfcf2094ab2af352757ac4fc2813c27c26020.zip
Merge pull request #59050 from mohit84/issue_67270
mon: Create a local copy of scrub_state to avoid a crash Reviewed-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
Diffstat (limited to 'src/mon')
-rw-r--r--src/mon/Monitor.cc68
-rw-r--r--src/mon/Monitor.h20
2 files changed, 56 insertions, 32 deletions
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index 833bdddc71b..d64ef16e4bd 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -184,7 +184,6 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s,
leader(0),
quorum_con_features(0),
// scrub
- scrub_version(0),
scrub_event(NULL),
scrub_timeout_event(NULL),
@@ -5576,14 +5575,16 @@ int Monitor::scrub_start()
dout(10) << __func__ << dendl;
ceph_assert(is_leader());
- if (!scrub_result.empty()) {
- clog->info() << "scrub already in progress";
- return -EBUSY;
+ if (boost::shared_ptr<ScrubContext> local_ctx = scrub_ctx.load(); local_ctx) {
+ if (!local_ctx->scrub_result.empty()) {
+ clog->info() << "scrub already in progress";
+ return -EBUSY;
+ }
+ local_ctx->scrub_result.clear();
}
scrub_event_cancel();
- scrub_result.clear();
- scrub_state.reset(new ScrubState);
+ scrub_ctx.store(boost::make_shared<ScrubContext>());
scrub();
return 0;
@@ -5592,12 +5593,15 @@ int Monitor::scrub_start()
int Monitor::scrub()
{
ceph_assert(is_leader());
- ceph_assert(scrub_state);
scrub_cancel_timeout();
wait_for_paxos_write();
- scrub_version = paxos->get_version();
+ boost::shared_ptr<ScrubContext> local_ctx = scrub_ctx.load();
+ if (!local_ctx)
+ return -1; // scrub aborted
+
+ local_ctx->scrub_version = paxos->get_version();
// scrub all keys if we're the only monitor in the quorum
int32_t num_keys =
@@ -5608,18 +5612,18 @@ int Monitor::scrub()
++p) {
if (*p == rank)
continue;
- MMonScrub *r = new MMonScrub(MMonScrub::OP_SCRUB, scrub_version,
+ MMonScrub *r = new MMonScrub(MMonScrub::OP_SCRUB, local_ctx->scrub_version,
num_keys);
- r->key = scrub_state->last_key;
+ r->key = local_ctx->scrub_state.last_key;
send_mon_message(r, *p);
}
// scrub my keys
- bool r = _scrub(&scrub_result[rank],
- &scrub_state->last_key,
+ bool r = _scrub(&local_ctx->scrub_result[rank],
+ &local_ctx->scrub_state.last_key,
&num_keys);
- scrub_state->finished = !r;
+ local_ctx->scrub_state.finished = !r;
// only after we got our scrub results do we really care whether the
// other monitors are late on their results. Also, this way we avoid
@@ -5628,7 +5632,7 @@ int Monitor::scrub()
scrub_reset_timeout();
if (quorum.size() == 1) {
- ceph_assert(scrub_state->finished == true);
+ ceph_assert(local_ctx->scrub_state.finished == true);
scrub_finish();
}
return 0;
@@ -5663,28 +5667,33 @@ void Monitor::handle_scrub(MonOpRequestRef op)
{
if (!is_leader())
break;
- if (m->version != scrub_version)
+
+ boost::shared_ptr<ScrubContext> local_ctx = scrub_ctx.load();
+ if (!local_ctx)
+ break; // scrub aborted
+
+ if (m->version != local_ctx->scrub_version)
break;
// reset the timeout each time we get a result
scrub_reset_timeout();
int from = m->get_source().num();
- ceph_assert(scrub_result.count(from) == 0);
- scrub_result[from] = m->result;
+ ceph_assert(local_ctx->scrub_result.count(from) == 0);
+ local_ctx->scrub_result[from] = m->result;
- if (scrub_result.size() == quorum.size()) {
+ if (local_ctx->scrub_result.size() == quorum.size()) {
scrub_check_results();
- scrub_result.clear();
- if (scrub_state->finished) {
- const utime_t lat = ceph_clock_now() - scrub_state->start;
+ local_ctx->scrub_result.clear();
+ if (local_ctx->scrub_state.finished) {
+ const utime_t lat = ceph_clock_now() - local_ctx->scrub_state.start;
dout(10) << __func__ << " mon scrub latency: " << lat << dendl;
scrub_finish();
} else {
scrub();
}
}
+ break;
}
- break;
}
}
@@ -5767,9 +5776,11 @@ void Monitor::scrub_check_results()
// compare
int errors = 0;
- ScrubResult& mine = scrub_result[rank];
- for (map<int,ScrubResult>::iterator p = scrub_result.begin();
- p != scrub_result.end();
+
+ boost::shared_ptr<ScrubContext> local_ctx = scrub_ctx.load();
+ ScrubResult& mine = local_ctx->scrub_result[rank];
+ for (map<int,ScrubResult>::iterator p = local_ctx->scrub_result.begin();
+ p != local_ctx->scrub_result.end();
++p) {
if (p->first == rank)
continue;
@@ -5802,9 +5813,7 @@ void Monitor::scrub_reset()
{
dout(10) << __func__ << dendl;
scrub_cancel_timeout();
- scrub_version = 0;
- scrub_result.clear();
- scrub_state.reset();
+ scrub_ctx.store(nullptr);
}
inline void Monitor::scrub_update_interval(ceph::timespan interval)
@@ -5818,7 +5827,8 @@ inline void Monitor::scrub_update_interval(ceph::timespan interval)
// if scrub already in progress, all changes will already be visible during
// the next round. Nothing to do.
- if (scrub_state != NULL)
+ boost::shared_ptr<ScrubContext> local_ctx = scrub_ctx.load();
+ if (local_ctx)
return;
scrub_event_cancel();
diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h
index 557edbf2eb4..9f094448b1a 100644
--- a/src/mon/Monitor.h
+++ b/src/mon/Monitor.h
@@ -53,6 +53,8 @@
#include "messages/MMonCommand.h"
#include "mon/MonitorDBStore.h"
#include "mgr/MgrClient.h"
+#include <boost/smart_ptr/atomic_shared_ptr.hpp>
+#include <boost/smart_ptr/shared_ptr.hpp>
#include "mon/MonOpRequest.h"
#include "common/WorkQueue.h"
@@ -312,8 +314,6 @@ private:
* @defgroup Monitor_h_scrub
* @{
*/
- version_t scrub_version; ///< paxos version we are scrubbing
- std::map<int,ScrubResult> scrub_result; ///< results so far
/**
* trigger a cross-mon scrub
@@ -348,7 +348,21 @@ private:
start(ceph_clock_now()) { }
virtual ~ScrubState() { }
};
- std::shared_ptr<ScrubState> scrub_state; ///< keeps track of current scrub
+
+ struct ScrubContext {
+ ScrubState scrub_state; ///< keeps track of current scrub
+ version_t scrub_version; ///< paxos version we are scrubbing
+ std::map<int,ScrubResult> scrub_result; ///< result so far
+ ScrubContext() {
+ scrub_version = 0;
+ scrub_result.clear();
+ }
+ ~ScrubContext() {
+ scrub_version = 0;
+ scrub_result.clear();
+ }
+ };
+ boost::atomic_shared_ptr<ScrubContext> scrub_ctx; ///< keeps track of scrub_context
/**
* @defgroup Monitor_h_sync Synchronization