summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXuehan Xu <xuxuehan@qianxin.com>2024-10-08 06:26:41 +0200
committerXuehan Xu <xuxuehan@qianxin.com>2024-12-16 07:06:32 +0100
commita34b0ce2aa6820706167e8ea3119160e6e68f157 (patch)
treea4d720ec0d7cffd17d05bbd24480a0c51cdb701d
parentcrimson/osd/backfill_state: drop Cancelled (diff)
downloadceph-a34b0ce2aa6820706167e8ea3119160e6e68f157.tar.xz
ceph-a34b0ce2aa6820706167e8ea3119160e6e68f157.zip
crimson/osd/backfill_state: treat Cancelled as a pause of the ongoing backfilling
Fixes: https://tracker.ceph.com/issues/67888 Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
-rw-r--r--src/crimson/osd/backfill_state.cc87
-rw-r--r--src/crimson/osd/backfill_state.h30
2 files changed, 110 insertions, 7 deletions
diff --git a/src/crimson/osd/backfill_state.cc b/src/crimson/osd/backfill_state.cc
index 62607c7fbeb..1392ee330ac 100644
--- a/src/crimson/osd/backfill_state.cc
+++ b/src/crimson/osd/backfill_state.cc
@@ -407,7 +407,34 @@ BackfillState::PrimaryScanning::react(PrimaryScanned evt)
LOG_PREFIX(BackfillState::PrimaryScanning::react::PrimaryScanned);
DEBUGDPP("", pg());
backfill_state().backfill_info = std::move(evt.result);
- return transit<Enqueuing>();
+ if (!backfill_state().is_suspended()) {
+ return transit<Enqueuing>();
+ } else {
+ DEBUGDPP("backfill suspended, not going Enqueuing", pg());
+ backfill_state().go_enqueuing_on_resume();
+ }
+ return discard_event();
+}
+
+boost::statechart::result
+BackfillState::PrimaryScanning::react(CancelBackfill evt)
+{
+ LOG_PREFIX(BackfillState::PrimaryScanning::react::SuspendBackfill);
+ DEBUGDPP("suspended within PrimaryScanning", pg());
+ backfill_state().on_suspended();
+ return discard_event();
+}
+
+boost::statechart::result
+BackfillState::PrimaryScanning::react(Triggered evt)
+{
+ LOG_PREFIX(BackfillState::PrimaryScanning::react::Triggered);
+ ceph_assert(backfill_state().is_suspended());
+ if (backfill_state().on_resumed()) {
+ DEBUGDPP("Backfill resumed, going Enqueuing", pg());
+ return transit<Enqueuing>();
+ }
+ return discard_event();
}
boost::statechart::result
@@ -470,12 +497,17 @@ BackfillState::ReplicasScanning::react(ReplicaScanned evt)
if (waiting_on_backfill.empty()) {
ceph_assert(backfill_state().peer_backfill_info.size() == \
peering_state().get_backfill_targets().size());
- return transit<Enqueuing>();
+ if (!backfill_state().is_suspended()) {
+ return transit<Enqueuing>();
+ } else {
+ DEBUGDPP("backfill suspended, not going Enqueuing", pg());
+ backfill_state().go_enqueuing_on_resume();
+ }
}
} else {
- // we canceled backfill for a while due to a too full, and this
+ // we suspended backfill for a while due to a too full, and this
// is an extra response from a non-too-full peer
- DEBUGDPP("canceled backfill (too full?)", pg());
+ DEBUGDPP("suspended backfill (too full?)", pg());
}
return discard_event();
}
@@ -483,8 +515,22 @@ BackfillState::ReplicasScanning::react(ReplicaScanned evt)
boost::statechart::result
BackfillState::ReplicasScanning::react(CancelBackfill evt)
{
- LOG_PREFIX(BackfillState::ReplicasScanning::react::CancelBackfill);
- DEBUGDPP("cancelled within ReplicasScanning", pg());
+ LOG_PREFIX(BackfillState::ReplicasScanning::react::SuspendBackfill);
+ DEBUGDPP("suspended within ReplicasScanning", pg());
+ backfill_state().on_suspended();
+ return discard_event();
+}
+
+boost::statechart::result
+BackfillState::ReplicasScanning::react(Triggered evt)
+{
+ LOG_PREFIX(BackfillState::ReplicasScanning::react::Triggered);
+ ceph_assert(backfill_state().is_suspended());
+ if (backfill_state().on_resumed()) {
+ DEBUGDPP("Backfill resumed, going Enqueuing", pg());
+ return transit<Enqueuing>();
+ }
+ return discard_event();
}
boost::statechart::result
@@ -510,7 +556,34 @@ BackfillState::Waiting::react(ObjectPushed evt)
LOG_PREFIX(BackfillState::Waiting::react::ObjectPushed);
DEBUGDPP("Waiting::react() on ObjectPushed; evt.object={}", pg(), evt.object);
backfill_state().progress_tracker->complete_to(evt.object, evt.stat, false);
- return transit<Enqueuing>();
+ if (!backfill_state().is_suspended()) {
+ return transit<Enqueuing>();
+ } else {
+ DEBUGDPP("backfill suspended, not going Enqueuing", pg());
+ backfill_state().go_enqueuing_on_resume();
+ }
+ return discard_event();
+}
+
+boost::statechart::result
+BackfillState::Waiting::react(CancelBackfill evt)
+{
+ LOG_PREFIX(BackfillState::Waiting::react::SuspendBackfill);
+ DEBUGDPP("suspended within Waiting", pg());
+ backfill_state().on_suspended();
+ return discard_event();
+}
+
+boost::statechart::result
+BackfillState::Waiting::react(Triggered evt)
+{
+ LOG_PREFIX(BackfillState::Waiting::react::Triggered);
+ ceph_assert(backfill_state().is_suspended());
+ if (backfill_state().on_resumed()) {
+ DEBUGDPP("Backfill resumed, going Enqueuing", pg());
+ return transit<Enqueuing>();
+ }
+ return discard_event();
}
// -- Done
diff --git a/src/crimson/osd/backfill_state.h b/src/crimson/osd/backfill_state.h
index 34400d930b2..463be4a7a2e 100644
--- a/src/crimson/osd/backfill_state.h
+++ b/src/crimson/osd/backfill_state.h
@@ -210,11 +210,15 @@ public:
sc::custom_reaction<ObjectPushed>,
sc::custom_reaction<PrimaryScanned>,
sc::transition<RequestDone, Done>,
+ sc::custom_reaction<CancelBackfill>,
+ sc::custom_reaction<Triggered>,
sc::transition<sc::event_base, Crashed>>;
explicit PrimaryScanning(my_context);
sc::result react(ObjectPushed);
// collect scanning result and transit to Enqueuing.
sc::result react(PrimaryScanned);
+ sc::result react(CancelBackfill);
+ sc::result react(Triggered);
};
struct ReplicasScanning : sc::state<ReplicasScanning, BackfillMachine>,
@@ -223,6 +227,7 @@ public:
sc::custom_reaction<ObjectPushed>,
sc::custom_reaction<ReplicaScanned>,
sc::custom_reaction<CancelBackfill>,
+ sc::custom_reaction<Triggered>,
sc::transition<RequestDone, Done>,
sc::transition<sc::event_base, Crashed>>;
explicit ReplicasScanning(my_context);
@@ -231,6 +236,7 @@ public:
sc::result react(ObjectPushed);
sc::result react(ReplicaScanned);
sc::result react(CancelBackfill);
+ sc::result react(Triggered);
// indicate whether a particular peer should be scanned to retrieve
// BackfillInterval for new range of hobject_t namespace.
@@ -249,9 +255,13 @@ public:
using reactions = boost::mpl::list<
sc::custom_reaction<ObjectPushed>,
sc::transition<RequestDone, Done>,
+ sc::custom_reaction<CancelBackfill>,
+ sc::custom_reaction<Triggered>,
sc::transition<sc::event_base, Crashed>>;
explicit Waiting(my_context);
sc::result react(ObjectPushed);
+ sc::result react(CancelBackfill);
+ sc::result react(Triggered);
};
struct Done : sc::state<Done, BackfillMachine>,
@@ -296,6 +306,26 @@ public:
}
}
private:
+ struct backfill_suspend_state_t {
+ bool suspended = false;
+ bool should_go_enqueuing = false;
+ } backfill_suspend_state;
+ bool is_suspended() const {
+ return backfill_suspend_state.suspended;
+ }
+ void on_suspended() {
+ ceph_assert(!is_suspended());
+ backfill_suspend_state = {true, false};
+ }
+ bool on_resumed() {
+ auto go_enqueuing = backfill_suspend_state.should_go_enqueuing;
+ backfill_suspend_state = {false, false};
+ return go_enqueuing;
+ }
+ void go_enqueuing_on_resume() {
+ ceph_assert(is_suspended());
+ backfill_suspend_state.should_go_enqueuing = true;
+ }
hobject_t last_backfill_started;
BackfillInterval backfill_info;
std::map<pg_shard_t, BackfillInterval> peer_backfill_info;