diff options
author | Xuehan Xu <xuxuehan@qianxin.com> | 2024-10-08 06:26:41 +0200 |
---|---|---|
committer | Xuehan Xu <xuxuehan@qianxin.com> | 2024-12-16 07:06:32 +0100 |
commit | a34b0ce2aa6820706167e8ea3119160e6e68f157 (patch) | |
tree | a4d720ec0d7cffd17d05bbd24480a0c51cdb701d | |
parent | crimson/osd/backfill_state: drop Cancelled (diff) | |
download | ceph-a34b0ce2aa6820706167e8ea3119160e6e68f157.tar.xz ceph-a34b0ce2aa6820706167e8ea3119160e6e68f157.zip |
crimson/osd/backfill_state: treat Cancelled as a pause of the ongoing backfilling
Fixes: https://tracker.ceph.com/issues/67888
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
-rw-r--r-- | src/crimson/osd/backfill_state.cc | 87 | ||||
-rw-r--r-- | src/crimson/osd/backfill_state.h | 30 |
2 files changed, 110 insertions, 7 deletions
diff --git a/src/crimson/osd/backfill_state.cc b/src/crimson/osd/backfill_state.cc index 62607c7fbeb..1392ee330ac 100644 --- a/src/crimson/osd/backfill_state.cc +++ b/src/crimson/osd/backfill_state.cc @@ -407,7 +407,34 @@ BackfillState::PrimaryScanning::react(PrimaryScanned evt) LOG_PREFIX(BackfillState::PrimaryScanning::react::PrimaryScanned); DEBUGDPP("", pg()); backfill_state().backfill_info = std::move(evt.result); - return transit<Enqueuing>(); + if (!backfill_state().is_suspended()) { + return transit<Enqueuing>(); + } else { + DEBUGDPP("backfill suspended, not going Enqueuing", pg()); + backfill_state().go_enqueuing_on_resume(); + } + return discard_event(); +} + +boost::statechart::result +BackfillState::PrimaryScanning::react(CancelBackfill evt) +{ + LOG_PREFIX(BackfillState::PrimaryScanning::react::SuspendBackfill); + DEBUGDPP("suspended within PrimaryScanning", pg()); + backfill_state().on_suspended(); + return discard_event(); +} + +boost::statechart::result +BackfillState::PrimaryScanning::react(Triggered evt) +{ + LOG_PREFIX(BackfillState::PrimaryScanning::react::Triggered); + ceph_assert(backfill_state().is_suspended()); + if (backfill_state().on_resumed()) { + DEBUGDPP("Backfill resumed, going Enqueuing", pg()); + return transit<Enqueuing>(); + } + return discard_event(); } boost::statechart::result @@ -470,12 +497,17 @@ BackfillState::ReplicasScanning::react(ReplicaScanned evt) if (waiting_on_backfill.empty()) { ceph_assert(backfill_state().peer_backfill_info.size() == \ peering_state().get_backfill_targets().size()); - return transit<Enqueuing>(); + if (!backfill_state().is_suspended()) { + return transit<Enqueuing>(); + } else { + DEBUGDPP("backfill suspended, not going Enqueuing", pg()); + backfill_state().go_enqueuing_on_resume(); + } } } else { - // we canceled backfill for a while due to a too full, and this + // we suspended backfill for a while due to a too full, and this // is an extra response from a non-too-full peer - DEBUGDPP("canceled backfill (too full?)", pg()); + DEBUGDPP("suspended backfill (too full?)", pg()); } return discard_event(); } @@ -483,8 +515,22 @@ BackfillState::ReplicasScanning::react(ReplicaScanned evt) boost::statechart::result BackfillState::ReplicasScanning::react(CancelBackfill evt) { - LOG_PREFIX(BackfillState::ReplicasScanning::react::CancelBackfill); - DEBUGDPP("cancelled within ReplicasScanning", pg()); + LOG_PREFIX(BackfillState::ReplicasScanning::react::SuspendBackfill); + DEBUGDPP("suspended within ReplicasScanning", pg()); + backfill_state().on_suspended(); + return discard_event(); +} + +boost::statechart::result +BackfillState::ReplicasScanning::react(Triggered evt) +{ + LOG_PREFIX(BackfillState::ReplicasScanning::react::Triggered); + ceph_assert(backfill_state().is_suspended()); + if (backfill_state().on_resumed()) { + DEBUGDPP("Backfill resumed, going Enqueuing", pg()); + return transit<Enqueuing>(); + } + return discard_event(); } boost::statechart::result @@ -510,7 +556,34 @@ BackfillState::Waiting::react(ObjectPushed evt) LOG_PREFIX(BackfillState::Waiting::react::ObjectPushed); DEBUGDPP("Waiting::react() on ObjectPushed; evt.object={}", pg(), evt.object); backfill_state().progress_tracker->complete_to(evt.object, evt.stat, false); - return transit<Enqueuing>(); + if (!backfill_state().is_suspended()) { + return transit<Enqueuing>(); + } else { + DEBUGDPP("backfill suspended, not going Enqueuing", pg()); + backfill_state().go_enqueuing_on_resume(); + } + return discard_event(); +} + +boost::statechart::result +BackfillState::Waiting::react(CancelBackfill evt) +{ + LOG_PREFIX(BackfillState::Waiting::react::SuspendBackfill); + DEBUGDPP("suspended within Waiting", pg()); + backfill_state().on_suspended(); + return discard_event(); +} + +boost::statechart::result +BackfillState::Waiting::react(Triggered evt) +{ + LOG_PREFIX(BackfillState::Waiting::react::Triggered); + ceph_assert(backfill_state().is_suspended()); + if (backfill_state().on_resumed()) { + DEBUGDPP("Backfill resumed, going Enqueuing", pg()); + return transit<Enqueuing>(); + } + return discard_event(); } // -- Done diff --git a/src/crimson/osd/backfill_state.h b/src/crimson/osd/backfill_state.h index 34400d930b2..463be4a7a2e 100644 --- a/src/crimson/osd/backfill_state.h +++ b/src/crimson/osd/backfill_state.h @@ -210,11 +210,15 @@ public: sc::custom_reaction<ObjectPushed>, sc::custom_reaction<PrimaryScanned>, sc::transition<RequestDone, Done>, + sc::custom_reaction<CancelBackfill>, + sc::custom_reaction<Triggered>, sc::transition<sc::event_base, Crashed>>; explicit PrimaryScanning(my_context); sc::result react(ObjectPushed); // collect scanning result and transit to Enqueuing. sc::result react(PrimaryScanned); + sc::result react(CancelBackfill); + sc::result react(Triggered); }; struct ReplicasScanning : sc::state<ReplicasScanning, BackfillMachine>, @@ -223,6 +227,7 @@ public: sc::custom_reaction<ObjectPushed>, sc::custom_reaction<ReplicaScanned>, sc::custom_reaction<CancelBackfill>, + sc::custom_reaction<Triggered>, sc::transition<RequestDone, Done>, sc::transition<sc::event_base, Crashed>>; explicit ReplicasScanning(my_context); @@ -231,6 +236,7 @@ public: sc::result react(ObjectPushed); sc::result react(ReplicaScanned); sc::result react(CancelBackfill); + sc::result react(Triggered); // indicate whether a particular peer should be scanned to retrieve // BackfillInterval for new range of hobject_t namespace. @@ -249,9 +255,13 @@ public: using reactions = boost::mpl::list< sc::custom_reaction<ObjectPushed>, sc::transition<RequestDone, Done>, + sc::custom_reaction<CancelBackfill>, + sc::custom_reaction<Triggered>, sc::transition<sc::event_base, Crashed>>; explicit Waiting(my_context); sc::result react(ObjectPushed); + sc::result react(CancelBackfill); + sc::result react(Triggered); }; struct Done : sc::state<Done, BackfillMachine>, @@ -296,6 +306,26 @@ public: } } private: + struct backfill_suspend_state_t { + bool suspended = false; + bool should_go_enqueuing = false; + } backfill_suspend_state; + bool is_suspended() const { + return backfill_suspend_state.suspended; + } + void on_suspended() { + ceph_assert(!is_suspended()); + backfill_suspend_state = {true, false}; + } + bool on_resumed() { + auto go_enqueuing = backfill_suspend_state.should_go_enqueuing; + backfill_suspend_state = {false, false}; + return go_enqueuing; + } + void go_enqueuing_on_resume() { + ceph_assert(is_suspended()); + backfill_suspend_state.should_go_enqueuing = true; + } hobject_t last_backfill_started; BackfillInterval backfill_info; std::map<pg_shard_t, BackfillInterval> peer_backfill_info; |