diff options
author | Xuehan Xu <xuxuehan@qianxin.com> | 2024-09-29 11:26:04 +0200 |
---|---|---|
committer | Xuehan Xu <xuxuehan@qianxin.com> | 2024-12-19 03:08:33 +0100 |
commit | 1f1051d91f320636f513faeb2d113542bcb9e5c3 (patch) | |
tree | 33b301a413651657eeb06c598c78290d4d443393 /src/crimson | |
parent | Merge pull request #60976 from rhcs-dashboard/fix-69144-main (diff) | |
download | ceph-1f1051d91f320636f513faeb2d113542bcb9e5c3.tar.xz ceph-1f1051d91f320636f513faeb2d113542bcb9e5c3.zip |
crimson/osd/pg_shard_manager: discard outdated operations when the
corresponding pgs are already removed
Fixes: https://tracker.ceph.com/issues/68286
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
Diffstat (limited to 'src/crimson')
-rw-r--r-- | src/crimson/osd/osd_operation.h | 3 | ||||
-rw-r--r-- | src/crimson/osd/osd_operations/client_request.h | 4 | ||||
-rw-r--r-- | src/crimson/osd/osd_operations/logmissing_request.h | 3 | ||||
-rw-r--r-- | src/crimson/osd/osd_operations/logmissing_request_reply.h | 3 | ||||
-rw-r--r-- | src/crimson/osd/osd_operations/peering_event.h | 8 | ||||
-rw-r--r-- | src/crimson/osd/osd_operations/pg_advance_map.h | 4 | ||||
-rw-r--r-- | src/crimson/osd/osd_operations/recovery_subrequest.h | 3 | ||||
-rw-r--r-- | src/crimson/osd/osd_operations/replicated_request.h | 3 | ||||
-rw-r--r-- | src/crimson/osd/osd_operations/scrub_events.h | 12 | ||||
-rw-r--r-- | src/crimson/osd/pg_shard_manager.h | 46 | ||||
-rw-r--r-- | src/crimson/osd/shard_services.cc | 5 | ||||
-rw-r--r-- | src/crimson/osd/shard_services.h | 2 |
12 files changed, 82 insertions, 14 deletions
diff --git a/src/crimson/osd/osd_operation.h b/src/crimson/osd/osd_operation.h index 2897a7e1623..8f525c6a8a4 100644 --- a/src/crimson/osd/osd_operation.h +++ b/src/crimson/osd/osd_operation.h @@ -211,6 +211,9 @@ protected: public: static constexpr bool is_trackable = true; + virtual bool requires_pg() const { + return true; + } }; template <class T> diff --git a/src/crimson/osd/osd_operations/client_request.h b/src/crimson/osd/osd_operations/client_request.h index 98443bdfc0f..91a6728fd4b 100644 --- a/src/crimson/osd/osd_operations/client_request.h +++ b/src/crimson/osd/osd_operations/client_request.h @@ -42,6 +42,10 @@ class ClientRequest final : public PhasedOperationT<ClientRequest>, unsigned instance_id = 0; public: + epoch_t get_epoch_sent_at() const { + return m->get_map_epoch(); + } + /** * instance_handle_t * diff --git a/src/crimson/osd/osd_operations/logmissing_request.h b/src/crimson/osd/osd_operations/logmissing_request.h index e12243ce430..fe4761c4ab4 100644 --- a/src/crimson/osd/osd_operations/logmissing_request.h +++ b/src/crimson/osd/osd_operations/logmissing_request.h @@ -36,6 +36,9 @@ public: } PipelineHandle &get_handle() { return handle; } epoch_t get_epoch() const { return req->get_min_epoch(); } + epoch_t get_epoch_sent_at() const { + return req->get_map_epoch(); + } ConnectionPipeline &get_connection_pipeline(); diff --git a/src/crimson/osd/osd_operations/logmissing_request_reply.h b/src/crimson/osd/osd_operations/logmissing_request_reply.h index 71651d16789..bdb6c2ac6ac 100644 --- a/src/crimson/osd/osd_operations/logmissing_request_reply.h +++ b/src/crimson/osd/osd_operations/logmissing_request_reply.h @@ -36,6 +36,9 @@ public: } PipelineHandle &get_handle() { return handle; } epoch_t get_epoch() const { return req->get_min_epoch(); } + epoch_t get_epoch_sent_at() const { + return req->get_map_epoch(); + } ConnectionPipeline &get_connection_pipeline(); diff --git a/src/crimson/osd/osd_operations/peering_event.h b/src/crimson/osd/osd_operations/peering_event.h index 85de5c711d6..aa6b8a95a94 100644 --- a/src/crimson/osd/osd_operations/peering_event.h +++ b/src/crimson/osd/osd_operations/peering_event.h @@ -44,6 +44,10 @@ protected: float delay = 0; PGPeeringEvent evt; + epoch_t get_epoch_sent_at() const { + return evt.get_epoch_sent(); + } + const pg_shard_t get_from() const { return from; } @@ -84,6 +88,10 @@ public: evt(std::forward<Args>(args)...) {} + bool requires_pg() const final { + return evt.requires_pg; + } + void print(std::ostream &) const final; void dump_detail(ceph::Formatter* f) const final; seastar::future<> with_pg( diff --git a/src/crimson/osd/osd_operations/pg_advance_map.h b/src/crimson/osd/osd_operations/pg_advance_map.h index 43be7319545..21702f6ff4f 100644 --- a/src/crimson/osd/osd_operations/pg_advance_map.h +++ b/src/crimson/osd/osd_operations/pg_advance_map.h @@ -50,6 +50,10 @@ public: PGPeeringPipeline::Process::BlockingEvent > tracking_events; + epoch_t get_epoch_sent_at() const { + return to; + } + private: PGPeeringPipeline &peering_pp(PG &pg); }; diff --git a/src/crimson/osd/osd_operations/recovery_subrequest.h b/src/crimson/osd/osd_operations/recovery_subrequest.h index 17c2faf97ea..2fe8ff372b3 100644 --- a/src/crimson/osd/osd_operations/recovery_subrequest.h +++ b/src/crimson/osd/osd_operations/recovery_subrequest.h @@ -39,6 +39,9 @@ public: } PipelineHandle &get_handle() { return handle; } epoch_t get_epoch() const { return m->get_min_epoch(); } + epoch_t get_epoch_sent_at() const { + return m->get_map_epoch(); + } ConnectionPipeline &get_connection_pipeline(); diff --git a/src/crimson/osd/osd_operations/replicated_request.h b/src/crimson/osd/osd_operations/replicated_request.h index 1e84fd108e2..05724943cf0 100644 --- a/src/crimson/osd/osd_operations/replicated_request.h +++ b/src/crimson/osd/osd_operations/replicated_request.h @@ -36,6 +36,9 @@ public: } PipelineHandle &get_handle() { return handle; } epoch_t get_epoch() const { return req->get_min_epoch(); } + epoch_t get_epoch_sent_at() const { + return req->get_map_epoch(); + } ConnectionPipeline &get_connection_pipeline(); diff --git a/src/crimson/osd/osd_operations/scrub_events.h b/src/crimson/osd/osd_operations/scrub_events.h index 02a5d852bb7..8bed90e4c14 100644 --- a/src/crimson/osd/osd_operations/scrub_events.h +++ b/src/crimson/osd/osd_operations/scrub_events.h @@ -27,11 +27,11 @@ class RemoteScrubEventBaseT : public PhasedOperationT<T> { crimson::net::ConnectionRef l_conn; crimson::net::ConnectionXcoreRef r_conn; - epoch_t epoch; spg_t pgid; protected: using interruptor = InterruptibleOperation::interruptor; + epoch_t epoch; template <typename U=void> using ifut = InterruptibleOperation::interruptible_future<U>; @@ -40,7 +40,7 @@ protected: public: RemoteScrubEventBaseT( crimson::net::ConnectionRef conn, epoch_t epoch, spg_t pgid) - : l_conn(std::move(conn)), epoch(epoch), pgid(pgid) {} + : l_conn(std::move(conn)), pgid(pgid), epoch(epoch) {} PGPeeringPipeline &get_peering_pipeline(PG &pg); @@ -117,6 +117,10 @@ public: : RemoteScrubEventBaseT<ScrubRequested>(std::forward<Args>(base_args)...), deep(deep) {} + epoch_t get_epoch_sent_at() const { + return epoch; + } + void print(std::ostream &out) const final { out << "(deep=" << deep << ")"; } @@ -141,6 +145,10 @@ public: ceph_assert(scrub::PGScrubber::is_scrub_message(*m)); } + epoch_t get_epoch_sent_at() const { + return epoch; + } + void print(std::ostream &out) const final { out << "(m=" << *m << ")"; } diff --git a/src/crimson/osd/pg_shard_manager.h b/src/crimson/osd/pg_shard_manager.h index b9879c8c9dd..f7bd7a6c08e 100644 --- a/src/crimson/osd/pg_shard_manager.h +++ b/src/crimson/osd/pg_shard_manager.h @@ -256,18 +256,40 @@ public: auto &opref = *op; return opref.template with_blocking_event< PGMap::PGCreationBlockingEvent - >([&target_shard_services, &opref](auto &&trigger) { - return target_shard_services.wait_for_pg( - std::move(trigger), opref.get_pgid()); - }).safe_then([&logger, &target_shard_services, &opref](Ref<PG> pgref) { - logger.debug("{}: have_pg", opref); - return opref.with_pg(target_shard_services, pgref); - }).handle_error( - crimson::ct_error::ecanceled::handle([&logger, &opref](auto) { - logger.debug("{}: pg creation canceled, dropping", opref); - return seastar::now(); - }) - ).then([op=std::move(op)] {}); + >([&target_shard_services, &opref, &logger](auto &&trigger) mutable { + auto pg = target_shard_services.get_pg(opref.get_pgid()); + auto fut = ShardServices::wait_for_pg_ertr::make_ready_future<Ref<PG>>(pg); + if (!pg) { + if (opref.requires_pg()) { + auto osdmap = target_shard_services.get_map(); + if (!osdmap->is_up_acting_osd_shard( + opref.get_pgid(), target_shard_services.local_state.whoami)) { + logger.debug( + "pg {} for {} is no longer here, discarding", + opref.get_pgid(), opref); + opref.get_handle().exit(); + auto _fut = seastar::now(); + if (osdmap->get_epoch() > opref.get_epoch_sent_at()) { + _fut = target_shard_services.send_incremental_map( + std::ref(opref.get_foreign_connection()), + opref.get_epoch_sent_at() + 1); + } + return _fut; + } + } + fut = target_shard_services.wait_for_pg( + std::move(trigger), opref.get_pgid()); + } + return fut.safe_then([&logger, &target_shard_services, &opref](Ref<PG> pgref) { + logger.debug("{}: have_pg", opref); + return opref.with_pg(target_shard_services, pgref); + }).handle_error( + crimson::ct_error::ecanceled::handle([&logger, &opref](auto) { + logger.debug("{}: pg creation canceled, dropping", opref); + return seastar::now(); + }) + ); + }).then([op=std::move(op)] {}); } seastar::future<> load_pgs(crimson::os::FuturizedStore& store); diff --git a/src/crimson/osd/shard_services.cc b/src/crimson/osd/shard_services.cc index c2340898929..e1acb34636f 100644 --- a/src/crimson/osd/shard_services.cc +++ b/src/crimson/osd/shard_services.cc @@ -783,6 +783,11 @@ seastar::future<> ShardServices::dispatch_context_transaction( co_return; } +Ref<PG> ShardServices::get_pg(spg_t pgid) +{ + return local_state.get_pg(pgid); +} + seastar::future<> ShardServices::dispatch_context_messages( BufferedRecoveryMessages &&ctx) { diff --git a/src/crimson/osd/shard_services.h b/src/crimson/osd/shard_services.h index 56ac4963fff..f4d4b4c2eb4 100644 --- a/src/crimson/osd/shard_services.h +++ b/src/crimson/osd/shard_services.h @@ -483,6 +483,8 @@ public: return pg_to_shard_mapping.remove_pg_mapping(pgid); } + Ref<PG> get_pg(spg_t pgid); + crimson::common::CephContext *get_cct() { return &(local_state.cct); } |