summaryrefslogtreecommitdiffstats
path: root/src/crimson
diff options
context:
space:
mode:
authorXuehan Xu <xuxuehan@qianxin.com>2024-09-29 11:26:04 +0200
committerXuehan Xu <xuxuehan@qianxin.com>2024-12-19 03:08:33 +0100
commit1f1051d91f320636f513faeb2d113542bcb9e5c3 (patch)
tree33b301a413651657eeb06c598c78290d4d443393 /src/crimson
parentMerge pull request #60976 from rhcs-dashboard/fix-69144-main (diff)
downloadceph-1f1051d91f320636f513faeb2d113542bcb9e5c3.tar.xz
ceph-1f1051d91f320636f513faeb2d113542bcb9e5c3.zip
crimson/osd/pg_shard_manager: discard outdated operations when the
corresponding pgs are already removed Fixes: https://tracker.ceph.com/issues/68286 Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
Diffstat (limited to 'src/crimson')
-rw-r--r--src/crimson/osd/osd_operation.h3
-rw-r--r--src/crimson/osd/osd_operations/client_request.h4
-rw-r--r--src/crimson/osd/osd_operations/logmissing_request.h3
-rw-r--r--src/crimson/osd/osd_operations/logmissing_request_reply.h3
-rw-r--r--src/crimson/osd/osd_operations/peering_event.h8
-rw-r--r--src/crimson/osd/osd_operations/pg_advance_map.h4
-rw-r--r--src/crimson/osd/osd_operations/recovery_subrequest.h3
-rw-r--r--src/crimson/osd/osd_operations/replicated_request.h3
-rw-r--r--src/crimson/osd/osd_operations/scrub_events.h12
-rw-r--r--src/crimson/osd/pg_shard_manager.h46
-rw-r--r--src/crimson/osd/shard_services.cc5
-rw-r--r--src/crimson/osd/shard_services.h2
12 files changed, 82 insertions, 14 deletions
diff --git a/src/crimson/osd/osd_operation.h b/src/crimson/osd/osd_operation.h
index 2897a7e1623..8f525c6a8a4 100644
--- a/src/crimson/osd/osd_operation.h
+++ b/src/crimson/osd/osd_operation.h
@@ -211,6 +211,9 @@ protected:
public:
static constexpr bool is_trackable = true;
+ virtual bool requires_pg() const {
+ return true;
+ }
};
template <class T>
diff --git a/src/crimson/osd/osd_operations/client_request.h b/src/crimson/osd/osd_operations/client_request.h
index 98443bdfc0f..91a6728fd4b 100644
--- a/src/crimson/osd/osd_operations/client_request.h
+++ b/src/crimson/osd/osd_operations/client_request.h
@@ -42,6 +42,10 @@ class ClientRequest final : public PhasedOperationT<ClientRequest>,
unsigned instance_id = 0;
public:
+ epoch_t get_epoch_sent_at() const {
+ return m->get_map_epoch();
+ }
+
/**
* instance_handle_t
*
diff --git a/src/crimson/osd/osd_operations/logmissing_request.h b/src/crimson/osd/osd_operations/logmissing_request.h
index e12243ce430..fe4761c4ab4 100644
--- a/src/crimson/osd/osd_operations/logmissing_request.h
+++ b/src/crimson/osd/osd_operations/logmissing_request.h
@@ -36,6 +36,9 @@ public:
}
PipelineHandle &get_handle() { return handle; }
epoch_t get_epoch() const { return req->get_min_epoch(); }
+ epoch_t get_epoch_sent_at() const {
+ return req->get_map_epoch();
+ }
ConnectionPipeline &get_connection_pipeline();
diff --git a/src/crimson/osd/osd_operations/logmissing_request_reply.h b/src/crimson/osd/osd_operations/logmissing_request_reply.h
index 71651d16789..bdb6c2ac6ac 100644
--- a/src/crimson/osd/osd_operations/logmissing_request_reply.h
+++ b/src/crimson/osd/osd_operations/logmissing_request_reply.h
@@ -36,6 +36,9 @@ public:
}
PipelineHandle &get_handle() { return handle; }
epoch_t get_epoch() const { return req->get_min_epoch(); }
+ epoch_t get_epoch_sent_at() const {
+ return req->get_map_epoch();
+ }
ConnectionPipeline &get_connection_pipeline();
diff --git a/src/crimson/osd/osd_operations/peering_event.h b/src/crimson/osd/osd_operations/peering_event.h
index 85de5c711d6..aa6b8a95a94 100644
--- a/src/crimson/osd/osd_operations/peering_event.h
+++ b/src/crimson/osd/osd_operations/peering_event.h
@@ -44,6 +44,10 @@ protected:
float delay = 0;
PGPeeringEvent evt;
+ epoch_t get_epoch_sent_at() const {
+ return evt.get_epoch_sent();
+ }
+
const pg_shard_t get_from() const {
return from;
}
@@ -84,6 +88,10 @@ public:
evt(std::forward<Args>(args)...)
{}
+ bool requires_pg() const final {
+ return evt.requires_pg;
+ }
+
void print(std::ostream &) const final;
void dump_detail(ceph::Formatter* f) const final;
seastar::future<> with_pg(
diff --git a/src/crimson/osd/osd_operations/pg_advance_map.h b/src/crimson/osd/osd_operations/pg_advance_map.h
index 43be7319545..21702f6ff4f 100644
--- a/src/crimson/osd/osd_operations/pg_advance_map.h
+++ b/src/crimson/osd/osd_operations/pg_advance_map.h
@@ -50,6 +50,10 @@ public:
PGPeeringPipeline::Process::BlockingEvent
> tracking_events;
+ epoch_t get_epoch_sent_at() const {
+ return to;
+ }
+
private:
PGPeeringPipeline &peering_pp(PG &pg);
};
diff --git a/src/crimson/osd/osd_operations/recovery_subrequest.h b/src/crimson/osd/osd_operations/recovery_subrequest.h
index 17c2faf97ea..2fe8ff372b3 100644
--- a/src/crimson/osd/osd_operations/recovery_subrequest.h
+++ b/src/crimson/osd/osd_operations/recovery_subrequest.h
@@ -39,6 +39,9 @@ public:
}
PipelineHandle &get_handle() { return handle; }
epoch_t get_epoch() const { return m->get_min_epoch(); }
+ epoch_t get_epoch_sent_at() const {
+ return m->get_map_epoch();
+ }
ConnectionPipeline &get_connection_pipeline();
diff --git a/src/crimson/osd/osd_operations/replicated_request.h b/src/crimson/osd/osd_operations/replicated_request.h
index 1e84fd108e2..05724943cf0 100644
--- a/src/crimson/osd/osd_operations/replicated_request.h
+++ b/src/crimson/osd/osd_operations/replicated_request.h
@@ -36,6 +36,9 @@ public:
}
PipelineHandle &get_handle() { return handle; }
epoch_t get_epoch() const { return req->get_min_epoch(); }
+ epoch_t get_epoch_sent_at() const {
+ return req->get_map_epoch();
+ }
ConnectionPipeline &get_connection_pipeline();
diff --git a/src/crimson/osd/osd_operations/scrub_events.h b/src/crimson/osd/osd_operations/scrub_events.h
index 02a5d852bb7..8bed90e4c14 100644
--- a/src/crimson/osd/osd_operations/scrub_events.h
+++ b/src/crimson/osd/osd_operations/scrub_events.h
@@ -27,11 +27,11 @@ class RemoteScrubEventBaseT : public PhasedOperationT<T> {
crimson::net::ConnectionRef l_conn;
crimson::net::ConnectionXcoreRef r_conn;
- epoch_t epoch;
spg_t pgid;
protected:
using interruptor = InterruptibleOperation::interruptor;
+ epoch_t epoch;
template <typename U=void>
using ifut = InterruptibleOperation::interruptible_future<U>;
@@ -40,7 +40,7 @@ protected:
public:
RemoteScrubEventBaseT(
crimson::net::ConnectionRef conn, epoch_t epoch, spg_t pgid)
- : l_conn(std::move(conn)), epoch(epoch), pgid(pgid) {}
+ : l_conn(std::move(conn)), pgid(pgid), epoch(epoch) {}
PGPeeringPipeline &get_peering_pipeline(PG &pg);
@@ -117,6 +117,10 @@ public:
: RemoteScrubEventBaseT<ScrubRequested>(std::forward<Args>(base_args)...),
deep(deep) {}
+ epoch_t get_epoch_sent_at() const {
+ return epoch;
+ }
+
void print(std::ostream &out) const final {
out << "(deep=" << deep << ")";
}
@@ -141,6 +145,10 @@ public:
ceph_assert(scrub::PGScrubber::is_scrub_message(*m));
}
+ epoch_t get_epoch_sent_at() const {
+ return epoch;
+ }
+
void print(std::ostream &out) const final {
out << "(m=" << *m << ")";
}
diff --git a/src/crimson/osd/pg_shard_manager.h b/src/crimson/osd/pg_shard_manager.h
index b9879c8c9dd..f7bd7a6c08e 100644
--- a/src/crimson/osd/pg_shard_manager.h
+++ b/src/crimson/osd/pg_shard_manager.h
@@ -256,18 +256,40 @@ public:
auto &opref = *op;
return opref.template with_blocking_event<
PGMap::PGCreationBlockingEvent
- >([&target_shard_services, &opref](auto &&trigger) {
- return target_shard_services.wait_for_pg(
- std::move(trigger), opref.get_pgid());
- }).safe_then([&logger, &target_shard_services, &opref](Ref<PG> pgref) {
- logger.debug("{}: have_pg", opref);
- return opref.with_pg(target_shard_services, pgref);
- }).handle_error(
- crimson::ct_error::ecanceled::handle([&logger, &opref](auto) {
- logger.debug("{}: pg creation canceled, dropping", opref);
- return seastar::now();
- })
- ).then([op=std::move(op)] {});
+ >([&target_shard_services, &opref, &logger](auto &&trigger) mutable {
+ auto pg = target_shard_services.get_pg(opref.get_pgid());
+ auto fut = ShardServices::wait_for_pg_ertr::make_ready_future<Ref<PG>>(pg);
+ if (!pg) {
+ if (opref.requires_pg()) {
+ auto osdmap = target_shard_services.get_map();
+ if (!osdmap->is_up_acting_osd_shard(
+ opref.get_pgid(), target_shard_services.local_state.whoami)) {
+ logger.debug(
+ "pg {} for {} is no longer here, discarding",
+ opref.get_pgid(), opref);
+ opref.get_handle().exit();
+ auto _fut = seastar::now();
+ if (osdmap->get_epoch() > opref.get_epoch_sent_at()) {
+ _fut = target_shard_services.send_incremental_map(
+ std::ref(opref.get_foreign_connection()),
+ opref.get_epoch_sent_at() + 1);
+ }
+ return _fut;
+ }
+ }
+ fut = target_shard_services.wait_for_pg(
+ std::move(trigger), opref.get_pgid());
+ }
+ return fut.safe_then([&logger, &target_shard_services, &opref](Ref<PG> pgref) {
+ logger.debug("{}: have_pg", opref);
+ return opref.with_pg(target_shard_services, pgref);
+ }).handle_error(
+ crimson::ct_error::ecanceled::handle([&logger, &opref](auto) {
+ logger.debug("{}: pg creation canceled, dropping", opref);
+ return seastar::now();
+ })
+ );
+ }).then([op=std::move(op)] {});
}
seastar::future<> load_pgs(crimson::os::FuturizedStore& store);
diff --git a/src/crimson/osd/shard_services.cc b/src/crimson/osd/shard_services.cc
index c2340898929..e1acb34636f 100644
--- a/src/crimson/osd/shard_services.cc
+++ b/src/crimson/osd/shard_services.cc
@@ -783,6 +783,11 @@ seastar::future<> ShardServices::dispatch_context_transaction(
co_return;
}
+Ref<PG> ShardServices::get_pg(spg_t pgid)
+{
+ return local_state.get_pg(pgid);
+}
+
seastar::future<> ShardServices::dispatch_context_messages(
BufferedRecoveryMessages &&ctx)
{
diff --git a/src/crimson/osd/shard_services.h b/src/crimson/osd/shard_services.h
index 56ac4963fff..f4d4b4c2eb4 100644
--- a/src/crimson/osd/shard_services.h
+++ b/src/crimson/osd/shard_services.h
@@ -483,6 +483,8 @@ public:
return pg_to_shard_mapping.remove_pg_mapping(pgid);
}
+ Ref<PG> get_pg(spg_t pgid);
+
crimson::common::CephContext *get_cct() {
return &(local_state.cct);
}