summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatan Breizman <mbreizma@redhat.com>2023-12-03 09:06:33 +0100
committerGitHub <noreply@github.com>2023-12-03 09:06:33 +0100
commit67b75549b33f045a83f5bf55b677d8842cb7bac4 (patch)
tree04c6d5f487cea7b0e9a3f8d07c71ff5a214fd478
parentMerge pull request #53712 from ronen-fr/wip-rf-move-1 (diff)
parentcrimson/osd: update osd_beacon_report_interval on conf change (diff)
downloadceph-67b75549b33f045a83f5bf55b677d8842cb7bac4.tar.xz
ceph-67b75549b33f045a83f5bf55b677d8842cb7bac4.zip
Merge pull request #54437 from Matan-B/wip-matanb-crimson-osdmap-trimming
crimson/osd: introduce osdmap trimming Reviewed-by: Samuel Just <sjust@redhat.com>
-rw-r--r--src/crimson/common/shared_lru.h10
-rw-r--r--src/crimson/mgr/client.h2
-rw-r--r--src/crimson/osd/osd.cc72
-rw-r--r--src/crimson/osd/osd.h12
-rw-r--r--src/crimson/osd/osd_meta.cc5
-rw-r--r--src/crimson/osd/osd_meta.h1
-rw-r--r--src/crimson/osd/pg.h3
-rw-r--r--src/crimson/osd/pg_shard_manager.cc9
-rw-r--r--src/crimson/osd/pg_shard_manager.h5
-rw-r--r--src/crimson/osd/shard_services.cc65
-rw-r--r--src/crimson/osd/shard_services.h15
11 files changed, 164 insertions, 35 deletions
diff --git a/src/crimson/common/shared_lru.h b/src/crimson/common/shared_lru.h
index 186f02a614c..92d99d332c4 100644
--- a/src/crimson/common/shared_lru.h
+++ b/src/crimson/common/shared_lru.h
@@ -83,6 +83,7 @@ public:
cache.clear();
}
shared_ptr_t find(const K& key);
+ K cached_key_lower_bound();
// return the last element that is not greater than key
shared_ptr_t lower_bound(const K& key);
// return the first element that is greater than key
@@ -147,6 +148,15 @@ SharedLRU<K,V>::find(const K& key)
}
template<class K, class V>
+K SharedLRU<K,V>::cached_key_lower_bound()
+{
+ if (weak_refs.empty()) {
+ return {};
+ }
+ return weak_refs.begin()->first;
+}
+
+template<class K, class V>
typename SharedLRU<K,V>::shared_ptr_t
SharedLRU<K,V>::lower_bound(const K& key)
{
diff --git a/src/crimson/mgr/client.h b/src/crimson/mgr/client.h
index 501949768dd..b88c60c5e4a 100644
--- a/src/crimson/mgr/client.h
+++ b/src/crimson/mgr/client.h
@@ -24,7 +24,7 @@ namespace crimson::mgr
// implement WithStats if you want to report stats to mgr periodically
class WithStats {
public:
- virtual seastar::future<MessageURef> get_stats() const = 0;
+ virtual seastar::future<MessageURef> get_stats() = 0;
virtual ~WithStats() {}
};
diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc
index 8da2d566e6b..f3648c6df27 100644
--- a/src/crimson/osd/osd.cc
+++ b/src/crimson/osd/osd.cc
@@ -400,7 +400,11 @@ seastar::future<> OSD::start()
);
}).then([this](OSDSuperblock&& sb) {
superblock = std::move(sb);
- pg_shard_manager.set_superblock(superblock);
+ if (!superblock.cluster_osdmap_trim_lower_bound) {
+ superblock.cluster_osdmap_trim_lower_bound = superblock.get_oldest_map();
+ }
+ return pg_shard_manager.set_superblock(superblock);
+ }).then([this] {
return pg_shard_manager.get_local_map(superblock.current_epoch);
}).then([this](OSDMapService::local_cached_map_t&& map) {
osdmap = make_local_shared_foreign(OSDMapService::local_cached_map_t(map));
@@ -864,6 +868,25 @@ void OSD::handle_authentication(const EntityName& name,
}
}
+const char** OSD::get_tracked_conf_keys() const
+{
+ static const char* KEYS[] = {
+ "osd_beacon_report_interval",
+ nullptr
+ };
+ return KEYS;
+}
+
+void OSD::handle_conf_change(
+ const crimson::common::ConfigProxy& conf,
+ const std::set <std::string> &changed)
+{
+ if (changed.count("osd_beacon_report_interval")) {
+ beacon_timer.rearm_periodic(
+ std::chrono::seconds(conf->osd_beacon_report_interval));
+ }
+}
+
void OSD::update_stats()
{
osd_stat_seq++;
@@ -879,13 +902,20 @@ void OSD::update_stats()
});
}
-seastar::future<MessageURef> OSD::get_stats() const
+seastar::future<MessageURef> OSD::get_stats()
{
// MPGStats::had_map_for is not used since PGMonitor was removed
auto m = crimson::make_message<MPGStats>(monc->get_fsid(), osdmap->get_epoch());
m->osd_stat = osd_stat;
return pg_shard_manager.get_pg_stats(
- ).then([m=std::move(m)](auto &&stats) mutable {
+ ).then([this, m=std::move(m)](auto &&stats) mutable {
+ min_last_epoch_clean = osdmap->get_epoch();
+ min_last_epoch_clean_pgs.clear();
+ for (auto [pgid, stat] : stats) {
+ min_last_epoch_clean = std::min(min_last_epoch_clean,
+ stat.get_effective_last_epoch_clean());
+ min_last_epoch_clean_pgs.push_back(pgid);
+ }
m->pg_stat = std::move(stats);
return seastar::make_ready_future<MessageURef>(std::move(m));
});
@@ -934,6 +964,16 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m)
logger().info("handle_osd_map epochs [{}..{}], i have {}, src has [{}..{}]",
first, last, superblock.get_newest_map(),
m->cluster_osdmap_trim_lower_bound, m->newest_map);
+
+ if (superblock.cluster_osdmap_trim_lower_bound <
+ m->cluster_osdmap_trim_lower_bound) {
+ superblock.cluster_osdmap_trim_lower_bound =
+ m->cluster_osdmap_trim_lower_bound;
+ logger().debug("{} superblock cluster_osdmap_trim_lower_bound new epoch is: {}",
+ __func__, superblock.cluster_osdmap_trim_lower_bound);
+ ceph_assert(
+ superblock.cluster_osdmap_trim_lower_bound >= superblock.get_oldest_map());
+ }
// make sure there is something new, here, before we bother flushing
// the queues and such
if (last <= superblock.get_newest_map()) {
@@ -964,8 +1004,9 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m)
monc->sub_got("osdmap", last);
if (!superblock.maps.empty()) {
- // TODO: support osdmap trimming
- // See: <tracker>
+ pg_shard_manager.trim_maps(t, superblock);
+ // TODO: once we support pg splitting, update pg_num_history here
+ //pg_num_history.prune(superblock.get_oldest_map());
}
superblock.insert_osdmap_epochs(first, last);
@@ -977,11 +1018,13 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m)
superblock.clean_thru = last;
}
pg_shard_manager.get_meta_coll().store_superblock(t, superblock);
- pg_shard_manager.set_superblock(superblock);
- logger().debug("OSD::handle_osd_map: do_transaction...");
- return store.get_sharded_store().do_transaction(
- pg_shard_manager.get_meta_coll().collection(),
- std::move(t));
+ return pg_shard_manager.set_superblock(superblock).then(
+ [this, &t] {
+ logger().debug("OSD::handle_osd_map: do_transaction...");
+ return store.get_sharded_store().do_transaction(
+ pg_shard_manager.get_meta_coll().collection(),
+ std::move(t));
+ });
});
}).then([=, this] {
// TODO: write to superblock and commit the transaction
@@ -1266,14 +1309,13 @@ seastar::future<> OSD::send_beacon()
if (!pg_shard_manager.is_active()) {
return seastar::now();
}
- // FIXME: min lec should be calculated from pg_stat
- // and should set m->pgs
- epoch_t min_last_epoch_clean = osdmap->get_epoch();
- auto m = crimson::make_message<MOSDBeacon>(osdmap->get_epoch(),
+ auto beacon = crimson::make_message<MOSDBeacon>(osdmap->get_epoch(),
min_last_epoch_clean,
superblock.last_purged_snaps_scrub,
local_conf()->osd_beacon_report_interval);
- return monc->send_message(std::move(m));
+ beacon->pgs = min_last_epoch_clean_pgs;
+ logger().debug("{} {}", __func__, *beacon);
+ return monc->send_message(std::move(beacon));
}
seastar::future<> OSD::update_heartbeat_peers()
diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h
index 10ff60d4701..134376ad947 100644
--- a/src/crimson/osd/osd.h
+++ b/src/crimson/osd/osd.h
@@ -61,7 +61,8 @@ class PG;
class OSD final : public crimson::net::Dispatcher,
private crimson::common::AuthHandler,
- private crimson::mgr::WithStats {
+ private crimson::mgr::WithStats,
+ public md_config_obs_t {
const int whoami;
const uint32_t nonce;
seastar::abort_source& abort_source;
@@ -106,8 +107,11 @@ class OSD final : public crimson::net::Dispatcher,
// pg statistics including osd ones
osd_stat_t osd_stat;
uint32_t osd_stat_seq = 0;
+ epoch_t min_last_epoch_clean = 0;
+ // which pgs were scanned for min_lec
+ std::vector<pg_t> min_last_epoch_clean_pgs;
void update_stats();
- seastar::future<MessageURef> get_stats() const final;
+ seastar::future<MessageURef> get_stats() final;
// AuthHandler methods
void handle_authentication(const EntityName& name,
@@ -123,6 +127,10 @@ class OSD final : public crimson::net::Dispatcher,
std::unique_ptr<Heartbeat> heartbeat;
seastar::timer<seastar::lowres_clock> tick_timer;
+ const char** get_tracked_conf_keys() const final;
+ void handle_conf_change(const ConfigProxy& conf,
+ const std::set<std::string> &changed) final;
+
// admin-socket
seastar::lw_shared_ptr<crimson::admin::AdminSocket> asok;
diff --git a/src/crimson/osd/osd_meta.cc b/src/crimson/osd/osd_meta.cc
index e40b2b2464b..54785a975f0 100644
--- a/src/crimson/osd/osd_meta.cc
+++ b/src/crimson/osd/osd_meta.cc
@@ -24,6 +24,11 @@ void OSDMeta::store_map(ceph::os::Transaction& t,
t.write(coll->get_cid(), osdmap_oid(e), 0, m.length(), m);
}
+void OSDMeta::remove_map(ceph::os::Transaction& t, epoch_t e)
+{
+ t.remove(coll->get_cid(), osdmap_oid(e));
+}
+
seastar::future<bufferlist> OSDMeta::load_map(epoch_t e)
{
return store.read(coll,
diff --git a/src/crimson/osd/osd_meta.h b/src/crimson/osd/osd_meta.h
index 652266d9e20..3632a9feef3 100644
--- a/src/crimson/osd/osd_meta.h
+++ b/src/crimson/osd/osd_meta.h
@@ -40,6 +40,7 @@ public:
void store_map(ceph::os::Transaction& t,
epoch_t e, const bufferlist& m);
+ void remove_map(ceph::os::Transaction& t, epoch_t e);
seastar::future<bufferlist> load_map(epoch_t e);
void store_superblock(ceph::os::Transaction& t,
diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h
index 056d5e9e1c0..f56f2dc6627 100644
--- a/src/crimson/osd/pg.h
+++ b/src/crimson/osd/pg.h
@@ -348,8 +348,7 @@ public:
void on_active_advmap(const OSDMapRef &osdmap) final;
epoch_t cluster_osdmap_trim_lower_bound() final {
- // TODO
- return 0;
+ return shard_services.get_osdmap_tlb();
}
void on_backfill_reserved() final {
diff --git a/src/crimson/osd/pg_shard_manager.cc b/src/crimson/osd/pg_shard_manager.cc
index 6061c856be2..1980b36271c 100644
--- a/src/crimson/osd/pg_shard_manager.cc
+++ b/src/crimson/osd/pg_shard_manager.cc
@@ -105,4 +105,13 @@ seastar::future<> PGShardManager::set_up_epoch(epoch_t e) {
});
}
+seastar::future<> PGShardManager::set_superblock(OSDSuperblock superblock) {
+ ceph_assert(seastar::this_shard_id() == PRIMARY_CORE);
+ get_osd_singleton_state().set_singleton_superblock(superblock);
+ return shard_services.invoke_on_all(
+ [superblock = std::move(superblock)](auto &local_service) {
+ return local_service.local_state.update_shard_superblock(superblock);
+ });
+}
+
}
diff --git a/src/crimson/osd/pg_shard_manager.h b/src/crimson/osd/pg_shard_manager.h
index 74154499c8a..1559dde0cb1 100644
--- a/src/crimson/osd/pg_shard_manager.h
+++ b/src/crimson/osd/pg_shard_manager.h
@@ -129,16 +129,17 @@ public:
FORWARD_TO_OSD_SINGLETON(init_meta_coll)
FORWARD_TO_OSD_SINGLETON(get_meta_coll)
- FORWARD_TO_OSD_SINGLETON(set_superblock)
-
// Core OSDMap methods
FORWARD_TO_OSD_SINGLETON(get_local_map)
FORWARD_TO_OSD_SINGLETON(load_map_bl)
FORWARD_TO_OSD_SINGLETON(load_map_bls)
FORWARD_TO_OSD_SINGLETON(store_maps)
+ FORWARD_TO_OSD_SINGLETON(trim_maps)
seastar::future<> set_up_epoch(epoch_t e);
+ seastar::future<> set_superblock(OSDSuperblock superblock);
+
template <typename F>
auto with_remote_shard_state(core_id_t core, F &&f) {
return shard_services.invoke_on(
diff --git a/src/crimson/osd/shard_services.cc b/src/crimson/osd/shard_services.cc
index c2438809020..c321439df7a 100644
--- a/src/crimson/osd/shard_services.cc
+++ b/src/crimson/osd/shard_services.cc
@@ -72,7 +72,7 @@ seastar::future<> PerShardState::stop_pgs()
});
}
-std::map<pg_t, pg_stat_t> PerShardState::get_pg_stats() const
+std::map<pg_t, pg_stat_t> PerShardState::get_pg_stats()
{
assert_core();
std::map<pg_t, pg_stat_t> ret;
@@ -119,6 +119,13 @@ HeartbeatStampsRef PerShardState::get_hb_stamps(int peer)
return stamps->second;
}
+seastar::future<> PerShardState::update_shard_superblock(OSDSuperblock superblock)
+{
+ assert_core();
+ per_shard_superblock = std::move(superblock);
+ return seastar::now();
+}
+
OSDSingletonState::OSDSingletonState(
int whoami,
crimson::net::Messenger &cluster_msgr,
@@ -352,7 +359,6 @@ void OSDSingletonState::handle_conf_change(
seastar::future<OSDSingletonState::local_cached_map_t>
OSDSingletonState::get_local_map(epoch_t e)
{
- // TODO: use LRU cache for managing osdmap, fallback to disk if we have to
if (auto found = osdmaps.find(e); found) {
logger().debug("{} osdmap.{} found in cache", __func__, e);
return seastar::make_ready_future<local_cached_map_t>(std::move(found));
@@ -392,6 +398,9 @@ seastar::future<std::map<epoch_t, bufferlist>> OSDSingletonState::load_map_bls(
logger().debug("{} loading maps [{},{}]",
__func__, first, last);
ceph_assert(first <= last);
+ // TODO: take osd_map_max into account
+ //int max = cct->_conf->osd_map_message_max;
+ //ssize_t max_bytes = cct->_conf->osd_map_message_max_bytes;
return seastar::map_reduce(boost::make_counting_iterator<epoch_t>(first),
boost::make_counting_iterator<epoch_t>(last + 1),
[this](epoch_t e) {
@@ -458,6 +467,34 @@ seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t,
});
}
+// Note: store/set_superblock is called in later OSD::handle_osd_map
+// so we use the OSD's superblock reference meanwhile.
+void OSDSingletonState::trim_maps(ceph::os::Transaction& t,
+ OSDSuperblock& superblock)
+{
+ epoch_t min =
+ std::min(superblock.cluster_osdmap_trim_lower_bound,
+ osdmaps.cached_key_lower_bound());
+
+ if (min <= superblock.get_oldest_map()) {
+ return;
+ }
+ logger().debug("{}: min={} oldest_map={}", __func__, min, superblock.get_oldest_map());
+
+ // Trim from the superblock's oldest_map up to `min`.
+ // Break if we have exceeded the txn target size.
+ while (superblock.get_oldest_map() < min &&
+ t.get_num_ops() < crimson::common::local_conf()->osd_target_transaction_size) {
+ logger().debug("{}: removing old osdmap epoch {}", __func__, superblock.get_oldest_map());
+ meta_coll->remove_map(t, superblock.get_oldest_map());
+ superblock.maps.erase(superblock.get_oldest_map());
+ }
+
+ // we should not trim past osdmaps.cached_key_lower_bound()
+ // as there may still be PGs with those map epochs recorded.
+ ceph_assert(min <= osdmaps.cached_key_lower_bound());
+}
+
seastar::future<Ref<PG>> ShardServices::make_pg(
OSDMapService::cached_map_t create_map,
spg_t pgid,
@@ -716,30 +753,36 @@ seastar::future<> OSDSingletonState::send_incremental_map(
"superblock's oldest map: {}",
__func__, first, superblock.get_oldest_map());
if (first >= superblock.get_oldest_map()) {
+ // TODO: osd_map_share_max_epochs
+ // See OSDService::build_incremental_map_msg
+ if (first < superblock.cluster_osdmap_trim_lower_bound) {
+ logger().info("{}: cluster osdmap lower bound: {} "
+ " > first {}, starting with full map",
+ __func__, superblock.cluster_osdmap_trim_lower_bound, first);
+ // we don't have the next map the target wants,
+ // so start with a full map.
+ first = superblock.cluster_osdmap_trim_lower_bound;
+ }
return load_map_bls(
first, superblock.get_newest_map()
- ).then([this, &conn, first](auto&& bls) {
+ ).then([this, &conn](auto&& bls) {
auto m = crimson::make_message<MOSDMap>(
monc.get_fsid(),
osdmap->get_encoding_features());
- m->cluster_osdmap_trim_lower_bound = first;
+ m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound;
m->newest_map = superblock.get_newest_map();
m->maps = std::move(bls);
return conn.send(std::move(m));
});
} else {
+ // See OSDService::send_incremental_map
+ // just send latest full map
return load_map_bl(osdmap->get_epoch()
).then([this, &conn](auto&& bl) mutable {
auto m = crimson::make_message<MOSDMap>(
monc.get_fsid(),
osdmap->get_encoding_features());
- /* TODO: once we support the tracking of superblock's
- * cluster_osdmap_trim_lower_bound, the MOSDMap should
- * be populated with this value instead of the oldest_map.
- * See: OSD::handle_osd_map for how classic updates the
- * cluster's trim lower bound.
- */
- m->cluster_osdmap_trim_lower_bound = superblock.get_oldest_map();
+ m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound;
m->newest_map = superblock.get_newest_map();
m->maps.emplace(osdmap->get_epoch(), std::move(bl));
return conn.send(std::move(m));
diff --git a/src/crimson/osd/shard_services.h b/src/crimson/osd/shard_services.h
index d71513a6645..37993a4f679 100644
--- a/src/crimson/osd/shard_services.h
+++ b/src/crimson/osd/shard_services.h
@@ -77,6 +77,10 @@ class PerShardState {
PerfCounters *perf = nullptr;
PerfCounters *recoverystate_perf = nullptr;
+ const epoch_t& get_osdmap_tlb() {
+ return per_shard_superblock.cluster_osdmap_trim_lower_bound;
+ }
+
// Op Management
OSDOperationRegistry registry;
OperationThrottler throttler;
@@ -115,7 +119,7 @@ class PerShardState {
PGMap pg_map;
seastar::future<> stop_pgs();
- std::map<pg_t, pg_stat_t> get_pg_stats() const;
+ std::map<pg_t, pg_stat_t> get_pg_stats();
seastar::future<> broadcast_map_to_pgs(
ShardServices &shard_services,
epoch_t epoch);
@@ -181,6 +185,8 @@ class PerShardState {
HeartbeatStampsRef get_hb_stamps(int peer);
std::map<int, HeartbeatStampsRef> heartbeat_stamps;
+ seastar::future<> update_shard_superblock(OSDSuperblock superblock);
+
// Time state
const ceph::mono_time startup_time;
ceph::signedspan get_mnow() const {
@@ -188,6 +194,8 @@ class PerShardState {
return ceph::mono_clock::now() - startup_time;
}
+ OSDSuperblock per_shard_superblock;
+
public:
PerShardState(
int whoami,
@@ -256,7 +264,7 @@ private:
}
OSDSuperblock superblock;
- void set_superblock(OSDSuperblock _superblock) {
+ void set_singleton_superblock(OSDSuperblock _superblock) {
superblock = std::move(_superblock);
}
@@ -316,6 +324,7 @@ private:
epoch_t e, bufferlist&& bl);
seastar::future<> store_maps(ceph::os::Transaction& t,
epoch_t start, Ref<MOSDMap> m);
+ void trim_maps(ceph::os::Transaction& t, OSDSuperblock& superblock);
};
/**
@@ -508,6 +517,8 @@ public:
FORWARD_TO_OSD_SINGLETON(send_pg_temp)
FORWARD_TO_LOCAL_CONST(get_mnow)
FORWARD_TO_LOCAL(get_hb_stamps)
+ FORWARD_TO_LOCAL(update_shard_superblock)
+ FORWARD_TO_LOCAL(get_osdmap_tlb)
FORWARD(pg_created, pg_created, local_state.pg_map)