diff options
author | Matan Breizman <mbreizma@redhat.com> | 2023-12-03 09:06:33 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-03 09:06:33 +0100 |
commit | 67b75549b33f045a83f5bf55b677d8842cb7bac4 (patch) | |
tree | 04c6d5f487cea7b0e9a3f8d07c71ff5a214fd478 | |
parent | Merge pull request #53712 from ronen-fr/wip-rf-move-1 (diff) | |
parent | crimson/osd: update osd_beacon_report_interval on conf change (diff) | |
download | ceph-67b75549b33f045a83f5bf55b677d8842cb7bac4.tar.xz ceph-67b75549b33f045a83f5bf55b677d8842cb7bac4.zip |
Merge pull request #54437 from Matan-B/wip-matanb-crimson-osdmap-trimming
crimson/osd: introduce osdmap trimming
Reviewed-by: Samuel Just <sjust@redhat.com>
-rw-r--r-- | src/crimson/common/shared_lru.h | 10 | ||||
-rw-r--r-- | src/crimson/mgr/client.h | 2 | ||||
-rw-r--r-- | src/crimson/osd/osd.cc | 72 | ||||
-rw-r--r-- | src/crimson/osd/osd.h | 12 | ||||
-rw-r--r-- | src/crimson/osd/osd_meta.cc | 5 | ||||
-rw-r--r-- | src/crimson/osd/osd_meta.h | 1 | ||||
-rw-r--r-- | src/crimson/osd/pg.h | 3 | ||||
-rw-r--r-- | src/crimson/osd/pg_shard_manager.cc | 9 | ||||
-rw-r--r-- | src/crimson/osd/pg_shard_manager.h | 5 | ||||
-rw-r--r-- | src/crimson/osd/shard_services.cc | 65 | ||||
-rw-r--r-- | src/crimson/osd/shard_services.h | 15 |
11 files changed, 164 insertions, 35 deletions
diff --git a/src/crimson/common/shared_lru.h b/src/crimson/common/shared_lru.h index 186f02a614c..92d99d332c4 100644 --- a/src/crimson/common/shared_lru.h +++ b/src/crimson/common/shared_lru.h @@ -83,6 +83,7 @@ public: cache.clear(); } shared_ptr_t find(const K& key); + K cached_key_lower_bound(); // return the last element that is not greater than key shared_ptr_t lower_bound(const K& key); // return the first element that is greater than key @@ -147,6 +148,15 @@ SharedLRU<K,V>::find(const K& key) } template<class K, class V> +K SharedLRU<K,V>::cached_key_lower_bound() +{ + if (weak_refs.empty()) { + return {}; + } + return weak_refs.begin()->first; +} + +template<class K, class V> typename SharedLRU<K,V>::shared_ptr_t SharedLRU<K,V>::lower_bound(const K& key) { diff --git a/src/crimson/mgr/client.h b/src/crimson/mgr/client.h index 501949768dd..b88c60c5e4a 100644 --- a/src/crimson/mgr/client.h +++ b/src/crimson/mgr/client.h @@ -24,7 +24,7 @@ namespace crimson::mgr // implement WithStats if you want to report stats to mgr periodically class WithStats { public: - virtual seastar::future<MessageURef> get_stats() const = 0; + virtual seastar::future<MessageURef> get_stats() = 0; virtual ~WithStats() {} }; diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 8da2d566e6b..f3648c6df27 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -400,7 +400,11 @@ seastar::future<> OSD::start() ); }).then([this](OSDSuperblock&& sb) { superblock = std::move(sb); - pg_shard_manager.set_superblock(superblock); + if (!superblock.cluster_osdmap_trim_lower_bound) { + superblock.cluster_osdmap_trim_lower_bound = superblock.get_oldest_map(); + } + return pg_shard_manager.set_superblock(superblock); + }).then([this] { return pg_shard_manager.get_local_map(superblock.current_epoch); }).then([this](OSDMapService::local_cached_map_t&& map) { osdmap = make_local_shared_foreign(OSDMapService::local_cached_map_t(map)); @@ -864,6 +868,25 @@ void OSD::handle_authentication(const EntityName& name, } } +const char** OSD::get_tracked_conf_keys() const +{ + static const char* KEYS[] = { + "osd_beacon_report_interval", + nullptr + }; + return KEYS; +} + +void OSD::handle_conf_change( + const crimson::common::ConfigProxy& conf, + const std::set <std::string> &changed) +{ + if (changed.count("osd_beacon_report_interval")) { + beacon_timer.rearm_periodic( + std::chrono::seconds(conf->osd_beacon_report_interval)); + } +} + void OSD::update_stats() { osd_stat_seq++; @@ -879,13 +902,20 @@ void OSD::update_stats() }); } -seastar::future<MessageURef> OSD::get_stats() const +seastar::future<MessageURef> OSD::get_stats() { // MPGStats::had_map_for is not used since PGMonitor was removed auto m = crimson::make_message<MPGStats>(monc->get_fsid(), osdmap->get_epoch()); m->osd_stat = osd_stat; return pg_shard_manager.get_pg_stats( - ).then([m=std::move(m)](auto &&stats) mutable { + ).then([this, m=std::move(m)](auto &&stats) mutable { + min_last_epoch_clean = osdmap->get_epoch(); + min_last_epoch_clean_pgs.clear(); + for (auto [pgid, stat] : stats) { + min_last_epoch_clean = std::min(min_last_epoch_clean, + stat.get_effective_last_epoch_clean()); + min_last_epoch_clean_pgs.push_back(pgid); + } m->pg_stat = std::move(stats); return seastar::make_ready_future<MessageURef>(std::move(m)); }); @@ -934,6 +964,16 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m) logger().info("handle_osd_map epochs [{}..{}], i have {}, src has [{}..{}]", first, last, superblock.get_newest_map(), m->cluster_osdmap_trim_lower_bound, m->newest_map); + + if (superblock.cluster_osdmap_trim_lower_bound < + m->cluster_osdmap_trim_lower_bound) { + superblock.cluster_osdmap_trim_lower_bound = + m->cluster_osdmap_trim_lower_bound; + logger().debug("{} superblock cluster_osdmap_trim_lower_bound new epoch is: {}", + __func__, superblock.cluster_osdmap_trim_lower_bound); + ceph_assert( + superblock.cluster_osdmap_trim_lower_bound >= superblock.get_oldest_map()); + } // make sure there is something new, here, before we bother flushing // the queues and such if (last <= superblock.get_newest_map()) { @@ -964,8 +1004,9 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m) monc->sub_got("osdmap", last); if (!superblock.maps.empty()) { - // TODO: support osdmap trimming - // See: <tracker> + pg_shard_manager.trim_maps(t, superblock); + // TODO: once we support pg splitting, update pg_num_history here + //pg_num_history.prune(superblock.get_oldest_map()); } superblock.insert_osdmap_epochs(first, last); @@ -977,11 +1018,13 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m) superblock.clean_thru = last; } pg_shard_manager.get_meta_coll().store_superblock(t, superblock); - pg_shard_manager.set_superblock(superblock); - logger().debug("OSD::handle_osd_map: do_transaction..."); - return store.get_sharded_store().do_transaction( - pg_shard_manager.get_meta_coll().collection(), - std::move(t)); + return pg_shard_manager.set_superblock(superblock).then( + [this, &t] { + logger().debug("OSD::handle_osd_map: do_transaction..."); + return store.get_sharded_store().do_transaction( + pg_shard_manager.get_meta_coll().collection(), + std::move(t)); + }); }); }).then([=, this] { // TODO: write to superblock and commit the transaction @@ -1266,14 +1309,13 @@ seastar::future<> OSD::send_beacon() if (!pg_shard_manager.is_active()) { return seastar::now(); } - // FIXME: min lec should be calculated from pg_stat - // and should set m->pgs - epoch_t min_last_epoch_clean = osdmap->get_epoch(); - auto m = crimson::make_message<MOSDBeacon>(osdmap->get_epoch(), + auto beacon = crimson::make_message<MOSDBeacon>(osdmap->get_epoch(), min_last_epoch_clean, superblock.last_purged_snaps_scrub, local_conf()->osd_beacon_report_interval); - return monc->send_message(std::move(m)); + beacon->pgs = min_last_epoch_clean_pgs; + logger().debug("{} {}", __func__, *beacon); + return monc->send_message(std::move(beacon)); } seastar::future<> OSD::update_heartbeat_peers() diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h index 10ff60d4701..134376ad947 100644 --- a/src/crimson/osd/osd.h +++ b/src/crimson/osd/osd.h @@ -61,7 +61,8 @@ class PG; class OSD final : public crimson::net::Dispatcher, private crimson::common::AuthHandler, - private crimson::mgr::WithStats { + private crimson::mgr::WithStats, + public md_config_obs_t { const int whoami; const uint32_t nonce; seastar::abort_source& abort_source; @@ -106,8 +107,11 @@ class OSD final : public crimson::net::Dispatcher, // pg statistics including osd ones osd_stat_t osd_stat; uint32_t osd_stat_seq = 0; + epoch_t min_last_epoch_clean = 0; + // which pgs were scanned for min_lec + std::vector<pg_t> min_last_epoch_clean_pgs; void update_stats(); - seastar::future<MessageURef> get_stats() const final; + seastar::future<MessageURef> get_stats() final; // AuthHandler methods void handle_authentication(const EntityName& name, @@ -123,6 +127,10 @@ class OSD final : public crimson::net::Dispatcher, std::unique_ptr<Heartbeat> heartbeat; seastar::timer<seastar::lowres_clock> tick_timer; + const char** get_tracked_conf_keys() const final; + void handle_conf_change(const ConfigProxy& conf, + const std::set<std::string> &changed) final; + // admin-socket seastar::lw_shared_ptr<crimson::admin::AdminSocket> asok; diff --git a/src/crimson/osd/osd_meta.cc b/src/crimson/osd/osd_meta.cc index e40b2b2464b..54785a975f0 100644 --- a/src/crimson/osd/osd_meta.cc +++ b/src/crimson/osd/osd_meta.cc @@ -24,6 +24,11 @@ void OSDMeta::store_map(ceph::os::Transaction& t, t.write(coll->get_cid(), osdmap_oid(e), 0, m.length(), m); } +void OSDMeta::remove_map(ceph::os::Transaction& t, epoch_t e) +{ + t.remove(coll->get_cid(), osdmap_oid(e)); +} + seastar::future<bufferlist> OSDMeta::load_map(epoch_t e) { return store.read(coll, diff --git a/src/crimson/osd/osd_meta.h b/src/crimson/osd/osd_meta.h index 652266d9e20..3632a9feef3 100644 --- a/src/crimson/osd/osd_meta.h +++ b/src/crimson/osd/osd_meta.h @@ -40,6 +40,7 @@ public: void store_map(ceph::os::Transaction& t, epoch_t e, const bufferlist& m); + void remove_map(ceph::os::Transaction& t, epoch_t e); seastar::future<bufferlist> load_map(epoch_t e); void store_superblock(ceph::os::Transaction& t, diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index 056d5e9e1c0..f56f2dc6627 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -348,8 +348,7 @@ public: void on_active_advmap(const OSDMapRef &osdmap) final; epoch_t cluster_osdmap_trim_lower_bound() final { - // TODO - return 0; + return shard_services.get_osdmap_tlb(); } void on_backfill_reserved() final { diff --git a/src/crimson/osd/pg_shard_manager.cc b/src/crimson/osd/pg_shard_manager.cc index 6061c856be2..1980b36271c 100644 --- a/src/crimson/osd/pg_shard_manager.cc +++ b/src/crimson/osd/pg_shard_manager.cc @@ -105,4 +105,13 @@ seastar::future<> PGShardManager::set_up_epoch(epoch_t e) { }); } +seastar::future<> PGShardManager::set_superblock(OSDSuperblock superblock) { + ceph_assert(seastar::this_shard_id() == PRIMARY_CORE); + get_osd_singleton_state().set_singleton_superblock(superblock); + return shard_services.invoke_on_all( + [superblock = std::move(superblock)](auto &local_service) { + return local_service.local_state.update_shard_superblock(superblock); + }); +} + } diff --git a/src/crimson/osd/pg_shard_manager.h b/src/crimson/osd/pg_shard_manager.h index 74154499c8a..1559dde0cb1 100644 --- a/src/crimson/osd/pg_shard_manager.h +++ b/src/crimson/osd/pg_shard_manager.h @@ -129,16 +129,17 @@ public: FORWARD_TO_OSD_SINGLETON(init_meta_coll) FORWARD_TO_OSD_SINGLETON(get_meta_coll) - FORWARD_TO_OSD_SINGLETON(set_superblock) - // Core OSDMap methods FORWARD_TO_OSD_SINGLETON(get_local_map) FORWARD_TO_OSD_SINGLETON(load_map_bl) FORWARD_TO_OSD_SINGLETON(load_map_bls) FORWARD_TO_OSD_SINGLETON(store_maps) + FORWARD_TO_OSD_SINGLETON(trim_maps) seastar::future<> set_up_epoch(epoch_t e); + seastar::future<> set_superblock(OSDSuperblock superblock); + template <typename F> auto with_remote_shard_state(core_id_t core, F &&f) { return shard_services.invoke_on( diff --git a/src/crimson/osd/shard_services.cc b/src/crimson/osd/shard_services.cc index c2438809020..c321439df7a 100644 --- a/src/crimson/osd/shard_services.cc +++ b/src/crimson/osd/shard_services.cc @@ -72,7 +72,7 @@ seastar::future<> PerShardState::stop_pgs() }); } -std::map<pg_t, pg_stat_t> PerShardState::get_pg_stats() const +std::map<pg_t, pg_stat_t> PerShardState::get_pg_stats() { assert_core(); std::map<pg_t, pg_stat_t> ret; @@ -119,6 +119,13 @@ HeartbeatStampsRef PerShardState::get_hb_stamps(int peer) return stamps->second; } +seastar::future<> PerShardState::update_shard_superblock(OSDSuperblock superblock) +{ + assert_core(); + per_shard_superblock = std::move(superblock); + return seastar::now(); +} + OSDSingletonState::OSDSingletonState( int whoami, crimson::net::Messenger &cluster_msgr, @@ -352,7 +359,6 @@ void OSDSingletonState::handle_conf_change( seastar::future<OSDSingletonState::local_cached_map_t> OSDSingletonState::get_local_map(epoch_t e) { - // TODO: use LRU cache for managing osdmap, fallback to disk if we have to if (auto found = osdmaps.find(e); found) { logger().debug("{} osdmap.{} found in cache", __func__, e); return seastar::make_ready_future<local_cached_map_t>(std::move(found)); @@ -392,6 +398,9 @@ seastar::future<std::map<epoch_t, bufferlist>> OSDSingletonState::load_map_bls( logger().debug("{} loading maps [{},{}]", __func__, first, last); ceph_assert(first <= last); + // TODO: take osd_map_max into account + //int max = cct->_conf->osd_map_message_max; + //ssize_t max_bytes = cct->_conf->osd_map_message_max_bytes; return seastar::map_reduce(boost::make_counting_iterator<epoch_t>(first), boost::make_counting_iterator<epoch_t>(last + 1), [this](epoch_t e) { @@ -458,6 +467,34 @@ seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t, }); } +// Note: store/set_superblock is called in later OSD::handle_osd_map +// so we use the OSD's superblock reference meanwhile. +void OSDSingletonState::trim_maps(ceph::os::Transaction& t, + OSDSuperblock& superblock) +{ + epoch_t min = + std::min(superblock.cluster_osdmap_trim_lower_bound, + osdmaps.cached_key_lower_bound()); + + if (min <= superblock.get_oldest_map()) { + return; + } + logger().debug("{}: min={} oldest_map={}", __func__, min, superblock.get_oldest_map()); + + // Trim from the superblock's oldest_map up to `min`. + // Break if we have exceeded the txn target size. + while (superblock.get_oldest_map() < min && + t.get_num_ops() < crimson::common::local_conf()->osd_target_transaction_size) { + logger().debug("{}: removing old osdmap epoch {}", __func__, superblock.get_oldest_map()); + meta_coll->remove_map(t, superblock.get_oldest_map()); + superblock.maps.erase(superblock.get_oldest_map()); + } + + // we should not trim past osdmaps.cached_key_lower_bound() + // as there may still be PGs with those map epochs recorded. + ceph_assert(min <= osdmaps.cached_key_lower_bound()); +} + seastar::future<Ref<PG>> ShardServices::make_pg( OSDMapService::cached_map_t create_map, spg_t pgid, @@ -716,30 +753,36 @@ seastar::future<> OSDSingletonState::send_incremental_map( "superblock's oldest map: {}", __func__, first, superblock.get_oldest_map()); if (first >= superblock.get_oldest_map()) { + // TODO: osd_map_share_max_epochs + // See OSDService::build_incremental_map_msg + if (first < superblock.cluster_osdmap_trim_lower_bound) { + logger().info("{}: cluster osdmap lower bound: {} " + " > first {}, starting with full map", + __func__, superblock.cluster_osdmap_trim_lower_bound, first); + // we don't have the next map the target wants, + // so start with a full map. + first = superblock.cluster_osdmap_trim_lower_bound; + } return load_map_bls( first, superblock.get_newest_map() - ).then([this, &conn, first](auto&& bls) { + ).then([this, &conn](auto&& bls) { auto m = crimson::make_message<MOSDMap>( monc.get_fsid(), osdmap->get_encoding_features()); - m->cluster_osdmap_trim_lower_bound = first; + m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound; m->newest_map = superblock.get_newest_map(); m->maps = std::move(bls); return conn.send(std::move(m)); }); } else { + // See OSDService::send_incremental_map + // just send latest full map return load_map_bl(osdmap->get_epoch() ).then([this, &conn](auto&& bl) mutable { auto m = crimson::make_message<MOSDMap>( monc.get_fsid(), osdmap->get_encoding_features()); - /* TODO: once we support the tracking of superblock's - * cluster_osdmap_trim_lower_bound, the MOSDMap should - * be populated with this value instead of the oldest_map. - * See: OSD::handle_osd_map for how classic updates the - * cluster's trim lower bound. - */ - m->cluster_osdmap_trim_lower_bound = superblock.get_oldest_map(); + m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound; m->newest_map = superblock.get_newest_map(); m->maps.emplace(osdmap->get_epoch(), std::move(bl)); return conn.send(std::move(m)); diff --git a/src/crimson/osd/shard_services.h b/src/crimson/osd/shard_services.h index d71513a6645..37993a4f679 100644 --- a/src/crimson/osd/shard_services.h +++ b/src/crimson/osd/shard_services.h @@ -77,6 +77,10 @@ class PerShardState { PerfCounters *perf = nullptr; PerfCounters *recoverystate_perf = nullptr; + const epoch_t& get_osdmap_tlb() { + return per_shard_superblock.cluster_osdmap_trim_lower_bound; + } + // Op Management OSDOperationRegistry registry; OperationThrottler throttler; @@ -115,7 +119,7 @@ class PerShardState { PGMap pg_map; seastar::future<> stop_pgs(); - std::map<pg_t, pg_stat_t> get_pg_stats() const; + std::map<pg_t, pg_stat_t> get_pg_stats(); seastar::future<> broadcast_map_to_pgs( ShardServices &shard_services, epoch_t epoch); @@ -181,6 +185,8 @@ class PerShardState { HeartbeatStampsRef get_hb_stamps(int peer); std::map<int, HeartbeatStampsRef> heartbeat_stamps; + seastar::future<> update_shard_superblock(OSDSuperblock superblock); + // Time state const ceph::mono_time startup_time; ceph::signedspan get_mnow() const { @@ -188,6 +194,8 @@ class PerShardState { return ceph::mono_clock::now() - startup_time; } + OSDSuperblock per_shard_superblock; + public: PerShardState( int whoami, @@ -256,7 +264,7 @@ private: } OSDSuperblock superblock; - void set_superblock(OSDSuperblock _superblock) { + void set_singleton_superblock(OSDSuperblock _superblock) { superblock = std::move(_superblock); } @@ -316,6 +324,7 @@ private: epoch_t e, bufferlist&& bl); seastar::future<> store_maps(ceph::os::Transaction& t, epoch_t start, Ref<MOSDMap> m); + void trim_maps(ceph::os::Transaction& t, OSDSuperblock& superblock); }; /** @@ -508,6 +517,8 @@ public: FORWARD_TO_OSD_SINGLETON(send_pg_temp) FORWARD_TO_LOCAL_CONST(get_mnow) FORWARD_TO_LOCAL(get_hb_stamps) + FORWARD_TO_LOCAL(update_shard_superblock) + FORWARD_TO_LOCAL(get_osdmap_tlb) FORWARD(pg_created, pg_created, local_state.pg_map) |