diff options
author | Venky Shankar <vshankar@redhat.com> | 2024-08-08 07:44:41 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-08 07:44:41 +0200 |
commit | 5ba8c92b978cfb311a630097ef43883219a2cb6d (patch) | |
tree | 2e1ef7bd2f35f338ce28e7d9a3de7d0a668b5c3b /src/mds | |
parent | Merge pull request #58983 from cyx1231st/wip-seastore-cleanup-cache (diff) | |
parent | mds: use intrusive set for ClientLease tracking (diff) | |
download | ceph-5ba8c92b978cfb311a630097ef43883219a2cb6d.tar.xz ceph-5ba8c92b978cfb311a630097ef43883219a2cb6d.zip |
Merge pull request #57911 from gardran/wip-gdran-mds-better-clease-handling
mds: some optimizations around client Capability and Lease tracking
Reviewed-by: Venky Shankar <vshankar@redhat.com>
Diffstat (limited to 'src/mds')
-rw-r--r-- | src/mds/CDentry.cc | 41 | ||||
-rw-r--r-- | src/mds/CDentry.h | 59 | ||||
-rw-r--r-- | src/mds/CDir.cc | 2 | ||||
-rw-r--r-- | src/mds/Capability.cc | 3 | ||||
-rw-r--r-- | src/mds/Capability.h | 6 | ||||
-rw-r--r-- | src/mds/Locker.cc | 77 | ||||
-rw-r--r-- | src/mds/Locker.h | 7 | ||||
-rw-r--r-- | src/mds/MDCache.cc | 6 | ||||
-rw-r--r-- | src/mds/MDSCacheObject.h | 19 | ||||
-rw-r--r-- | src/mds/SnapRealm.h | 20 |
10 files changed, 122 insertions, 118 deletions
diff --git a/src/mds/CDentry.cc b/src/mds/CDentry.cc index b9a232798d8..942bd9a9ec6 100644 --- a/src/mds/CDentry.cc +++ b/src/mds/CDentry.cc @@ -500,22 +500,30 @@ void CDentry::decode_lock_state(int type, const bufferlist& bl) } -ClientLease *CDentry::add_client_lease(client_t c, Session *session) -{ - ClientLease *l; - if (client_lease_map.count(c)) - l = client_lease_map[c]; - else { - dout(20) << __func__ << " client." << c << " on " << lock << dendl; - if (client_lease_map.empty()) { +MEMPOOL_DEFINE_OBJECT_FACTORY(ClientLease, mds_client_lease, mds_co); + +client_t ClientLease::get_client() const +{ + return session->get_client(); +} + +ClientLease *CDentry::add_client_lease(Session *session) +{ + client_t client = session->get_client(); + ClientLease* l = nullptr; + auto it = client_leases.lower_bound(client); + if (it == client_leases.end() || it->get_client() != client) { + l = new ClientLease(this, session); + dout(20) << __func__ << " client." << client << " on " << lock << dendl; + if (client_leases.empty()) { get(PIN_CLIENTLEASE); lock.get_client_lease(); } - l = client_lease_map[c] = new ClientLease(c, this); + client_leases.insert_before(it, *l); l->seq = ++session->lease_seq; - + } else { + l = &(*it); } - return l; } @@ -524,15 +532,14 @@ void CDentry::remove_client_lease(ClientLease *l, Locker *locker) ceph_assert(l->parent == this); bool gather = false; + dout(20) << __func__ << " client." << l->get_client() << " on " << lock << dendl; - dout(20) << __func__ << " client." << l->client << " on " << lock << dendl; - - client_lease_map.erase(l->client); l->item_lease.remove_myself(); l->item_session_lease.remove_myself(); + client_leases.erase(client_leases.iterator_to(*l)); delete l; - if (client_lease_map.empty()) { + if (client_leases.empty()) { gather = !lock.is_stable(); lock.put_client_lease(); put(PIN_CLIENTLEASE); @@ -544,8 +551,8 @@ void CDentry::remove_client_lease(ClientLease *l, Locker *locker) void CDentry::remove_client_leases(Locker *locker) { - while (!client_lease_map.empty()) - remove_client_lease(client_lease_map.begin()->second, locker); + while (!client_leases.empty()) + remove_client_lease(&(*client_leases.begin()), locker); } void CDentry::_put() diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h index 2566395d185..ca36da0354f 100644 --- a/src/mds/CDentry.h +++ b/src/mds/CDentry.h @@ -17,7 +17,6 @@ #include <string> #include <string_view> -#include <set> #include "include/counter.h" #include "include/types.h" @@ -25,6 +24,7 @@ #include "include/lru.h" #include "include/elist.h" #include "include/filepath.h" +#include <boost/intrusive/set.hpp> #include "BatchOp.h" #include "MDSCacheObject.h" @@ -38,9 +38,35 @@ class CDir; class Locker; class CDentry; class LogSegment; - class Session; +struct ClientLease : public boost::intrusive::set_base_hook<> +{ + MEMPOOL_CLASS_HELPERS(); + + ClientLease(CDentry *p, Session *s) : + parent(p), session(s), + item_session_lease(this), + item_lease(this) { } + ClientLease() = delete; + client_t get_client() const; + + CDentry *parent; + Session *session; + + ceph_seq_t seq = 0; + utime_t ttl; + xlist<ClientLease*>::item item_session_lease; // per-session list + xlist<ClientLease*>::item item_lease; // global list +}; +struct client_is_key +{ + typedef client_t type; + const type operator() (const ClientLease& l) const { + return l.get_client(); + } +}; + // define an ordering bool operator<(const CDentry& l, const CDentry& r); @@ -324,27 +350,25 @@ public: // replicas (on clients) bool is_any_leases() const { - return !client_lease_map.empty(); + return !client_leases.empty(); } const ClientLease *get_client_lease(client_t c) const { - if (client_lease_map.count(c)) - return client_lease_map.find(c)->second; - return 0; + auto it = client_leases.find(c); + if (it != client_leases.end()) + return &(*it); + return nullptr; } ClientLease *get_client_lease(client_t c) { - if (client_lease_map.count(c)) - return client_lease_map.find(c)->second; - return 0; + auto it = client_leases.find(c); + if (it != client_leases.end()) + return &(*it); + return nullptr; } bool have_client_lease(client_t c) const { - const ClientLease *l = get_client_lease(c); - if (l) - return true; - else - return false; + return client_leases.count(c); } - ClientLease *add_client_lease(client_t c, Session *session); + ClientLease *add_client_lease(Session *session); void remove_client_lease(ClientLease *r, Locker *locker); // returns remaining mask (if any), and kicks locker eval_gathers void remove_client_leases(Locker *locker); @@ -373,7 +397,10 @@ public: SimpleLock lock; // FIXME referenced containers not in mempool LocalLockC versionlock; // FIXME referenced containers not in mempool - mempool::mds_co::map<client_t,ClientLease*> client_lease_map; + typedef boost::intrusive::set< + ClientLease, boost::intrusive::key_of_value<client_is_key>> ClientLeaseMap; + ClientLeaseMap client_leases; + std::map<int, std::unique_ptr<BatchOp>> batch_ops; ceph_tid_t reintegration_reqid = 0; diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index acddeb4f1d1..89e2e5e4be9 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -510,7 +510,7 @@ void CDir::remove_dentry(CDentry *dn) dout(12) << __func__ << " " << *dn << dendl; // there should be no client leases at this point! - ceph_assert(dn->client_lease_map.empty()); + ceph_assert(dn->client_leases.empty()); if (state_test(CDir::STATE_DNPINNEDFRAG)) { dn->put(CDentry::PIN_FRAGMENTING); diff --git a/src/mds/Capability.cc b/src/mds/Capability.cc index de2a16e1ab1..9a3d093f9eb 100644 --- a/src/mds/Capability.cc +++ b/src/mds/Capability.cc @@ -151,8 +151,7 @@ void Capability::revoke_info::generate_test_instances(std::list<Capability::revo * Capability */ Capability::Capability(CInode *i, Session *s, uint64_t id) : - item_session_caps(this), item_snaprealm_caps(this), - item_revoking_caps(this), item_client_revoking_caps(this), + item_session_caps(this), lock_caches(member_offset(MDLockCache, item_cap_lock_cache)), inode(i), session(s), cap_id(id) { diff --git a/src/mds/Capability.h b/src/mds/Capability.h index ebc626a2294..9680895a5c8 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -336,9 +336,9 @@ public: int64_t last_rsize = 0; xlist<Capability*>::item item_session_caps; - xlist<Capability*>::item item_snaprealm_caps; - xlist<Capability*>::item item_revoking_caps; - xlist<Capability*>::item item_client_revoking_caps; + elist<Capability*>::item item_snaprealm_caps; + elist<Capability*>::item item_revoking_caps; + elist<Capability*>::item item_client_revoking_caps; elist<MDLockCache*> lock_caches; int get_lock_cache_allowed() const { return lock_cache_allowed; } diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index f4fb1a114d9..9668da03278 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -72,7 +72,9 @@ public: }; Locker::Locker(MDSRank *m, MDCache *c) : - need_snapflush_inodes(member_offset(CInode, item_to_flush)), mds(m), mdcache(c) {} + revoking_caps(member_offset(Capability, item_revoking_caps)), + need_snapflush_inodes(member_offset(CInode, item_to_flush)), + mds(m), mdcache(c) {} void Locker::dispatch(const cref_t<Message> &m) @@ -2629,9 +2631,11 @@ int Locker::issue_caps(CInode *in, Capability *only_cap) int op = (before & ~after) ? CEPH_CAP_OP_REVOKE : CEPH_CAP_OP_GRANT; if (op == CEPH_CAP_OP_REVOKE) { - if (mds->logger) mds->logger->inc(l_mdss_ceph_cap_op_revoke); + if (mds->logger) mds->logger->inc(l_mdss_ceph_cap_op_revoke); revoking_caps.push_back(&cap->item_revoking_caps); - revoking_caps_by_client[cap->get_client()].push_back(&cap->item_client_revoking_caps); + auto em = revoking_caps_by_client.emplace(cap->get_client(), + member_offset(Capability, item_client_revoking_caps)); + em.first->second.push_back(&cap->item_client_revoking_caps); cap->set_last_revoke_stamp(ceph_clock_now()); cap->reset_num_revoke_warnings(); } else { @@ -2670,7 +2674,7 @@ void Locker::issue_truncate(CInode *in) cap->get_mseq(), mds->get_osd_epoch_barrier()); in->encode_cap_message(m, cap); - mds->send_message_client_counted(m, p.first); + mds->send_message_client_counted(m, cap->get_session()); } // should we increase max_size? @@ -3160,7 +3164,7 @@ void Locker::share_inode_max_size(CInode *in, Capability *only_cap) cap->get_mseq(), mds->get_osd_epoch_barrier()); in->encode_cap_message(m, cap); - mds->send_message_client_counted(m, client); + mds->send_message_client_counted(m, cap->get_session()); } if (only_cap) break; @@ -4311,42 +4315,33 @@ void Locker::remove_client_cap(CInode *in, Capability *cap, bool kill) try_eval(in, CEPH_CAP_LOCKS); } - -/** - * Return true if any currently revoking caps exceed the - * session_timeout threshold. - */ -bool Locker::any_late_revoking_caps(xlist<Capability*> const &revoking, - double timeout) const +std::set<client_t> Locker::get_late_revoking_clients(double timeout) { - xlist<Capability*>::const_iterator p = revoking.begin(); - if (p.end()) { + auto any_late_revoking = [timeout](elist<Capability*> &revoking) { + auto p = revoking.begin(); + if (p.end()) // No revoking caps at the moment return false; - } else { - utime_t now = ceph_clock_now(); - utime_t age = now - (*p)->get_last_revoke_stamp(); - if (age <= timeout) { - return false; - } else { - return true; - } - } -} -std::set<client_t> Locker::get_late_revoking_clients(double timeout) const -{ - std::set<client_t> result; + utime_t now = ceph_clock_now(); + return now - (*p)->get_last_revoke_stamp() > timeout; + }; - if (any_late_revoking_caps(revoking_caps, timeout)) { + std::set<client_t> result; + if (!any_late_revoking(revoking_caps)) { + // Fast path: no misbehaving clients, execute in O(1) + } else { // Slow path: execute in O(N_clients) - for (auto &p : revoking_caps_by_client) { - if (any_late_revoking_caps(p.second, timeout)) { - result.insert(p.first); + for (auto it = revoking_caps_by_client.begin(); + it != revoking_caps_by_client.end(); ) { + if (it->second.empty()) { + revoking_caps_by_client.erase(it++); + continue; } + if (any_late_revoking(it->second)) + result.insert(it->first); + ++it; } - } else { - // Fast path: no misbehaving clients, execute in O(1) } return result; } @@ -4378,11 +4373,10 @@ void Locker::caps_tick() } } - dout(20) << __func__ << " " << revoking_caps.size() << " revoking caps" << dendl; now = ceph_clock_now(); int n = 0; - for (xlist<Capability*>::iterator p = revoking_caps.begin(); !p.end(); ++p) { + for (auto p = revoking_caps.begin(); !p.end(); ++p) { Capability *cap = *p; utime_t age = now - cap->get_last_revoke_stamp(); @@ -4508,7 +4502,7 @@ void Locker::issue_client_lease(CDentry *dn, CInode *in, const MDRequestRef& mdr ceph_assert(!in); } // issue a dentry lease - ClientLease *l = dn->add_client_lease(client, session); + ClientLease *l = dn->add_client_lease(session); session->touch_lease(l); int pool = 1; // fixme.. do something smart! @@ -4537,11 +4531,8 @@ void Locker::issue_client_lease(CDentry *dn, CInode *in, const MDRequestRef& mdr void Locker::revoke_client_leases(SimpleLock *lock) { CDentry *dn = static_cast<CDentry*>(lock->get_parent()); - for (map<client_t, ClientLease*>::iterator p = dn->client_lease_map.begin(); - p != dn->client_lease_map.end(); - ++p) { - ClientLease *l = p->second; - + for (ClientLease& l : dn->client_leases) { + ceph_assert(lock->get_type() == CEPH_LOCK_DN); CDentry *dn = static_cast<CDentry*>(lock->get_parent()); @@ -4549,8 +4540,8 @@ void Locker::revoke_client_leases(SimpleLock *lock) // i should also revoke the dir ICONTENT lease, if they have it! CInode *diri = dn->get_dir()->get_inode(); - auto lease = make_message<MClientLease>(CEPH_MDS_LEASE_REVOKE, l->seq, mask, diri->ino(), diri->first, CEPH_NOSNAP, dn->get_name()); - mds->send_message_client_counted(lease, l->client); + auto lease = make_message<MClientLease>(CEPH_MDS_LEASE_REVOKE, l.seq, mask, diri->ino(), diri->first, CEPH_NOSNAP, dn->get_name()); + mds->send_message_client_counted(lease, l.session); } } diff --git a/src/mds/Locker.h b/src/mds/Locker.h index aa037ac6abd..eab345984c8 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -151,7 +151,7 @@ public: void remove_client_cap(CInode *in, Capability *cap, bool kill=false); - std::set<client_t> get_late_revoking_clients(double timeout) const; + std::set<client_t> get_late_revoking_clients(double timeout); void snapflush_nudge(CInode *in); void mark_need_snapflush_inode(CInode *in); @@ -249,9 +249,9 @@ protected: xlist<ScatterLock*> updated_scatterlocks; // Maintain a global list to quickly find if any caps are late revoking - xlist<Capability*> revoking_caps; + elist<Capability*> revoking_caps; // Maintain a per-client list to find clients responsible for late ones quickly - std::map<client_t, xlist<Capability*> > revoking_caps_by_client; + std::map<client_t, elist<Capability*> > revoking_caps_by_client; elist<CInode*> need_snapflush_inodes; @@ -267,7 +267,6 @@ private: void handle_quiesce_failure(const MDRequestRef& mdr, std::string_view& marker); - bool any_late_revoking_caps(xlist<Capability*> const &revoking, double timeout) const; uint64_t calc_new_max_size(const CInode::inode_const_ptr& pi, uint64_t size); __u32 get_xattr_total_length(CInode::mempool_xattr_map &xattr); void decode_new_xattrs(CInode::mempool_inode *inode, diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 94b6af0b4e7..e9cb50c6e00 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -5696,7 +5696,7 @@ void MDCache::prepare_realm_merge(SnapRealm *realm, SnapRealm *parent_realm, split_realms.push_back((*p)->inode->ino()); for (const auto& p : realm->client_caps) { - ceph_assert(!p.second->empty()); + ceph_assert(!p.second.empty()); auto em = splits.emplace(std::piecewise_construct, std::forward_as_tuple(p.first), std::forward_as_tuple()); if (em.second) { auto update = make_message<MClientSnap>(CEPH_SNAP_OP_SPLIT); @@ -7820,7 +7820,7 @@ void MDCache::trim_client_leases() ClientLease *r = list.front(); if (r->ttl > now) break; CDentry *dn = static_cast<CDentry*>(r->parent); - dout(10) << " expiring client." << r->client << " lease of " << *dn << dendl; + dout(10) << " expiring client." << r->get_client() << " lease of " << *dn << dendl; dn->remove_client_lease(r, mds->locker); } auto after = list.size(); @@ -10087,7 +10087,7 @@ void MDCache::do_realm_invalidate_and_update_notify(CInode *in, int snapop, bool for (const auto& p : realm->client_caps) { const auto& client = p.first; const auto& caps = p.second; - ceph_assert(!caps->empty()); + ceph_assert(!caps.empty()); auto em = updates.emplace(std::piecewise_construct, std::forward_as_tuple(client), std::forward_as_tuple()); if (em.second) { diff --git a/src/mds/MDSCacheObject.h b/src/mds/MDSCacheObject.h index d322a05851a..be84d142e9a 100644 --- a/src/mds/MDSCacheObject.h +++ b/src/mds/MDSCacheObject.h @@ -20,9 +20,6 @@ //#define MDS_AUTHPIN_SET // define me for debugging auth pin leaks //#define MDS_VERIFY_FRAGSTAT // do (slow) sanity checking on frags -/* - * for metadata leases to clients - */ class MLock; class SimpleLock; class MDSCacheObject; @@ -32,22 +29,6 @@ namespace ceph { class Formatter; } -struct ClientLease { - ClientLease(client_t c, MDSCacheObject *p) : - client(c), parent(p), - item_session_lease(this), - item_lease(this) { } - ClientLease() = delete; - - client_t client; - MDSCacheObject *parent; - - ceph_seq_t seq = 0; - utime_t ttl; - xlist<ClientLease*>::item item_session_lease; // per-session list - xlist<ClientLease*>::item item_lease; // global list -}; - // print hack struct mdsco_db_line_prefix { explicit mdsco_db_line_prefix(MDSCacheObject *o) : object(o) {} diff --git a/src/mds/SnapRealm.h b/src/mds/SnapRealm.h index 700c1d81e3b..b1c38d03fcb 100644 --- a/src/mds/SnapRealm.h +++ b/src/mds/SnapRealm.h @@ -103,18 +103,18 @@ public: void merge_to(SnapRealm *newparent); void add_cap(client_t client, Capability *cap) { - auto client_caps_entry = client_caps.find(client); - if (client_caps_entry == client_caps.end()) - client_caps_entry = client_caps.emplace(client, - new xlist<Capability*>).first; - client_caps_entry->second->push_back(&cap->item_snaprealm_caps); + auto em = client_caps.emplace(cap->get_client(), + member_offset(Capability, item_snaprealm_caps)); + em.first->second.push_back(&cap->item_snaprealm_caps); } void remove_cap(client_t client, Capability *cap) { + bool last_cap = cap->item_snaprealm_caps.is_singular(); cap->item_snaprealm_caps.remove_myself(); - auto found = client_caps.find(client); - if (found != client_caps.end() && found->second->empty()) { - delete found->second; - client_caps.erase(found); + if (last_cap) { + auto it = client_caps.find(client); + ceph_assert(it != client_caps.end()); + ceph_assert(it->second.empty()); + client_caps.erase(it); } } @@ -129,7 +129,7 @@ public: std::set<SnapRealm*> open_children; // active children that are currently open elist<CInode*> inodes_with_caps; // for efficient realm splits - std::map<client_t, xlist<Capability*>* > client_caps; // to identify clients who need snap notifications + std::map<client_t, elist<Capability*> > client_caps; // to identify clients who need snap notifications protected: void check_cache() const; |