summaryrefslogtreecommitdiffstats
path: root/src/mds
diff options
context:
space:
mode:
authorVenky Shankar <vshankar@redhat.com>2024-08-08 07:44:41 +0200
committerGitHub <noreply@github.com>2024-08-08 07:44:41 +0200
commit5ba8c92b978cfb311a630097ef43883219a2cb6d (patch)
tree2e1ef7bd2f35f338ce28e7d9a3de7d0a668b5c3b /src/mds
parentMerge pull request #58983 from cyx1231st/wip-seastore-cleanup-cache (diff)
parentmds: use intrusive set for ClientLease tracking (diff)
downloadceph-5ba8c92b978cfb311a630097ef43883219a2cb6d.tar.xz
ceph-5ba8c92b978cfb311a630097ef43883219a2cb6d.zip
Merge pull request #57911 from gardran/wip-gdran-mds-better-clease-handling
mds: some optimizations around client Capability and Lease tracking Reviewed-by: Venky Shankar <vshankar@redhat.com>
Diffstat (limited to 'src/mds')
-rw-r--r--src/mds/CDentry.cc41
-rw-r--r--src/mds/CDentry.h59
-rw-r--r--src/mds/CDir.cc2
-rw-r--r--src/mds/Capability.cc3
-rw-r--r--src/mds/Capability.h6
-rw-r--r--src/mds/Locker.cc77
-rw-r--r--src/mds/Locker.h7
-rw-r--r--src/mds/MDCache.cc6
-rw-r--r--src/mds/MDSCacheObject.h19
-rw-r--r--src/mds/SnapRealm.h20
10 files changed, 122 insertions, 118 deletions
diff --git a/src/mds/CDentry.cc b/src/mds/CDentry.cc
index b9a232798d8..942bd9a9ec6 100644
--- a/src/mds/CDentry.cc
+++ b/src/mds/CDentry.cc
@@ -500,22 +500,30 @@ void CDentry::decode_lock_state(int type, const bufferlist& bl)
}
-ClientLease *CDentry::add_client_lease(client_t c, Session *session)
-{
- ClientLease *l;
- if (client_lease_map.count(c))
- l = client_lease_map[c];
- else {
- dout(20) << __func__ << " client." << c << " on " << lock << dendl;
- if (client_lease_map.empty()) {
+MEMPOOL_DEFINE_OBJECT_FACTORY(ClientLease, mds_client_lease, mds_co);
+
+client_t ClientLease::get_client() const
+{
+ return session->get_client();
+}
+
+ClientLease *CDentry::add_client_lease(Session *session)
+{
+ client_t client = session->get_client();
+ ClientLease* l = nullptr;
+ auto it = client_leases.lower_bound(client);
+ if (it == client_leases.end() || it->get_client() != client) {
+ l = new ClientLease(this, session);
+ dout(20) << __func__ << " client." << client << " on " << lock << dendl;
+ if (client_leases.empty()) {
get(PIN_CLIENTLEASE);
lock.get_client_lease();
}
- l = client_lease_map[c] = new ClientLease(c, this);
+ client_leases.insert_before(it, *l);
l->seq = ++session->lease_seq;
-
+ } else {
+ l = &(*it);
}
-
return l;
}
@@ -524,15 +532,14 @@ void CDentry::remove_client_lease(ClientLease *l, Locker *locker)
ceph_assert(l->parent == this);
bool gather = false;
+ dout(20) << __func__ << " client." << l->get_client() << " on " << lock << dendl;
- dout(20) << __func__ << " client." << l->client << " on " << lock << dendl;
-
- client_lease_map.erase(l->client);
l->item_lease.remove_myself();
l->item_session_lease.remove_myself();
+ client_leases.erase(client_leases.iterator_to(*l));
delete l;
- if (client_lease_map.empty()) {
+ if (client_leases.empty()) {
gather = !lock.is_stable();
lock.put_client_lease();
put(PIN_CLIENTLEASE);
@@ -544,8 +551,8 @@ void CDentry::remove_client_lease(ClientLease *l, Locker *locker)
void CDentry::remove_client_leases(Locker *locker)
{
- while (!client_lease_map.empty())
- remove_client_lease(client_lease_map.begin()->second, locker);
+ while (!client_leases.empty())
+ remove_client_lease(&(*client_leases.begin()), locker);
}
void CDentry::_put()
diff --git a/src/mds/CDentry.h b/src/mds/CDentry.h
index 2566395d185..ca36da0354f 100644
--- a/src/mds/CDentry.h
+++ b/src/mds/CDentry.h
@@ -17,7 +17,6 @@
#include <string>
#include <string_view>
-#include <set>
#include "include/counter.h"
#include "include/types.h"
@@ -25,6 +24,7 @@
#include "include/lru.h"
#include "include/elist.h"
#include "include/filepath.h"
+#include <boost/intrusive/set.hpp>
#include "BatchOp.h"
#include "MDSCacheObject.h"
@@ -38,9 +38,35 @@ class CDir;
class Locker;
class CDentry;
class LogSegment;
-
class Session;
+struct ClientLease : public boost::intrusive::set_base_hook<>
+{
+ MEMPOOL_CLASS_HELPERS();
+
+ ClientLease(CDentry *p, Session *s) :
+ parent(p), session(s),
+ item_session_lease(this),
+ item_lease(this) { }
+ ClientLease() = delete;
+ client_t get_client() const;
+
+ CDentry *parent;
+ Session *session;
+
+ ceph_seq_t seq = 0;
+ utime_t ttl;
+ xlist<ClientLease*>::item item_session_lease; // per-session list
+ xlist<ClientLease*>::item item_lease; // global list
+};
+struct client_is_key
+{
+ typedef client_t type;
+ const type operator() (const ClientLease& l) const {
+ return l.get_client();
+ }
+};
+
// define an ordering
bool operator<(const CDentry& l, const CDentry& r);
@@ -324,27 +350,25 @@ public:
// replicas (on clients)
bool is_any_leases() const {
- return !client_lease_map.empty();
+ return !client_leases.empty();
}
const ClientLease *get_client_lease(client_t c) const {
- if (client_lease_map.count(c))
- return client_lease_map.find(c)->second;
- return 0;
+ auto it = client_leases.find(c);
+ if (it != client_leases.end())
+ return &(*it);
+ return nullptr;
}
ClientLease *get_client_lease(client_t c) {
- if (client_lease_map.count(c))
- return client_lease_map.find(c)->second;
- return 0;
+ auto it = client_leases.find(c);
+ if (it != client_leases.end())
+ return &(*it);
+ return nullptr;
}
bool have_client_lease(client_t c) const {
- const ClientLease *l = get_client_lease(c);
- if (l)
- return true;
- else
- return false;
+ return client_leases.count(c);
}
- ClientLease *add_client_lease(client_t c, Session *session);
+ ClientLease *add_client_lease(Session *session);
void remove_client_lease(ClientLease *r, Locker *locker); // returns remaining mask (if any), and kicks locker eval_gathers
void remove_client_leases(Locker *locker);
@@ -373,7 +397,10 @@ public:
SimpleLock lock; // FIXME referenced containers not in mempool
LocalLockC versionlock; // FIXME referenced containers not in mempool
- mempool::mds_co::map<client_t,ClientLease*> client_lease_map;
+ typedef boost::intrusive::set<
+ ClientLease, boost::intrusive::key_of_value<client_is_key>> ClientLeaseMap;
+ ClientLeaseMap client_leases;
+
std::map<int, std::unique_ptr<BatchOp>> batch_ops;
ceph_tid_t reintegration_reqid = 0;
diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc
index acddeb4f1d1..89e2e5e4be9 100644
--- a/src/mds/CDir.cc
+++ b/src/mds/CDir.cc
@@ -510,7 +510,7 @@ void CDir::remove_dentry(CDentry *dn)
dout(12) << __func__ << " " << *dn << dendl;
// there should be no client leases at this point!
- ceph_assert(dn->client_lease_map.empty());
+ ceph_assert(dn->client_leases.empty());
if (state_test(CDir::STATE_DNPINNEDFRAG)) {
dn->put(CDentry::PIN_FRAGMENTING);
diff --git a/src/mds/Capability.cc b/src/mds/Capability.cc
index de2a16e1ab1..9a3d093f9eb 100644
--- a/src/mds/Capability.cc
+++ b/src/mds/Capability.cc
@@ -151,8 +151,7 @@ void Capability::revoke_info::generate_test_instances(std::list<Capability::revo
* Capability
*/
Capability::Capability(CInode *i, Session *s, uint64_t id) :
- item_session_caps(this), item_snaprealm_caps(this),
- item_revoking_caps(this), item_client_revoking_caps(this),
+ item_session_caps(this),
lock_caches(member_offset(MDLockCache, item_cap_lock_cache)),
inode(i), session(s), cap_id(id)
{
diff --git a/src/mds/Capability.h b/src/mds/Capability.h
index ebc626a2294..9680895a5c8 100644
--- a/src/mds/Capability.h
+++ b/src/mds/Capability.h
@@ -336,9 +336,9 @@ public:
int64_t last_rsize = 0;
xlist<Capability*>::item item_session_caps;
- xlist<Capability*>::item item_snaprealm_caps;
- xlist<Capability*>::item item_revoking_caps;
- xlist<Capability*>::item item_client_revoking_caps;
+ elist<Capability*>::item item_snaprealm_caps;
+ elist<Capability*>::item item_revoking_caps;
+ elist<Capability*>::item item_client_revoking_caps;
elist<MDLockCache*> lock_caches;
int get_lock_cache_allowed() const { return lock_cache_allowed; }
diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc
index f4fb1a114d9..9668da03278 100644
--- a/src/mds/Locker.cc
+++ b/src/mds/Locker.cc
@@ -72,7 +72,9 @@ public:
};
Locker::Locker(MDSRank *m, MDCache *c) :
- need_snapflush_inodes(member_offset(CInode, item_to_flush)), mds(m), mdcache(c) {}
+ revoking_caps(member_offset(Capability, item_revoking_caps)),
+ need_snapflush_inodes(member_offset(CInode, item_to_flush)),
+ mds(m), mdcache(c) {}
void Locker::dispatch(const cref_t<Message> &m)
@@ -2629,9 +2631,11 @@ int Locker::issue_caps(CInode *in, Capability *only_cap)
int op = (before & ~after) ? CEPH_CAP_OP_REVOKE : CEPH_CAP_OP_GRANT;
if (op == CEPH_CAP_OP_REVOKE) {
- if (mds->logger) mds->logger->inc(l_mdss_ceph_cap_op_revoke);
+ if (mds->logger) mds->logger->inc(l_mdss_ceph_cap_op_revoke);
revoking_caps.push_back(&cap->item_revoking_caps);
- revoking_caps_by_client[cap->get_client()].push_back(&cap->item_client_revoking_caps);
+ auto em = revoking_caps_by_client.emplace(cap->get_client(),
+ member_offset(Capability, item_client_revoking_caps));
+ em.first->second.push_back(&cap->item_client_revoking_caps);
cap->set_last_revoke_stamp(ceph_clock_now());
cap->reset_num_revoke_warnings();
} else {
@@ -2670,7 +2674,7 @@ void Locker::issue_truncate(CInode *in)
cap->get_mseq(),
mds->get_osd_epoch_barrier());
in->encode_cap_message(m, cap);
- mds->send_message_client_counted(m, p.first);
+ mds->send_message_client_counted(m, cap->get_session());
}
// should we increase max_size?
@@ -3160,7 +3164,7 @@ void Locker::share_inode_max_size(CInode *in, Capability *only_cap)
cap->get_mseq(),
mds->get_osd_epoch_barrier());
in->encode_cap_message(m, cap);
- mds->send_message_client_counted(m, client);
+ mds->send_message_client_counted(m, cap->get_session());
}
if (only_cap)
break;
@@ -4311,42 +4315,33 @@ void Locker::remove_client_cap(CInode *in, Capability *cap, bool kill)
try_eval(in, CEPH_CAP_LOCKS);
}
-
-/**
- * Return true if any currently revoking caps exceed the
- * session_timeout threshold.
- */
-bool Locker::any_late_revoking_caps(xlist<Capability*> const &revoking,
- double timeout) const
+std::set<client_t> Locker::get_late_revoking_clients(double timeout)
{
- xlist<Capability*>::const_iterator p = revoking.begin();
- if (p.end()) {
+ auto any_late_revoking = [timeout](elist<Capability*> &revoking) {
+ auto p = revoking.begin();
+ if (p.end())
// No revoking caps at the moment
return false;
- } else {
- utime_t now = ceph_clock_now();
- utime_t age = now - (*p)->get_last_revoke_stamp();
- if (age <= timeout) {
- return false;
- } else {
- return true;
- }
- }
-}
-std::set<client_t> Locker::get_late_revoking_clients(double timeout) const
-{
- std::set<client_t> result;
+ utime_t now = ceph_clock_now();
+ return now - (*p)->get_last_revoke_stamp() > timeout;
+ };
- if (any_late_revoking_caps(revoking_caps, timeout)) {
+ std::set<client_t> result;
+ if (!any_late_revoking(revoking_caps)) {
+ // Fast path: no misbehaving clients, execute in O(1)
+ } else {
// Slow path: execute in O(N_clients)
- for (auto &p : revoking_caps_by_client) {
- if (any_late_revoking_caps(p.second, timeout)) {
- result.insert(p.first);
+ for (auto it = revoking_caps_by_client.begin();
+ it != revoking_caps_by_client.end(); ) {
+ if (it->second.empty()) {
+ revoking_caps_by_client.erase(it++);
+ continue;
}
+ if (any_late_revoking(it->second))
+ result.insert(it->first);
+ ++it;
}
- } else {
- // Fast path: no misbehaving clients, execute in O(1)
}
return result;
}
@@ -4378,11 +4373,10 @@ void Locker::caps_tick()
}
}
- dout(20) << __func__ << " " << revoking_caps.size() << " revoking caps" << dendl;
now = ceph_clock_now();
int n = 0;
- for (xlist<Capability*>::iterator p = revoking_caps.begin(); !p.end(); ++p) {
+ for (auto p = revoking_caps.begin(); !p.end(); ++p) {
Capability *cap = *p;
utime_t age = now - cap->get_last_revoke_stamp();
@@ -4508,7 +4502,7 @@ void Locker::issue_client_lease(CDentry *dn, CInode *in, const MDRequestRef& mdr
ceph_assert(!in);
}
// issue a dentry lease
- ClientLease *l = dn->add_client_lease(client, session);
+ ClientLease *l = dn->add_client_lease(session);
session->touch_lease(l);
int pool = 1; // fixme.. do something smart!
@@ -4537,11 +4531,8 @@ void Locker::issue_client_lease(CDentry *dn, CInode *in, const MDRequestRef& mdr
void Locker::revoke_client_leases(SimpleLock *lock)
{
CDentry *dn = static_cast<CDentry*>(lock->get_parent());
- for (map<client_t, ClientLease*>::iterator p = dn->client_lease_map.begin();
- p != dn->client_lease_map.end();
- ++p) {
- ClientLease *l = p->second;
-
+ for (ClientLease& l : dn->client_leases) {
+
ceph_assert(lock->get_type() == CEPH_LOCK_DN);
CDentry *dn = static_cast<CDentry*>(lock->get_parent());
@@ -4549,8 +4540,8 @@ void Locker::revoke_client_leases(SimpleLock *lock)
// i should also revoke the dir ICONTENT lease, if they have it!
CInode *diri = dn->get_dir()->get_inode();
- auto lease = make_message<MClientLease>(CEPH_MDS_LEASE_REVOKE, l->seq, mask, diri->ino(), diri->first, CEPH_NOSNAP, dn->get_name());
- mds->send_message_client_counted(lease, l->client);
+ auto lease = make_message<MClientLease>(CEPH_MDS_LEASE_REVOKE, l.seq, mask, diri->ino(), diri->first, CEPH_NOSNAP, dn->get_name());
+ mds->send_message_client_counted(lease, l.session);
}
}
diff --git a/src/mds/Locker.h b/src/mds/Locker.h
index aa037ac6abd..eab345984c8 100644
--- a/src/mds/Locker.h
+++ b/src/mds/Locker.h
@@ -151,7 +151,7 @@ public:
void remove_client_cap(CInode *in, Capability *cap, bool kill=false);
- std::set<client_t> get_late_revoking_clients(double timeout) const;
+ std::set<client_t> get_late_revoking_clients(double timeout);
void snapflush_nudge(CInode *in);
void mark_need_snapflush_inode(CInode *in);
@@ -249,9 +249,9 @@ protected:
xlist<ScatterLock*> updated_scatterlocks;
// Maintain a global list to quickly find if any caps are late revoking
- xlist<Capability*> revoking_caps;
+ elist<Capability*> revoking_caps;
// Maintain a per-client list to find clients responsible for late ones quickly
- std::map<client_t, xlist<Capability*> > revoking_caps_by_client;
+ std::map<client_t, elist<Capability*> > revoking_caps_by_client;
elist<CInode*> need_snapflush_inodes;
@@ -267,7 +267,6 @@ private:
void handle_quiesce_failure(const MDRequestRef& mdr, std::string_view& marker);
- bool any_late_revoking_caps(xlist<Capability*> const &revoking, double timeout) const;
uint64_t calc_new_max_size(const CInode::inode_const_ptr& pi, uint64_t size);
__u32 get_xattr_total_length(CInode::mempool_xattr_map &xattr);
void decode_new_xattrs(CInode::mempool_inode *inode,
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index 94b6af0b4e7..e9cb50c6e00 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -5696,7 +5696,7 @@ void MDCache::prepare_realm_merge(SnapRealm *realm, SnapRealm *parent_realm,
split_realms.push_back((*p)->inode->ino());
for (const auto& p : realm->client_caps) {
- ceph_assert(!p.second->empty());
+ ceph_assert(!p.second.empty());
auto em = splits.emplace(std::piecewise_construct, std::forward_as_tuple(p.first), std::forward_as_tuple());
if (em.second) {
auto update = make_message<MClientSnap>(CEPH_SNAP_OP_SPLIT);
@@ -7820,7 +7820,7 @@ void MDCache::trim_client_leases()
ClientLease *r = list.front();
if (r->ttl > now) break;
CDentry *dn = static_cast<CDentry*>(r->parent);
- dout(10) << " expiring client." << r->client << " lease of " << *dn << dendl;
+ dout(10) << " expiring client." << r->get_client() << " lease of " << *dn << dendl;
dn->remove_client_lease(r, mds->locker);
}
auto after = list.size();
@@ -10087,7 +10087,7 @@ void MDCache::do_realm_invalidate_and_update_notify(CInode *in, int snapop, bool
for (const auto& p : realm->client_caps) {
const auto& client = p.first;
const auto& caps = p.second;
- ceph_assert(!caps->empty());
+ ceph_assert(!caps.empty());
auto em = updates.emplace(std::piecewise_construct, std::forward_as_tuple(client), std::forward_as_tuple());
if (em.second) {
diff --git a/src/mds/MDSCacheObject.h b/src/mds/MDSCacheObject.h
index d322a05851a..be84d142e9a 100644
--- a/src/mds/MDSCacheObject.h
+++ b/src/mds/MDSCacheObject.h
@@ -20,9 +20,6 @@
//#define MDS_AUTHPIN_SET // define me for debugging auth pin leaks
//#define MDS_VERIFY_FRAGSTAT // do (slow) sanity checking on frags
-/*
- * for metadata leases to clients
- */
class MLock;
class SimpleLock;
class MDSCacheObject;
@@ -32,22 +29,6 @@ namespace ceph {
class Formatter;
}
-struct ClientLease {
- ClientLease(client_t c, MDSCacheObject *p) :
- client(c), parent(p),
- item_session_lease(this),
- item_lease(this) { }
- ClientLease() = delete;
-
- client_t client;
- MDSCacheObject *parent;
-
- ceph_seq_t seq = 0;
- utime_t ttl;
- xlist<ClientLease*>::item item_session_lease; // per-session list
- xlist<ClientLease*>::item item_lease; // global list
-};
-
// print hack
struct mdsco_db_line_prefix {
explicit mdsco_db_line_prefix(MDSCacheObject *o) : object(o) {}
diff --git a/src/mds/SnapRealm.h b/src/mds/SnapRealm.h
index 700c1d81e3b..b1c38d03fcb 100644
--- a/src/mds/SnapRealm.h
+++ b/src/mds/SnapRealm.h
@@ -103,18 +103,18 @@ public:
void merge_to(SnapRealm *newparent);
void add_cap(client_t client, Capability *cap) {
- auto client_caps_entry = client_caps.find(client);
- if (client_caps_entry == client_caps.end())
- client_caps_entry = client_caps.emplace(client,
- new xlist<Capability*>).first;
- client_caps_entry->second->push_back(&cap->item_snaprealm_caps);
+ auto em = client_caps.emplace(cap->get_client(),
+ member_offset(Capability, item_snaprealm_caps));
+ em.first->second.push_back(&cap->item_snaprealm_caps);
}
void remove_cap(client_t client, Capability *cap) {
+ bool last_cap = cap->item_snaprealm_caps.is_singular();
cap->item_snaprealm_caps.remove_myself();
- auto found = client_caps.find(client);
- if (found != client_caps.end() && found->second->empty()) {
- delete found->second;
- client_caps.erase(found);
+ if (last_cap) {
+ auto it = client_caps.find(client);
+ ceph_assert(it != client_caps.end());
+ ceph_assert(it->second.empty());
+ client_caps.erase(it);
}
}
@@ -129,7 +129,7 @@ public:
std::set<SnapRealm*> open_children; // active children that are currently open
elist<CInode*> inodes_with_caps; // for efficient realm splits
- std::map<client_t, xlist<Capability*>* > client_caps; // to identify clients who need snap notifications
+ std::map<client_t, elist<Capability*> > client_caps; // to identify clients who need snap notifications
protected:
void check_cache() const;