summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVenky Shankar <vshankar@redhat.com>2023-01-17 11:45:04 +0100
committerVenky Shankar <vshankar@redhat.com>2023-01-17 12:58:22 +0100
commitd01acd531a0694514f7e7e11087ce7a9d16e94d9 (patch)
tree94a4da53d2ffd85c6edfa540faf4b24fca7cf233
parentMerge pull request #49710 from cyx1231st/wip-seastore-cleanup-overwrite (diff)
downloadceph-d01acd531a0694514f7e7e11087ce7a9d16e94d9.tar.xz
ceph-d01acd531a0694514f7e7e11087ce7a9d16e94d9.zip
mds: record and dump last tid for trimming completed requests (or flushes)
CephFS clients include `oldest_tid` as part of the client request to the MDS. This field is the tid of the oldest incomplete mds request (excluding setfilelock request). The MDS uses this to trim completed requests (and flushes). In one case, the ceph cluster had an extremely high completed requests count, meaning, for some reason the client was not advancing its `oldest_tid` field, although, the MDS had successfully "safe replied" the request back to the client. This change adds a debug aid for recording and dumping this field. It might be possible to fetch this from clients (if not, we should add that!), but it makes sense to have this information available from the MDS. Partially-Fixes: http://tracker.ceph.com/issues/57985 Signed-off-by: Venky Shankar <vshankar@redhat.com>
-rw-r--r--doc/cephfs/health-messages.rst4
-rw-r--r--src/mds/SessionMap.cc3
-rw-r--r--src/mds/SessionMap.h5
3 files changed, 11 insertions, 1 deletions
diff --git a/doc/cephfs/health-messages.rst b/doc/cephfs/health-messages.rst
index 7edc1262f5c..d833d0e6fc6 100644
--- a/doc/cephfs/health-messages.rst
+++ b/doc/cephfs/health-messages.rst
@@ -130,7 +130,9 @@ other daemons, please see :ref:`health-checks`.
from properly cleaning up resources used by client requests. This message
appears if a client appears to have more than ``max_completed_requests``
(default 100000) requests that are complete on the MDS side but haven't
- yet been accounted for in the client's *oldest tid* value.
+ yet been accounted for in the client's *oldest tid* value. The last tid
+ used by the MDS to trim completed client requests (or flush) is included
+ as part of `session ls` (or `client ls`) command as a debug aid.
``MDS_DAMAGE``
--------------
diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc
index 2364c973e67..c21ac469356 100644
--- a/src/mds/SessionMap.cc
+++ b/src/mds/SessionMap.cc
@@ -601,6 +601,9 @@ void Session::dump(Formatter *f, bool cap_dump) const
f->dump_object("session_cache_liveness", session_cache_liveness);
f->dump_object("cap_acquisition", cap_acquisition);
+ f->dump_unsigned("last_trim_completed_requests_tid", last_trim_completed_requests_tid);
+ f->dump_unsigned("last_trim_completed_flushes_tid", last_trim_completed_flushes_tid);
+
f->open_array_section("delegated_inos");
for (const auto& [start, len] : delegated_inos) {
f->open_object_section("ino_range");
diff --git a/src/mds/SessionMap.h b/src/mds/SessionMap.h
index e59f7f26484..067e1474cc3 100644
--- a/src/mds/SessionMap.h
+++ b/src/mds/SessionMap.h
@@ -313,6 +313,7 @@ public:
bool trim_completed_requests(ceph_tid_t mintid) {
// trim
bool erased_any = false;
+ last_trim_completed_requests_tid = mintid;
while (!info.completed_requests.empty() &&
(mintid == 0 || info.completed_requests.begin()->first < mintid)) {
info.completed_requests.erase(info.completed_requests.begin());
@@ -338,6 +339,7 @@ public:
}
bool trim_completed_flushes(ceph_tid_t mintid) {
bool erased_any = false;
+ last_trim_completed_flushes_tid = mintid;
while (!info.completed_flushes.empty() &&
(mintid == 0 || *info.completed_flushes.begin() < mintid)) {
info.completed_flushes.erase(info.completed_flushes.begin());
@@ -492,6 +494,9 @@ private:
unsigned num_trim_flushes_warnings = 0;
unsigned num_trim_requests_warnings = 0;
+
+ ceph_tid_t last_trim_completed_requests_tid = 0;
+ ceph_tid_t last_trim_completed_flushes_tid = 0;
};
class SessionFilter