summaryrefslogtreecommitdiffstats
path: root/src/blk
diff options
context:
space:
mode:
authorsajibreadd <sajibreadd@gmail.com>2024-05-27 09:30:06 +0200
committersajibreadd <sajibreadd@gmail.com>2024-06-26 10:47:03 +0200
commit73b80a9a2c38259346fb646f85fa2ba4dcbb1329 (patch)
tree589c9f9aefc75b54f77226c2643f42432d7d14d0 /src/blk
parentMerge pull request #57729 from idryomov/wip-66232 (diff)
downloadceph-73b80a9a2c38259346fb646f85fa2ba4dcbb1329.tar.xz
ceph-73b80a9a2c38259346fb646f85fa2ba4dcbb1329.zip
Warning added for slow operations and stalled read in BlueStore. User can control how much time the warning should persist after last occurence and maximum number of operations as a threshold will be considered for the warning.
Fixes: https://tracker.ceph.com/issues/62500 Signed-off-by: Md Mahamudur Rahaman Sajib <mahamudur.sajib@croit.io>
Diffstat (limited to 'src/blk')
-rw-r--r--src/blk/BlockDevice.cc37
-rw-r--r--src/blk/BlockDevice.h9
-rw-r--r--src/blk/kernel/KernelDevice.cc4
3 files changed, 49 insertions, 1 deletions
diff --git a/src/blk/BlockDevice.cc b/src/blk/BlockDevice.cc
index 33ceacc8909..7510fb89c8b 100644
--- a/src/blk/BlockDevice.cc
+++ b/src/blk/BlockDevice.cc
@@ -42,6 +42,7 @@
#define dout_prefix *_dout << "bdev "
using std::string;
+using ceph::mono_clock;
blk_access_mode_t buffermode(bool buffered)
@@ -191,3 +192,39 @@ bool BlockDevice::is_valid_io(uint64_t off, uint64_t len) const {
}
return ret;
}
+
+size_t BlockDevice::trim_stalled_read_event_queue(mono_clock::time_point cur_time) {
+ std::lock_guard lock(stalled_read_event_queue_lock);
+ auto warn_duration = std::chrono::seconds(cct->_conf->bdev_stalled_read_warn_lifetime);
+ while (!stalled_read_event_queue.empty() &&
+ ((stalled_read_event_queue.front() < cur_time - warn_duration) ||
+ (stalled_read_event_queue.size() > cct->_conf->bdev_stalled_read_warn_threshold))) {
+ stalled_read_event_queue.pop();
+ }
+ return stalled_read_event_queue.size();
+}
+
+void BlockDevice::add_stalled_read_event() {
+ if (!cct->_conf->bdev_stalled_read_warn_threshold) {
+ return;
+ }
+ auto cur_time = mono_clock::now();
+ {
+ std::lock_guard lock(stalled_read_event_queue_lock);
+ stalled_read_event_queue.push(cur_time);
+ }
+ trim_stalled_read_event_queue(cur_time);
+}
+
+void BlockDevice::collect_alerts(osd_alert_list_t& alerts, const std::string& device_name) {
+ if (cct->_conf->bdev_stalled_read_warn_threshold) {
+ size_t qsize = trim_stalled_read_event_queue(mono_clock::now());
+ if (qsize >= cct->_conf->bdev_stalled_read_warn_threshold) {
+ std::ostringstream ss;
+ ss << "observed stalled read indications in "
+ << device_name << " device";
+ alerts.emplace(device_name + "_DEVICE_STALLED_READ_ALERT", ss.str());
+ }
+ }
+}
+
diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h
index 6c55646fc76..115f7e6c1b5 100644
--- a/src/blk/BlockDevice.h
+++ b/src/blk/BlockDevice.h
@@ -25,11 +25,13 @@
#include <set>
#include <string>
#include <vector>
+#include <queue>
#include "acconfig.h"
#include "common/ceph_mutex.h"
#include "include/common_fwd.h"
#include "extblkdev/ExtBlkDevInterface.h"
+#include "osd/osd_types.h"
#if defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO)
#include "aio/aio.h"
@@ -148,6 +150,8 @@ class BlockDevice {
public:
CephContext* cct;
typedef void (*aio_callback_t)(void *handle, void *aio);
+ void collect_alerts(osd_alert_list_t& alerts, const std::string& device_name);
+
private:
ceph::mutex ioc_reap_lock = ceph::make_mutex("BlockDevice::ioc_reap_lock");
std::vector<IOContext*> ioc_reap_queue;
@@ -164,12 +168,14 @@ private:
pmem,
#endif
};
+ std::queue <ceph::mono_clock::time_point> stalled_read_event_queue;
+ ceph::mutex stalled_read_event_queue_lock = ceph::make_mutex("BlockDevice::stalled_read_event_queue_lock");
+ size_t trim_stalled_read_event_queue(mono_clock::time_point cur_time);
static block_device_t detect_device_type(const std::string& path);
static block_device_t device_type_from_name(const std::string& blk_dev_name);
static BlockDevice *create_with_type(block_device_t device_type,
CephContext* cct, const std::string& path, aio_callback_t cb,
void *cbpriv, aio_callback_t d_cb, void *d_cbpriv);
-
protected:
uint64_t size = 0;
uint64_t block_size = 0;
@@ -187,6 +193,7 @@ protected:
// of the drive. The zones 524-52155 are sequential zones.
uint64_t conventional_region_size = 0;
uint64_t zone_size = 0;
+ void add_stalled_read_event();
public:
aio_callback_t aio_callback;
diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc
index 6337292f5de..f6edaf98e83 100644
--- a/src/blk/kernel/KernelDevice.cc
+++ b/src/blk/kernel/KernelDevice.cc
@@ -1310,6 +1310,7 @@ int KernelDevice::read(uint64_t off, uint64_t len, bufferlist *pbl,
<< " since " << start1 << ", timeout is "
<< age
<< "s" << dendl;
+ add_stalled_read_event();
}
if (r < 0) {
if (ioc->allow_eio && is_expected_ioerr(-errno)) {
@@ -1383,6 +1384,7 @@ int KernelDevice::direct_read_unaligned(uint64_t off, uint64_t len, char *buf)
<< " since " << start1 << ", timeout is "
<< age
<< "s" << dendl;
+ add_stalled_read_event();
}
if (r < 0) {
@@ -1446,6 +1448,7 @@ int KernelDevice::read_random(uint64_t off, uint64_t len, char *buf,
<< " (buffered) since " << start1 << ", timeout is "
<< age
<< "s" << dendl;
+ add_stalled_read_event();
}
} else {
//direct and aligned read
@@ -1456,6 +1459,7 @@ int KernelDevice::read_random(uint64_t off, uint64_t len, char *buf,
<< " (direct) since " << start1 << ", timeout is "
<< age
<< "s" << dendl;
+ add_stalled_read_event();
}
if (r < 0) {
r = -errno;