diff options
author | sajibreadd <sajibreadd@gmail.com> | 2024-05-27 09:30:06 +0200 |
---|---|---|
committer | sajibreadd <sajibreadd@gmail.com> | 2024-06-26 10:47:03 +0200 |
commit | 73b80a9a2c38259346fb646f85fa2ba4dcbb1329 (patch) | |
tree | 589c9f9aefc75b54f77226c2643f42432d7d14d0 /src/blk | |
parent | Merge pull request #57729 from idryomov/wip-66232 (diff) | |
download | ceph-73b80a9a2c38259346fb646f85fa2ba4dcbb1329.tar.xz ceph-73b80a9a2c38259346fb646f85fa2ba4dcbb1329.zip |
Warning added for slow operations and stalled read in BlueStore. User can control how much time the warning should persist after last occurence and maximum number of operations as a threshold will be considered for the warning.
Fixes: https://tracker.ceph.com/issues/62500
Signed-off-by: Md Mahamudur Rahaman Sajib <mahamudur.sajib@croit.io>
Diffstat (limited to 'src/blk')
-rw-r--r-- | src/blk/BlockDevice.cc | 37 | ||||
-rw-r--r-- | src/blk/BlockDevice.h | 9 | ||||
-rw-r--r-- | src/blk/kernel/KernelDevice.cc | 4 |
3 files changed, 49 insertions, 1 deletions
diff --git a/src/blk/BlockDevice.cc b/src/blk/BlockDevice.cc index 33ceacc8909..7510fb89c8b 100644 --- a/src/blk/BlockDevice.cc +++ b/src/blk/BlockDevice.cc @@ -42,6 +42,7 @@ #define dout_prefix *_dout << "bdev " using std::string; +using ceph::mono_clock; blk_access_mode_t buffermode(bool buffered) @@ -191,3 +192,39 @@ bool BlockDevice::is_valid_io(uint64_t off, uint64_t len) const { } return ret; } + +size_t BlockDevice::trim_stalled_read_event_queue(mono_clock::time_point cur_time) { + std::lock_guard lock(stalled_read_event_queue_lock); + auto warn_duration = std::chrono::seconds(cct->_conf->bdev_stalled_read_warn_lifetime); + while (!stalled_read_event_queue.empty() && + ((stalled_read_event_queue.front() < cur_time - warn_duration) || + (stalled_read_event_queue.size() > cct->_conf->bdev_stalled_read_warn_threshold))) { + stalled_read_event_queue.pop(); + } + return stalled_read_event_queue.size(); +} + +void BlockDevice::add_stalled_read_event() { + if (!cct->_conf->bdev_stalled_read_warn_threshold) { + return; + } + auto cur_time = mono_clock::now(); + { + std::lock_guard lock(stalled_read_event_queue_lock); + stalled_read_event_queue.push(cur_time); + } + trim_stalled_read_event_queue(cur_time); +} + +void BlockDevice::collect_alerts(osd_alert_list_t& alerts, const std::string& device_name) { + if (cct->_conf->bdev_stalled_read_warn_threshold) { + size_t qsize = trim_stalled_read_event_queue(mono_clock::now()); + if (qsize >= cct->_conf->bdev_stalled_read_warn_threshold) { + std::ostringstream ss; + ss << "observed stalled read indications in " + << device_name << " device"; + alerts.emplace(device_name + "_DEVICE_STALLED_READ_ALERT", ss.str()); + } + } +} + diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h index 6c55646fc76..115f7e6c1b5 100644 --- a/src/blk/BlockDevice.h +++ b/src/blk/BlockDevice.h @@ -25,11 +25,13 @@ #include <set> #include <string> #include <vector> +#include <queue> #include "acconfig.h" #include "common/ceph_mutex.h" #include "include/common_fwd.h" #include "extblkdev/ExtBlkDevInterface.h" +#include "osd/osd_types.h" #if defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO) #include "aio/aio.h" @@ -148,6 +150,8 @@ class BlockDevice { public: CephContext* cct; typedef void (*aio_callback_t)(void *handle, void *aio); + void collect_alerts(osd_alert_list_t& alerts, const std::string& device_name); + private: ceph::mutex ioc_reap_lock = ceph::make_mutex("BlockDevice::ioc_reap_lock"); std::vector<IOContext*> ioc_reap_queue; @@ -164,12 +168,14 @@ private: pmem, #endif }; + std::queue <ceph::mono_clock::time_point> stalled_read_event_queue; + ceph::mutex stalled_read_event_queue_lock = ceph::make_mutex("BlockDevice::stalled_read_event_queue_lock"); + size_t trim_stalled_read_event_queue(mono_clock::time_point cur_time); static block_device_t detect_device_type(const std::string& path); static block_device_t device_type_from_name(const std::string& blk_dev_name); static BlockDevice *create_with_type(block_device_t device_type, CephContext* cct, const std::string& path, aio_callback_t cb, void *cbpriv, aio_callback_t d_cb, void *d_cbpriv); - protected: uint64_t size = 0; uint64_t block_size = 0; @@ -187,6 +193,7 @@ protected: // of the drive. The zones 524-52155 are sequential zones. uint64_t conventional_region_size = 0; uint64_t zone_size = 0; + void add_stalled_read_event(); public: aio_callback_t aio_callback; diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index 6337292f5de..f6edaf98e83 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -1310,6 +1310,7 @@ int KernelDevice::read(uint64_t off, uint64_t len, bufferlist *pbl, << " since " << start1 << ", timeout is " << age << "s" << dendl; + add_stalled_read_event(); } if (r < 0) { if (ioc->allow_eio && is_expected_ioerr(-errno)) { @@ -1383,6 +1384,7 @@ int KernelDevice::direct_read_unaligned(uint64_t off, uint64_t len, char *buf) << " since " << start1 << ", timeout is " << age << "s" << dendl; + add_stalled_read_event(); } if (r < 0) { @@ -1446,6 +1448,7 @@ int KernelDevice::read_random(uint64_t off, uint64_t len, char *buf, << " (buffered) since " << start1 << ", timeout is " << age << "s" << dendl; + add_stalled_read_event(); } } else { //direct and aligned read @@ -1456,6 +1459,7 @@ int KernelDevice::read_random(uint64_t off, uint64_t len, char *buf, << " (direct) since " << start1 << ", timeout is " << age << "s" << dendl; + add_stalled_read_event(); } if (r < 0) { r = -errno; |