summaryrefslogtreecommitdiffstats
path: root/src/os/bluestore/BlueStore.cc
diff options
context:
space:
mode:
authorXiaoguang Wang <xiaoguang.wang@easystack.cn>2018-08-30 04:26:41 +0200
committerXiaoguang Wang <xiaoguang.wang@easystack.cn>2018-08-31 08:11:18 +0200
commita7f1af25dd2ba88a322ed21828f073a277b09d02 (patch)
treef82f63ddc047d4db4ed73edc07d6f3d58c2041d1 /src/os/bluestore/BlueStore.cc
parentcore: add missing flag name for CEPH_OSD_OP_FLAG_WITH_REFERENCE (diff)
downloadceph-a7f1af25dd2ba88a322ed21828f073a277b09d02.tar.xz
ceph-a7f1af25dd2ba88a322ed21828f073a277b09d02.zip
os/bluestore: fix deep-scrub operation againest disk silent errors
Say a object who has data caches, but in a while later, caches' underlying physical device has silent disk erros accidentally, then caches and physical data are not same. In such case, deep-scrub operation still tries to read caches firstly and won't do crc checksum, then deep-scrub won't find such data corruptions timely. Here introduce a new flag 'CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE' which tells deep-scrub to bypass object caches. Note that we only bypass cache who is in STATE_CLEAN state. For STATE_WRITING caches, currently they are not written to physical device, so deep-scrub operation can not read physical device and can read these dirty caches safely. Once they are in STATE_CLEAN state(or not added to bluestore cache), next round deep-scurb can check them correctly. As to above discussions, I refactor BlueStore::BufferSpace::read sightly, adding a new 'flags' argument, whose value will be 0 or: enum { BYPASS_CLEAN_CACHE = 0x1, // bypass clean cache }; flags 0: normal read, do not bypass clean or dirty cache flags BYPASS_CLEAN_CACHE: bypass clean cache, currently only for deep-scrube operation Test: I deliberately corrupt a object with cache, with this patch, deep-scrub can find data error very timely. Signed-off-by: Xiaoguang Wang <xiaoguang.wang@easystack.cn>
Diffstat (limited to 'src/os/bluestore/BlueStore.cc')
-rw-r--r--src/os/bluestore/BlueStore.cc22
1 files changed, 19 insertions, 3 deletions
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc
index 90f2a0e8690..25d09c8648f 100644
--- a/src/os/bluestore/BlueStore.cc
+++ b/src/os/bluestore/BlueStore.cc
@@ -1306,7 +1306,8 @@ void BlueStore::BufferSpace::read(
uint32_t offset,
uint32_t length,
BlueStore::ready_regions_t& res,
- interval_set<uint32_t>& res_intervals)
+ interval_set<uint32_t>& res_intervals,
+ int flags)
{
res.clear();
res_intervals.clear();
@@ -1320,7 +1321,13 @@ void BlueStore::BufferSpace::read(
++i) {
Buffer *b = i->second.get();
ceph_assert(b->end() > offset);
- if (b->is_writing() || b->is_clean()) {
+
+ bool val = false;
+ if (flags & BYPASS_CLEAN_CACHE)
+ val = b->is_writing();
+ else
+ val = b->is_writing() || b->is_clean();
+ if (val) {
if (b->offset < offset) {
uint32_t skip = offset - b->offset;
uint32_t l = min(length, b->length - skip);
@@ -7408,6 +7415,7 @@ int BlueStore::_do_read(
{
FUNCTRACE(cct);
int r = 0;
+ int read_cache_policy = 0; // do not bypass clean or dirty cache
dout(20) << __func__ << " 0x" << std::hex << offset << "~" << length
<< " size 0x" << o->onode.size << " (" << std::dec
@@ -7442,6 +7450,13 @@ int BlueStore::_do_read(
ready_regions_t ready_regions;
+ // for deep-scrub, we only read dirty cache and bypass clean cache in
+ // order to read underlying block device in case there are silent disk errors.
+ if (op_flags & CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE) {
+ dout(20) << __func__ << " will bypass cache and do direct read" << dendl;
+ read_cache_policy = BufferSpace::BYPASS_CLEAN_CACHE;
+ }
+
// build blob-wise list to of stuff read (that isn't cached)
blobs2read_t blobs2read;
unsigned left = length;
@@ -7467,7 +7482,8 @@ int BlueStore::_do_read(
ready_regions_t cache_res;
interval_set<uint32_t> cache_interval;
bptr->shared_blob->bc.read(
- bptr->shared_blob->get_cache(), b_off, b_len, cache_res, cache_interval);
+ bptr->shared_blob->get_cache(), b_off, b_len, cache_res, cache_interval,
+ read_cache_policy);
dout(20) << __func__ << " blob " << *bptr << std::hex
<< " need 0x" << b_off << "~" << b_len
<< " cache has 0x" << cache_interval