summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorSage Weil <sage@redhat.com>2018-08-13 19:10:31 +0200
committerSage Weil <sage@redhat.com>2018-08-13 19:10:59 +0200
commitf21f1f14f2d2a465ba072118bd8e32271bf8906e (patch)
tree3dc5e08210336d02da8981f1e67219d4bf0ef4b1 /src
parentMerge PR #22879 into master (diff)
downloadceph-f21f1f14f2d2a465ba072118bd8e32271bf8906e.tar.xz
ceph-f21f1f14f2d2a465ba072118bd8e32271bf8906e.zip
os/bluestore: fix split vs finish_write race
In _tcx_finish(), we were looking at the right Cache for the collection, and then calling finish_write with that Cache and taking the lock. This could race with a split_cache() such that after we got the lock the collection was not on a different cache. This would in turn lead to a failed assertion later on in _rm_buffer when the sharedblob was trimmed. Fixes: http://tracker.ceph.com/issues/24439 Signed-off-by: Sage Weil <sage@redhat.com>
Diffstat (limited to 'src')
-rw-r--r--src/os/bluestore/BlueStore.cc23
-rw-r--r--src/os/bluestore/BlueStore.h4
2 files changed, 22 insertions, 5 deletions
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc
index 9c8f0ecafaf..ad36982a9c5 100644
--- a/src/os/bluestore/BlueStore.cc
+++ b/src/os/bluestore/BlueStore.cc
@@ -1367,10 +1367,8 @@ void BlueStore::BufferSpace::read(
cache->logger->inc(l_bluestore_buffer_miss_bytes, miss_bytes);
}
-void BlueStore::BufferSpace::finish_write(Cache* cache, uint64_t seq)
+void BlueStore::BufferSpace::_finish_write(Cache* cache, uint64_t seq)
{
- std::lock_guard<std::recursive_mutex> l(cache->lock);
-
auto i = writing.begin();
while (i != writing.end()) {
if (i->seq > seq) {
@@ -1641,6 +1639,23 @@ void BlueStore::SharedBlob::put_ref(uint64_t offset, uint32_t length,
unshare && !*unshare ? unshare : nullptr);
}
+void BlueStore::SharedBlob::finish_write(uint64_t seq)
+{
+ while (true) {
+ Cache *cache = coll->cache;
+ std::lock_guard<std::recursive_mutex> l(cache->lock);
+ if (coll->cache != cache) {
+ ldout(coll->store->cct, 20) << __func__
+ << " raced with sb cache update, was " << cache
+ << ", now " << coll->cache << ", retrying"
+ << dendl;
+ continue;
+ }
+ bc._finish_write(cache, seq);
+ break;
+ }
+}
+
// SharedBlobSet
#undef dout_prefix
@@ -8864,7 +8879,7 @@ void BlueStore::_txc_finish(TransContext *txc)
assert(txc->state == TransContext::STATE_FINISHING);
for (auto& sb : txc->shared_blobs_written) {
- sb->bc.finish_write(sb->get_cache(), txc->seq);
+ sb->finish_write(txc->seq);
}
txc->shared_blobs_written.clear();
diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h
index 0785eb0a151..bd63d6acca4 100644
--- a/src/os/bluestore/BlueStore.h
+++ b/src/os/bluestore/BlueStore.h
@@ -336,7 +336,7 @@ public:
b->cache_private = _discard(cache, offset, bl.length());
_add_buffer(cache, b, (flags & Buffer::FLAG_NOCACHE) ? 0 : 1, nullptr);
}
- void finish_write(Cache* cache, uint64_t seq);
+ void _finish_write(Cache* cache, uint64_t seq);
void did_read(Cache* cache, uint32_t offset, bufferlist& bl) {
std::lock_guard<std::recursive_mutex> l(cache->lock);
Buffer *b = new Buffer(this, Buffer::STATE_CLEAN, 0, offset, bl);
@@ -412,6 +412,8 @@ public:
void put_ref(uint64_t offset, uint32_t length,
PExtentVector *r, bool *unshare);
+ void finish_write(uint64_t seq);
+
friend bool operator==(const SharedBlob &l, const SharedBlob &r) {
return l.get_sbid() == r.get_sbid();
}