diff options
author | Jason Dillaman <dillaman@redhat.com> | 2019-05-29 15:37:34 +0200 |
---|---|---|
committer | Jason Dillaman <dillaman@redhat.com> | 2019-05-30 16:32:20 +0200 |
commit | c8ce520870ef46ac00dfea8acfbff46f8b869913 (patch) | |
tree | 61173282f95a68675ce1f1ece7c912a955c815c7 /src/librbd | |
parent | mgr/dashboard: Optimize iSCSI target edition (#27272) (diff) | |
download | ceph-c8ce520870ef46ac00dfea8acfbff46f8b869913.tar.xz ceph-c8ce520870ef46ac00dfea8acfbff46f8b869913.zip |
librbd: do not unblock IO prior to growing object map during resize
This could result in a small race condition where IO is able to write
beyond the current extent of the object map, resulting in an assertion
failure.
Fixes: http://tracker.ceph.com/issues/39952
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
Diffstat (limited to 'src/librbd')
-rw-r--r-- | src/librbd/operation/ResizeRequest.cc | 59 | ||||
-rw-r--r-- | src/librbd/operation/ResizeRequest.h | 8 |
2 files changed, 34 insertions, 33 deletions
diff --git a/src/librbd/operation/ResizeRequest.cc b/src/librbd/operation/ResizeRequest.cc index 65b3899c09e..7450bd4bd98 100644 --- a/src/librbd/operation/ResizeRequest.cc +++ b/src/librbd/operation/ResizeRequest.cc @@ -17,7 +17,8 @@ #define dout_subsys ceph_subsys_rbd #undef dout_prefix -#define dout_prefix *_dout << "librbd::ResizeRequest: " +#define dout_prefix *_dout << "librbd::operation::ResizeRequest: " << this \ + << " " << __func__ << ": " namespace librbd { namespace operation { @@ -93,7 +94,7 @@ template <typename I> void ResizeRequest<I>::send_pre_block_writes() { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << dendl; + ldout(cct, 5) << dendl; image_ctx.io_work_queue->block_writes(create_context_callback< ResizeRequest<I>, &ResizeRequest<I>::handle_pre_block_writes>(this)); @@ -103,7 +104,7 @@ template <typename I> Context *ResizeRequest<I>::handle_pre_block_writes(int *result) { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + ldout(cct, 5) << "r=" << *result << dendl; if (*result < 0) { lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl; @@ -120,7 +121,7 @@ Context *ResizeRequest<I>::send_append_op_event() { CephContext *cct = image_ctx.cct; if (m_new_size < m_original_size && !m_allow_shrink) { - ldout(cct, 1) << " shrinking the image is not permitted" << dendl; + ldout(cct, 1) << "shrinking the image is not permitted" << dendl; image_ctx.io_work_queue->unblock_writes(); this->async_complete(-EINVAL); return nullptr; @@ -131,7 +132,7 @@ Context *ResizeRequest<I>::send_append_op_event() { return send_grow_object_map(); } - ldout(cct, 5) << this << " " << __func__ << dendl; + ldout(cct, 5) << dendl; return nullptr; } @@ -139,7 +140,7 @@ template <typename I> Context *ResizeRequest<I>::handle_append_op_event(int *result) { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + ldout(cct, 5) << "r=" << *result << dendl; if (*result < 0) { lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result) @@ -155,7 +156,7 @@ template <typename I> void ResizeRequest<I>::send_trim_image() { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << dendl; + ldout(cct, 5) << dendl; RWLock::RLocker owner_locker(image_ctx.owner_lock); TrimRequest<I> *req = TrimRequest<I>::create( @@ -169,7 +170,7 @@ template <typename I> Context *ResizeRequest<I>::handle_trim_image(int *result) { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + ldout(cct, 5) << "r=" << *result << dendl; if (*result == -ERESTART) { ldout(cct, 5) << "resize operation interrupted" << dendl; @@ -188,7 +189,7 @@ void ResizeRequest<I>::send_flush_cache() { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << dendl; + ldout(cct, 5) << dendl; RWLock::RLocker owner_locker(image_ctx.owner_lock); auto ctx = create_context_callback< @@ -205,7 +206,7 @@ template <typename I> Context *ResizeRequest<I>::handle_flush_cache(int *result) { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + ldout(cct, 5) << "r=" << *result << dendl; if (*result < 0) { lderr(cct) << "failed to flush cache: " << cpp_strerror(*result) << dendl; @@ -220,7 +221,7 @@ template <typename I> void ResizeRequest<I>::send_invalidate_cache() { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << dendl; + ldout(cct, 5) << dendl; // need to invalidate since we're deleting objects, and // ObjectCacher doesn't track non-existent objects @@ -233,7 +234,7 @@ template <typename I> Context *ResizeRequest<I>::handle_invalidate_cache(int *result) { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + ldout(cct, 5) << "r=" << *result << dendl; // ignore busy error -- writeback was successfully flushed so we might be // wasting some cache space for trimmed objects, but they will get purged @@ -256,11 +257,12 @@ Context *ResizeRequest<I>::send_grow_object_map() { RWLock::WLocker image_locker(image_ctx.image_lock); m_shrink_size_visible = true; } - image_ctx.io_work_queue->unblock_writes(); if (m_original_size == m_new_size) { + image_ctx.io_work_queue->unblock_writes(); return this->create_context_finisher(0); } else if (m_new_size < m_original_size) { + image_ctx.io_work_queue->unblock_writes(); send_flush_cache(); return nullptr; } @@ -271,12 +273,13 @@ Context *ResizeRequest<I>::send_grow_object_map() { image_ctx.image_lock.put_read(); image_ctx.owner_lock.put_read(); - send_post_block_writes(); + // IO is still blocked + send_update_header(); return nullptr; } CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << dendl; + ldout(cct, 5) << dendl; // should have been canceled prior to releasing lock ceph_assert(image_ctx.exclusive_lock == nullptr || @@ -294,15 +297,17 @@ template <typename I> Context *ResizeRequest<I>::handle_grow_object_map(int *result) { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + ldout(cct, 5) << "r=" << *result << dendl; if (*result < 0) { - lderr(cct) << this << " " << __func__ << ": failed to resize object map: " + lderr(cct) << "failed to resize object map: " << cpp_strerror(*result) << dendl; + image_ctx.io_work_queue->unblock_writes(); return this->create_context_finisher(*result); } - send_post_block_writes(); + // IO is still blocked + send_update_header(); return nullptr; } @@ -321,8 +326,7 @@ Context *ResizeRequest<I>::send_shrink_object_map() { } CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << " " - << "original_size=" << m_original_size << ", " + ldout(cct, 5) << "original_size=" << m_original_size << ", " << "new_size=" << m_new_size << dendl; // should have been canceled prior to releasing lock @@ -341,10 +345,10 @@ template <typename I> Context *ResizeRequest<I>::handle_shrink_object_map(int *result) { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + ldout(cct, 5) << "r=" << *result << dendl; if (*result < 0) { - lderr(cct) << this << " " << __func__ << ": failed to resize object map: " + lderr(cct) << "failed to resize object map: " << cpp_strerror(*result) << dendl; image_ctx.io_work_queue->unblock_writes(); return this->create_context_finisher(*result); @@ -358,7 +362,7 @@ template <typename I> void ResizeRequest<I>::send_post_block_writes() { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << dendl; + ldout(cct, 5) << dendl; RWLock::RLocker owner_locker(image_ctx.owner_lock); image_ctx.io_work_queue->block_writes(create_context_callback< @@ -369,7 +373,7 @@ template <typename I> Context *ResizeRequest<I>::handle_post_block_writes(int *result) { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + ldout(cct, 5) << "r=" << *result << dendl; if (*result < 0) { image_ctx.io_work_queue->unblock_writes(); @@ -386,8 +390,7 @@ template <typename I> void ResizeRequest<I>::send_update_header() { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << " " - << "original_size=" << m_original_size << ", " + ldout(cct, 5) << "original_size=" << m_original_size << ", " << "new_size=" << m_new_size << dendl;; // should have been canceled prior to releasing lock @@ -418,7 +421,7 @@ template <typename I> Context *ResizeRequest<I>::handle_update_header(int *result) { I &image_ctx = this->m_image_ctx; CephContext *cct = image_ctx.cct; - ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl; + ldout(cct, 5) << "r=" << *result << dendl; if (*result < 0) { lderr(cct) << "failed to update image header: " << cpp_strerror(*result) @@ -454,7 +457,7 @@ void ResizeRequest<I>::update_size_and_overlap() { } } - // blocked by POST_BLOCK_WRITES state + // blocked by PRE_BLOCK_WRITES (grow) or POST_BLOCK_WRITES (shrink) state image_ctx.io_work_queue->unblock_writes(); } diff --git a/src/librbd/operation/ResizeRequest.h b/src/librbd/operation/ResizeRequest.h index 4ec79c373be..f5e2f807fde 100644 --- a/src/librbd/operation/ResizeRequest.h +++ b/src/librbd/operation/ResizeRequest.h @@ -67,17 +67,15 @@ private: * v * STATE_APPEND_OP_EVENT (skip if journaling * | disabled) - * | (unblock writes) - * | * | * | (grow) * |\--------> STATE_GROW_OBJECT_MAP (skip if object map * | | disabled) * | v - * | STATE_POST_BLOCK_WRITES - * | | - * | v * | STATE_UPDATE_HEADER ----------------------------\ + * | (unblock writes) | + * | | + * | (unblock writes) | * | | * | (shrink) | * |\--------> STATE_FLUSH_CACHE | |