diff options
Diffstat (limited to 'src/librbd')
-rw-r--r-- | src/librbd/Journal.cc | 86 | ||||
-rw-r--r-- | src/librbd/Journal.h | 23 | ||||
-rw-r--r-- | src/librbd/io/ImageRequest.cc | 54 | ||||
-rw-r--r-- | src/librbd/io/ImageRequest.h | 21 | ||||
-rw-r--r-- | src/librbd/io/ObjectRequest.cc | 15 | ||||
-rw-r--r-- | src/librbd/librbd.cc | 6 |
6 files changed, 119 insertions, 86 deletions
diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc index 8ddce2e8f7d..1b37a30c17c 100644 --- a/src/librbd/Journal.cc +++ b/src/librbd/Journal.cc @@ -39,6 +39,7 @@ using util::create_async_context_callback; using util::create_context_callback; using journal::util::C_DecodeTag; using journal::util::C_DecodeTags; +using io::Extents; namespace { @@ -760,36 +761,87 @@ void Journal<I>::user_flushed() { } template <typename I> -uint64_t Journal<I>::append_write_event(uint64_t offset, size_t length, - const bufferlist &bl, - bool flush_entry) { +void Journal<I>::add_write_event_entries(uint64_t offset, size_t length, + const bufferlist &bl, + uint64_t buffer_offset, + Bufferlists *bufferlists) { ceph_assert(m_max_append_size > journal::AioWriteEvent::get_fixed_size()); - uint64_t max_write_data_size = + const uint64_t max_write_data_size = m_max_append_size - journal::AioWriteEvent::get_fixed_size(); // ensure that the write event fits within the journal entry - Bufferlists bufferlists; uint64_t bytes_remaining = length; uint64_t event_offset = 0; do { uint64_t event_length = std::min(bytes_remaining, max_write_data_size); bufferlist event_bl; - event_bl.substr_of(bl, event_offset, event_length); + event_bl.substr_of(bl, buffer_offset + event_offset, event_length); journal::EventEntry event_entry(journal::AioWriteEvent(offset + event_offset, event_length, event_bl), ceph_clock_now()); - bufferlists.emplace_back(); - encode(event_entry, bufferlists.back()); + bufferlists->emplace_back(); + encode(event_entry, bufferlists->back()); event_offset += event_length; bytes_remaining -= event_length; } while (bytes_remaining > 0); +} - return append_io_events(journal::EVENT_TYPE_AIO_WRITE, bufferlists, offset, - length, flush_entry, 0); +template <typename I> +uint64_t Journal<I>::append_write_event(const Extents &image_extents, + const bufferlist &bl, + bool flush_entry) { + Bufferlists bufferlists; + uint64_t buffer_offset = 0; + for (auto &extent : image_extents) { + add_write_event_entries(extent.first, extent.second, bl, buffer_offset, + &bufferlists); + + buffer_offset += extent.second; + } + + return append_io_events(journal::EVENT_TYPE_AIO_WRITE, bufferlists, + image_extents, flush_entry, 0); +} + +template <typename I> +uint64_t Journal<I>::append_write_same_event(const Extents &image_extents, + const bufferlist &bl, + bool flush_entry) { + Bufferlists bufferlists; + for (auto &extent : image_extents) { + journal::EventEntry event_entry( + journal::AioWriteSameEvent(extent.first, extent.second, bl), + ceph_clock_now()); + + bufferlists.emplace_back(); + encode(event_entry, bufferlists.back()); + } + + return append_io_events(journal::EVENT_TYPE_AIO_WRITESAME, bufferlists, + image_extents, flush_entry, 0); +} + +template <typename I> +uint64_t Journal<I>::append_discard_event(const Extents &image_extents, + uint32_t discard_granularity_bytes, + bool flush_entry) { + Bufferlists bufferlists; + for (auto &extent : image_extents) { + journal::EventEntry event_entry( + journal::AioDiscardEvent(extent.first, extent.second, + discard_granularity_bytes), + ceph_clock_now()); + + bufferlists.emplace_back(); + encode(event_entry, bufferlists.back()); + } + + return append_io_events(journal::EVENT_TYPE_AIO_DISCARD, bufferlists, + image_extents, flush_entry, 0); } template <typename I> @@ -832,7 +884,8 @@ uint64_t Journal<I>::append_compare_and_write_event(uint64_t offset, } while (bytes_remaining > 0); return append_io_events(journal::EVENT_TYPE_AIO_COMPARE_AND_WRITE, - bufferlists, offset, length, flush_entry, -EILSEQ); + bufferlists, {{offset, length}}, flush_entry, + -EILSEQ); } template <typename I> @@ -842,14 +895,14 @@ uint64_t Journal<I>::append_io_event(journal::EventEntry &&event_entry, bufferlist bl; event_entry.timestamp = ceph_clock_now(); encode(event_entry, bl); - return append_io_events(event_entry.get_event_type(), {bl}, offset, length, - flush_entry, filter_ret_val); + return append_io_events(event_entry.get_event_type(), {bl}, + {{offset, length}}, flush_entry, filter_ret_val); } template <typename I> uint64_t Journal<I>::append_io_events(journal::EventType event_type, const Bufferlists &bufferlists, - uint64_t offset, size_t length, + const Extents &image_extents, bool flush_entry, int filter_ret_val) { ceph_assert(!bufferlists.empty()); @@ -870,14 +923,13 @@ uint64_t Journal<I>::append_io_events(journal::EventType event_type, { std::lock_guard event_locker{m_event_lock}; - m_events[tid] = Event(futures, offset, length, filter_ret_val); + m_events[tid] = Event(futures, image_extents, filter_ret_val); } CephContext *cct = m_image_ctx.cct; ldout(cct, 20) << this << " " << __func__ << ": " << "event=" << event_type << ", " - << "offset=" << offset << ", " - << "length=" << length << ", " + << "image_extents=" << image_extents << ", " << "flush=" << flush_entry << ", tid=" << tid << dendl; Context *on_safe = create_async_context_callback( diff --git a/src/librbd/Journal.h b/src/librbd/Journal.h index 1ef9ffa8830..5327adac719 100644 --- a/src/librbd/Journal.h +++ b/src/librbd/Journal.h @@ -18,6 +18,7 @@ #include "journal/ReplayHandler.h" #include "librbd/Utils.h" #include "librbd/asio/ContextWQ.h" +#include "librbd/io/Types.h" #include "librbd/journal/Types.h" #include "librbd/journal/TypeTraits.h" @@ -133,14 +134,20 @@ public: void user_flushed(); - uint64_t append_write_event(uint64_t offset, size_t length, + uint64_t append_write_event(const io::Extents &image_extents, const bufferlist &bl, bool flush_entry); + uint64_t append_write_same_event(const io::Extents &image_extents, + const bufferlist &bl, + bool flush_entry); uint64_t append_compare_and_write_event(uint64_t offset, size_t length, const bufferlist &cmp_bl, const bufferlist &write_bl, bool flush_entry); + uint64_t append_discard_event(const io::Extents &image_extents, + uint32_t discard_granularity_bytes, + bool flush_entry); uint64_t append_io_event(journal::EventEntry &&event_entry, uint64_t offset, size_t length, bool flush_entry, int filter_ret_val); @@ -200,11 +207,13 @@ private: Event() { } - Event(const Futures &_futures, uint64_t offset, size_t length, + Event(const Futures &_futures, const io::Extents &image_extents, int filter_ret_val) : futures(_futures), filter_ret_val(filter_ret_val) { - if (length > 0) { - pending_extents.insert(offset, length); + for (auto &extent : image_extents) { + if (extent.second > 0) { + pending_extents.insert(extent.first, extent.second); + } } } }; @@ -322,9 +331,13 @@ private: bool is_journal_replaying(const ceph::mutex &) const; bool is_tag_owner(const ceph::mutex &) const; + void add_write_event_entries(uint64_t offset, size_t length, + const bufferlist &bl, + uint64_t buffer_offset, + Bufferlists *bufferlists); uint64_t append_io_events(journal::EventType event_type, const Bufferlists &bufferlists, - uint64_t offset, size_t length, bool flush_entry, + const io::Extents &extents, bool flush_entry, int filter_ret_val); Future wait_event(ceph::mutex &lock, uint64_t tid, Context *on_safe); diff --git a/src/librbd/io/ImageRequest.cc b/src/librbd/io/ImageRequest.cc index e4c41c22976..fb9f8944ed8 100644 --- a/src/librbd/io/ImageRequest.cc +++ b/src/librbd/io/ImageRequest.cc @@ -473,7 +473,7 @@ void AbstractImageWriteRequest<I>::send_request() { if (journaling) { // in-flight ops are flushed prior to closing the journal ceph_assert(image_ctx.journal != NULL); - journal_tid = append_journal_event(m_synchronous); + journal_tid = append_journal_event(); } // it's very important that IOContext is captured here instead of @@ -518,22 +518,12 @@ void ImageWriteRequest<I>::assemble_extent( } template <typename I> -uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) { +uint64_t ImageWriteRequest<I>::append_journal_event() { I &image_ctx = this->m_image_ctx; - uint64_t tid = 0; - uint64_t buffer_offset = 0; ceph_assert(!this->m_image_extents.empty()); - for (auto &extent : this->m_image_extents) { - bufferlist sub_bl; - sub_bl.substr_of(m_bl, buffer_offset, extent.second); - buffer_offset += extent.second; - - tid = image_ctx.journal->append_write_event(extent.first, extent.second, - sub_bl, synchronous); - } - - return tid; + return image_ctx.journal->append_write_event( + this->m_image_extents, m_bl, false); } template <typename I> @@ -566,22 +556,12 @@ void ImageWriteRequest<I>::update_stats(size_t length) { } template <typename I> -uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) { +uint64_t ImageDiscardRequest<I>::append_journal_event() { I &image_ctx = this->m_image_ctx; - uint64_t tid = 0; ceph_assert(!this->m_image_extents.empty()); - for (auto &extent : this->m_image_extents) { - journal::EventEntry event_entry( - journal::AioDiscardEvent(extent.first, - extent.second, - this->m_discard_granularity_bytes)); - tid = image_ctx.journal->append_io_event(std::move(event_entry), - extent.first, extent.second, - synchronous, 0); - } - - return tid; + return image_ctx.journal->append_discard_event( + this->m_image_extents, m_discard_granularity_bytes, false); } template <typename I> @@ -717,21 +697,12 @@ void ImageFlushRequest<I>::send_request() { } template <typename I> -uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) { +uint64_t ImageWriteSameRequest<I>::append_journal_event() { I &image_ctx = this->m_image_ctx; - uint64_t tid = 0; ceph_assert(!this->m_image_extents.empty()); - for (auto &extent : this->m_image_extents) { - journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first, - extent.second, - m_data_bl)); - tid = image_ctx.journal->append_io_event(std::move(event_entry), - extent.first, extent.second, - synchronous, 0); - } - - return tid; + return image_ctx.journal->append_write_same_event( + this->m_image_extents, m_data_bl, false); } template <typename I> @@ -768,8 +739,7 @@ void ImageWriteSameRequest<I>::update_stats(size_t length) { } template <typename I> -uint64_t ImageCompareAndWriteRequest<I>::append_journal_event( - bool synchronous) { +uint64_t ImageCompareAndWriteRequest<I>::append_journal_event() { I &image_ctx = this->m_image_ctx; uint64_t tid = 0; @@ -779,7 +749,7 @@ uint64_t ImageCompareAndWriteRequest<I>::append_journal_event( extent.second, m_cmp_bl, m_bl, - synchronous); + false); return tid; } diff --git a/src/librbd/io/ImageRequest.h b/src/librbd/io/ImageRequest.h index 2668c1acb2c..996c90a11f2 100644 --- a/src/librbd/io/ImageRequest.h +++ b/src/librbd/io/ImageRequest.h @@ -114,11 +114,6 @@ private: template <typename ImageCtxT = ImageCtx> class AbstractImageWriteRequest : public ImageRequest<ImageCtxT> { -public: - inline void flag_synchronous() { - m_synchronous = true; - } - protected: using typename ImageRequest<ImageCtxT>::ObjectRequests; @@ -127,8 +122,7 @@ protected: const char *trace_name, const ZTracer::Trace &parent_trace) : ImageRequest<ImageCtxT>(image_ctx, aio_comp, std::move(image_extents), - area, trace_name, parent_trace), - m_synchronous(false) { + area, trace_name, parent_trace) { } void send_request() override; @@ -144,11 +138,8 @@ protected: const LightweightObjectExtent &object_extent, IOContext io_context, uint64_t journal_tid, bool single_extent, Context *on_finish) = 0; - virtual uint64_t append_journal_event(bool synchronous) = 0; + virtual uint64_t append_journal_event() = 0; virtual void update_stats(size_t length) = 0; - -private: - bool m_synchronous; }; template <typename ImageCtxT = ImageCtx> @@ -180,7 +171,7 @@ protected: const LightweightObjectExtent &object_extent, IOContext io_context, uint64_t journal_tid, bool single_extent, Context *on_finish) override; - uint64_t append_journal_event(bool synchronous) override; + uint64_t append_journal_event() override; void update_stats(size_t length) override; private: @@ -215,7 +206,7 @@ protected: const LightweightObjectExtent &object_extent, IOContext io_context, uint64_t journal_tid, bool single_extent, Context *on_finish) override; - uint64_t append_journal_event(bool synchronous) override; + uint64_t append_journal_event() override; void update_stats(size_t length) override; int prune_object_extents( @@ -283,7 +274,7 @@ protected: const LightweightObjectExtent &object_extent, IOContext io_context, uint64_t journal_tid, bool single_extent, Context *on_finish) override; - uint64_t append_journal_event(bool synchronous) override; + uint64_t append_journal_event() override; void update_stats(size_t length) override; private: bufferlist m_data_bl; @@ -315,7 +306,7 @@ protected: const LightweightObjectExtent &object_extent, IOContext io_context, uint64_t journal_tid, bool single_extent, Context *on_finish) override; - uint64_t append_journal_event(bool synchronous) override; + uint64_t append_journal_event() override; void update_stats(size_t length) override; aio_type_t get_aio_type() const override { diff --git a/src/librbd/io/ObjectRequest.cc b/src/librbd/io/ObjectRequest.cc index 827f551d1f7..2bf06966c36 100644 --- a/src/librbd/io/ObjectRequest.cc +++ b/src/librbd/io/ObjectRequest.cc @@ -834,16 +834,17 @@ void ObjectListSnapsRequest<I>::handle_list_snaps(int r) { end_snap_id, &diff, &end_size, &exists, &clone_end_snap_id, &read_whole_object); - if (read_whole_object || - (!diff.empty() && - ((m_list_snaps_flags & LIST_SNAPS_FLAG_WHOLE_OBJECT) != 0))) { + if (read_whole_object) { ldout(cct, 1) << "need to read full object" << dendl; - diff.clear(); diff.insert(0, image_ctx->layout.object_size); + exists = true; end_size = image_ctx->layout.object_size; clone_end_snap_id = end_snap_id; - } else if (!exists) { - end_size = 0; + } else if ((m_list_snaps_flags & LIST_SNAPS_FLAG_WHOLE_OBJECT) != 0 && + !diff.empty()) { + ldout(cct, 20) << "expanding diff from " << diff << dendl; + diff.clear(); + diff.insert(0, image_ctx->layout.object_size); } if (exists) { @@ -884,7 +885,7 @@ void ObjectListSnapsRequest<I>::handle_list_snaps(int r) { << "end_size=" << end_size << ", " << "prev_end_size=" << prev_end_size << ", " << "exists=" << exists << ", " - << "whole_object=" << read_whole_object << dendl; + << "read_whole_object=" << read_whole_object << dendl; // check if object exists prior to start of incremental snap delta so that // we don't DNE the object if no additional deltas exist diff --git a/src/librbd/librbd.cc b/src/librbd/librbd.cc index 1272d95dd7e..132a0084a9f 100644 --- a/src/librbd/librbd.cc +++ b/src/librbd/librbd.cc @@ -15,6 +15,12 @@ #include <errno.h> +// these strand headers declare static variables that need to be shared between +// librbd.so and librados.so. referencing them here causes librbd.so to link +// their symbols as 'global unique'. see https://tracker.ceph.com/issues/63682 +#include <boost/asio/strand.hpp> +#include <boost/asio/io_context_strand.hpp> + #include "common/deleter.h" #include "common/dout.h" #include "common/errno.h" |