summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Dillaman <dillaman@redhat.com>2017-02-03 03:31:54 +0100
committerJason Dillaman <dillaman@redhat.com>2017-03-08 17:59:56 +0100
commit56b17f4165c854555e38a7398fd0162945b0f56d (patch)
treed78a0d0b6af9fe4f43657383b544278f24a86dd8
parentlibrbd: move group and diff API functions (diff)
downloadceph-56b17f4165c854555e38a7398fd0162945b0f56d.tar.xz
ceph-56b17f4165c854555e38a7398fd0162945b0f56d.zip
librbd: asynchronous journal demote state machine
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
-rw-r--r--src/librbd/CMakeLists.txt3
-rw-r--r--src/librbd/Journal.cc108
-rw-r--r--src/librbd/Journal.h2
-rw-r--r--src/librbd/api/Mirror.cc2
-rw-r--r--src/librbd/journal/DemoteRequest.cc255
-rw-r--r--src/librbd/journal/DemoteRequest.h107
6 files changed, 386 insertions, 91 deletions
diff --git a/src/librbd/CMakeLists.txt b/src/librbd/CMakeLists.txt
index 456c2d8f582..dc5dda37014 100644
--- a/src/librbd/CMakeLists.txt
+++ b/src/librbd/CMakeLists.txt
@@ -47,10 +47,11 @@ set(librbd_internal_srcs
io/ImageRequestWQ.cc
io/ObjectRequest.cc
io/ReadResult.cc
- journal/RemoveRequest.cc
journal/CreateRequest.cc
+ journal/DemoteRequest.cc
journal/OpenRequest.cc
journal/PromoteRequest.cc
+ journal/RemoveRequest.cc
journal/Replay.cc
journal/StandardPolicy.cc
journal/Utils.cc
diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc
index e75f3bfc75c..476f3e00fb2 100644
--- a/src/librbd/Journal.cc
+++ b/src/librbd/Journal.cc
@@ -2,25 +2,26 @@
// vim: ts=8 sw=2 smarttab
#include "librbd/Journal.h"
-#include "librbd/ExclusiveLock.h"
-#include "librbd/ImageCtx.h"
+#include "include/rados/librados.hpp"
+#include "common/errno.h"
+#include "common/Timer.h"
+#include "common/WorkQueue.h"
#include "cls/journal/cls_journal_types.h"
#include "journal/Journaler.h"
#include "journal/Policy.h"
#include "journal/ReplayEntry.h"
#include "journal/Settings.h"
#include "journal/Utils.h"
-#include "common/errno.h"
-#include "common/Timer.h"
-#include "common/WorkQueue.h"
-#include "include/rados/librados.hpp"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
#include "librbd/io/ImageRequestWQ.h"
#include "librbd/io/ObjectRequest.h"
#include "librbd/journal/CreateRequest.h"
+#include "librbd/journal/DemoteRequest.h"
#include "librbd/journal/OpenRequest.h"
-#include "librbd/journal/PromoteRequest.h"
#include "librbd/journal/RemoveRequest.h"
#include "librbd/journal/Replay.h"
+#include "librbd/journal/PromoteRequest.h"
#include <boost/scope_exit.hpp>
#include <utility>
@@ -558,6 +559,18 @@ int Journal<I>::promote(I *image_ctx) {
}
template <typename I>
+int Journal<I>::demote(I *image_ctx) {
+ CephContext *cct = image_ctx->cct;
+ ldout(cct, 20) << __func__ << dendl;
+
+ C_SaferCond ctx;
+ auto req = journal::DemoteRequest<I>::create(*image_ctx, &ctx);
+ req->send();
+
+ return ctx.wait();
+}
+
+template <typename I>
bool Journal<I>::is_journal_ready() const {
Mutex::Locker locker(m_lock);
return (m_state == STATE_READY);
@@ -673,87 +686,6 @@ journal::TagData Journal<I>::get_tag_data() const {
}
template <typename I>
-int Journal<I>::demote() {
- CephContext *cct = m_image_ctx.cct;
- ldout(cct, 20) << __func__ << dendl;
-
- int r;
- C_SaferCond ctx;
- Future future;
- C_SaferCond flush_ctx;
-
- {
- Mutex::Locker locker(m_lock);
- assert(m_journaler != nullptr && is_tag_owner(m_lock));
-
- cls::journal::Client client;
- r = m_journaler->get_cached_client(IMAGE_CLIENT_ID, &client);
- if (r < 0) {
- lderr(cct) << this << " " << __func__ << ": "
- << "failed to retrieve client: " << cpp_strerror(r) << dendl;
- return r;
- }
-
- assert(m_tag_data.mirror_uuid == LOCAL_MIRROR_UUID);
- journal::TagPredecessor predecessor;
- predecessor.mirror_uuid = LOCAL_MIRROR_UUID;
- if (!client.commit_position.object_positions.empty()) {
- auto position = client.commit_position.object_positions.front();
- predecessor.commit_valid = true;
- predecessor.tag_tid = position.tag_tid;
- predecessor.entry_tid = position.entry_tid;
- }
-
- cls::journal::Tag new_tag;
- r = allocate_journaler_tag(cct, m_journaler, m_tag_class, predecessor,
- ORPHAN_MIRROR_UUID, &new_tag);
- if (r < 0) {
- return r;
- }
-
- bufferlist::iterator tag_data_bl_it = new_tag.data.begin();
- r = C_DecodeTag::decode(&tag_data_bl_it, &m_tag_data);
- if (r < 0) {
- lderr(cct) << this << " " << __func__ << ": "
- << "failed to decode newly allocated tag" << dendl;
- return r;
- }
-
- journal::EventEntry event_entry{journal::DemoteEvent{}, ceph_clock_now()};
- bufferlist event_entry_bl;
- ::encode(event_entry, event_entry_bl);
-
- m_tag_tid = new_tag.tid;
- future = m_journaler->append(m_tag_tid, event_entry_bl);
- future.flush(&ctx);
- }
-
- r = ctx.wait();
- if (r < 0) {
- lderr(cct) << this << " " << __func__ << ": "
- << "failed to append demotion journal event: " << cpp_strerror(r)
- << dendl;
- return r;
- }
-
- {
- Mutex::Locker l(m_lock);
- m_journaler->committed(future);
- m_journaler->flush_commit_position(&flush_ctx);
- }
-
- r = flush_ctx.wait();
- if (r < 0) {
- lderr(cct) << this << " " << __func__ << ": "
- << "failed to flush demotion commit position: "
- << cpp_strerror(r) << dendl;
- return r;
- }
-
- return 0;
-}
-
-template <typename I>
void Journal<I>::allocate_local_tag(Context *on_finish) {
CephContext *cct = m_image_ctx.cct;
ldout(cct, 20) << this << " " << __func__ << dendl;
diff --git a/src/librbd/Journal.h b/src/librbd/Journal.h
index 022ba9c2832..6a60826dc97 100644
--- a/src/librbd/Journal.h
+++ b/src/librbd/Journal.h
@@ -115,6 +115,7 @@ public:
std::string *mirror_uuid);
static int request_resync(ImageCtxT *image_ctx);
static int promote(ImageCtxT *image_ctx);
+ static int demote(ImageCtxT *image_ctx);
bool is_journal_ready() const;
bool is_journal_replaying() const;
@@ -128,7 +129,6 @@ public:
bool is_tag_owner() const;
uint64_t get_tag_tid() const;
journal::TagData get_tag_data() const;
- int demote();
void allocate_local_tag(Context *on_finish);
void allocate_tag(const std::string &mirror_uuid,
diff --git a/src/librbd/api/Mirror.cc b/src/librbd/api/Mirror.cc
index dfed5d7d38f..81386af70d2 100644
--- a/src/librbd/api/Mirror.cc
+++ b/src/librbd/api/Mirror.cc
@@ -374,7 +374,7 @@ int Mirror<I>::image_demote(I *ictx) {
return -EINVAL;
}
- r = ictx->journal->demote();
+ r = Journal<I>::demote(ictx);
if (r < 0) {
lderr(cct) << "failed to demote image: " << cpp_strerror(r)
<< dendl;
diff --git a/src/librbd/journal/DemoteRequest.cc b/src/librbd/journal/DemoteRequest.cc
new file mode 100644
index 00000000000..c41961d73ba
--- /dev/null
+++ b/src/librbd/journal/DemoteRequest.cc
@@ -0,0 +1,255 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/journal/DemoteRequest.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "journal/Journaler.h"
+#include "journal/Settings.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/journal/OpenRequest.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::journal::DemoteRequest: " << this \
+ << " " << __func__ << ": "
+
+namespace librbd {
+namespace journal {
+
+using librbd::util::create_async_context_callback;
+using librbd::util::create_context_callback;
+
+template <typename I>
+DemoteRequest<I>::DemoteRequest(I &image_ctx, Context *on_finish)
+ : m_image_ctx(image_ctx), m_on_finish(on_finish),
+ m_lock("DemoteRequest::m_lock") {
+}
+
+template <typename I>
+DemoteRequest<I>::~DemoteRequest() {
+ assert(m_journaler == nullptr);
+}
+
+template <typename I>
+void DemoteRequest<I>::send() {
+ open_journaler();
+}
+
+template <typename I>
+void DemoteRequest<I>::open_journaler() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << dendl;
+
+ m_journaler = new Journaler(m_image_ctx.md_ctx, m_image_ctx.id,
+ Journal<>::IMAGE_CLIENT_ID, {});
+ auto ctx = create_async_context_callback(
+ m_image_ctx, create_context_callback<
+ DemoteRequest<I>, &DemoteRequest<I>::handle_open_journaler>(this));
+ auto req = OpenRequest<I>::create(&m_image_ctx, m_journaler, &m_lock,
+ &m_client_meta, &m_tag_tid, &m_tag_data,
+ ctx);
+ req->send();
+}
+
+template <typename I>
+void DemoteRequest<I>::handle_open_journaler(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ m_ret_val = r;
+ lderr(cct) << "failed to open journal: " << cpp_strerror(r) << dendl;
+ shut_down_journaler();
+ return;
+ } else if (m_tag_data.mirror_uuid != Journal<>::LOCAL_MIRROR_UUID) {
+ m_ret_val = -EINVAL;
+ lderr(cct) << "image is not currently the primary" << dendl;
+ shut_down_journaler();
+ return;
+ }
+
+ allocate_tag();
+}
+
+template <typename I>
+void DemoteRequest<I>::allocate_tag() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << dendl;
+
+ cls::journal::Client client;
+ int r = m_journaler->get_cached_client(Journal<>::IMAGE_CLIENT_ID, &client);
+ if (r < 0) {
+ m_ret_val = r;
+ lderr(cct) << "failed to retrieve client: " << cpp_strerror(r) << dendl;
+ shut_down_journaler();
+ return;
+ }
+
+ TagPredecessor predecessor;
+ predecessor.mirror_uuid = Journal<>::LOCAL_MIRROR_UUID;
+ if (!client.commit_position.object_positions.empty()) {
+ auto position = client.commit_position.object_positions.front();
+ predecessor.commit_valid = true;
+ predecessor.tag_tid = position.tag_tid;
+ predecessor.entry_tid = position.entry_tid;
+ }
+
+ TagData tag_data;
+ tag_data.mirror_uuid = Journal<>::ORPHAN_MIRROR_UUID;
+ tag_data.predecessor = std::move(predecessor);
+
+ bufferlist tag_bl;
+ ::encode(tag_data, tag_bl);
+
+ auto ctx = create_context_callback<
+ DemoteRequest<I>, &DemoteRequest<I>::handle_allocate_tag>(this);
+ m_journaler->allocate_tag(m_client_meta.tag_class, tag_bl, &m_tag, ctx);
+}
+
+template <typename I>
+void DemoteRequest<I>::handle_allocate_tag(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ m_ret_val = r;
+ lderr(cct) << "failed to allocate tag: " << cpp_strerror(r) << dendl;
+ shut_down_journaler();
+ return;
+ }
+
+ m_tag_tid = m_tag.tid;
+ append_event();
+}
+
+template <typename I>
+void DemoteRequest<I>::append_event() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << dendl;
+
+ EventEntry event_entry{DemoteEvent{}, ceph_clock_now()};
+ bufferlist event_entry_bl;
+ ::encode(event_entry, event_entry_bl);
+
+ m_journaler->start_append(0, 0, 0);
+ m_future = m_journaler->append(m_tag_tid, event_entry_bl);
+
+ auto ctx = create_context_callback<
+ DemoteRequest<I>, &DemoteRequest<I>::handle_append_event>(this);
+ m_future.flush(ctx);
+
+}
+
+template <typename I>
+void DemoteRequest<I>::handle_append_event(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ m_ret_val = r;
+ lderr(cct) << "failed to append demotion journal event: " << cpp_strerror(r)
+ << dendl;
+ stop_append();
+ return;
+ }
+
+ commit_event();
+}
+
+template <typename I>
+void DemoteRequest<I>::commit_event() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << dendl;
+
+ m_journaler->committed(m_future);
+
+ auto ctx = create_context_callback<
+ DemoteRequest<I>, &DemoteRequest<I>::handle_commit_event>(this);
+ m_journaler->flush_commit_position(ctx);
+}
+
+template <typename I>
+void DemoteRequest<I>::handle_commit_event(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ m_ret_val = r;
+ lderr(cct) << "failed to flush demotion commit position: "
+ << cpp_strerror(r) << dendl;
+ }
+
+ stop_append();
+}
+
+template <typename I>
+void DemoteRequest<I>::stop_append() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << dendl;
+
+ auto ctx = create_context_callback<
+ DemoteRequest<I>, &DemoteRequest<I>::handle_stop_append>(this);
+ m_journaler->stop_append(ctx);
+}
+
+template <typename I>
+void DemoteRequest<I>::handle_stop_append(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ if (m_ret_val == 0) {
+ m_ret_val = r;
+ }
+ lderr(cct) << "failed to stop journal append: " << cpp_strerror(r) << dendl;
+ }
+
+ shut_down_journaler();
+}
+
+template <typename I>
+void DemoteRequest<I>::shut_down_journaler() {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << dendl;
+
+ Context *ctx = create_async_context_callback(
+ m_image_ctx, create_context_callback<
+ DemoteRequest<I>, &DemoteRequest<I>::handle_shut_down_journaler>(this));
+ m_journaler->shut_down(ctx);
+}
+
+template <typename I>
+void DemoteRequest<I>::handle_shut_down_journaler(int r) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "r=" << r << dendl;
+
+ if (r < 0) {
+ lderr(cct) << "failed to shut down journal: " << cpp_strerror(r) << dendl;
+ }
+
+ delete m_journaler;
+ m_journaler = nullptr;
+ finish(r);
+}
+
+template <typename I>
+void DemoteRequest<I>::finish(int r) {
+ if (m_ret_val < 0) {
+ r = m_ret_val;
+ }
+
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "r=" << r << dendl;
+
+ m_on_finish->complete(r);
+ delete this;
+}
+
+} // namespace journal
+} // namespace librbd
+
+template class librbd::journal::DemoteRequest<librbd::ImageCtx>;
diff --git a/src/librbd/journal/DemoteRequest.h b/src/librbd/journal/DemoteRequest.h
new file mode 100644
index 00000000000..5fea7f47b30
--- /dev/null
+++ b/src/librbd/journal/DemoteRequest.h
@@ -0,0 +1,107 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_JOURNAL_DEMOTE_REQUEST_H
+#define CEPH_LIBRBD_JOURNAL_DEMOTE_REQUEST_H
+
+#include "common/Mutex.h"
+#include "cls/journal/cls_journal_types.h"
+#include "journal/Future.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+
+struct Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace journal {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class DemoteRequest {
+public:
+ static DemoteRequest *create(ImageCtxT &image_ctx, Context *on_finish) {
+ return new DemoteRequest(image_ctx, on_finish);
+ }
+
+ DemoteRequest(ImageCtxT &image_ctx, Context *on_finish);
+ ~DemoteRequest();
+
+ void send();
+
+private:
+ /**
+ * @verbatim
+ *
+ * <start>
+ * |
+ * v
+ * OPEN_JOURNALER * * * * *
+ * | *
+ * v *
+ * ALLOCATE_TAG * * * * * *
+ * | *
+ * v *
+ * APPEND_EVENT * * * *
+ * | * *
+ * v * *
+ * COMMIT_EVENT * *
+ * | * *
+ * v * *
+ * STOP_APPEND <* * * *
+ * | *
+ * v *
+ * SHUT_DOWN_JOURNALER <* *
+ * |
+ * v
+ * <finish>
+ *
+ * @endverbatim
+ */
+
+ typedef typename TypeTraits<ImageCtxT>::Journaler Journaler;
+ typedef typename TypeTraits<ImageCtxT>::Future Future;
+
+ ImageCtxT &m_image_ctx;
+ Context *m_on_finish;
+
+ Journaler *m_journaler = nullptr;
+ int m_ret_val = 0;
+
+ Mutex m_lock;
+ ImageClientMeta m_client_meta;
+ uint64_t m_tag_tid = 0;
+ TagData m_tag_data;
+
+ cls::journal::Tag m_tag;
+ Future m_future;
+
+ void open_journaler();
+ void handle_open_journaler(int r);
+
+ void allocate_tag();
+ void handle_allocate_tag(int r);
+
+ void append_event();
+ void handle_append_event(int r);
+
+ void commit_event();
+ void handle_commit_event(int r);
+
+ void stop_append();
+ void handle_stop_append(int r);
+
+ void shut_down_journaler();
+ void handle_shut_down_journaler(int r);
+
+ void finish(int r);
+
+};
+
+} // namespace journal
+} // namespace librbd
+
+extern template class librbd::journal::DemoteRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_JOURNAL_DEMOTE_REQUEST_H