diff options
author | Sage Weil <sage@inktank.com> | 2013-04-17 00:48:15 +0200 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-04-17 00:48:15 +0200 |
commit | 4865fb73c66ebbd4e2330ccf22dcbbdc0f749f13 (patch) | |
tree | 15f8ea49868d94dad152e3c909e3ff67f3315247 /src/librbd | |
parent | librbd: flush on diff_iterate (diff) | |
parent | LibrbdWriteback: complete writes strictly in order (diff) | |
download | ceph-4865fb73c66ebbd4e2330ccf22dcbbdc0f749f13.tar.xz ceph-4865fb73c66ebbd4e2330ccf22dcbbdc0f749f13.zip |
Merge pull request #214 from ceph/wip-objectcacher-handler-ordered
keep write responses to clones in order
Reviewed-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'src/librbd')
-rw-r--r-- | src/librbd/LibrbdWriteback.cc | 69 | ||||
-rw-r--r-- | src/librbd/LibrbdWriteback.h | 28 |
2 files changed, 83 insertions, 14 deletions
diff --git a/src/librbd/LibrbdWriteback.cc b/src/librbd/LibrbdWriteback.cc index 1689ad91860..237901dc61f 100644 --- a/src/librbd/LibrbdWriteback.cc +++ b/src/librbd/LibrbdWriteback.cc @@ -48,7 +48,6 @@ namespace librbd { C_Request(CephContext *cct, Context *c, Mutex *l) : m_cct(cct), m_ctx(c), m_lock(l) {} virtual ~C_Request() {} - void set_req(AioRequest *req); virtual void finish(int r) { ldout(m_cct, 20) << "aio_cb completing " << dendl; { @@ -63,16 +62,39 @@ namespace librbd { Mutex *m_lock; }; + class C_OrderedWrite : public Context { + public: + C_OrderedWrite(CephContext *cct, LibrbdWriteback::write_result_d *result, + LibrbdWriteback *wb) + : m_cct(cct), m_result(result), m_wb_handler(wb) {} + virtual ~C_OrderedWrite() {} + virtual void finish(int r) { + ldout(m_cct, 20) << "C_OrderedWrite completing " << m_result << dendl; + { + Mutex::Locker l(m_wb_handler->m_lock); + assert(!m_result->done); + m_result->done = true; + m_result->ret = r; + m_wb_handler->complete_writes(m_result->oid); + } + ldout(m_cct, 20) << "C_OrderedWrite finished " << m_result << dendl; + } + private: + CephContext *m_cct; + LibrbdWriteback::write_result_d *m_result; + LibrbdWriteback *m_wb_handler; + }; + LibrbdWriteback::LibrbdWriteback(ImageCtx *ictx, Mutex& lock) : m_tid(0), m_lock(lock), m_ictx(ictx) { } - tid_t LibrbdWriteback::read(const object_t& oid, - const object_locator_t& oloc, - uint64_t off, uint64_t len, snapid_t snapid, - bufferlist *pbl, uint64_t trunc_size, - __u32 trunc_seq, Context *onfinish) + void LibrbdWriteback::read(const object_t& oid, + const object_locator_t& oloc, + uint64_t off, uint64_t len, snapid_t snapid, + bufferlist *pbl, uint64_t trunc_size, + __u32 trunc_seq, Context *onfinish) { // on completion, take the mutex and then call onfinish. Context *req = new C_Request(m_ictx->cct, onfinish, &m_lock); @@ -82,7 +104,6 @@ namespace librbd { len, off); rados_completion->release(); assert(r >= 0); - return ++m_tid; } bool LibrbdWriteback::may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid) @@ -132,8 +153,10 @@ namespace librbd { object_no, 0, m_ictx->layout.fl_object_size, objectx); uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, overlap); - - C_Request *req_comp = new C_Request(m_ictx->cct, oncommit, &m_lock); + write_result_d *result = new write_result_d(oid.name, oncommit); + m_writes[oid.name].push(result); + ldout(m_ictx->cct, 20) << "write will wait for result " << result << dendl; + C_OrderedWrite *req_comp = new C_OrderedWrite(m_ictx->cct, result, this); AioWrite *req = new AioWrite(m_ictx, oid.name, object_no, off, objectx, object_overlap, bl, snapc, snap_id, @@ -141,4 +164,32 @@ namespace librbd { req->send(); return ++m_tid; } + + void LibrbdWriteback::complete_writes(const std::string& oid) + { + assert(m_lock.is_locked()); + std::queue<write_result_d*>& results = m_writes[oid]; + ldout(m_ictx->cct, 20) << "complete_writes() oid " << oid << dendl; + std::list<write_result_d*> finished; + + while (!results.empty()) { + write_result_d *result = results.front(); + if (!result->done) + break; + finished.push_back(result); + results.pop(); + } + + if (results.empty()) + m_writes.erase(oid); + + for (std::list<write_result_d*>::iterator it = finished.begin(); + it != finished.end(); ++it) { + write_result_d *result = *it; + ldout(m_ictx->cct, 20) << "complete_writes() completing " << result + << dendl; + result->oncommit->complete(result->ret); + delete result; + } + } } diff --git a/src/librbd/LibrbdWriteback.h b/src/librbd/LibrbdWriteback.h index b054dbc5950..ba8ff1f114d 100644 --- a/src/librbd/LibrbdWriteback.h +++ b/src/librbd/LibrbdWriteback.h @@ -3,6 +3,8 @@ #ifndef CEPH_LIBRBD_LIBRBDWRITEBACKHANDLER_H #define CEPH_LIBRBD_LIBRBDWRITEBACKHANDLER_H +#include <queue> + #include "include/Context.h" #include "include/types.h" #include "include/rados/librados.hpp" @@ -21,10 +23,10 @@ namespace librbd { virtual ~LibrbdWriteback() {} // Note that oloc, trunc_size, and trunc_seq are ignored - virtual tid_t read(const object_t& oid, const object_locator_t& oloc, - uint64_t off, uint64_t len, snapid_t snapid, - bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq, - Context *onfinish); + virtual void read(const object_t& oid, const object_locator_t& oloc, + uint64_t off, uint64_t len, snapid_t snapid, + bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq, + Context *onfinish); // Determine whether a read to this extent could be affected by a write-triggered copy-on-write virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid); @@ -35,10 +37,26 @@ namespace librbd { const bufferlist &bl, utime_t mtime, uint64_t trunc_size, __u32 trunc_seq, Context *oncommit); + struct write_result_d { + bool done; + int ret; + std::string oid; + Context *oncommit; + write_result_d(const std::string& oid, Context *oncommit) : + done(false), ret(0), oid(oid), oncommit(oncommit) {} + private: + write_result_d(const write_result_d& rhs); + const write_result_d& operator=(const write_result_d& rhs); + }; + private: - int m_tid; + void complete_writes(const std::string& oid); + + tid_t m_tid; Mutex& m_lock; librbd::ImageCtx *m_ictx; + hash_map<std::string, std::queue<write_result_d*> > m_writes; + friend class C_OrderedWrite; }; } |