summaryrefslogtreecommitdiffstats
path: root/src/librbd
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-04-17 00:48:15 +0200
committerSage Weil <sage@inktank.com>2013-04-17 00:48:15 +0200
commit4865fb73c66ebbd4e2330ccf22dcbbdc0f749f13 (patch)
tree15f8ea49868d94dad152e3c909e3ff67f3315247 /src/librbd
parentlibrbd: flush on diff_iterate (diff)
parentLibrbdWriteback: complete writes strictly in order (diff)
downloadceph-4865fb73c66ebbd4e2330ccf22dcbbdc0f749f13.tar.xz
ceph-4865fb73c66ebbd4e2330ccf22dcbbdc0f749f13.zip
Merge pull request #214 from ceph/wip-objectcacher-handler-ordered
keep write responses to clones in order Reviewed-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'src/librbd')
-rw-r--r--src/librbd/LibrbdWriteback.cc69
-rw-r--r--src/librbd/LibrbdWriteback.h28
2 files changed, 83 insertions, 14 deletions
diff --git a/src/librbd/LibrbdWriteback.cc b/src/librbd/LibrbdWriteback.cc
index 1689ad91860..237901dc61f 100644
--- a/src/librbd/LibrbdWriteback.cc
+++ b/src/librbd/LibrbdWriteback.cc
@@ -48,7 +48,6 @@ namespace librbd {
C_Request(CephContext *cct, Context *c, Mutex *l)
: m_cct(cct), m_ctx(c), m_lock(l) {}
virtual ~C_Request() {}
- void set_req(AioRequest *req);
virtual void finish(int r) {
ldout(m_cct, 20) << "aio_cb completing " << dendl;
{
@@ -63,16 +62,39 @@ namespace librbd {
Mutex *m_lock;
};
+ class C_OrderedWrite : public Context {
+ public:
+ C_OrderedWrite(CephContext *cct, LibrbdWriteback::write_result_d *result,
+ LibrbdWriteback *wb)
+ : m_cct(cct), m_result(result), m_wb_handler(wb) {}
+ virtual ~C_OrderedWrite() {}
+ virtual void finish(int r) {
+ ldout(m_cct, 20) << "C_OrderedWrite completing " << m_result << dendl;
+ {
+ Mutex::Locker l(m_wb_handler->m_lock);
+ assert(!m_result->done);
+ m_result->done = true;
+ m_result->ret = r;
+ m_wb_handler->complete_writes(m_result->oid);
+ }
+ ldout(m_cct, 20) << "C_OrderedWrite finished " << m_result << dendl;
+ }
+ private:
+ CephContext *m_cct;
+ LibrbdWriteback::write_result_d *m_result;
+ LibrbdWriteback *m_wb_handler;
+ };
+
LibrbdWriteback::LibrbdWriteback(ImageCtx *ictx, Mutex& lock)
: m_tid(0), m_lock(lock), m_ictx(ictx)
{
}
- tid_t LibrbdWriteback::read(const object_t& oid,
- const object_locator_t& oloc,
- uint64_t off, uint64_t len, snapid_t snapid,
- bufferlist *pbl, uint64_t trunc_size,
- __u32 trunc_seq, Context *onfinish)
+ void LibrbdWriteback::read(const object_t& oid,
+ const object_locator_t& oloc,
+ uint64_t off, uint64_t len, snapid_t snapid,
+ bufferlist *pbl, uint64_t trunc_size,
+ __u32 trunc_seq, Context *onfinish)
{
// on completion, take the mutex and then call onfinish.
Context *req = new C_Request(m_ictx->cct, onfinish, &m_lock);
@@ -82,7 +104,6 @@ namespace librbd {
len, off);
rados_completion->release();
assert(r >= 0);
- return ++m_tid;
}
bool LibrbdWriteback::may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid)
@@ -132,8 +153,10 @@ namespace librbd {
object_no, 0, m_ictx->layout.fl_object_size,
objectx);
uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, overlap);
-
- C_Request *req_comp = new C_Request(m_ictx->cct, oncommit, &m_lock);
+ write_result_d *result = new write_result_d(oid.name, oncommit);
+ m_writes[oid.name].push(result);
+ ldout(m_ictx->cct, 20) << "write will wait for result " << result << dendl;
+ C_OrderedWrite *req_comp = new C_OrderedWrite(m_ictx->cct, result, this);
AioWrite *req = new AioWrite(m_ictx, oid.name,
object_no, off, objectx, object_overlap,
bl, snapc, snap_id,
@@ -141,4 +164,32 @@ namespace librbd {
req->send();
return ++m_tid;
}
+
+ void LibrbdWriteback::complete_writes(const std::string& oid)
+ {
+ assert(m_lock.is_locked());
+ std::queue<write_result_d*>& results = m_writes[oid];
+ ldout(m_ictx->cct, 20) << "complete_writes() oid " << oid << dendl;
+ std::list<write_result_d*> finished;
+
+ while (!results.empty()) {
+ write_result_d *result = results.front();
+ if (!result->done)
+ break;
+ finished.push_back(result);
+ results.pop();
+ }
+
+ if (results.empty())
+ m_writes.erase(oid);
+
+ for (std::list<write_result_d*>::iterator it = finished.begin();
+ it != finished.end(); ++it) {
+ write_result_d *result = *it;
+ ldout(m_ictx->cct, 20) << "complete_writes() completing " << result
+ << dendl;
+ result->oncommit->complete(result->ret);
+ delete result;
+ }
+ }
}
diff --git a/src/librbd/LibrbdWriteback.h b/src/librbd/LibrbdWriteback.h
index b054dbc5950..ba8ff1f114d 100644
--- a/src/librbd/LibrbdWriteback.h
+++ b/src/librbd/LibrbdWriteback.h
@@ -3,6 +3,8 @@
#ifndef CEPH_LIBRBD_LIBRBDWRITEBACKHANDLER_H
#define CEPH_LIBRBD_LIBRBDWRITEBACKHANDLER_H
+#include <queue>
+
#include "include/Context.h"
#include "include/types.h"
#include "include/rados/librados.hpp"
@@ -21,10 +23,10 @@ namespace librbd {
virtual ~LibrbdWriteback() {}
// Note that oloc, trunc_size, and trunc_seq are ignored
- virtual tid_t read(const object_t& oid, const object_locator_t& oloc,
- uint64_t off, uint64_t len, snapid_t snapid,
- bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
- Context *onfinish);
+ virtual void read(const object_t& oid, const object_locator_t& oloc,
+ uint64_t off, uint64_t len, snapid_t snapid,
+ bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
+ Context *onfinish);
// Determine whether a read to this extent could be affected by a write-triggered copy-on-write
virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid);
@@ -35,10 +37,26 @@ namespace librbd {
const bufferlist &bl, utime_t mtime, uint64_t trunc_size,
__u32 trunc_seq, Context *oncommit);
+ struct write_result_d {
+ bool done;
+ int ret;
+ std::string oid;
+ Context *oncommit;
+ write_result_d(const std::string& oid, Context *oncommit) :
+ done(false), ret(0), oid(oid), oncommit(oncommit) {}
+ private:
+ write_result_d(const write_result_d& rhs);
+ const write_result_d& operator=(const write_result_d& rhs);
+ };
+
private:
- int m_tid;
+ void complete_writes(const std::string& oid);
+
+ tid_t m_tid;
Mutex& m_lock;
librbd::ImageCtx *m_ictx;
+ hash_map<std::string, std::queue<write_result_d*> > m_writes;
+ friend class C_OrderedWrite;
};
}