summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/libradosstriper/RadosStriperImpl.cc197
-rw-r--r--src/libradosstriper/RadosStriperImpl.h94
2 files changed, 262 insertions, 29 deletions
diff --git a/src/libradosstriper/RadosStriperImpl.cc b/src/libradosstriper/RadosStriperImpl.cc
index 26833eab75d..d33aa522c30 100644
--- a/src/libradosstriper/RadosStriperImpl.cc
+++ b/src/libradosstriper/RadosStriperImpl.cc
@@ -76,10 +76,7 @@
*
* There are a number of missing features/improvements that could be implemented.
* Here are some ideas :
- * - asynchronous stat and deletion
- * - improvement of the synchronous deletion to launch asynchrously
- * the deletion of the rados objects
- * - make the truncation asynchronous in aio_write_full
+ * - asynchronous stat
* - implementation of missing entry points (compared to rados)
* In particular : clone_range, sparse_read, exec, aio_flush_async, tmaps, omaps, ...
*
@@ -190,6 +187,32 @@ void libradosstriper::RadosStriperImpl::WriteCompletionData::safe(int r) {
if (m_safe) m_safe->finish(r);
}
+libradosstriper::RadosStriperImpl::RemoveCompletionData::RemoveCompletionData
+(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion,
+ RadosExclusiveLock *lock,
+ int flags) :
+ CompletionData(striper, soid, lockCookie, userCompletion), m_lock(lock) {}
+
+libradosstriper::RadosStriperImpl::RemoveCompletionData::~RemoveCompletionData() {
+ if (m_lock) delete m_lock;
+}
+
+libradosstriper::RadosStriperImpl::TruncateCompletionData::TruncateCompletionData
+(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ uint64_t size) :
+ RefCountedObject(striper->cct()),
+ m_striper(striper), m_soid(soid), m_size(size) {
+ m_striper->get();
+}
+
+libradosstriper::RadosStriperImpl::TruncateCompletionData::~TruncateCompletionData() {
+ m_striper->put();
+}
+
///////////////////////// RadosExclusiveLock /////////////////////////////
libradosstriper::RadosStriperImpl::RadosExclusiveLock::RadosExclusiveLock(librados::IoCtx* ioCtx,
@@ -562,12 +585,88 @@ int libradosstriper::RadosStriperImpl::stat(const std::string& soid, uint64_t *p
return 0;
}
+static void rados_req_remove_complete(rados_completion_t c, void *arg)
+{
+ libradosstriper::RadosStriperImpl::RadosRemoveCompletionData *cdata =
+ reinterpret_cast<libradosstriper::RadosStriperImpl::RadosRemoveCompletionData*>(arg);
+ int rc = rados_aio_get_return_value(c);
+ // in case the object did not exist, it means we had a sparse file, all is fine
+ if (rc == -ENOENT) {
+ rc = 0;
+ }
+ cdata->m_multiAioCompl->complete_request(rc);
+ cdata -> put();
+}
+
+static void striper_remove_aio_req_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ libradosstriper::RadosStriperImpl::RemoveCompletionData *cdata =
+ reinterpret_cast<libradosstriper::RadosStriperImpl::RemoveCompletionData*>(arg);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ ldout(cdata->m_striper->cct(), 10)
+ << "RadosStriperImpl : striper_remove_aio_req_complete called for "
+ << cdata->m_soid << dendl;
+ int rc = comp->rval;
+ if (rc == 0) {
+ // All went fine, synchronously remove first object
+ rc = cdata->m_striper->m_ioCtx.remove(cdata->m_striper->getObjectId(cdata->m_soid, 0),
+ cdata->flags);
+ } else {
+ lderr(cdata->m_striper->cct())
+ << "RadosStriperImpl : deletion/truncation incomplete for " << cdata->m_soid
+ << ", as errors were encountered. The file is left present but it's content "
+ << " has been partially removed"
+ << dendl;
+ }
+ cdata->complete(rc);
+ cdata->put();
+}
+
int libradosstriper::RadosStriperImpl::remove(const std::string& soid, int flags)
{
+ // create a completion object
+ librados::AioCompletionImpl c;
+ // call asynchronous version of remove
+ int rc = aio_remove(soid, &c, flags);
+ if (rc == 0) {
+ // wait for completion of the remove
+ c.wait_for_complete_and_cb();
+ // get result
+ rc = c.get_return_value();
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::aio_remove(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ int flags)
+{
+ // the RemoveCompletionData object will lock the given soid for the duration
+ // of the removal
+ try {
+ std::string lockCookie = getUUID();
+ RadosExclusiveLock *lock = new RadosExclusiveLock(&m_ioCtx, getObjectId(soid, 0));
+ // lock ownership is transferred to RemoveCompletionData here
+ RemoveCompletionData *cdata = new RemoveCompletionData(this, soid, lockCookie, c, lock, flags);
+ cdata->get();
+ libradosstriper::MultiAioCompletionImpl *multi_completion =
+ new libradosstriper::MultiAioCompletionImpl;
+ multi_completion->set_complete_callback(cdata, striper_remove_aio_req_complete);
+ // call asynchronous internal version of remove
+ return internal_aio_remove(soid, multi_completion);
+ } catch (ErrorCode &e) {
+ return e.m_code;
+ }
+}
+
+int libradosstriper::RadosStriperImpl::internal_aio_remove
+(const std::string& soid,
+ libradosstriper::MultiAioCompletionImpl *multi_completion,
+ int flags)
+{
std::string firstObjOid = getObjectId(soid, 0);
try {
- // lock the object in exclusive mode. Will be released when leaving the scope
- RadosExclusiveLock lock(&m_ioCtx, firstObjOid);
// check size and get number of rados objects to delete
uint64_t nb_objects = 0;
bufferlist bl2;
@@ -601,13 +700,22 @@ int libradosstriper::RadosStriperImpl::remove(const std::string& soid, int flags
nb_objects = nb_complete_sets * stripe_count + remaining_objects;
}
// delete rados objects in reverse order
+ // Note that we do not drop the first object. This one will only be dropped
+ // if all other removals have been successful, and this is done in the
+ // callback of the multi_completion object
int rcr = 0;
- for (int i = nb_objects-1; i >= 0; i--) {
+ for (int i = nb_objects-1; i >= 1; i--) {
+ multi_completion->add_request();
+ RadosRemoveCompletionData *data =
+ new RadosRemoveCompletionData(multi_completion, cct());
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(data, rados_req_remove_complete, 0);
if (flags == 0) {
- rcr = m_ioCtx.remove(getObjectId(soid, i));
+ rcr = m_ioCtx.aio_remove(getObjectId(soid, i), rados_completion);
} else {
- rcr = m_ioCtx.remove(getObjectId(soid, i), flags);
+ rcr = m_ioCtx.aio_remove(getObjectId(soid, i), rados_completion, flags);
}
+ rados_completion->release();
if (rcr < 0 and -ENOENT != rcr) {
lderr(cct()) << "RadosStriperImpl::remove : deletion incomplete for " << soid
<< ", as " << getObjectId(soid, i) << " could not be deleted (rc=" << rc << ")"
@@ -615,6 +723,8 @@ int libradosstriper::RadosStriperImpl::remove(const std::string& soid, int flags
break;
}
}
+ // we are over adding requests to the multi_completion object
+ multi_completion->finish_adding_requests();
// return
return rcr;
} catch (ErrorCode &e) {
@@ -1001,11 +1111,52 @@ int libradosstriper::RadosStriperImpl::createAndOpenStripedObject(const std::str
return openStripedObjectForWrite(soid, layout, &fileSize, lockCookie, isFileSizeAbsolute);
}
+static void striper_truncate_aio_req_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ libradosstriper::RadosStriperImpl::TruncateCompletionData *cdata =
+ reinterpret_cast<libradosstriper::RadosStriperImpl::TruncateCompletionData*>(arg);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ if (0 == comp->rval) {
+ // all went fine, change size in the external attributes
+ std::ostringstream oss;
+ oss << cdata->m_size;
+ bufferlist bl;
+ bl.append(oss.str());
+ cdata->m_striper->setxattr(cdata->m_soid, XATTR_SIZE, bl);
+ }
+ cdata->put();
+}
+
int libradosstriper::RadosStriperImpl::truncate(const std::string& soid,
uint64_t original_size,
uint64_t size,
ceph_file_layout &layout)
{
+ TruncateCompletionData *cdata = new TruncateCompletionData(this, soid, size);
+ cdata->get();
+ libradosstriper::MultiAioCompletionImpl *multi_completion =
+ new libradosstriper::MultiAioCompletionImpl;
+ multi_completion->set_complete_callback(cdata, striper_truncate_aio_req_complete);
+ // call asynchrous version of truncate
+ int rc = aio_truncate(soid, multi_completion, original_size, size, layout);
+ // wait for completion of the truncation
+ multi_completion->wait_for_complete_and_cb();
+ // return result
+ if (rc == 0) {
+ rc = multi_completion->get_return_value();
+ }
+ multi_completion->put();
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::aio_truncate
+(const std::string& soid,
+ libradosstriper::MultiAioCompletionImpl *multi_completion,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout)
+{
// handle the underlying rados objects. 3 cases here :
// -- the objects belonging to object sets entirely located
// before the truncation are unchanged
@@ -1032,8 +1183,13 @@ int libradosstriper::RadosStriperImpl::truncate(const std::string& soid,
exists = (original_size > object_start_off);
}
if (exists) {
- // remove
- int rc = m_ioCtx.remove(getObjectId(soid, objectno));
+ // remove asynchronously
+ librados::AioCompletion *rados_completion =
+ m_radosCluster.aio_create_completion(multi_completion,
+ rados_req_remove_complete,
+ 0);
+ int rc = m_ioCtx.aio_remove(getObjectId(soid, objectno), rados_completion);
+ rados_completion->release();
// in case the object did not exist, it means we had a sparse file, all is fine
if (rc && rc != -ENOENT) return rc;
}
@@ -1054,21 +1210,24 @@ int libradosstriper::RadosStriperImpl::truncate(const std::string& soid,
uint64_t new_object_size = Striper::object_truncate_size(cct(), &l, objectno, size);
int rc;
if (new_object_size > 0 or 0 == objectno) {
+ // trunc is synchronous as there is no async version
+ // but note that only a single object will be truncated
+ // reducing the overload to a fixed amount
rc = m_ioCtx.trunc(getObjectId(soid, objectno), new_object_size);
} else {
- rc = m_ioCtx.remove(getObjectId(soid, objectno));
+ // removes are asynchronous in order to speed up truncations of big files
+ librados::AioCompletion *rados_completion =
+ m_radosCluster.aio_create_completion(multi_completion,
+ rados_req_remove_complete,
+ 0);
+ rc = m_ioCtx.aio_remove(getObjectId(soid, objectno), rados_completion);
+ rados_completion->release();
}
// in case the object did not exist, it means we had a sparse file, all is fine
if (rc && rc != -ENOENT) return rc;
}
}
- // all went fine, change size in the external attributes
- std::ostringstream oss;
- oss << size;
- bufferlist bl;
- bl.append(oss.str());
- int rc = m_ioCtx.setxattr(getObjectId(soid, 0), XATTR_SIZE, bl);
- return rc;
+ return 0;
}
int libradosstriper::RadosStriperImpl::grow(const std::string& soid,
diff --git a/src/libradosstriper/RadosStriperImpl.h b/src/libradosstriper/RadosStriperImpl.h
index c2bb8ba321b..c71952abebf 100644
--- a/src/libradosstriper/RadosStriperImpl.h
+++ b/src/libradosstriper/RadosStriperImpl.h
@@ -121,13 +121,19 @@ struct libradosstriper::RadosStriperImpl {
};
/**
- * exception wrapper around an error code
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous remove operations of a Rados File
*/
- struct ErrorCode {
- ErrorCode(int error) : m_code(error) {};
- int m_code;
+ struct RadosRemoveCompletionData : RefCountedObject {
+ /// constructor
+ RadosRemoveCompletionData(MultiAioCompletionImpl *multiAioCompl,
+ CephContext *context) :
+ RefCountedObject(context, 1),
+ m_multiAioCompl(multiAioCompl) {};
+ /// the multi asynch io completion object to be used
+ MultiAioCompletionImpl *m_multiAioCompl;
};
-
+
/**
* Helper struct to handle simple locks on objects
*/
@@ -144,6 +150,52 @@ struct libradosstriper::RadosStriperImpl {
~RadosExclusiveLock();
};
+ struct RemoveCompletionData : CompletionData {
+ /// removal flags
+ int flags;
+ /// exclusive lock
+ RadosExclusiveLock *m_lock;
+ /**
+ * constructor
+ * note that the constructed object will take ownership of the lock
+ */
+ RemoveCompletionData(libradosstriper::RadosStriperImpl * striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion,
+ RadosExclusiveLock *lock,
+ int flags = 0);
+ /// destructor
+ ~RemoveCompletionData();
+ };
+
+ /**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous truncate operations
+ */
+ struct TruncateCompletionData : RefCountedObject {
+ /// constructor
+ TruncateCompletionData(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ uint64_t size);
+ /// destructor
+ virtual ~TruncateCompletionData();
+ /// striper to be used
+ libradosstriper::RadosStriperImpl *m_striper;
+ /// striped object concerned by the truncate operation
+ std::string m_soid;
+ /// the final size of the truncated object
+ uint64_t m_size;
+ };
+
+ /**
+ * exception wrapper around an error code
+ */
+ struct ErrorCode {
+ ErrorCode(int error) : m_code(error) {};
+ int m_code;
+ };
+
/*
* Constructor
* @param cluster_name name of the cluster, can be NULL
@@ -165,7 +217,7 @@ struct libradosstriper::RadosStriperImpl {
int setxattr(const object_t& soid, const char *name, bufferlist& bl);
int getxattrs(const object_t& soid, map<string, bufferlist>& attrset);
int rmxattr(const object_t& soid, const char *name);
-
+
// io
int write(const std::string& soid, const bufferlist& bl, size_t len, uint64_t off);
int append(const std::string& soid, const bufferlist& bl, size_t len);
@@ -190,6 +242,12 @@ struct libradosstriper::RadosStriperImpl {
int remove(const std::string& soid, int flags=0);
int trunc(const std::string& soid, uint64_t size);
+ // asynchronous remove. Note that the removal is not 100% parallelized :
+ // the removal of the first rados object of the striped object will be
+ // done via a syncrhonous call after the completion of all other removals.
+ // These are done asynchrounously and in parallel
+ int aio_remove(const std::string& soid, librados::AioCompletionImpl *c, int flags=0);
+
// reference counting
void get() {
lock.Lock();
@@ -250,6 +308,10 @@ struct libradosstriper::RadosStriperImpl {
ceph_file_layout *layout,
uint64_t *size);
+ int internal_aio_remove(const std::string& soid,
+ libradosstriper::MultiAioCompletionImpl *multi_completion,
+ int flags=0);
+
/**
* opens an existing striped object and takes a shared lock on it
* @return 0 if everything is ok and the lock was taken. -errcode otherwise
@@ -266,7 +328,7 @@ struct libradosstriper::RadosStriperImpl {
* and sets its size to the size it will have after the write.
* In case the striped object does not exists, it will create it by
* calling createOrOpenStripedObject.
- * @param layout this is filled with the layout of the file
+ * @param layout this is filled with the layout of the file
* @param size new size of the file (together with isFileSizeAbsolute)
* In case of success, this is filled with the size of the file before the opening
* @param isFileSizeAbsolute if false, this means that the given size should
@@ -296,7 +358,7 @@ struct libradosstriper::RadosStriperImpl {
bool isFileSizeAbsolute);
/**
- * truncates an object. Should only be called with size < original_size
+ * truncates an object synchronously. Should only be called with size < original_size
*/
int truncate(const std::string& soid,
uint64_t original_size,
@@ -304,18 +366,30 @@ struct libradosstriper::RadosStriperImpl {
ceph_file_layout &layout);
/**
+ * truncates an object asynchronously. Should only be called with size < original_size
+ * note that the method is not 100% asynchronous, only the removal of rados objects
+ * is, the (potential) truncation of the rados object residing just at the truncation
+ * point is synchronous for lack of asynchronous truncation in the rados layer
+ */
+ int aio_truncate(const std::string& soid,
+ libradosstriper::MultiAioCompletionImpl *c,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout);
+
+ /**
* grows an object (adding 0s). Should only be called with size > original_size
*/
int grow(const std::string& soid,
uint64_t original_size,
uint64_t size,
ceph_file_layout &layout);
-
+
/**
* creates a unique identifier
*/
static std::string getUUID();
-
+
CephContext *cct() {
return (CephContext*)m_radosCluster.cct();
}