summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorSebastien Ponce <sebastien.ponce@cern.ch>2016-04-01 14:46:57 +0200
committerroot <root@lxbre43a05.cern.ch>2016-11-01 16:36:05 +0100
commit7a50ea479e7e5c2909d899d89d33d3fb082257f8 (patch)
tree225e8364c4ea87516e4c55692388959fec885b56 /src
parentrados : added an aio_remove version taking flags (diff)
downloadceph-7a50ea479e7e5c2909d899d89d33d3fb082257f8.tar.xz
ceph-7a50ea479e7e5c2909d899d89d33d3fb082257f8.zip
radosstriper : optimized truncation and removal of striped objects
striped objects' truncation and removal is now using the rados asynchronous APIs and taking benefit of the parallelization they allow. So far, the different rados objects inside a striped object where removed one by one via synchronous calls, and it was typically taking ages for big ones. Signed-off-by: Sebastien Ponce <sebastien.ponce@cern.ch>
Diffstat (limited to '')
-rw-r--r--src/libradosstriper/RadosStriperImpl.cc197
-rw-r--r--src/libradosstriper/RadosStriperImpl.h94
2 files changed, 262 insertions, 29 deletions
diff --git a/src/libradosstriper/RadosStriperImpl.cc b/src/libradosstriper/RadosStriperImpl.cc
index 26833eab75d..d33aa522c30 100644
--- a/src/libradosstriper/RadosStriperImpl.cc
+++ b/src/libradosstriper/RadosStriperImpl.cc
@@ -76,10 +76,7 @@
*
* There are a number of missing features/improvements that could be implemented.
* Here are some ideas :
- * - asynchronous stat and deletion
- * - improvement of the synchronous deletion to launch asynchrously
- * the deletion of the rados objects
- * - make the truncation asynchronous in aio_write_full
+ * - asynchronous stat
* - implementation of missing entry points (compared to rados)
* In particular : clone_range, sparse_read, exec, aio_flush_async, tmaps, omaps, ...
*
@@ -190,6 +187,32 @@ void libradosstriper::RadosStriperImpl::WriteCompletionData::safe(int r) {
if (m_safe) m_safe->finish(r);
}
+libradosstriper::RadosStriperImpl::RemoveCompletionData::RemoveCompletionData
+(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion,
+ RadosExclusiveLock *lock,
+ int flags) :
+ CompletionData(striper, soid, lockCookie, userCompletion), m_lock(lock) {}
+
+libradosstriper::RadosStriperImpl::RemoveCompletionData::~RemoveCompletionData() {
+ if (m_lock) delete m_lock;
+}
+
+libradosstriper::RadosStriperImpl::TruncateCompletionData::TruncateCompletionData
+(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ uint64_t size) :
+ RefCountedObject(striper->cct()),
+ m_striper(striper), m_soid(soid), m_size(size) {
+ m_striper->get();
+}
+
+libradosstriper::RadosStriperImpl::TruncateCompletionData::~TruncateCompletionData() {
+ m_striper->put();
+}
+
///////////////////////// RadosExclusiveLock /////////////////////////////
libradosstriper::RadosStriperImpl::RadosExclusiveLock::RadosExclusiveLock(librados::IoCtx* ioCtx,
@@ -562,12 +585,88 @@ int libradosstriper::RadosStriperImpl::stat(const std::string& soid, uint64_t *p
return 0;
}
+static void rados_req_remove_complete(rados_completion_t c, void *arg)
+{
+ libradosstriper::RadosStriperImpl::RadosRemoveCompletionData *cdata =
+ reinterpret_cast<libradosstriper::RadosStriperImpl::RadosRemoveCompletionData*>(arg);
+ int rc = rados_aio_get_return_value(c);
+ // in case the object did not exist, it means we had a sparse file, all is fine
+ if (rc == -ENOENT) {
+ rc = 0;
+ }
+ cdata->m_multiAioCompl->complete_request(rc);
+ cdata -> put();
+}
+
+static void striper_remove_aio_req_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ libradosstriper::RadosStriperImpl::RemoveCompletionData *cdata =
+ reinterpret_cast<libradosstriper::RadosStriperImpl::RemoveCompletionData*>(arg);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ ldout(cdata->m_striper->cct(), 10)
+ << "RadosStriperImpl : striper_remove_aio_req_complete called for "
+ << cdata->m_soid << dendl;
+ int rc = comp->rval;
+ if (rc == 0) {
+ // All went fine, synchronously remove first object
+ rc = cdata->m_striper->m_ioCtx.remove(cdata->m_striper->getObjectId(cdata->m_soid, 0),
+ cdata->flags);
+ } else {
+ lderr(cdata->m_striper->cct())
+ << "RadosStriperImpl : deletion/truncation incomplete for " << cdata->m_soid
+ << ", as errors were encountered. The file is left present but it's content "
+ << " has been partially removed"
+ << dendl;
+ }
+ cdata->complete(rc);
+ cdata->put();
+}
+
int libradosstriper::RadosStriperImpl::remove(const std::string& soid, int flags)
{
+ // create a completion object
+ librados::AioCompletionImpl c;
+ // call asynchronous version of remove
+ int rc = aio_remove(soid, &c, flags);
+ if (rc == 0) {
+ // wait for completion of the remove
+ c.wait_for_complete_and_cb();
+ // get result
+ rc = c.get_return_value();
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::aio_remove(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ int flags)
+{
+ // the RemoveCompletionData object will lock the given soid for the duration
+ // of the removal
+ try {
+ std::string lockCookie = getUUID();
+ RadosExclusiveLock *lock = new RadosExclusiveLock(&m_ioCtx, getObjectId(soid, 0));
+ // lock ownership is transferred to RemoveCompletionData here
+ RemoveCompletionData *cdata = new RemoveCompletionData(this, soid, lockCookie, c, lock, flags);
+ cdata->get();
+ libradosstriper::MultiAioCompletionImpl *multi_completion =
+ new libradosstriper::MultiAioCompletionImpl;
+ multi_completion->set_complete_callback(cdata, striper_remove_aio_req_complete);
+ // call asynchronous internal version of remove
+ return internal_aio_remove(soid, multi_completion);
+ } catch (ErrorCode &e) {
+ return e.m_code;
+ }
+}
+
+int libradosstriper::RadosStriperImpl::internal_aio_remove
+(const std::string& soid,
+ libradosstriper::MultiAioCompletionImpl *multi_completion,
+ int flags)
+{
std::string firstObjOid = getObjectId(soid, 0);
try {
- // lock the object in exclusive mode. Will be released when leaving the scope
- RadosExclusiveLock lock(&m_ioCtx, firstObjOid);
// check size and get number of rados objects to delete
uint64_t nb_objects = 0;
bufferlist bl2;
@@ -601,13 +700,22 @@ int libradosstriper::RadosStriperImpl::remove(const std::string& soid, int flags
nb_objects = nb_complete_sets * stripe_count + remaining_objects;
}
// delete rados objects in reverse order
+ // Note that we do not drop the first object. This one will only be dropped
+ // if all other removals have been successful, and this is done in the
+ // callback of the multi_completion object
int rcr = 0;
- for (int i = nb_objects-1; i >= 0; i--) {
+ for (int i = nb_objects-1; i >= 1; i--) {
+ multi_completion->add_request();
+ RadosRemoveCompletionData *data =
+ new RadosRemoveCompletionData(multi_completion, cct());
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(data, rados_req_remove_complete, 0);
if (flags == 0) {
- rcr = m_ioCtx.remove(getObjectId(soid, i));
+ rcr = m_ioCtx.aio_remove(getObjectId(soid, i), rados_completion);
} else {
- rcr = m_ioCtx.remove(getObjectId(soid, i), flags);
+ rcr = m_ioCtx.aio_remove(getObjectId(soid, i), rados_completion, flags);
}
+ rados_completion->release();
if (rcr < 0 and -ENOENT != rcr) {
lderr(cct()) << "RadosStriperImpl::remove : deletion incomplete for " << soid
<< ", as " << getObjectId(soid, i) << " could not be deleted (rc=" << rc << ")"
@@ -615,6 +723,8 @@ int libradosstriper::RadosStriperImpl::remove(const std::string& soid, int flags
break;
}
}
+ // we are over adding requests to the multi_completion object
+ multi_completion->finish_adding_requests();
// return
return rcr;
} catch (ErrorCode &e) {
@@ -1001,11 +1111,52 @@ int libradosstriper::RadosStriperImpl::createAndOpenStripedObject(const std::str
return openStripedObjectForWrite(soid, layout, &fileSize, lockCookie, isFileSizeAbsolute);
}
+static void striper_truncate_aio_req_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ libradosstriper::RadosStriperImpl::TruncateCompletionData *cdata =
+ reinterpret_cast<libradosstriper::RadosStriperImpl::TruncateCompletionData*>(arg);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ if (0 == comp->rval) {
+ // all went fine, change size in the external attributes
+ std::ostringstream oss;
+ oss << cdata->m_size;
+ bufferlist bl;
+ bl.append(oss.str());
+ cdata->m_striper->setxattr(cdata->m_soid, XATTR_SIZE, bl);
+ }
+ cdata->put();
+}
+
int libradosstriper::RadosStriperImpl::truncate(const std::string& soid,
uint64_t original_size,
uint64_t size,
ceph_file_layout &layout)
{
+ TruncateCompletionData *cdata = new TruncateCompletionData(this, soid, size);
+ cdata->get();
+ libradosstriper::MultiAioCompletionImpl *multi_completion =
+ new libradosstriper::MultiAioCompletionImpl;
+ multi_completion->set_complete_callback(cdata, striper_truncate_aio_req_complete);
+ // call asynchrous version of truncate
+ int rc = aio_truncate(soid, multi_completion, original_size, size, layout);
+ // wait for completion of the truncation
+ multi_completion->wait_for_complete_and_cb();
+ // return result
+ if (rc == 0) {
+ rc = multi_completion->get_return_value();
+ }
+ multi_completion->put();
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::aio_truncate
+(const std::string& soid,
+ libradosstriper::MultiAioCompletionImpl *multi_completion,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout)
+{
// handle the underlying rados objects. 3 cases here :
// -- the objects belonging to object sets entirely located
// before the truncation are unchanged
@@ -1032,8 +1183,13 @@ int libradosstriper::RadosStriperImpl::truncate(const std::string& soid,
exists = (original_size > object_start_off);
}
if (exists) {
- // remove
- int rc = m_ioCtx.remove(getObjectId(soid, objectno));
+ // remove asynchronously
+ librados::AioCompletion *rados_completion =
+ m_radosCluster.aio_create_completion(multi_completion,
+ rados_req_remove_complete,
+ 0);
+ int rc = m_ioCtx.aio_remove(getObjectId(soid, objectno), rados_completion);
+ rados_completion->release();
// in case the object did not exist, it means we had a sparse file, all is fine
if (rc && rc != -ENOENT) return rc;
}
@@ -1054,21 +1210,24 @@ int libradosstriper::RadosStriperImpl::truncate(const std::string& soid,
uint64_t new_object_size = Striper::object_truncate_size(cct(), &l, objectno, size);
int rc;
if (new_object_size > 0 or 0 == objectno) {
+ // trunc is synchronous as there is no async version
+ // but note that only a single object will be truncated
+ // reducing the overload to a fixed amount
rc = m_ioCtx.trunc(getObjectId(soid, objectno), new_object_size);
} else {
- rc = m_ioCtx.remove(getObjectId(soid, objectno));
+ // removes are asynchronous in order to speed up truncations of big files
+ librados::AioCompletion *rados_completion =
+ m_radosCluster.aio_create_completion(multi_completion,
+ rados_req_remove_complete,
+ 0);
+ rc = m_ioCtx.aio_remove(getObjectId(soid, objectno), rados_completion);
+ rados_completion->release();
}
// in case the object did not exist, it means we had a sparse file, all is fine
if (rc && rc != -ENOENT) return rc;
}
}
- // all went fine, change size in the external attributes
- std::ostringstream oss;
- oss << size;
- bufferlist bl;
- bl.append(oss.str());
- int rc = m_ioCtx.setxattr(getObjectId(soid, 0), XATTR_SIZE, bl);
- return rc;
+ return 0;
}
int libradosstriper::RadosStriperImpl::grow(const std::string& soid,
diff --git a/src/libradosstriper/RadosStriperImpl.h b/src/libradosstriper/RadosStriperImpl.h
index c2bb8ba321b..c71952abebf 100644
--- a/src/libradosstriper/RadosStriperImpl.h
+++ b/src/libradosstriper/RadosStriperImpl.h
@@ -121,13 +121,19 @@ struct libradosstriper::RadosStriperImpl {
};
/**
- * exception wrapper around an error code
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous remove operations of a Rados File
*/
- struct ErrorCode {
- ErrorCode(int error) : m_code(error) {};
- int m_code;
+ struct RadosRemoveCompletionData : RefCountedObject {
+ /// constructor
+ RadosRemoveCompletionData(MultiAioCompletionImpl *multiAioCompl,
+ CephContext *context) :
+ RefCountedObject(context, 1),
+ m_multiAioCompl(multiAioCompl) {};
+ /// the multi asynch io completion object to be used
+ MultiAioCompletionImpl *m_multiAioCompl;
};
-
+
/**
* Helper struct to handle simple locks on objects
*/
@@ -144,6 +150,52 @@ struct libradosstriper::RadosStriperImpl {
~RadosExclusiveLock();
};
+ struct RemoveCompletionData : CompletionData {
+ /// removal flags
+ int flags;
+ /// exclusive lock
+ RadosExclusiveLock *m_lock;
+ /**
+ * constructor
+ * note that the constructed object will take ownership of the lock
+ */
+ RemoveCompletionData(libradosstriper::RadosStriperImpl * striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion,
+ RadosExclusiveLock *lock,
+ int flags = 0);
+ /// destructor
+ ~RemoveCompletionData();
+ };
+
+ /**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous truncate operations
+ */
+ struct TruncateCompletionData : RefCountedObject {
+ /// constructor
+ TruncateCompletionData(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ uint64_t size);
+ /// destructor
+ virtual ~TruncateCompletionData();
+ /// striper to be used
+ libradosstriper::RadosStriperImpl *m_striper;
+ /// striped object concerned by the truncate operation
+ std::string m_soid;
+ /// the final size of the truncated object
+ uint64_t m_size;
+ };
+
+ /**
+ * exception wrapper around an error code
+ */
+ struct ErrorCode {
+ ErrorCode(int error) : m_code(error) {};
+ int m_code;
+ };
+
/*
* Constructor
* @param cluster_name name of the cluster, can be NULL
@@ -165,7 +217,7 @@ struct libradosstriper::RadosStriperImpl {
int setxattr(const object_t& soid, const char *name, bufferlist& bl);
int getxattrs(const object_t& soid, map<string, bufferlist>& attrset);
int rmxattr(const object_t& soid, const char *name);
-
+
// io
int write(const std::string& soid, const bufferlist& bl, size_t len, uint64_t off);
int append(const std::string& soid, const bufferlist& bl, size_t len);
@@ -190,6 +242,12 @@ struct libradosstriper::RadosStriperImpl {
int remove(const std::string& soid, int flags=0);
int trunc(const std::string& soid, uint64_t size);
+ // asynchronous remove. Note that the removal is not 100% parallelized :
+ // the removal of the first rados object of the striped object will be
+ // done via a syncrhonous call after the completion of all other removals.
+ // These are done asynchrounously and in parallel
+ int aio_remove(const std::string& soid, librados::AioCompletionImpl *c, int flags=0);
+
// reference counting
void get() {
lock.Lock();
@@ -250,6 +308,10 @@ struct libradosstriper::RadosStriperImpl {
ceph_file_layout *layout,
uint64_t *size);
+ int internal_aio_remove(const std::string& soid,
+ libradosstriper::MultiAioCompletionImpl *multi_completion,
+ int flags=0);
+
/**
* opens an existing striped object and takes a shared lock on it
* @return 0 if everything is ok and the lock was taken. -errcode otherwise
@@ -266,7 +328,7 @@ struct libradosstriper::RadosStriperImpl {
* and sets its size to the size it will have after the write.
* In case the striped object does not exists, it will create it by
* calling createOrOpenStripedObject.
- * @param layout this is filled with the layout of the file
+ * @param layout this is filled with the layout of the file
* @param size new size of the file (together with isFileSizeAbsolute)
* In case of success, this is filled with the size of the file before the opening
* @param isFileSizeAbsolute if false, this means that the given size should
@@ -296,7 +358,7 @@ struct libradosstriper::RadosStriperImpl {
bool isFileSizeAbsolute);
/**
- * truncates an object. Should only be called with size < original_size
+ * truncates an object synchronously. Should only be called with size < original_size
*/
int truncate(const std::string& soid,
uint64_t original_size,
@@ -304,18 +366,30 @@ struct libradosstriper::RadosStriperImpl {
ceph_file_layout &layout);
/**
+ * truncates an object asynchronously. Should only be called with size < original_size
+ * note that the method is not 100% asynchronous, only the removal of rados objects
+ * is, the (potential) truncation of the rados object residing just at the truncation
+ * point is synchronous for lack of asynchronous truncation in the rados layer
+ */
+ int aio_truncate(const std::string& soid,
+ libradosstriper::MultiAioCompletionImpl *c,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout);
+
+ /**
* grows an object (adding 0s). Should only be called with size > original_size
*/
int grow(const std::string& soid,
uint64_t original_size,
uint64_t size,
ceph_file_layout &layout);
-
+
/**
* creates a unique identifier
*/
static std::string getUUID();
-
+
CephContext *cct() {
return (CephContext*)m_radosCluster.cct();
}