diff options
author | David Zafman <dzafman@redhat.com> | 2014-09-25 07:43:05 +0200 |
---|---|---|
committer | David Zafman <dzafman@redhat.com> | 2014-10-20 19:47:50 +0200 |
commit | a03f85a8e7fab296ea2df70a929a1c5e4aa0f7fb (patch) | |
tree | 0c31947fa08adbf78a89ef20820affc2e6bdd9d2 /src | |
parent | rados: Fix ObjectIterator instantiation to specify correct value_type (diff) | |
download | ceph-a03f85a8e7fab296ea2df70a929a1c5e4aa0f7fb.tar.xz ceph-a03f85a8e7fab296ea2df70a929a1c5e4aa0f7fb.zip |
osd, osdc, librados, tools, rgw: Implement pgls of all namespaces
Add release note
New librados interface
New pg_nls_response_t over the wire protocol
Ignore internal namespace (.ceph_internal)
Enhance ObjListCtx to keep independent IoCtxImpl so nspace won't change out from under listing code
Add ListObject with private implementation ListObjectImpl to return from iterator
Add EINVAL error for old librados interface when LIBRADOS_ALL_NSPACES set
Add throw to old librados c++ interface when all_nspaces set
Fixes: #9031
Signed-off-by: David Zafman <dzafman@redhat.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/include/rados.h | 4 | ||||
-rw-r--r-- | src/include/rados/librados.h | 55 | ||||
-rw-r--r-- | src/include/rados/librados.hpp | 60 | ||||
-rw-r--r-- | src/include/rados/rados_types.h | 9 | ||||
-rw-r--r-- | src/include/rados/rados_types.hpp | 11 | ||||
-rw-r--r-- | src/key_value_store/kv_flat_btree_async.cc | 12 | ||||
-rw-r--r-- | src/librados/IoCtxImpl.cc | 31 | ||||
-rw-r--r-- | src/librados/IoCtxImpl.h | 2 | ||||
-rw-r--r-- | src/librados/ListObjectImpl.h | 78 | ||||
-rw-r--r-- | src/librados/Makefile.am | 3 | ||||
-rw-r--r-- | src/librados/librados.cc | 426 | ||||
-rw-r--r-- | src/osd/ReplicatedPG.cc | 160 | ||||
-rw-r--r-- | src/osd/osd_types.cc | 2 | ||||
-rw-r--r-- | src/osd/osd_types.h | 70 | ||||
-rw-r--r-- | src/osdc/Objecter.cc | 139 | ||||
-rw-r--r-- | src/osdc/Objecter.h | 79 | ||||
-rw-r--r-- | src/rgw/rgw_rados.cc | 32 | ||||
-rw-r--r-- | src/rgw/rgw_rados.h | 2 | ||||
-rw-r--r-- | src/tools/rados/rados_export.cc | 6 | ||||
-rw-r--r-- | src/tools/rados/rados_import.cc | 6 | ||||
-rw-r--r-- | src/tools/scratchtool.c | 10 | ||||
-rw-r--r-- | src/tools/scratchtoolpp.cc | 4 | ||||
-rw-r--r-- | src/tracing/librados.tp | 87 |
23 files changed, 1237 insertions, 51 deletions
diff --git a/src/include/rados.h b/src/include/rados.h index 2ab26dc8f13..21e994db036 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -294,7 +294,9 @@ extern const char *ceph_osd_state_name(int s); f(PGLS, __CEPH_OSD_OP(RD, PG, 1), "pgls") \ f(PGLS_FILTER, __CEPH_OSD_OP(RD, PG, 2), "pgls-filter") \ f(PG_HITSET_LS, __CEPH_OSD_OP(RD, PG, 3), "pg-hitset-ls") \ - f(PG_HITSET_GET, __CEPH_OSD_OP(RD, PG, 4), "pg-hitset-get") + f(PG_HITSET_GET, __CEPH_OSD_OP(RD, PG, 4), "pg-hitset-get") \ + f(PGNLS, __CEPH_OSD_OP(RD, PG, 5), "pgnls") \ + f(PGNLS_FILTER, __CEPH_OSD_OP(RD, PG, 6), "pgnls-filter") enum { #define GENERATE_ENUM_ENTRY(op, opcode, str) CEPH_OSD_OP_##op = (opcode), diff --git a/src/include/rados/librados.h b/src/include/rados/librados.h index 71fa8e70679..0658bd339e6 100644 --- a/src/include/rados/librados.h +++ b/src/include/rados/librados.h @@ -150,7 +150,9 @@ typedef void *rados_config_t; * - object locator for all single-object operations (see * rados_ioctx_locator_set_key()) * - namespace for all single-object operations (see - * rados_ioctx_set_namespace()) + * rados_ioctx_set_namespace()). Set to LIBRADOS_ALL_NSPACES + * before rados_nobjects_list_open() will list all objects in all + * namespaces. * * @warning changing any of these settings is not thread-safe - * librados users must synchronize any of these changes on their own, @@ -162,9 +164,9 @@ typedef void *rados_ioctx_t; * @typedef rados_list_ctx_t * * An iterator for listing the objects in a pool. - * Used with rados_objects_list_open(), - * rados_objects_list_next(), and - * rados_objects_list_close(). + * Used with rados_nobjects_list_open(), + * rados_nobjects_list_next(), and + * rados_nobjects_list_close(). */ typedef void *rados_list_ctx_t; @@ -818,7 +820,7 @@ void rados_ioctx_set_namespace(rados_ioctx_t io, const char *nspace); /** @} obj_loc */ /** - * @defgroup librados_h_list_obj Listing Objects + * @defgroup librados_h_list_nobj New Listing Objects * @{ */ /** @@ -828,7 +830,7 @@ void rados_ioctx_set_namespace(rados_ioctx_t io, const char *nspace); * @param ctx the handle to store list context in * @returns 0 on success, negative error code on failure */ -int rados_objects_list_open(rados_ioctx_t io, rados_list_ctx_t *ctx); +int rados_nobjects_list_open(rados_ioctx_t io, rados_list_ctx_t *ctx); /** * Return hash position of iterator, rounded to the current PG @@ -836,7 +838,7 @@ int rados_objects_list_open(rados_ioctx_t io, rados_list_ctx_t *ctx); * @param ctx iterator marking where you are in the listing * @returns current hash position, rounded to the current pg */ -uint32_t rados_objects_list_get_pg_hash_position(rados_list_ctx_t ctx); +uint32_t rados_nobjects_list_get_pg_hash_position(rados_list_ctx_t ctx); /** * Reposition object iterator to a different hash position @@ -845,7 +847,7 @@ uint32_t rados_objects_list_get_pg_hash_position(rados_list_ctx_t ctx); * @param pos hash position to move to * @returns actual (rounded) position we moved to */ -uint32_t rados_objects_list_seek(rados_list_ctx_t ctx, uint32_t pos); +uint32_t rados_nobjects_list_seek(rados_list_ctx_t ctx, uint32_t pos); /** * Get the next object name and locator in the pool @@ -855,10 +857,12 @@ uint32_t rados_objects_list_seek(rados_list_ctx_t ctx, uint32_t pos); * @param ctx iterator marking where you are in the listing * @param entry where to store the name of the entry * @param key where to store the object locator (set to NULL to ignore) + * @param nspace where to store the object namespace (set to NULL to ignore) * @returns 0 on success, negative error code on failure * @returns -ENOENT when there are no more objects to list */ -int rados_objects_list_next(rados_list_ctx_t ctx, const char **entry, const char **key); +int rados_nobjects_list_next(rados_list_ctx_t ctx, const char **entry, + const char **key, const char **nspace); /** * Close the object listing handle. @@ -868,6 +872,39 @@ int rados_objects_list_next(rados_list_ctx_t ctx, const char **entry, const char * * @param ctx the handle to close */ +void rados_nobjects_list_close(rados_list_ctx_t ctx); + +/** @} New Listing Objects */ + +/** + * @defgroup librados_h_list_obj Deprecated Listing Objects + * + * Older listing objects interface. Please use the new interface. + * @{ + */ +/** + * @warning Deprecated: Use rados_nobjects_list_open() instead + */ +int rados_objects_list_open(rados_ioctx_t io, rados_list_ctx_t *ctx); + +/** + * @warning Deprecated: Use rados_nobjects_list_get_pg_hash_position() instead + */ +uint32_t rados_objects_list_get_pg_hash_position(rados_list_ctx_t ctx); + +/** + * @warning Deprecated: Use rados_nobjects_list_seek() instead + */ +uint32_t rados_objects_list_seek(rados_list_ctx_t ctx, uint32_t pos); + +/** + * @warning Deprecated: Use rados_nobjects_list_next() instead + */ +int rados_objects_list_next(rados_list_ctx_t ctx, const char **entry, const char **key); + +/** + * @warning Deprecated: Use rados_nobjects_list_close() instead + */ void rados_objects_list_close(rados_list_ctx_t ctx); /** @} Listing Objects */ diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp index c3882bf0c09..cabe63f97b5 100644 --- a/src/include/rados/librados.hpp +++ b/src/include/rados/librados.hpp @@ -30,6 +30,8 @@ namespace librados struct ObjListCtx; struct PoolAsyncCompletionImpl; class RadosClient; + class ListObjectImpl; + struct NObjectIteratorImpl; typedef void *list_ctx_t; typedef uint64_t auid_t; @@ -63,6 +65,57 @@ namespace librados typedef void *completion_t; typedef void (*callback_t)(completion_t cb, void *arg); + class ListObject + { + public: + const std::string& get_nspace() const; + const std::string& get_oid() const; + const std::string& get_locator() const; + + ListObject(); + ~ListObject(); + ListObject( const ListObject&); + ListObject& operator=(const ListObject& rhs); + private: + ListObject(ListObjectImpl *impl); + + friend class NObjectIteratorImpl; + friend std::ostream& operator<<(std::ostream& out, const ListObject& lop); + + ListObjectImpl *impl; + }; + std::ostream& operator<<(std::ostream& out, const librados::ListObject& lop); + + class NObjectIterator : public std::iterator <std::forward_iterator_tag, ListObject> { + public: + static const NObjectIterator __EndObjectIterator; + NObjectIterator(): impl(NULL) {} + ~NObjectIterator(); + NObjectIterator(const NObjectIterator &rhs); + NObjectIterator& operator=(const NObjectIterator& rhs); + + bool operator==(const NObjectIterator& rhs) const; + bool operator!=(const NObjectIterator& rhs) const; + const ListObject& operator*() const; + const ListObject* operator->() const; + NObjectIterator &operator++(); // Preincrement + NObjectIterator operator++(int); // Postincrement + friend class IoCtx; + friend class NObjectIteratorImpl; + + /// get current hash position of the iterator, rounded to the current pg + uint32_t get_pg_hash_position() const; + + /// move the iterator to a given hash position. this may (will!) be rounded to the nearest pg. + uint32_t seek(uint32_t pos); + + private: + NObjectIterator(ObjListCtx *ctx_); + void get_next(); + NObjectIteratorImpl *impl; + }; + + // DEPRECATED; Use NObjectIterator class ObjectIterator : public std::iterator <std::forward_iterator_tag, std::pair<std::string, std::string> > { public: static const ObjectIterator __EndObjectIterator; @@ -662,6 +715,13 @@ namespace librados /// Start enumerating objects for a pool + NObjectIterator nobjects_begin(); + /// Start enumerating objects for a pool starting from a hash position + NObjectIterator nobjects_begin(uint32_t start_hash_position); + /// Iterator indicating the end of a pool + const NObjectIterator& nobjects_end() const; + + // DEPRECATED ObjectIterator objects_begin(); /// Start enumerating objects for a pool starting from a hash position ObjectIterator objects_begin(uint32_t start_hash_position); diff --git a/src/include/rados/rados_types.h b/src/include/rados/rados_types.h index fa60bd266c7..5156b7562fa 100644 --- a/src/include/rados/rados_types.h +++ b/src/include/rados/rados_types.h @@ -14,4 +14,13 @@ struct obj_watch_t { uint32_t timeout_seconds; }; +/** + * @defines + * + * Pass as nspace argument to rados_ioctx_set_namespace() + * before calling rados_nobjects_list_open() to return + * all objects in all namespaces. + */ +#define LIBRADOS_ALL_NSPACES "\001" + #endif diff --git a/src/include/rados/rados_types.hpp b/src/include/rados/rados_types.hpp index ec70f9dc18a..6afb44cd0ab 100644 --- a/src/include/rados/rados_types.hpp +++ b/src/include/rados/rados_types.hpp @@ -4,6 +4,9 @@ #include <utility> #include <vector> #include <stdint.h> +#include <string> + +#include <include/rados/rados_types.h> namespace librados { @@ -28,5 +31,13 @@ struct snap_set_t { snap_set_t() : seq(0) {} }; +/** + * @var all_nspaces + * Pass as nspace argument to IoCtx::set_namespace() + * before calling nobjects_begin() to iterate + * through all objects in all namespaces. + */ +const std::string all_nspaces(LIBRADOS_ALL_NSPACES); + } #endif diff --git a/src/key_value_store/kv_flat_btree_async.cc b/src/key_value_store/kv_flat_btree_async.cc index 03f274edef6..df54384b525 100644 --- a/src/key_value_store/kv_flat_btree_async.cc +++ b/src/key_value_store/kv_flat_btree_async.cc @@ -2052,10 +2052,10 @@ bool KvFlatBtreeAsync::is_consistent() { io_ctx.operate(index_name, &oro, NULL); if (err < 0){ //probably because the index doesn't exist - this might be ok. - for (librados::ObjectIterator oit = io_ctx.objects_begin(); - oit != io_ctx.objects_end(); ++oit) { + for (librados::NObjectIterator oit = io_ctx.nobjects_begin(); + oit != io_ctx.nobjects_end(); ++oit) { //if this executes, there are floating objects. - cerr << "Not consistent! found floating object " << oit->first + cerr << "Not consistent! found floating object " << oit->get_oid() << std::endl; ret = false; } @@ -2116,9 +2116,9 @@ bool KvFlatBtreeAsync::is_consistent() { //make sure that an object exists iff it either is the index //or is listed in the index - for (librados::ObjectIterator oit = io_ctx.objects_begin(); - oit != io_ctx.objects_end(); ++oit) { - string name = oit->first; + for (librados::NObjectIterator oit = io_ctx.nobjects_begin(); + oit != io_ctx.nobjects_end(); ++oit) { + string name = oit->get_oid(); if (name != index_name && onames.count(name) == 0 && special_names.count(name) == 0) { cerr << "Not consistent! found floating object " << name << std::endl; diff --git a/src/librados/IoCtxImpl.cc b/src/librados/IoCtxImpl.cc index 964597b88c7..35e15a163e3 100644 --- a/src/librados/IoCtxImpl.cc +++ b/src/librados/IoCtxImpl.cc @@ -330,6 +330,37 @@ int librados::IoCtxImpl::snap_get_stamp(uint64_t snapid, time_t *t) // IO +int librados::IoCtxImpl::nlist(Objecter::NListContext *context, int max_entries) +{ + Cond cond; + bool done; + int r = 0; + object_t oid; + Mutex mylock("IoCtxImpl::nlist::mylock"); + + if (context->at_end()) + return 0; + + context->max_entries = max_entries; + context->nspace = oloc.nspace; + + objecter->list_nobjects(context, new C_SafeCond(&mylock, &cond, &done, &r)); + + mylock.Lock(); + while(!done) + cond.Wait(mylock); + mylock.Unlock(); + + return r; +} + +uint32_t librados::IoCtxImpl::nlist_seek(Objecter::NListContext *context, + uint32_t pos) +{ + context->list.clear(); + return objecter->list_nobjects_seek(context, pos); +} + int librados::IoCtxImpl::list(Objecter::ListContext *context, int max_entries) { Cond cond; diff --git a/src/librados/IoCtxImpl.h b/src/librados/IoCtxImpl.h index 4d79e7b16f9..7e899a91b80 100644 --- a/src/librados/IoCtxImpl.h +++ b/src/librados/IoCtxImpl.h @@ -110,6 +110,8 @@ struct librados::IoCtxImpl { ::SnapContext& snapc, uint64_t snapid); // io + int nlist(Objecter::NListContext *context, int max_entries); + uint32_t nlist_seek(Objecter::NListContext *context, uint32_t pos); int list(Objecter::ListContext *context, int max_entries); uint32_t list_seek(Objecter::ListContext *context, uint32_t pos); int create(const object_t& oid, bool exclusive); diff --git a/src/librados/ListObjectImpl.h b/src/librados/ListObjectImpl.h new file mode 100644 index 00000000000..08754dfad0b --- /dev/null +++ b/src/librados/ListObjectImpl.h @@ -0,0 +1,78 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 David Zafman <dzafman@redhat.com> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#include <string> + +#ifndef CEPH_LIBRADOS_LISTOBJECTIMPL_H +#define CEPH_LIBRADOS_LISTOBJECTIMPL_H + +#include <include/rados/librados.hpp> + +namespace librados { +struct ListObjectImpl { + std::string nspace; + std::string oid; + std::string locator; + + ListObjectImpl() {} + ListObjectImpl(std::string n, std::string o, std::string l): + nspace(n), oid(o), locator(l) {} + + const std::string& get_nspace() { return nspace; } + const std::string& get_oid() { return oid; } + const std::string& get_locator() { return locator; } +}; +WRITE_EQ_OPERATORS_3(ListObjectImpl, nspace, oid, locator) +WRITE_CMP_OPERATORS_3(ListObjectImpl, nspace, oid, locator) +inline std::ostream& operator<<(std::ostream& out, const struct ListObjectImpl& lop) { + out << (lop.nspace.size() ? lop.nspace + "/" : "") << lop.oid + << (lop.locator.size() ? "@" + lop.locator : ""); + return out; +} + +class ObjListCtx; + +struct NObjectIteratorImpl { + public: + NObjectIteratorImpl() {} + ~NObjectIteratorImpl(); + NObjectIteratorImpl(const NObjectIteratorImpl &rhs); + NObjectIteratorImpl& operator=(const NObjectIteratorImpl& rhs); + + bool operator==(const NObjectIteratorImpl& rhs) const; + bool operator!=(const NObjectIteratorImpl& rhs) const; + const ListObject& operator*() const; + const ListObject* operator->() const; + NObjectIteratorImpl &operator++(); // Preincrement + NObjectIteratorImpl operator++(int); // Postincrement + const ListObject *get_listobjectp() { return &cur_obj; } + friend class IoCtx; + friend class ListObjectImpl; + //friend class ListObject; + friend class NObjectIterator; + + /// get current hash position of the iterator, rounded to the current pg + uint32_t get_pg_hash_position() const; + + /// move the iterator to a given hash position. this may (will!) be rounded to the nearest pg. + uint32_t seek(uint32_t pos); + + private: + NObjectIteratorImpl(ObjListCtx *ctx_); + void get_next(); + ceph::shared_ptr < ObjListCtx > ctx; + ListObject cur_obj; +}; + +} +#endif diff --git a/src/librados/Makefile.am b/src/librados/Makefile.am index 75a1683dc5a..c9ad50e94f4 100644 --- a/src/librados/Makefile.am +++ b/src/librados/Makefile.am @@ -26,4 +26,5 @@ noinst_HEADERS += \ librados/IoCtxImpl.h \ librados/PoolAsyncCompletionImpl.h \ librados/RadosClient.h \ - librados/RadosXattrIter.h + librados/RadosXattrIter.h \ + librados/ListObjectImpl.h diff --git a/src/librados/librados.cc b/src/librados/librados.cc index ffa72c31e36..07d642cf20a 100644 --- a/src/librados/librados.cc +++ b/src/librados/librados.cc @@ -28,6 +28,7 @@ #include "librados/PoolAsyncCompletionImpl.h" #include "librados/RadosClient.h" #include "librados/RadosXattrIter.h" +#include "librados/ListObjectImpl.h" #include <cls/lock/cls_lock_client.h> #include <string> @@ -480,15 +481,235 @@ librados::WatchCtx:: struct librados::ObjListCtx { + bool new_request; + librados::IoCtxImpl dupctx; librados::IoCtxImpl *ctx; Objecter::ListContext *lc; + Objecter::NListContext *nlc; - ObjListCtx(IoCtxImpl *c, Objecter::ListContext *l) : ctx(c), lc(l) {} + ObjListCtx(IoCtxImpl *c, Objecter::ListContext *l) : new_request(false), lc(l), nlc(NULL) { + // Get our own private IoCtxImpl so that namespace setting isn't changed by caller + // between uses. + ctx = &dupctx; + dupctx.dup(*c); + } + ObjListCtx(IoCtxImpl *c, Objecter::NListContext *nl) : new_request(true), lc(NULL), nlc(nl) { + // Get our own private IoCtxImpl so that namespace setting isn't changed by caller + // between uses. + ctx = &dupctx; + dupctx.dup(*c); + } ~ObjListCtx() { - delete lc; + ctx = NULL; + if (new_request) + delete nlc; + else + delete lc; } }; +///////////////////////////// NObjectIteratorImpl ///////////////////////////// +librados::NObjectIteratorImpl::NObjectIteratorImpl(ObjListCtx *ctx_) + : ctx(ctx_) +{ +} + +librados::NObjectIteratorImpl::~NObjectIteratorImpl() +{ + ctx.reset(); +} + +librados::NObjectIteratorImpl::NObjectIteratorImpl(const NObjectIteratorImpl &rhs) +{ + *this = rhs; +} + +librados::NObjectIteratorImpl& librados::NObjectIteratorImpl::operator=(const librados::NObjectIteratorImpl &rhs) +{ + if (&rhs == this) + return *this; + if (rhs.ctx.get() == NULL) { + ctx.reset(); + return *this; + } + if (rhs.ctx->new_request) { + Objecter::NListContext *list_ctx = new Objecter::NListContext(*rhs.ctx->nlc); + ctx.reset(new ObjListCtx(rhs.ctx->ctx, list_ctx)); + cur_obj = rhs.cur_obj; + } else { + Objecter::ListContext *list_ctx = new Objecter::ListContext(*rhs.ctx->lc); + ctx.reset(new ObjListCtx(rhs.ctx->ctx, list_ctx)); + cur_obj = rhs.cur_obj; + } + return *this; +} + +bool librados::NObjectIteratorImpl::operator==(const librados::NObjectIteratorImpl& rhs) const { + + if (ctx.get() == NULL) { + if (rhs.ctx.get() == NULL) + return true; + if (rhs.ctx->new_request) + return rhs.ctx->nlc->at_end(); + else + return rhs.ctx->lc->at_end(); + } + if (rhs.ctx.get() == NULL) { + // Redundant but same as ObjectIterator version + if (ctx.get() == NULL) + return true; + if (ctx->new_request) + return ctx->nlc->at_end(); + else + return ctx->lc->at_end(); + } + return ctx.get() == rhs.ctx.get(); +} + +bool librados::NObjectIteratorImpl::operator!=(const librados::NObjectIteratorImpl& rhs) const { + return !(*this == rhs); +} + +const librados::ListObject& librados::NObjectIteratorImpl::operator*() const { + return cur_obj; +} + +const librados::ListObject* librados::NObjectIteratorImpl::operator->() const { + return &cur_obj; +} + +librados::NObjectIteratorImpl& librados::NObjectIteratorImpl::operator++() +{ + get_next(); + return *this; +} + +librados::NObjectIteratorImpl librados::NObjectIteratorImpl::operator++(int) +{ + librados::NObjectIteratorImpl ret(*this); + get_next(); + return ret; +} + +uint32_t librados::NObjectIteratorImpl::seek(uint32_t pos) +{ + uint32_t r = rados_nobjects_list_seek(ctx.get(), pos); + get_next(); + return r; +} + +void librados::NObjectIteratorImpl::get_next() +{ + const char *entry, *key, *nspace; + if (ctx->new_request) { + if (ctx->nlc->at_end()) + return; + } else { + if (ctx->lc->at_end()) + return; + } + int ret = rados_nobjects_list_next(ctx.get(), &entry, &key, &nspace); + if (ret == -ENOENT) { + return; + } + else if (ret) { + ostringstream oss; + oss << "rados returned " << cpp_strerror(ret); + throw std::runtime_error(oss.str()); + } + + if (cur_obj.impl == NULL) + cur_obj.impl = new ListObjectImpl(); + cur_obj.impl->nspace = nspace; + cur_obj.impl->oid = entry; + cur_obj.impl->locator = key ? key : string(); +} + +uint32_t librados::NObjectIteratorImpl::get_pg_hash_position() const +{ + if (ctx->new_request) + return ctx->nlc->get_pg_hash_position(); + else + return ctx->lc->get_pg_hash_position(); +} + +///////////////////////////// NObjectIterator ///////////////////////////// +librados::NObjectIterator::NObjectIterator(ObjListCtx *ctx_) +{ + impl = new NObjectIteratorImpl(ctx_); +} + +librados::NObjectIterator::~NObjectIterator() +{ + delete impl; +} + +librados::NObjectIterator::NObjectIterator(const NObjectIterator &rhs) +{ + if (rhs.impl == NULL) { + impl = NULL; + return; + } + impl = new NObjectIteratorImpl(); + *impl = *(rhs.impl); +} + +librados::NObjectIterator& librados::NObjectIterator::operator=(const librados::NObjectIterator &rhs) +{ + if (impl == NULL) + impl = new NObjectIteratorImpl(); + *impl = *(rhs.impl); + return *this; +} + +bool librados::NObjectIterator::operator==(const librados::NObjectIterator& rhs) const +{ + return *impl == *(rhs.impl); +} + +bool librados::NObjectIterator::operator!=(const librados::NObjectIterator& rhs) const { + return !(*impl == *(rhs.impl)); +} + +const librados::ListObject& librados::NObjectIterator::operator*() const { + return *(impl->get_listobjectp()); +} + +const librados::ListObject* librados::NObjectIterator::operator->() const { + return impl->get_listobjectp(); +} + +librados::NObjectIterator& librados::NObjectIterator::operator++() +{ + impl->get_next(); + return *this; +} + +librados::NObjectIterator librados::NObjectIterator::operator++(int) +{ + librados::NObjectIterator ret(*this); + impl->get_next(); + return ret; +} + +uint32_t librados::NObjectIterator::seek(uint32_t pos) +{ + return impl->seek(pos); +} + +void librados::NObjectIterator::get_next() +{ + impl->get_next(); +} + +uint32_t librados::NObjectIterator::get_pg_hash_position() const +{ + return impl->get_pg_hash_position(); +} + +const librados::NObjectIterator librados::NObjectIterator::__EndObjectIterator(NULL); + +// DEPRECATED; Use NObjectIterator instead ///////////////////////////// ObjectIterator ///////////////////////////// librados::ObjectIterator::ObjectIterator(ObjListCtx *ctx_) : ctx(ctx_) @@ -1261,9 +1482,38 @@ int librados::IoCtx::list_lockers(const std::string &oid, const std::string &nam return tmp_lockers.size(); } +librados::NObjectIterator librados::IoCtx::nobjects_begin() +{ + rados_list_ctx_t listh; + rados_nobjects_list_open(io_ctx_impl, &listh); + NObjectIterator iter((ObjListCtx*)listh); + iter.get_next(); + return iter; +} + +librados::NObjectIterator librados::IoCtx::nobjects_begin(uint32_t pos) +{ + rados_list_ctx_t listh; + rados_nobjects_list_open(io_ctx_impl, &listh); + NObjectIterator iter((ObjListCtx*)listh); + iter.seek(pos); + return iter; +} + +const librados::NObjectIterator& librados::IoCtx::nobjects_end() const +{ + return NObjectIterator::__EndObjectIterator; +} + +// DEPRECATED; use n versions above librados::ObjectIterator librados::IoCtx::objects_begin() { rados_list_ctx_t listh; + if (io_ctx_impl->oloc.nspace == librados::all_nspaces) { + ostringstream oss; + oss << "rados returned " << cpp_strerror(-EINVAL); + throw std::runtime_error(oss.str()); + } rados_objects_list_open(io_ctx_impl, &listh); ObjectIterator iter((ObjListCtx*)listh); iter.get_next(); @@ -1273,6 +1523,11 @@ librados::ObjectIterator librados::IoCtx::objects_begin() librados::ObjectIterator librados::IoCtx::objects_begin(uint32_t pos) { rados_list_ctx_t listh; + if (io_ctx_impl->oloc.nspace == librados::all_nspaces) { + ostringstream oss; + oss << "rados returned " << cpp_strerror(-EINVAL); + throw std::runtime_error(oss.str()); + } rados_objects_list_open(io_ctx_impl, &listh); ObjectIterator iter((ObjListCtx*)listh); iter.seek(pos); @@ -3060,10 +3315,69 @@ extern "C" int rados_exec(rados_ioctx_t io, const char *o, const char *cls, cons /* list objects */ +extern "C" int rados_nobjects_list_open(rados_ioctx_t io, rados_list_ctx_t *listh) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + + // Let's do it the old way for backward compatbility if not using ANY_NSPACES + if (ctx->oloc.nspace != librados::all_nspaces) + return rados_objects_list_open(io, listh); + + tracepoint(librados, rados_nobjects_list_open_enter, io); + + Objecter::NListContext *h = new Objecter::NListContext; + h->pool_id = ctx->poolid; + h->pool_snap_seq = ctx->snap_seq; + h->nspace = ctx->oloc.nspace; // After dropping compatibility need nspace + *listh = (void *)new librados::ObjListCtx(ctx, h); + int retval = 0; + tracepoint(librados, rados_nobjects_list_open_exit, retval, *listh); + return retval; +} + +extern "C" void rados_nobjects_list_close(rados_list_ctx_t h) +{ + tracepoint(librados, rados_nobjects_list_close_enter, h); + librados::ObjListCtx *lh = (librados::ObjListCtx *)h; + delete lh; + tracepoint(librados, rados_nobjects_list_close_exit); +} + +extern "C" uint32_t rados_nobjects_list_seek(rados_list_ctx_t listctx, + uint32_t pos) +{ + librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx; + + // Let's do it the old way for backward compatbility if not using ANY_NSPACES + if (!lh->new_request) + return rados_objects_list_seek(listctx, pos); + + tracepoint(librados, rados_nobjects_list_seek_enter, listctx, pos); + uint32_t r = lh->ctx->nlist_seek(lh->nlc, pos); + tracepoint(librados, rados_nobjects_list_seek_exit, r); + return r; +} + +extern "C" uint32_t rados_nobjects_list_get_pg_hash_position( + rados_list_ctx_t listctx) +{ + librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx; + if (!lh->new_request) + return rados_objects_list_get_pg_hash_position(listctx); + + tracepoint(librados, rados_nobjects_list_get_pg_hash_position_enter, listctx); + uint32_t retval = lh->nlc->get_pg_hash_position(); + tracepoint(librados, rados_nobjects_list_get_pg_hash_position_exit, retval); + return retval; +} + +// Deprecated, but using it for compatibility with older OSDs extern "C" int rados_objects_list_open(rados_ioctx_t io, rados_list_ctx_t *listh) { tracepoint(librados, rados_objects_list_open_enter, io); librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + if (ctx->oloc.nspace == librados::all_nspaces) + return -EINVAL; Objecter::ListContext *h = new Objecter::ListContext; h->pool_id = ctx->poolid; h->pool_snap_seq = ctx->snap_seq; @@ -3074,6 +3388,7 @@ extern "C" int rados_objects_list_open(rados_ioctx_t io, rados_list_ctx_t *listh return retval; } +// Deprecated, but using it for compatibility with older OSDs extern "C" void rados_objects_list_close(rados_list_ctx_t h) { tracepoint(librados, rados_objects_list_close_enter, h); @@ -3102,12 +3417,63 @@ extern "C" uint32_t rados_objects_list_get_pg_hash_position( return retval; } +extern "C" int rados_nobjects_list_next(rados_list_ctx_t listctx, const char **entry, const char **key, const char **nspace) +{ + tracepoint(librados, rados_nobjects_list_next_enter, listctx); + librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx; + if (!lh->new_request) { + int retval = rados_objects_list_next(listctx, entry, key); + // Let's return nspace as you would expect even when asking + // for a specific one, since you know what it must be. + if (retval == 0 && nspace) + *nspace = lh->ctx->oloc.nspace.c_str(); + return retval; + } + Objecter::NListContext *h = lh->nlc; + + // if the list is non-empty, this method has been called before + if (!h->list.empty()) + // so let's kill the previously-returned object + h->list.pop_front(); + + if (h->list.empty()) { + int ret = lh->ctx->nlist(lh->nlc, RADOS_LIST_MAX_ENTRIES); + if (ret < 0) { + tracepoint(librados, rados_nobjects_list_next_exit, ret, NULL, NULL, NULL); + return ret; + } + if (h->list.empty()) { + tracepoint(librados, rados_nobjects_list_next_exit, -ENOENT, NULL, NULL, NULL); + return -ENOENT; + } + } + + *entry = h->list.front().oid.c_str(); + + if (key) { + if (h->list.front().locator.size()) + *key = h->list.front().locator.c_str(); + else + *key = NULL; + } + if (nspace) + *nspace = h->list.front().nspace.c_str(); + int retval = 0; + tracepoint(librados, rados_nobjects_list_next_exit, 0, *entry, key, nspace); + return retval; +} + +// DEPRECATED extern "C" int rados_objects_list_next(rados_list_ctx_t listctx, const char **entry, const char **key) { tracepoint(librados, rados_objects_list_next_enter, listctx); librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx; Objecter::ListContext *h = lh->lc; + // Calling wrong interface after rados_nobjects_list_open() + if (lh->new_request) + return -EINVAL; + // if the list is non-empty, this method has been called before if (!h->list.empty()) // so let's kill the previously-returned object @@ -4127,3 +4493,59 @@ extern "C" int rados_aio_read_op_operate(rados_read_op_t read_op, tracepoint(librados, rados_aio_read_op_operate_exit, retval); return retval; } + + +///////////////////////////// ListObject ////////////////////////////// +librados::ListObject::ListObject() : impl(NULL) +{ +} + +librados::ListObject::ListObject(librados::ListObjectImpl *i): impl(i) +{ +} + +librados::ListObject::ListObject(const ListObject& rhs) +{ + if (rhs.impl == NULL) { + impl = NULL; + return; + } + impl = new ListObjectImpl(); + *impl = *(rhs.impl); +} + +librados::ListObject& librados::ListObject::operator=(const ListObject& rhs) +{ + if (impl == NULL) + impl = new ListObjectImpl(); + *impl = *(rhs.impl); + return *this; +} + +librados::ListObject::~ListObject() +{ + if (impl) + delete impl; + impl = NULL; +} + +const std::string& librados::ListObject::get_nspace() const +{ + return impl->get_nspace(); +} + +const std::string& librados::ListObject::get_oid() const +{ + return impl->get_oid(); +} + +const std::string& librados::ListObject::get_locator() const +{ + return impl->get_locator(); +} + +std::ostream& librados::operator<<(std::ostream& out, const librados::ListObject& lop) +{ + out << *(lop.impl); + return out; +} diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index dcef9a76ed9..9dcdb4197fe 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -57,6 +57,7 @@ #include "json_spirit/json_spirit_value.h" #include "json_spirit/json_spirit_reader.h" #include "include/assert.h" // json_spirit clobbers it +#include "include/rados/rados_types.hpp" #ifdef WITH_LTTNG #include "tracing/osd.h" @@ -775,7 +776,8 @@ bool ReplicatedPG::pg_op_must_wait(MOSDOp *op) if (pg_log.get_missing().missing.empty()) return false; for (vector<OSDOp>::iterator p = op->ops.begin(); p != op->ops.end(); ++p) { - if (p->op.op == CEPH_OSD_OP_PGLS) { + // TODO: Need CEPH_OSD_OP_PGLS_FILTER and CEPH_OSD_OP_PGNLS_FILTER, See bug #9439 + if (p->op.op == CEPH_OSD_OP_PGLS || p->op.op == CEPH_OSD_OP_PGNLS) { if (op->get_snapid() != CEPH_NOSNAP) { return true; } @@ -805,6 +807,162 @@ void ReplicatedPG::do_pg_op(OpRequestRef op) OSDOp& osd_op = *p; bufferlist::iterator bp = p->indata.begin(); switch (p->op.op) { + case CEPH_OSD_OP_PGNLS_FILTER: + try { + ::decode(cname, bp); + ::decode(mname, bp); + } + catch (const buffer::error& e) { + dout(0) << "unable to decode PGLS_FILTER description in " << *m << dendl; + result = -EINVAL; + break; + } + result = get_pgls_filter(bp, &filter); + if (result < 0) + break; + + assert(filter); + + // fall through + + case CEPH_OSD_OP_PGNLS: + if (m->get_pg() != info.pgid.pgid) { + dout(10) << " pgnls pg=" << m->get_pg() << " != " << info.pgid << dendl; + result = 0; // hmm? + } else { + unsigned list_size = MIN(cct->_conf->osd_max_pgls, p->op.pgls.count); + + dout(10) << " pgnls pg=" << m->get_pg() << " count " << list_size << dendl; + // read into a buffer + vector<hobject_t> sentries; + pg_nls_response_t response; + try { + ::decode(response.handle, bp); + } + catch (const buffer::error& e) { + dout(0) << "unable to decode PGNLS handle in " << *m << dendl; + result = -EINVAL; + break; + } + + hobject_t next; + hobject_t current = response.handle; + osr->flush(); + int r = pgbackend->objects_list_partial( + current, + list_size, + list_size, + snapid, + &sentries, + &next); + if (r != 0) { + result = -EINVAL; + break; + } + + assert(snapid == CEPH_NOSNAP || pg_log.get_missing().missing.empty()); + map<hobject_t, pg_missing_t::item>::const_iterator missing_iter = + pg_log.get_missing().missing.lower_bound(current); + vector<hobject_t>::iterator ls_iter = sentries.begin(); + hobject_t _max = hobject_t::get_max(); + while (1) { + const hobject_t &mcand = + missing_iter == pg_log.get_missing().missing.end() ? + _max : + missing_iter->first; + const hobject_t &lcand = + ls_iter == sentries.end() ? + _max : + *ls_iter; + + hobject_t candidate; + if (mcand == lcand) { + candidate = mcand; + if (!mcand.is_max()) { + ++ls_iter; + ++missing_iter; + } + } else if (mcand < lcand) { + candidate = mcand; + assert(!mcand.is_max()); + ++missing_iter; + } else { + candidate = lcand; + assert(!lcand.is_max()); + ++ls_iter; + } + + if (candidate >= next) { + break; + } + + if (response.entries.size() == list_size) { + next = candidate; + break; + } + + // skip snapdir objects + if (candidate.snap == CEPH_SNAPDIR) + continue; + + if (candidate.snap < snapid) + continue; + + if (snapid != CEPH_NOSNAP) { + bufferlist bl; + if (candidate.snap == CEPH_NOSNAP) { + pgbackend->objects_get_attr( + candidate, + SS_ATTR, + &bl); + SnapSet snapset(bl); + if (snapid <= snapset.seq) + continue; + } else { + bufferlist attr_bl; + pgbackend->objects_get_attr( + candidate, OI_ATTR, &attr_bl); + object_info_t oi(attr_bl); + vector<snapid_t>::iterator i = find(oi.snaps.begin(), + oi.snaps.end(), + snapid); + if (i == oi.snaps.end()) + continue; + } + } + + // skip internal namespace + if (candidate.get_namespace() == cct->_conf->osd_hit_set_namespace) + continue; + + // skip wrong namespace + if (m->get_object_locator().nspace != librados::all_nspaces && + candidate.get_namespace() != m->get_object_locator().nspace) + continue; + + if (filter && !pgls_filter(filter, candidate, filter_out)) + continue; + + librados::ListObjectImpl item; + item.nspace = candidate.get_namespace(); + item.oid = candidate.oid.name; + item.locator = candidate.get_key(); + response.entries.push_back(item); + } + if (next.is_max() && + missing_iter == pg_log.get_missing().missing.end() && + ls_iter == sentries.end()) { + result = 1; + } + response.handle = next; + ::encode(response, osd_op.outdata); + if (filter) + ::encode(filter_out, osd_op.outdata); + dout(10) << " pgnls result=" << result << " outdata.length()=" + << osd_op.outdata.length() << dendl; + } + break; + case CEPH_OSD_OP_PGLS_FILTER: try { ::decode(cname, bp); diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index d7081b97ede..deca0f8493f 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -4568,6 +4568,8 @@ ostream& operator<<(ostream& out, const OSDOp& op) switch (op.op.op) { case CEPH_OSD_OP_PGLS: case CEPH_OSD_OP_PGLS_FILTER: + case CEPH_OSD_OP_PGNLS: + case CEPH_OSD_OP_PGNLS_FILTER: out << " start_epoch " << op.op.pgls.start_epoch; break; case CEPH_OSD_OP_PG_HITSET_LS: diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 9840bf16cfe..8c3be4064a9 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -40,6 +40,7 @@ #include "Watch.h" #include "OpRequest.h" #include "include/cmp.h" +#include "librados/ListObjectImpl.h" #define CEPH_OSD_ONDISK_MAGIC "ceph osd volume v026" @@ -2321,6 +2322,75 @@ ostream& operator<<(ostream& out, const pg_missing_t& missing); * pg list objects response format * */ +struct pg_nls_response_t { + collection_list_handle_t handle; + list<librados::ListObjectImpl> entries; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(handle, bl); + __u32 n = (__u32)entries.size(); + ::encode(n, bl); + for (list<librados::ListObjectImpl>::const_iterator i = entries.begin(); i != entries.end(); ++i) { + ::encode(i->nspace, bl); + ::encode(i->oid, bl); + ::encode(i->locator, bl); + } + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(handle, bl); + __u32 n; + ::decode(n, bl); + entries.clear(); + while (n--) { + librados::ListObjectImpl i; + ::decode(i.nspace, bl); + ::decode(i.oid, bl); + ::decode(i.locator, bl); + entries.push_back(i); + } + DECODE_FINISH(bl); + } + void dump(Formatter *f) const { + f->dump_stream("handle") << handle; + f->open_array_section("entries"); + for (list<librados::ListObjectImpl>::const_iterator p = entries.begin(); p != entries.end(); ++p) { + f->open_object_section("object"); + f->dump_string("namespace", p->nspace); + f->dump_string("object", p->oid); + f->dump_string("key", p->locator); + f->close_section(); + } + f->close_section(); + } + static void generate_test_instances(list<pg_nls_response_t*>& o) { + o.push_back(new pg_nls_response_t); + o.push_back(new pg_nls_response_t); + o.back()->handle = hobject_t(object_t("hi"), "key", 1, 2, -1, ""); + o.back()->entries.push_back(librados::ListObjectImpl("", "one", "")); + o.back()->entries.push_back(librados::ListObjectImpl("", "two", "twokey")); + o.back()->entries.push_back(librados::ListObjectImpl("", "three", "")); + o.push_back(new pg_nls_response_t); + o.back()->handle = hobject_t(object_t("hi"), "key", 3, 4, -1, ""); + o.back()->entries.push_back(librados::ListObjectImpl("n1", "n1one", "")); + o.back()->entries.push_back(librados::ListObjectImpl("n1", "n1two", "n1twokey")); + o.back()->entries.push_back(librados::ListObjectImpl("n1", "n1three", "")); + o.push_back(new pg_nls_response_t); + o.back()->handle = hobject_t(object_t("hi"), "key", 5, 6, -1, ""); + o.back()->entries.push_back(librados::ListObjectImpl("", "one", "")); + o.back()->entries.push_back(librados::ListObjectImpl("", "two", "twokey")); + o.back()->entries.push_back(librados::ListObjectImpl("", "three", "")); + o.back()->entries.push_back(librados::ListObjectImpl("n1", "n1one", "")); + o.back()->entries.push_back(librados::ListObjectImpl("n1", "n1two", "n1twokey")); + o.back()->entries.push_back(librados::ListObjectImpl("n1", "n1three", "")); + } +}; + +WRITE_CLASS_ENCODER(pg_nls_response_t) + +// For backwards compatibility with older OSD requests struct pg_ls_response_t { collection_list_handle_t handle; list<pair<object_t, string> > entries; diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index a2e03bbd96f..f241d431aaa 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -2677,6 +2677,145 @@ void Objecter::handle_osd_op_reply(MOSDOpReply *m) } +uint32_t Objecter::list_nobjects_seek(NListContext *list_context, + uint32_t pos) +{ + RWLock::RLocker rl(rwlock); + pg_t actual = osdmap->raw_pg_to_pg(pg_t(pos, list_context->pool_id)); + ldout(cct, 10) << "list_objects_seek " << list_context + << " pos " << pos << " -> " << actual << dendl; + list_context->current_pg = actual.ps(); + list_context->cookie = collection_list_handle_t(); + list_context->at_end_of_pg = false; + list_context->at_end_of_pool = false; + list_context->current_pg_epoch = 0; + return list_context->current_pg; +} + +void Objecter::list_nobjects(NListContext *list_context, Context *onfinish) +{ + ldout(cct, 10) << "list_objects" << dendl; + ldout(cct, 20) << " pool_id " << list_context->pool_id + << " pool_snap_seq " << list_context->pool_snap_seq + << " max_entries " << list_context->max_entries + << " list_context " << list_context + << " onfinish " << onfinish + << " list_context->current_pg " << list_context->current_pg + << " list_context->cookie " << list_context->cookie << dendl; + + if (list_context->at_end_of_pg) { + list_context->at_end_of_pg = false; + ++list_context->current_pg; + list_context->current_pg_epoch = 0; + list_context->cookie = collection_list_handle_t(); + if (list_context->current_pg >= list_context->starting_pg_num) { + list_context->at_end_of_pool = true; + ldout(cct, 20) << " no more pgs; reached end of pool" << dendl; + } else { + ldout(cct, 20) << " move to next pg " << list_context->current_pg << dendl; + } + } + if (list_context->at_end_of_pool) { + // release the listing context's budget once all + // OPs (in the session) are finished + put_nlist_context_budget(list_context); + + onfinish->complete(0); + return; + } + + rwlock.get_read(); + const pg_pool_t *pool = osdmap->get_pg_pool(list_context->pool_id); + int pg_num = pool->get_pg_num(); + rwlock.unlock(); + + if (list_context->starting_pg_num == 0) { // there can't be zero pgs! + list_context->starting_pg_num = pg_num; + ldout(cct, 20) << pg_num << " placement groups" << dendl; + } + if (list_context->starting_pg_num != pg_num) { + // start reading from the beginning; the pgs have changed + ldout(cct, 10) << " pg_num changed; restarting with " << pg_num << dendl; + list_context->current_pg = 0; + list_context->cookie = collection_list_handle_t(); + list_context->current_pg_epoch = 0; + list_context->starting_pg_num = pg_num; + } + assert(list_context->current_pg <= pg_num); + + ObjectOperation op; + op.pg_nls(list_context->max_entries, list_context->filter, list_context->cookie, + list_context->current_pg_epoch); + list_context->bl.clear(); + C_NList *onack = new C_NList(list_context, onfinish, this); + object_locator_t oloc(list_context->pool_id, list_context->nspace); + + pg_read(list_context->current_pg, oloc, op, + &list_context->bl, 0, onack, &onack->epoch, &list_context->ctx_budget); +} + +void Objecter::_nlist_reply(NListContext *list_context, int r, + Context *final_finish, epoch_t reply_epoch) +{ + ldout(cct, 10) << "_list_reply" << dendl; + + bufferlist::iterator iter = list_context->bl.begin(); + pg_nls_response_t response; + bufferlist extra_info; + ::decode(response, iter); + if (!iter.end()) { + ::decode(extra_info, iter); + } + list_context->cookie = response.handle; + if (!list_context->current_pg_epoch) { + // first pgls result, set epoch marker + ldout(cct, 20) << " first pgls piece, reply_epoch is " + << reply_epoch << dendl; + list_context->current_pg_epoch = reply_epoch; + } + + int response_size = response.entries.size(); + ldout(cct, 20) << " response.entries.size " << response_size + << ", response.entries " << response.entries << dendl; + list_context->extra_info.append(extra_info); + if (response_size) { + list_context->list.merge(response.entries); + } + + // if the osd returns 1 (newer code), or no entries, it means we + // hit the end of the pg. + if (response_size == 0 || r == 1) { + ldout(cct, 20) << " at end of pg" << dendl; + list_context->at_end_of_pg = true; + } else { + // there is more for this pg; get it? + if (response_size < list_context->max_entries) { + list_context->max_entries -= response_size; + list_nobjects(list_context, final_finish); + return; + } + } + if (!list_context->list.empty()) { + ldout(cct, 20) << " returning results so far" << dendl; + // release the listing context's budget once all + // OPs (in the session) are finished + put_nlist_context_budget(list_context); + final_finish->complete(0); + return; + } + + // continue! + list_nobjects(list_context, final_finish); +} + +void Objecter::put_nlist_context_budget(NListContext *list_context) { + if (list_context->ctx_budget >= 0) { + ldout(cct, 10) << " release listing context's budget " << list_context->ctx_budget << dendl; + put_op_budget_bytes(list_context->ctx_budget); + list_context->ctx_budget = -1; + } + } + uint32_t Objecter::list_objects_seek(ListContext *list_context, uint32_t pos) { diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 32089f8d89f..28b725e1f1b 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -24,7 +24,6 @@ #include "common/admin_socket.h" #include "common/Timer.h" #include "common/RWLock.h" -#include "include/rados/rados_types.h" #include "include/rados/rados_types.hpp" #include <list> @@ -216,6 +215,14 @@ struct ObjectOperation { flags |= CEPH_OSD_FLAG_PGOP; } + void pg_nls(uint64_t count, bufferlist& filter, collection_list_handle_t cookie, epoch_t start_epoch) { + if (filter.length() == 0) + add_pgls(CEPH_OSD_OP_PGNLS, count, cookie, start_epoch); + else + add_pgls_filter(CEPH_OSD_OP_PGNLS_FILTER, count, filter, cookie, start_epoch); + flags |= CEPH_OSD_FLAG_PGOP; + } + void create(bool excl) { OSDOp& o = add_op(CEPH_OSD_OP_CREATE); o.op.flags = (excl ? CEPH_OSD_OP_FLAG_EXCL : 0); @@ -1242,6 +1249,71 @@ public: // Pools and statistics + struct NListContext { + int current_pg; + collection_list_handle_t cookie; + epoch_t current_pg_epoch; + int starting_pg_num; + bool at_end_of_pool; + bool at_end_of_pg; + + int64_t pool_id; + int pool_snap_seq; + int max_entries; + string nspace; + + bufferlist bl; // raw data read to here + std::list<librados::ListObjectImpl> list; + + bufferlist filter; + + bufferlist extra_info; + + // The budget associated with this context, once it is set (>= 0), + // the budget is not get/released on OP basis, instead the budget + // is acquired before sending the first OP and released upon receiving + // the last op reply. + int ctx_budget; + + NListContext() : current_pg(0), current_pg_epoch(0), starting_pg_num(0), + at_end_of_pool(false), + at_end_of_pg(false), + pool_id(0), + pool_snap_seq(0), + max_entries(0), + nspace(), + bl(), + list(), + filter(), + extra_info(), + ctx_budget(-1) {} + + bool at_end() const { + return at_end_of_pool; + } + + uint32_t get_pg_hash_position() const { + return current_pg; + } + }; + + struct C_NList : public Context { + NListContext *list_context; + Context *final_finish; + Objecter *objecter; + epoch_t epoch; + C_NList(NListContext *lc, Context * finish, Objecter *ob) : + list_context(lc), final_finish(finish), objecter(ob), epoch(0) {} + void finish(int r) { + if (r >= 0) { + objecter->_nlist_reply(list_context, r, final_finish, epoch); + } else { + final_finish->complete(r); + } + } + }; + + // Old pgls context we still use for talking to older OSDs struct ListContext { int current_pg; collection_list_handle_t cookie; @@ -1574,6 +1646,8 @@ public: void _reopen_session(OSDSession *session); void close_session(OSDSession *session); + void _nlist_reply(NListContext *list_context, int r, Context *final_finish, + epoch_t reply_epoch); void _list_reply(ListContext *list_context, int r, Context *final_finish, epoch_t reply_epoch); @@ -1610,6 +1684,7 @@ public: put_op_budget_bytes(op_budget); } void put_list_context_budget(ListContext *list_context); + void put_nlist_context_budget(NListContext *list_context); Throttle op_throttle_bytes, op_throttle_ops; public: @@ -2192,6 +2267,8 @@ public: return op_submit(o); } + void list_nobjects(NListContext *p, Context *onfinish); + uint32_t list_nobjects_seek(NListContext *p, uint32_t pos); void list_objects(ListContext *p, Context *onfinish); uint32_t list_objects_seek(ListContext *p, uint32_t pos); diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 1d05509d6c8..0a5ab2e6160 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -1673,7 +1673,7 @@ int RGWRados::open_bucket_index_ctx(rgw_bucket& bucket, librados::IoCtx& index_c */ int RGWRados::list_buckets_init(RGWAccessHandle *handle) { - librados::ObjectIterator *state = new librados::ObjectIterator(root_pool_ctx.objects_begin()); + librados::NObjectIterator *state = new librados::NObjectIterator(root_pool_ctx.nobjects_begin()); *handle = (RGWAccessHandle)state; return 0; } @@ -1686,15 +1686,15 @@ int RGWRados::list_buckets_init(RGWAccessHandle *handle) */ int RGWRados::list_buckets_next(RGWObjEnt& obj, RGWAccessHandle *handle) { - librados::ObjectIterator *state = (librados::ObjectIterator *)*handle; + librados::NObjectIterator *state = (librados::NObjectIterator *)*handle; do { - if (*state == root_pool_ctx.objects_end()) { + if (*state == root_pool_ctx.nobjects_end()) { delete state; return -ENOENT; } - obj.name = (*state)->first; + obj.name = (*state)->get_oid(); (*state)++; } while (obj.name[0] == '.'); /* skip all entries starting with '.' */ @@ -1707,7 +1707,7 @@ int RGWRados::list_buckets_next(RGWObjEnt& obj, RGWAccessHandle *handle) struct log_list_state { string prefix; librados::IoCtx io_ctx; - librados::ObjectIterator obit; + librados::NObjectIterator obit; }; int RGWRados::log_list_init(const string& prefix, RGWAccessHandle *handle) @@ -1720,7 +1720,7 @@ int RGWRados::log_list_init(const string& prefix, RGWAccessHandle *handle) return r; } state->prefix = prefix; - state->obit = state->io_ctx.objects_begin(); + state->obit = state->io_ctx.nobjects_begin(); *handle = (RGWAccessHandle)state; return 0; } @@ -1729,16 +1729,16 @@ int RGWRados::log_list_next(RGWAccessHandle handle, string *name) { log_list_state *state = static_cast<log_list_state *>(handle); while (true) { - if (state->obit == state->io_ctx.objects_end()) { + if (state->obit == state->io_ctx.nobjects_end()) { delete state; return -ENOENT; } if (state->prefix.length() && - state->obit->first.find(state->prefix) != 0) { + state->obit->get_oid().find(state->prefix) != 0) { state->obit++; continue; } - *name = state->obit->first; + *name = state->obit->get_oid(); state->obit++; break; } @@ -5767,13 +5767,13 @@ int RGWRados::distribute(const string& key, bufferlist& bl) int RGWRados::pool_iterate_begin(rgw_bucket& bucket, RGWPoolIterCtx& ctx) { librados::IoCtx& io_ctx = ctx.io_ctx; - librados::ObjectIterator& iter = ctx.iter; + librados::NObjectIterator& iter = ctx.iter; int r = open_bucket_data_ctx(bucket, io_ctx); if (r < 0) return r; - iter = io_ctx.objects_begin(); + iter = io_ctx.nobjects_begin(); return 0; } @@ -5782,17 +5782,17 @@ int RGWRados::pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<RGWObjEnt>& bool *is_truncated, RGWAccessListFilter *filter) { librados::IoCtx& io_ctx = ctx.io_ctx; - librados::ObjectIterator& iter = ctx.iter; + librados::NObjectIterator& iter = ctx.iter; - if (iter == io_ctx.objects_end()) + if (iter == io_ctx.nobjects_end()) return -ENOENT; uint32_t i; - for (i = 0; i < num && iter != io_ctx.objects_end(); ++i, ++iter) { + for (i = 0; i < num && iter != io_ctx.nobjects_end(); ++i, ++iter) { RGWObjEnt e; - string oid = iter->first; + string oid = iter->get_oid(); ldout(cct, 20) << "RGWRados::pool_iterate: got " << oid << dendl; // fill it in with initial values; we may correct later @@ -5804,7 +5804,7 @@ int RGWRados::pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<RGWObjEnt>& } if (is_truncated) - *is_truncated = (iter != io_ctx.objects_end()); + *is_truncated = (iter != io_ctx.nobjects_end()); return objs.size(); } diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 669cb2d71ec..4d6267cd331 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -781,7 +781,7 @@ struct RGWRadosCtx { struct RGWPoolIterCtx { librados::IoCtx io_ctx; - librados::ObjectIterator iter; + librados::NObjectIterator iter; }; struct RGWListRawObjsCtx { diff --git a/src/tools/rados/rados_export.cc b/src/tools/rados/rados_export.cc index bf6654114c5..0519fce6ffd 100644 --- a/src/tools/rados/rados_export.cc +++ b/src/tools/rados/rados_export.cc @@ -179,8 +179,8 @@ int do_rados_export(ThreadPool *tp, IoCtx& io_ctx, IoCtxDistributor *io_ctx_dist, const char *dir_name, bool create, bool force, bool delete_after) { - librados::ObjectIterator oi = io_ctx.objects_begin(); - librados::ObjectIterator oi_end = io_ctx.objects_end(); + librados::NObjectIterator oi = io_ctx.nobjects_begin(); + librados::NObjectIterator oi_end = io_ctx.nobjects_end(); auto_ptr <ExportDir> export_dir; export_dir.reset(ExportDir::create_for_writing(dir_name, 1, create)); if (!export_dir.get()) @@ -188,7 +188,7 @@ int do_rados_export(ThreadPool *tp, IoCtx& io_ctx, ExportLocalFileWQ export_object_wq(io_ctx_dist, time(NULL), tp, export_dir.get(), force); for (; oi != oi_end; ++oi) { - export_object_wq.queue(new std::string((*oi).first)); + export_object_wq.queue(new std::string((*oi).get_oid())); } export_object_wq.drain(); diff --git a/src/tools/rados/rados_import.cc b/src/tools/rados/rados_import.cc index a6a398d767b..7c06b2963d0 100644 --- a/src/tools/rados/rados_import.cc +++ b/src/tools/rados/rados_import.cc @@ -227,10 +227,10 @@ int do_rados_import(ThreadPool *tp, IoCtx &io_ctx, IoCtxDistributor* io_ctx_dist if (delete_after) { ImportValidateExistingWQ import_val_wq(export_dir.get(), io_ctx_dist, time(NULL), tp); - librados::ObjectIterator oi = io_ctx.objects_begin(); - librados::ObjectIterator oi_end = io_ctx.objects_end(); + librados::NObjectIterator oi = io_ctx.nobjects_begin(); + librados::NObjectIterator oi_end = io_ctx.nobjects_end(); for (; oi != oi_end; ++oi) { - import_val_wq.queue(new std::string((*oi).first)); + import_val_wq.queue(new std::string((*oi).get_oid())); } import_val_wq.drain(); } diff --git a/src/tools/scratchtool.c b/src/tools/scratchtool.c index 9f717903ce2..17d5c6b09b9 100644 --- a/src/tools/scratchtool.c +++ b/src/tools/scratchtool.c @@ -288,12 +288,12 @@ static int testrados(void) /* list objects */ rados_list_ctx_t h; - r = rados_objects_list_open(io_ctx, &h); - printf("rados_list_objects_open = %d, h = %p\n", r, h); + r = rados_nobjects_list_open(io_ctx, &h); + printf("rados_nobjects_list_open = %d, h = %p\n", r, h); const char *poolname; - while (rados_objects_list_next(h, &poolname, NULL) == 0) - printf("rados_list_objects_next got object '%s'\n", poolname); - rados_objects_list_close(h); + while (rados_nobjects_list_next(h, &poolname, NULL, NULL) == 0) + printf("rados_nobjects_list_next got object '%s'\n", poolname); + rados_nobjects_list_close(h); /* stat */ r = rados_ioctx_pool_stat(io_ctx, &st); diff --git a/src/tools/scratchtoolpp.cc b/src/tools/scratchtoolpp.cc index 9dc25c13dcd..03839777e1d 100644 --- a/src/tools/scratchtoolpp.cc +++ b/src/tools/scratchtoolpp.cc @@ -288,8 +288,8 @@ int main(int argc, const char **argv) cout << "iterating over objects..." << std::endl; int num_objs = 0; - for (ObjectIterator iter = io_ctx.objects_begin(); - iter != io_ctx.objects_end(); ++iter) { + for (NObjectIterator iter = io_ctx.nobjects_begin(); + iter != io_ctx.nobjects_end(); ++iter) { num_objs++; cout << "'" << *iter << "'" << std::endl; } diff --git a/src/tracing/librados.tp b/src/tracing/librados.tp index f3189a120c6..ba138e7d330 100644 --- a/src/tracing/librados.tp +++ b/src/tracing/librados.tp @@ -1625,6 +1625,93 @@ TRACEPOINT_EVENT(librados, rados_exec_exit, ) ) +TRACEPOINT_EVENT(librados, rados_nobjects_list_open_enter, + TP_ARGS( + rados_ioctx_t, ioctx), + TP_FIELDS( + ctf_integer_hex(rados_ioctx_t, ioctx, ioctx) + ) +) + +TRACEPOINT_EVENT(librados, rados_nobjects_list_open_exit, + TP_ARGS( + int, retval, + rados_list_ctx_t, listctx), + TP_FIELDS( + ctf_integer(int, retval, retval) + ctf_integer_hex(rados_list_ctx_t, listctx, listctx) + ) +) + +TRACEPOINT_EVENT(librados, rados_nobjects_list_close_enter, + TP_ARGS( + rados_list_ctx_t, listctx), + TP_FIELDS( + ctf_integer_hex(rados_list_ctx_t, listctx, listctx) + ) +) + +TRACEPOINT_EVENT(librados, rados_nobjects_list_close_exit, + TP_ARGS(), + TP_FIELDS() +) + +TRACEPOINT_EVENT(librados, rados_nobjects_list_seek_enter, + TP_ARGS( + rados_list_ctx_t, listctx, + uint32_t, pos), + TP_FIELDS( + ctf_integer_hex(rados_list_ctx_t, listctx, listctx) + ctf_integer(uint32_t, pos, pos) + ) +) + +TRACEPOINT_EVENT(librados, rados_nobjects_list_seek_exit, + TP_ARGS( + uint32_t, retval), + TP_FIELDS( + ctf_integer(uint32_t, retval, retval) + ) +) + +TRACEPOINT_EVENT(librados, rados_nobjects_list_get_pg_hash_position_enter, + TP_ARGS( + rados_list_ctx_t, listctx), + TP_FIELDS( + ctf_integer_hex(rados_list_ctx_t, listctx, listctx) + ) +) + +TRACEPOINT_EVENT(librados, rados_nobjects_list_get_pg_hash_position_exit, + TP_ARGS( + uint32_t, retval), + TP_FIELDS( + ctf_integer(uint32_t, retval, retval) + ) +) + +TRACEPOINT_EVENT(librados, rados_nobjects_list_next_enter, + TP_ARGS( + rados_list_ctx_t, listctx), + TP_FIELDS( + ctf_integer_hex(rados_list_ctx_t, listctx, listctx) + ) +) + +TRACEPOINT_EVENT(librados, rados_nobjects_list_next_exit, + TP_ARGS( + int, retval, + const char*, entry, + char const* const*, key, + char const* const*, nspace), + TP_FIELDS( + ctf_integer(int, retval, retval) + ceph_ctf_string(entry, entry) + ceph_ctf_stringp(key, key) + ceph_ctf_stringp(nspace, nspace) + ) +) + TRACEPOINT_EVENT(librados, rados_objects_list_open_enter, TP_ARGS( rados_ioctx_t, ioctx), |