diff options
799 files changed, 35381 insertions, 8752 deletions
diff --git a/.github/labeler.yml b/.github/labeler.yml index ffc4247b171..77aecc436be 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -11,18 +11,36 @@ documentation: - man/** - "**/*.+(rst|md)" +mon: + - doc/man/8/ceph-mon.rst + - doc/man/8/monmaptool.rst + - doc/mon/** + - qa/workunits/mon/** + - src/mon/** + - src/test/mon/** + +mgr: + - doc/mgr/** + - src/mgr/** + - src/pybind/mgr/ceph_module.pyi + - src/pybind/mgr/mgr_module.py + - src/pybind/mgr/mgr_util.py + - src/pybind/mgr/requirements.txt + - src/pybind/mgr/tox.ini + - src/test/mgr/** + pybind: - - src/pybind/cephfs/* - - src/pybind/mgr/* - - src/pybind/rados/* - - src/pybind/rbd/* - - src/pybind/rgw/* - - src/pybind/* + - src/pybind/cephfs/** + - src/pybind/mgr/** + - src/pybind/rados/** + - src/pybind/rbd/** + - src/pybind/rgw/** + - src/pybind/** common: - - src/common - - src/global - - src/log + - src/common/** + - src/global/** + - src/log/** cephadm: - src/cephadm/** @@ -52,6 +70,9 @@ core: - qa/workunits/mon/** - qa/workunits/objectstore/** - qa/workunits/rados/** + - src/ceph.in + - src/ceph_osd.cc + - src/ceph_mon.cc - src/blk/** - src/crush/* - src/erasure-code/** @@ -63,6 +84,7 @@ core: - src/os/** - src/osd/** - src/tools/rados/** + - src/test/osd/** crimson: - doc/dev/crimson/** @@ -132,8 +154,28 @@ rbd: - src/test/librbd/** - src/test/rbd_mirror/** - src/tools/rbd/** + - src/tools/rbd_ggate/** - src/tools/rbd_mirror/** - src/tools/rbd_nbd/** + - src/tools/rbd_wnbd/** + +rgw: + - qa/suites/rgw/** + - qa/tasks/rgw* + - qa/tasks/s3* + - src/cls/cmpomap/** + - src/cls/fifo/** + - src/cls/otp/** + - src/cls/queue/** + - src/cls/rgw/** + - src/cls/rgw_gc/** + - src/cls/timeindex/** + - src/mrgw.sh + - src/rgw/** + - src/test/cls_rgw/** + - src/test/librgw_* + - src/test/rgw/** + - src/test/test_rgw* ceph-volume: - src/ceph-volume/** @@ -142,3 +184,4 @@ ceph-volume: tests: - qa/tasks/** - qa/workunits/** + - src/test/** diff --git a/.githubmap b/.githubmap index c5fd6048fd0..b73d998dc37 100644 --- a/.githubmap +++ b/.githubmap @@ -8,12 +8,14 @@ # # a2batic Kanika Murarka <kmurarka@redhat.com> +aaSharma14 Aashish Sharma <aasharma@redhat.com> aclamk Adam Kupczyk <akucpzyk@redhat.com> ajarr Ramana Raja <rraja@redhat.com> alfonsomthd Alfonso MartÃnez <almartin@redhat.com> alfredodeza Alfredo Deza <adeza@redhat.com> amitkumar50 Amit Kumar <amitkuma@redhat.com> athanatos Samuel Just <sjust@redhat.com> +avanthakkar Avan Thakkar <athakkar@redhat.com> badone Brad Hubbard <bhubbard@redhat.com> bassamtabbara Bassam Tabbara <bassam.tabbara@quantum.com> branch-predictor Piotr DaÅ‚ek <piotr.dalek@corp.ovh.com> @@ -22,6 +24,7 @@ capri1989 Kai Wagner <kwagner@suse.com> cbodley Casey Bodley <cbodley@redhat.com> chardan Jesse Williamson <jwilliamson@suse.de> chhabaramesh Ramesh Chander <Ramesh.Chander@sandisk.com> +CourtneyCCaldwell Courtney Caldwell <ccaldwel@redhat.com> Devp00l Stephan Müller <smueller@suse.com> ddiss David Disseldorp <ddiss@suse.de> dmick Dan Mick <dmick@redhat.com> @@ -83,6 +86,7 @@ yuyuyu101 Haomai Wang <haomai@xsky.com> wido Wido den Hollander <wido@42on.com> rishabh-d-dave Rishabh Dave <ridave@redhat.com> neha-ojha Neha Ojha <nojha@redhat.com> +nizamial09 Nizamudeen A <nia@redhat.com> vshankar Venky Shankar <vshankar@redhat.com> adamemerson Adam C. Emerson <aemerson@redhat.com> myoungwon Myoungwon Oh <myoungwon.oh@samsung.com> @@ -8,6 +8,7 @@ # # Aaron Bassett <abassett@gmail.com> +Aashish Sharma <aasharma@redhat.com> Abhishek Lekshmanan <abhishek@suse.com> <abhishek.lekshmanan@gmail.com> Abhishek Lekshmanan <abhishek@suse.com> <alekshmanan@suse.com> Adam C. Emerson <aemerson@redhat.com> @@ -105,6 +106,7 @@ Clement Lebrun <clement.lebrun.31@gmail.com> <clem_noob@hotmail.fr> Colin P. McCabe <colinm@hq.newdream.net> <cmccabe@alumni.cmu.edu> Colin P. McCabe <colinm@hq.newdream.net> <cmccabe@fatty.ops.newdream.net> Colin Walters <walters@redhat.com> <walters@verbum.org> +Courtney Caldwell <ccaldwel@redhat.com> Dai Zhi Wei <daizhiwei3@huawei.com> Dan Chai <tengweicai@gmail.com> <tengweicai@gmail.com> Dan Guo <guodan1@lenovo.com> @@ -437,6 +439,7 @@ Nick Fisk <nick@fisk.me.uk> Nicolas Yong <nicolas.yong93@gmail.com> Ning Yao <yaoning@ruijie.com.cn> <zay11022@gmail.com> Ning Yao <yaoning@unitedstack.com> +Nizamudeen A <nia@redhat.com> Noah Watkins <nwatkins@redhat.com> <jayhawk@cs.ucsc.edu> Noah Watkins <nwatkins@redhat.com> <noahwatkins@gmail.com> Or Friedmann <ofriedma@redhat.com> diff --git a/.organizationmap b/.organizationmap index 79ece9b3d60..4e500665c66 100644 --- a/.organizationmap +++ b/.organizationmap @@ -506,6 +506,7 @@ Qnap <contact@qnap.com> Tim Lin <timlin@qnap.com> Quadrature Capital Limited <info@quadraturecapital.com> Jim Wright <jim@quadraturecapital.com> Quantum Corporation <info@quantum.com> Bassam Tabbara <bassam.tabbara@quantum.com> Raidix International <contact@raidix.com> Marov Aleksey <Marov.A@raidix.com> +Red Hat <contact@redhat.com> Aashish Sharma <aasharma@redhat.com> Red Hat <contact@redhat.com> Adam C. Emerson <aemerson@redhat.com> Red Hat <contact@redhat.com> Adam King <adking@redhat.com> Red Hat <contact@redhat.com> Adam King <adking@redhat.com> @@ -533,6 +534,7 @@ Red Hat <contact@redhat.com> Brian Andrus <bandrus@redhat.com> Red Hat <contact@redhat.com> Casey Bodley <cbodley@redhat.com> Red Hat <contact@redhat.com> Cleber Rosa <crosa@redhat.com> Red Hat <contact@redhat.com> Colin Walters <walters@redhat.com> +Red Hat <contact@redhat.com> Courtney Caldwell <ccaldwel@redhat.com> Red Hat <contact@redhat.com> Dan Mick <dmick@redhat.com> Red Hat <contact@redhat.com> Daniel Gryniewicz <dang@redhat.com> Red Hat <contact@redhat.com> Daniel-Pivonka <dpivonka@redhat.com> @@ -593,6 +595,7 @@ Red Hat <contact@redhat.com> Milind Changire <mchangir@redhat.com> Red Hat <contact@redhat.com> Nathan Weinberg <nweinber@redhat.com> Red Hat <contact@redhat.com> Neha Ojha <nojha@redhat.com> Red Hat <contact@redhat.com> Neil Levine <nlevine@redhat.com> +Red Hat <contact@redhat.com> Nizamudeen A <nia@redhat.com> Red Hat <contact@redhat.com> Nilamdyuti Goswami <ngoswami@redhat.com> Red Hat <contact@redhat.com> Noah Watkins <nwatkins@redhat.com> Red Hat <contact@redhat.com> Or Friedmann <ofriedma@redhat.com> diff --git a/CMakeLists.txt b/CMakeLists.txt index a5c560fee6d..65a81c9e100 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,8 @@ foreach(policy CMP0056 CMP0065 CMP0074 - CMP0075) + CMP0075 + CMP0093) if(POLICY ${policy}) cmake_policy(SET ${policy} NEW) endif() @@ -193,6 +194,15 @@ if(WITH_BLUESTORE) endif() endif() +# libcryptsetup is only available on linux +if(WITH_RBD AND LINUX) + find_package(libcryptsetup REQUIRED) + set(HAVE_LIBCRYPTSETUP ${LIBCRYPTSETUP_FOUND}) + if(${LIBCRYPTSETUP_VERSION} VERSION_LESS 2.0.5) + set(LIBCRYPTSETUP_LEGACY_DATA_ALIGNMENT TRUE) + endif() +endif() + include(CMakeDependentOption) CMAKE_DEPENDENT_OPTION(WITH_ZBD "Enable libzbd bluestore backend" OFF @@ -616,7 +626,7 @@ set(WITH_MGR_ROOK_CLIENT WITH_MGR_DASHBOARD_FRONTEND) include_directories(SYSTEM ${PROJECT_BINARY_DIR}/include) find_package(Threads REQUIRED) -find_package(StdFilesystem) +find_package(StdFilesystem REQUIRED) option(WITH_SELINUX "build SELinux policy" OFF) if(WITH_SELINUX) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 6a89e73fbdd..fb832c0fb77 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -1,4 +1,4 @@ -For the general process of submitting patches to ceph, read the below +For the general process of submitting patches to Ceph, read the below `Submitting Patches`_ @@ -9,10 +9,10 @@ For documentation patches the following guide will help you get started Performance enhancements must come with test data and detailed explanations. -Code cleanup is appreciated along with a patch that fixes a bug or -implements a feature. Except on rare occasions, code cleanup that only -involve coding style or whitespace modifications are discouraged, -primarily because they cause problems when rebasing and backporting. +Code cleanup is appreciated, as are patches that fix bugs or +implement features. Except on rare occasions, code cleanup that only +relates to coding style or modifies whitespace is discouraged, +primarily because it can cause problems when rebasing and backporting. .. _Submitting Patches: SubmittingPatches.rst .. _Documenting Ceph: doc/start/documenting-ceph.rst diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 4684d9d03d3..3db77906e1f 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -1,5 +1,9 @@ >=16.0.0 -------- +* $pid expansion in config paths like `admin_socket` will now properly expand + to the daemon pid for commands like `ceph-mds` or `ceph-osd`. Previously only + `ceph-fuse`/`rbd-nbd` expanded `$pid` with the actual daemon pid. + * The allowable options for some "radosgw-admin" commands have been changed. * "mdlog-list", "datalog-list", "sync-error-list" no longer accepts @@ -33,6 +37,8 @@ * MGR: progress module can now be turned on/off, using the commands: ``ceph progress on`` and ``ceph progress off``. +* An AWS-compliant API: "GetTopicAttributes" was added to replace the existing "GetTopic" API. The new API + should be used to fetch information about topics used for bucket notifications. >=15.0.0 -------- diff --git a/admin/build-doc b/admin/build-doc index 979bba4b85e..98614b7db4e 100755 --- a/admin/build-doc +++ b/admin/build-doc @@ -84,37 +84,7 @@ $vdir/bin/python $TOPDIR/doc/scripts/gen_mon_command_api.py > $TOPDIR/doc/api/mo # --global-option=build_ext --global-option="--cython-include-dirs $TOPDIR/src/pybind/rados/" # but that doesn't work, so copying the file in the rbd module directly, that's ok for docs for bind in rados rbd cephfs rgw; do - if [ ${bind} != rados ]; then - cp -f $TOPDIR/src/pybind/rados/rados.pxd $TOPDIR/src/pybind/${bind}/ - fi - ln -sf lib${bind}.so.1 $vdir/lib/lib${bind}.so - gcc -shared -o $vdir/lib/lib${bind}.so.1 -xc /dev/null - ld_flags="-Wl,-rpath,$vdir/lib" - if [ $(uname) != Darwin ]; then - ld_flags="${ld_flags},--no-as-needed" - fi - BUILD_DOC=1 \ - CFLAGS="-iquote$TOPDIR/src/include" \ - CPPFLAGS="-iquote$TOPDIR/src/include" \ - LDFLAGS="-L$vdir/lib ${ld_flags}" \ - $vdir/bin/pip install --upgrade $TOPDIR/src/pybind/${bind} - # rgwfile_version(), librgw_create(), rgw_mount() - # since py3.5, distutils adds postfix in between ${bind} and so - lib_fn=$vdir/lib/python*/*-packages/${bind}.*.so - if [ ! -e $lib_fn ]; then - lib_fn=$vdir/lib/python*/*-packages/${bind}.so - fi - if [ ${bind} = "cephfs" ]; then - func_prefix="ceph" - else - func_prefix="(lib)?${bind}" - fi - nm $lib_fn | grep -E "U (_)?${func_prefix}" | \ - awk '{ gsub(/^_/,"",$2); print "void "$2"(void) {}" }' | \ - gcc -shared -o $vdir/lib/lib${bind}.so.1 -xc - - if [ ${bind} != rados ]; then - rm -f $TOPDIR/src/pybind/${bind}/rados.pxd - fi + BUILD_DOC=1 $vdir/bin/pip install --upgrade $TOPDIR/src/pybind/${bind} done if [ -z "$@" ]; then @@ -139,24 +109,6 @@ for target in $sphinx_targets; do done -# build the releases.json. this reads in the yaml version and dumps -# out the json representation of the same file. the resulting releases.json -# should be served from the root of hosted site. -$vdir/bin/python << EOF > $TOPDIR/build-doc/output/html/releases.json -from __future__ import print_function -import datetime -import json -import yaml - -def json_serialize(obj): - if isinstance(obj, datetime.date): - return obj.isoformat() - -with open("$TOPDIR/doc/releases/releases.yml", 'r') as fp: - releases = yaml.safe_load(fp) - print(json.dumps(releases, indent=2, default=json_serialize)) -EOF - # # Build and install JavaDocs # diff --git a/admin/doc-read-the-docs.txt b/admin/doc-read-the-docs.txt index b65cc463849..c66a08d9a87 100644 --- a/admin/doc-read-the-docs.txt +++ b/admin/doc-read-the-docs.txt @@ -1,2 +1,7 @@ plantweb git+https://github.com/readthedocs/readthedocs-sphinx-search@master +Cython +src/pybind/rados +src/pybind/cephfs +src/pybind/rbd +src/pybind/rgw diff --git a/ceph.spec.in b/ceph.spec.in index 81258139d1a..9ac67112f48 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -152,6 +152,7 @@ BuildRequires: gperftools-devel >= 2.4 BuildRequires: leveldb-devel > 1.2 BuildRequires: libaio-devel BuildRequires: libblkid-devel >= 2.17 +BuildRequires: cryptsetup-devel BuildRequires: libcurl-devel BuildRequires: libcap-ng-devel BuildRequires: fmt-devel >= 5.2.1 diff --git a/cmake/modules/Builduring.cmake b/cmake/modules/Builduring.cmake index 3cd64d7cbf7..328aa5ac79f 100644 --- a/cmake/modules/Builduring.cmake +++ b/cmake/modules/Builduring.cmake @@ -8,11 +8,10 @@ function(build_uring) else() set(source_dir_args SOURCE_DIR ${CMAKE_BINARY_DIR}/src/liburing - GIT_REPOSITORY https://git.kernel.dk/liburing + GIT_REPOSITORY https://github.com/axboe/liburing.git GIT_TAG "liburing-0.7" GIT_SHALLOW TRUE - GIT_CONFIG advice.detachedHead=false - UPDATE_DISCONNECTED TRUE) + GIT_CONFIG advice.detachedHead=false) endif() include(ExternalProject) @@ -22,7 +21,8 @@ function(build_uring) BUILD_COMMAND env CC=${CMAKE_C_COMPILER} "CFLAGS=${CMAKE_C_FLAGS} -fPIC" ${make_cmd} -C src -s BUILD_IN_SOURCE 1 BUILD_BYPRODUCTS "<SOURCE_DIR>/src/liburing.a" - INSTALL_COMMAND "") + INSTALL_COMMAND "" + UPDATE_COMMAND "") unset(make_cmd) ExternalProject_Get_Property(liburing_ext source_dir) diff --git a/cmake/modules/FindStdFilesystem.cmake b/cmake/modules/FindStdFilesystem.cmake index 8a1ec4264ae..5d3336571ce 100644 --- a/cmake/modules/FindStdFilesystem.cmake +++ b/cmake/modules/FindStdFilesystem.cmake @@ -1,43 +1,57 @@ set(_std_filesystem_test_src ${CMAKE_CURRENT_LIST_DIR}/FindStdFilesystem_test.cc) -macro(try_std_filesystem_library _library _result) +macro(try_std_filesystem_library _library _result _already_included) set(_std_filesystem_try_compile_arg CXX_STANDARD 17) + if(NOT _library STREQUAL "") + list(APPEND _std_filesystem_try_compile_arg + LINK_LIBRARIES ${_library}) + endif() try_compile(_std_filesystem_compiles ${CMAKE_CURRENT_BINARY_DIR} SOURCES ${_std_filesystem_test_src} - LINK_LIBRARIES ${_library} ${_std_filesystem_try_compile_arg}) unset(_std_filesystem_try_compile_arg) if(_std_filesystem_compiles) - set(${_result} ${_library}) + if(NOT _library STREQUAL "") + set(${_result} ${_library}) + else() + set(${_already_included} "included by standard library") + endif() endif() unset(_std_filesystem_compiles) endmacro() - -if(NOT StdFilesystem_LIBRARY) - try_std_filesystem_library("stdc++fs" StdFilesystem_LIBRARY) -endif() -if(NOT StdFilesystem_LIBRARY) - try_std_filesystem_library("c++experimental" StdFilesystem_LIBRARY) -endif() -if(NOT StdFilesystem_LIBRARY) - try_std_filesystem_library("c++fs" StdFilesystem_LIBRARY) -endif() +set(_std_filesystem_required_var "StdFilesystem_LIBRARY") +set(_std_filesystem_already_included FALSE) +foreach(library + "" + "stdc++fs" + "c++experimental" + "c++fs") + try_std_filesystem_library("${library}" StdFilesystem_LIBRARY _std_filesystem_already_included) + if(_std_filesystem_already_included) + set(_std_filesystem_required_var "_std_filesystem_already_included") + break() + elseif(StdFilesystem_LIBRARY) + break() + endif() +endforeach() unset(_std_filesystem_test_src) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(StdFilesystem FOUND_VAR StdFilesystem_FOUND - REQUIRED_VARS StdFilesystem_LIBRARY) + REQUIRED_VARS ${_std_filesystem_required_var}) mark_as_advanced(StdFilesystem_LIBRARY) if(StdFilesystem_FOUND AND NOT (TARGET StdFilesystem::filesystem)) add_library(StdFilesystem::filesystem INTERFACE IMPORTED) - set_target_properties(StdFilesystem::filesystem PROPERTIES + if(StdFilesystem_LIBRARY) + set_target_properties(StdFilesystem::filesystem PROPERTIES INTERFACE_LINK_LIBRARIES ${StdFilesystem_LIBRARY}) + endif() endif() diff --git a/cmake/modules/Findlibcryptsetup.cmake b/cmake/modules/Findlibcryptsetup.cmake new file mode 100644 index 00000000000..f0bdd864ef1 --- /dev/null +++ b/cmake/modules/Findlibcryptsetup.cmake @@ -0,0 +1,33 @@ +# - Find libcryptsetup +# Sets the following: +# +# LIBCRYPTSETUP_INCLUDE_DIR +# LIBCRYPTSETUP_LIBRARIES +# LIBCRYPTSETUP_VERSION +# LIBCRYPTSETUP_FOUND + +find_package(PkgConfig QUIET REQUIRED) +pkg_search_module(PC_libcryptsetup libcryptsetup) + +find_path(LIBCRYPTSETUP_INCLUDE_DIR + NAMES libcryptsetup.h + PATHS ${PC_libcryptsetup_INCLUDE_DIRS}) + +find_library(LIBCRYPTSETUP_LIBRARIES + NAMES libcryptsetup.so + PATHS ${PC_libcryptsetup_LIBRARY_DIRS}) + +set(LIBCRYPTSETUP_VERSION ${PC_libcryptsetup_VERSION}) + +include(FindPackageHandleStandardArgs) + +find_package_handle_standard_args(libcryptsetup + REQUIRED_VARS + LIBCRYPTSETUP_INCLUDE_DIR + LIBCRYPTSETUP_LIBRARIES + VERSION_VAR LIBCRYPTSETUP_VERSION) + +mark_as_advanced( + LIBCRYPTSETUP_LIBRARIES + LIBCRYPTSETUP_INCLUDE_DIR + LIBCRYPTSETUP_VERSION) diff --git a/debian/control b/debian/control index 13471e67c0e..4a89172aff9 100644 --- a/debian/control +++ b/debian/control @@ -29,10 +29,12 @@ Build-Depends: cmake (>= 3.10.2), libblkid-dev (>= 2.17), # Crimson libc-ares-dev, # Crimson libcrypto++-dev, + libcryptsetup-dev, libcap-ng-dev, libcunit1-dev, libcurl4-openssl-dev, libexpat1-dev, +# Make-Check libffi-dev [!amd64], libfuse-dev, libgoogle-perftools-dev [i386 amd64 arm64], # Crimson libgnutls28-dev, diff --git a/doc/_ext/edit_on_github.py b/doc/_ext/edit_on_github.py deleted file mode 100644 index 290f4b4244d..00000000000 --- a/doc/_ext/edit_on_github.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Adapted from https://gist.github.com/mgedmin/6052926 - -Sphinx extension to add ReadTheDocs-style "Edit on GitHub" links to the -sidebar. - -Loosely based on https://github.com/astropy/astropy/pull/347 -""" - -import os -import warnings - - -__licence__ = 'BSD (3 clause)' - - -def get_github_url(app, view, path): - return 'https://github.com/{project}/{view}/{branch}/doc/{path}'.format( - project=app.config.edit_on_github_project, - view=view, - branch=app.config.edit_on_github_branch, - path=path) - - -def html_page_context(app, pagename, templatename, context, doctree): - if templatename != 'page.html': - return - - if not app.config.edit_on_github_project: - warnings.warn("edit_on_github_project not specified") - return - - path = os.path.relpath(doctree.get('source'), app.builder.srcdir) - show_url = get_github_url(app, 'blob', path) - edit_url = get_github_url(app, 'edit', path) - - context['show_on_github_url'] = show_url - context['edit_on_github_url'] = edit_url - -def setup(app): - app.add_config_value('edit_on_github_project', '', True) - app.add_config_value('edit_on_github_branch', 'master', True) - app.connect('html-page-context', html_page_context) diff --git a/doc/_static/js/ceph.js b/doc/_static/js/ceph.js deleted file mode 100644 index 6a3412f16d5..00000000000 --- a/doc/_static/js/ceph.js +++ /dev/null @@ -1,43 +0,0 @@ -$(function() { - var releases_url = DOCUMENTATION_OPTIONS.URL_ROOT + 'releases.json'; - - function show_edit(branch, data) { - if (branch) { - if (branch === "master") { - $("#dev-warning").show(); - return true; - } - if (data && data.releases && branch in data.releases) { - var eol = ("actual_eol" in data.releases[branch]); - if (eol) { - $("#eol-warning").show(); - } - return !eol; - } - } - $("#dev-warning").show(); - return false; - } - - function get_branch() { - var url = window.location.href; - var res = url.match(/docs.ceph.com\/docs\/([a-z]+)\/?/i) - if (res) { - return res[1] - } - return null; - } - - $.getJSON(releases_url, function(data) { - var branch = get_branch(); - if (show_edit(branch, data)) { - // patch the edit-on-github URL for correct branch - var url = $("#edit-on-github").attr("href"); - if (url) { - url = url.replace("master", branch); - $("#edit-on-github").attr("href", url); - } - $("#docubetter").show(); - } - }); -}); diff --git a/doc/_templates/page.html b/doc/_templates/page.html index cfcf309b5d7..3923975d78f 100644 --- a/doc/_templates/page.html +++ b/doc/_templates/page.html @@ -1,19 +1,23 @@ {% extends "!page.html" %} {% block body %} -<div id="dev-warning" class="admonition note" style="display:none;"> +{%- if release == 'dev' %} +<div id="dev-warning" class="admonition note"> <p class="first admonition-title">Notice</p> <p class="last">This document is for a development version of Ceph.</p> </div> +{%- endif %} -<div id="eol-warning" class="admonition warning" style="display:none;"> +{%- if is_release_eol %} +<div id="eol-warning" class="admonition warning"> <p class="first admonition-title">Warning</p> <p class="last">This document is for an unsupported version of Ceph.</p> </div> +{%- endif %} -{%- if edit_on_github_url %} - <div id="docubetter" align="right" style="display:none; padding: 15px; font-weight: bold;"> - <a id="edit-on-github" href="{{ edit_on_github_url }}" rel="nofollow">{{ _('Edit on GitHub')}}</a> | <a href="https://pad.ceph.com/p/Report_Documentation_Bugs">Report a Documentation Bug</a> +{%- if not is_release_eol %} + <div id="docubetter" align="right" style="padding: 15px; font-weight: bold;"> + <a href="https://pad.ceph.com/p/Report_Documentation_Bugs">Report a Documentation Bug</a> </div> {%- endif %} diff --git a/doc/architecture.rst b/doc/architecture.rst index 805dd84b6aa..33558c0a877 100644 --- a/doc/architecture.rst +++ b/doc/architecture.rst @@ -22,16 +22,18 @@ Ceph provides an infinitely scalable :term:`Ceph Storage Cluster` based upon about in `RADOS - A Scalable, Reliable Storage Service for Petabyte-scale Storage Clusters`_. -A Ceph Storage Cluster consists of two types of daemons: +A Ceph Storage Cluster consists of multiple types of daemons: - :term:`Ceph Monitor` - :term:`Ceph OSD Daemon` +- :term:`Ceph Manager` +- :term:`Ceph Metadata Server` .. ditaa:: - +---------------+ +---------------+ - | OSDs | | Monitors | - +---------------+ +---------------+ + +---------------+ +---------------+ +---------------+ +---------------+ + | OSDs | | Monitors | | Managers | | MDS | + +---------------+ +---------------+ +---------------+ +---------------+ A Ceph Monitor maintains a master copy of the cluster map. A cluster of Ceph monitors ensures high availability should a monitor daemon fail. Storage cluster @@ -40,9 +42,15 @@ clients retrieve a copy of the cluster map from the Ceph Monitor. A Ceph OSD Daemon checks its own state and the state of other OSDs and reports back to monitors. +A Ceph Manager acts as an endpoint for monitoring, orchestration, and plug-in +modules. + +A Ceph Metadata Server (MDS) manages file metadata when CephFS is used to +provide file services. + Storage cluster clients and each :term:`Ceph OSD Daemon` use the CRUSH algorithm to efficiently compute information about data location, instead of having to -depend on a central lookup table. Ceph's high-level features include providing a +depend on a central lookup table. Ceph's high-level features include a native interface to the Ceph Storage Cluster via ``librados``, and a number of service interfaces built on top of ``librados``. @@ -54,9 +62,12 @@ Storing Data The Ceph Storage Cluster receives data from :term:`Ceph Clients`--whether it comes through a :term:`Ceph Block Device`, :term:`Ceph Object Storage`, the :term:`Ceph File System` or a custom implementation you create using -``librados``--and it stores the data as objects. Each object corresponds to a -file in a filesystem, which is stored on an :term:`Object Storage Device`. Ceph -OSD Daemons handle the read/write operations on the storage disks. +``librados``-- which is stored as RADOS objects. Each object is stored on an +:term:`Object Storage Device`. Ceph OSD Daemons handle read, write, and +replication operations on storage drives. With the older Filestore back end, +each RADOS object was stored as a separate file on a conventional filesystem +(usually XFS). With the new and default BlueStore back end, objects are +stored in a monolithic database-like fashion. .. ditaa:: @@ -64,9 +75,9 @@ OSD Daemons handle the read/write operations on the storage disks. | obj |------>| {d} |------>| {s} | \-----/ +-----+ +-----+ - Object File Disk + Object OSD Drive -Ceph OSD Daemons store all data as objects in a flat namespace (e.g., no +Ceph OSD Daemons store data as objects in a flat namespace (e.g., no hierarchy of directories). An object has an identifier, binary data, and metadata consisting of a set of name/value pairs. The semantics are completely up to :term:`Ceph Clients`. For example, CephFS uses metadata to store file @@ -657,13 +668,14 @@ new OSD after rebalancing is complete. Data Consistency ~~~~~~~~~~~~~~~~ -As part of maintaining data consistency and cleanliness, Ceph OSDs can also -scrub objects within placement groups. That is, Ceph OSDs can compare object -metadata in one placement group with its replicas in placement groups stored in -other OSDs. Scrubbing (usually performed daily) catches OSD bugs or filesystem -errors. OSDs can also perform deeper scrubbing by comparing data in objects -bit-for-bit. Deep scrubbing (usually performed weekly) finds bad sectors on a -disk that weren't apparent in a light scrub. +As part of maintaining data consistency and cleanliness, Ceph OSDs also scrub +objects within placement groups. That is, Ceph OSDs compare object metadata in +one placement group with its replicas in placement groups stored in other +OSDs. Scrubbing (usually performed daily) catches OSD bugs or filesystem +errors, often as a result of hardware issues. OSDs also perform deeper +scrubbing by comparing data in objects bit-for-bit. Deep scrubbing (by default +performed weekly) finds bad blocks on a drive that weren't apparent in a light +scrub. See `Data Scrubbing`_ for details on configuring scrubbing. @@ -681,7 +693,7 @@ An erasure coded pool stores each object as ``K+M`` chunks. It is divided into of ``K+M`` so that each chunk is stored in an OSD in the acting set. The rank of the chunk is stored as an attribute of the object. -For instance an erasure coded pool is created to use five OSDs (``K+M = 5``) and +For instance an erasure coded pool can be created to use five OSDs (``K+M = 5``) and sustain the loss of two of them (``M = 2``). Reading and Writing Encoded Chunks @@ -863,8 +875,8 @@ instructing it to write the chunk, it also creates a new entry in the placement group logs to reflect the change. For instance, as soon as **OSD 3** stores ``C1v2``, it adds the entry ``1,2`` ( i.e. epoch 1, version 2 ) to its logs. Because the OSDs work asynchronously, some chunks may still be in flight ( such -as ``D2v2`` ) while others are acknowledged and on disk ( such as ``C1v1`` and -``D1v1``). +as ``D2v2`` ) while others are acknowledged and persisted to storage drives +(such as ``C1v1`` and ``D1v1``). .. ditaa:: @@ -1117,7 +1129,8 @@ to Ceph clients. +---------------+ Slower I/O -See `Cache Tiering`_ for additional details. +See `Cache Tiering`_ for additional details. Note that Cache Tiers can be +tricky and their use is now discouraged. .. index:: Extensibility, Ceph Classes @@ -1333,7 +1346,7 @@ improvements by striping client data over multiple objects within an object set. Significant write performance occurs when the client writes the stripe units to their corresponding objects in parallel. Since objects get mapped to different placement groups and further mapped to different OSDs, each write occurs in -parallel at the maximum write speed. A write to a single disk would be limited +parallel at the maximum write speed. A write to a single drive would be limited by the head movement (e.g. 6ms per seek) and bandwidth of that one device (e.g. 100MB/s). By spreading that write over multiple objects (which map to different placement groups and OSDs) Ceph can reduce the number of seeks per drive and @@ -1437,7 +1450,7 @@ Three important variables determine how Ceph stripes data: Once the Ceph Client has striped data to stripe units and mapped the stripe units to objects, Ceph's CRUSH algorithm maps the objects to placement groups, and the placement groups to Ceph OSD Daemons before the objects are stored as -files on a storage disk. +files on a storage drive. .. note:: Since a client writes to a single pool, all data striped into objects get mapped to placement groups in the same pool. So they use the same CRUSH @@ -1606,7 +1619,6 @@ instance for high availability. - .. _RADOS - A Scalable, Reliable Storage Service for Petabyte-scale Storage Clusters: https://ceph.com/wp-content/uploads/2016/08/weil-rados-pdsw07.pdf .. _Paxos: https://en.wikipedia.org/wiki/Paxos_(computer_science) .. _Monitor Config Reference: ../rados/configuration/mon-config-ref diff --git a/doc/cephadm/monitoring.rst b/doc/cephadm/monitoring.rst index 6d4f21da1ae..de3dc39968a 100644 --- a/doc/cephadm/monitoring.rst +++ b/doc/cephadm/monitoring.rst @@ -15,7 +15,7 @@ metrics on cluster utilization and performance. Ceph users have three options: Ceph is running in Kubernetes with Rook). #. Skip the monitoring stack completely. Some Ceph dashboard graphs will not be available. - + The monitoring stack consists of `Prometheus <https://prometheus.io/>`_, Prometheus exporters (:ref:`mgr-prometheus`, `Node exporter <https://prometheus.io/docs/guides/node-exporter/>`_), `Prometheus Alert @@ -93,6 +93,37 @@ completed, you should see something like this from ``ceph orch ls`` node-exporter 2/2 6s ago docker.io/prom/node-exporter:latest e5a616e4b9cf present prometheus 1/1 6s ago docker.io/prom/prometheus:latest e935122ab143 present +Configuring SSL/TLS for Grafana +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``cephadm`` will deploy Grafana using the certificate defined in the ceph +key/value store. If a certificate is not specified, ``cephadm`` will generate a +self-signed certificate during deployment of the Grafana service. + +A custom certificate can be configured using the following commands. + +.. code-block:: bash + + ceph config-key set mgr/cephadm/grafana_key -i $PWD/key.pem + ceph config-key set mgr/cephadm/grafana_crt -i $PWD/certificate.pem + +The ``cephadm`` manager module needs to be restarted to be able to read updates +to these keys. + +.. code-block:: bash + + ceph orch restart mgr + +If you already deployed Grafana, you need to redeploy the service for the +configuration to be updated. + +.. code-block:: bash + + ceph orch redeploy grafana + +The ``redeploy`` command also takes care of setting the right URL for Ceph +Dashboard. + Using custom images ~~~~~~~~~~~~~~~~~~~ @@ -125,7 +156,7 @@ For example you have set the custom image for automatically. You will need to manually update the configuration (image name and tag) to be able to install updates. - + If you choose to go with the recommendations instead, you can reset the custom image you have set before. After that, the default value will be used again. Use ``ceph config rm`` to reset the configuration option @@ -140,6 +171,86 @@ For example ceph config rm mgr mgr/cephadm/container_image_prometheus +Using custom configuration files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By overriding cephadm templates, it is possible to completely customize the +configuration files for monitoring services. + +Internally, cephadm already uses `Jinja2 +<https://jinja.palletsprojects.com/en/2.11.x/>`_ templates to generate the +configuration files for all monitoring components. To be able to customize the +configuration of Prometheus, Grafana or the Alertmanager it is possible to store +a Jinja2 template for each service that will be used for configuration +generation instead. This template will be evaluated every time a service of that +kind is deployed or reconfigured. That way, the custom configuration is +preserved and automatically applied on future deployments of these services. + +.. note:: + + The configuration of the custom template is also preserved when the default + configuration of cephadm changes. If the updated configuration is to be used, + the custom template needs to be migrated *manually*. + +Option names +"""""""""""" + +The following templates for files that will be generated by cephadm can be +overridden. These are the names to be used when storing with ``ceph config-key +set``: + +- ``alertmanager_alertmanager.yml`` +- ``grafana_ceph-dashboard.yml`` +- ``grafana_grafana.ini`` +- ``prometheus_prometheus.yml`` + +You can look up the file templates that are currently used by cephadm in +``src/pybind/mgr/cephadm/templates``: + +- ``services/alertmanager/alertmanager.yml.j2`` +- ``services/grafana/ceph-dashboard.yml.j2`` +- ``services/grafana/grafana.ini.j2`` +- ``services/prometheus/prometheus.yml.j2`` + +Usage +""""" + +The following command applies a single line value: + +.. code-block:: bash + + ceph config-key set mgr/cephadm/<option_name> <value> + +To set contents of files as template use the ``-i`` argument: + +.. code-block:: bash + + ceph config-key set mgr/cephadm/<option_name> -i $PWD/<filename> + +.. note:: + + When using files as input to ``config-key`` an absolute path to the file must + be used. + +It is required to restart the cephadm mgr module after a configuration option +has been set. Then the configuration file for the service needs to be recreated. +This is done using `redeploy`. For more details see the following example. + +Example +""""""" + +.. code-block:: bash + + # set the contents of ./prometheus.yml.j2 as template + ceph config-key set mgr/cephadm/services_prometheus_prometheus.yml \ + -i $PWD/prometheus.yml.j2 + + # restart cephadm mgr module + ceph orch restart mgr + + # redeploy the prometheus service + ceph orch redeploy prometheus + Disabling monitoring -------------------- diff --git a/doc/cephfs/mds-config-ref.rst b/doc/cephfs/mds-config-ref.rst index 0df15275bfc..36b3e42c2e0 100644 --- a/doc/cephfs/mds-config-ref.rst +++ b/doc/cephfs/mds-config-ref.rst @@ -39,10 +39,10 @@ ``mds dir max commit size`` :Description: The maximum size of a directory update before Ceph breaks it into - smaller transactions) (MB). + smaller transactions (MB). :Type: 32-bit Integer -:Default: ``90`` +:Default: ``10`` ``mds decay halflife`` diff --git a/doc/conf.py b/doc/conf.py index 74ee9656233..cbc05b1ba16 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,20 +1,75 @@ import fileinput +import os import shutil import sys -import os -project = u'Ceph' -copyright = u'2016, Ceph authors and contributors. Licensed under Creative Commons Attribution Share Alike 3.0 (CC-BY-SA-3.0)' -version = 'dev' -release = 'dev' +import yaml + + +top_level = \ + os.path.dirname( + os.path.dirname( + os.path.abspath(__file__))) + + +def parse_ceph_release(): + with open(os.path.join(top_level, 'src/ceph_release')) as f: + lines = f.readlines() + assert(len(lines) == 3) + # 16, pacific, dev + version, codename, status = [line.strip() for line in lines] + return version, codename, status + +def latest_stable_release(): + with open(os.path.join(top_level, 'doc/releases/releases.yml')) as input: + releases = yaml.safe_load(input)['releases'] + # get the first release + return next(iter(releases.keys())) + + +def is_release_eol(codename): + with open(os.path.join(top_level, 'doc/releases/releases.yml')) as input: + releases = yaml.safe_load(input)['releases'] + return 'actual_eol' in releases.get(codename, {}) + + +# project information +project = 'Ceph' +copyright = ('2016, Ceph authors and contributors. ' + 'Licensed under Creative Commons Attribution Share Alike 3.0 ' + '(CC-BY-SA-3.0)') +version, codename, release = parse_ceph_release() +pygments_style = 'sphinx' + +# HTML output options +html_theme = 'ceph' +html_theme_path = ['_themes'] +html_title = "Ceph Documentation" +html_logo = 'logo.png' +html_context = {'is_release_eol': is_release_eol(codename)} +html_favicon = 'favicon.ico' +html_show_sphinx = False +html_static_path = ["_static"] +html_sidebars = { + '**': ['smarttoc.html', 'searchbox.html'] + } + +html_css_files = ['css/custom.css'] + +# general configuration templates_path = ['_templates'] source_suffix = '.rst' -master_doc = 'index' -exclude_patterns = ['**/.#*', '**/*~', 'start/quick-common.rst', '**/*.inc.rst'] -if tags.has('man'): +exclude_patterns = ['**/.#*', + '**/*~', + 'start/quick-common.rst', + '**/*.inc.rst'] +if tags.has('man'): # noqa: F821 master_doc = 'man_index' - exclude_patterns += ['index.rst', 'architecture.rst', 'glossary.rst', 'release*.rst', + exclude_patterns += ['index.rst', + 'architecture.rst', + 'glossary.rst', + 'release*.rst', 'api/*', 'cephadm/*', 'cephfs/*', @@ -31,23 +86,11 @@ if tags.has('man'): 'start/*', 'releases/*'] else: + master_doc = 'index' exclude_patterns += ['man_index.rst'] -pygments_style = 'sphinx' +build_with_rtd = os.environ.get('READTHEDOCS') == 'True' -html_theme = 'ceph' -html_theme_path = ['_themes'] -html_title = "Ceph Documentation" -html_logo = 'logo.png' -html_favicon = 'favicon.ico' -html_show_sphinx = False -html_static_path = ["_static"] -html_sidebars = { - '**': ['smarttoc.html', 'searchbox.html'], - } -html_css_files = [ - 'css/custom.css', -] sys.path.insert(0, os.path.abspath('_ext')) extensions = [ @@ -59,7 +102,6 @@ extensions = [ 'sphinx_autodoc_typehints', 'sphinx_substitution_extensions', 'breathe', - 'edit_on_github', 'ceph_releases', 'sphinxcontrib.openapi' ] @@ -73,25 +115,18 @@ else: 'engine': 'ditaa' } -build_with_rtd = os.environ.get('READTHEDOCS') == 'True' if build_with_rtd: extensions += ['sphinx_search.extension'] -# sphinx.ext.todo +# sphinx.ext.todo options todo_include_todos = True -# sphinx_substitution_extensions -# TODO: read from doc/releases/releases.yml -rst_prolog = """ -.. |stable-release| replace:: octopus +# sphinx_substitution_extensions options +rst_prolog = f""" +.. |stable-release| replace:: {latest_stable_release()} """ -top_level = os.path.dirname( - os.path.dirname( - os.path.abspath(__file__) - ) -) - +# breath options breathe_default_project = "Ceph" # see $(top_srcdir)/Doxyfile @@ -101,13 +136,16 @@ breathe_projects_source = { "Ceph": (os.path.join(top_level, "src/include/rados"), ["rados_types.h", "librados.h"]) } -breathe_domain_by_extension = {'py': 'py', 'c': 'c', 'h': 'c', 'cc': 'cxx', 'hpp': 'cxx'} +breathe_domain_by_extension = {'py': 'py', + 'c': 'c', 'h': 'c', + 'cc': 'cxx', 'hpp': 'cxx'} breathe_doxygen_config_options = { 'EXPAND_ONLY_PREDEF': 'YES', 'MACRO_EXPANSION': 'YES', 'PREDEFINED': 'CEPH_RADOS_API= ' } +# edit_on_github options # the docs are rendered with github links pointing to master. the javascript # snippet in _static/ceph.js rewrites the edit links when a page is loaded, to # point to the correct branch. @@ -133,16 +171,6 @@ def generate_state_diagram(input_paths, output_path): return process -# handles edit-on-github and old version warning display -def setup(app): - app.add_js_file('js/ceph.js') - if ditaa is None: - # add "ditaa" as an alias of "diagram" - from plantweb.directive import DiagramDirective - app.add_directive('ditaa', DiagramDirective) - app.connect('builder-inited', generate_state_diagram(['src/osd/PeeringState.h', - 'src/osd/PeeringState.cc'], - 'doc/dev/peering_graph.generated.dot')) # mocking ceph_module offered by ceph-mgr. `ceph_module` is required by # mgr.mgr_module @@ -150,8 +178,10 @@ class Dummy(object): def __getattr__(self, _): return lambda *args, **kwargs: None + class Mock(object): __all__ = [] + def __init__(self, *args, **kwargs): pass @@ -164,15 +194,12 @@ class Mock(object): mock.__module__ = __name__ return mock + +# autodoc options sys.modules['ceph_module'] = Mock() if build_with_rtd: - exclude_patterns += ['**/api/*', - '**/api.rst'] - autodoc_mock_imports = ['cephfs', - 'rados', - 'rbd', - 'ceph'] + autodoc_mock_imports = ['ceph'] pybinds = ['pybind/mgr', 'python-common'] else: @@ -184,3 +211,16 @@ for c in pybinds: pybind = os.path.join(top_level, 'src', c) if pybind not in sys.path: sys.path.insert(0, pybind) + + +# handles edit-on-github and old version warning display +def setup(app): + app.add_js_file('js/ceph.js') + if ditaa is None: + # add "ditaa" as an alias of "diagram" + from plantweb.directive import DiagramDirective + app.add_directive('ditaa', DiagramDirective) + app.connect('builder-inited', + generate_state_diagram(['src/osd/PeeringState.h', + 'src/osd/PeeringState.cc'], + 'doc/dev/peering_graph.generated.dot')) diff --git a/doc/dev/cephadm/cephadm-exporter.rst b/doc/dev/cephadm/cephadm-exporter.rst new file mode 100644 index 00000000000..bc41fcaeb10 --- /dev/null +++ b/doc/dev/cephadm/cephadm-exporter.rst @@ -0,0 +1,306 @@ +================ +cephadm Exporter +================ + +There are a number of long running tasks that the cephadm 'binary' runs which can take several seconds +to run. This latency represents a scalability challenge to the Ceph orchestrator management plane. + +To address this, cephadm needs to be able to run some of these longer running tasks asynchronously - this +frees up processing on the mgr by offloading tasks to each host, reduces latency and improves scalability. + +This document describes the implementation requirements and design for an 'exporter' feature + + +Requirements +============ +The exporter should address these functional and non-functional requirements; + +* run as a normal systemd unit +* utilise the same filesystem schema as other services deployed with cephadm +* require only python3 standard library modules (no external dependencies) +* use encryption to protect the data flowing from a host to Ceph mgr +* execute data gathering tasks as background threads +* be easily extended to include more data gathering tasks +* monitor itself for the health of the data gathering threads +* cache metadata to respond to queries quickly +* respond to a metadata query in <30ms to support large Ceph clusters (000's nodes) +* provide CLI interaction to enable the exporter to be deployed either at bootstrap time, or once the + cluster has been deployed. +* be deployed as a normal orchestrator service (similar to the node-exporter) + +High Level Design +================= + +This section will focus on the exporter logic **only**. + +.. code:: + + Establish a metadata cache object (tasks will be represented by separate attributes) + Create a thread for each data gathering task; host, ceph-volume and list_daemons + each thread updates it's own attribute within the cache object + Start a server instance passing requests to a specific request handler + the request handler only interacts with the cache object + the request handler passes metadata back to the caller + Main Loop + Leave the loop if a 'stop' request is received + check thread health + if a thread that was active, is now inactive + update the cache marking the task as inactive + update the cache with an error message for that task + wait for n secs + + +In the initial exporter implementation, the exporter has been implemented as a RESTful API. + + +Security +======== + +The cephadm 'binary' only supports standard python3 features, which has meant the RESTful API has been +developed using the http module, which itself is not intended for production use. However, the implementation +is not complex (based only on HTTPServer and BaseHHTPRequestHandler) and only supports the GET method - so the +security risk is perceived as low. + +Current mgr to host interactions occurs within an ssh connection, so the goal of the exporter is to adopt a similar +security model. + +The initial REST API is implemented with the following features; + +* generic self-signed, or user provided SSL crt/key to encrypt traffic between the mgr and the host +* 'token' based authentication of the request + +All exporter instances will use the **same** crt/key to secure the link from the mgr to the host(s), in the same way +that the ssh access uses the same public key and port for each host connection. + +.. note:: Since the same SSL configuration is used on every exporter, when you supply your own settings you must + ensure that the CN or SAN components of the distinguished name are either **not** used or created using wildcard naming. + +The crt, key and token files are all defined with restrictive permissions (600), to help mitigate against the risk of exposure +to any other user on the Ceph cluster node(s). + +Administrator Interaction +========================= +Several new commands are required to configure the exporter, and additional parameters should be added to the bootstrap +process to allow the exporter to be deployed automatically for new clusters. + + +Enhancements to the 'bootstrap' process +--------------------------------------- +bootstrap should support additional parameters to automatically configure exporter daemons across hosts + +``--with-exporter`` + +By using this flag, you're telling the bootstrap process to include the cephadm-exporter service within the +cluster. If you do not provide a specific configuration (SSL, token, port) to use, defaults would be applied. + +``--exporter-config`` + +With the --exporter-config option, you may pass your own SSL, token and port information. The file must be in +JSON format and contain the following fields; crt, key, token and port. The JSON content should be validated, and any +errors detected passed back to the user during the argument parsing phase (before any changes are done). + + +Additional ceph commands +------------------------ +:: + +# ceph cephadm generate-exporter-config + +This command will create generate a default configuration consisting of; a self signed certificate, a randomly generated +32 character token and the default port of 9443 for the REST API. +:: + +# ceph cephadm set-exporter-config -i <config.json> + +Use a JSON file to define the crt, key, token and port for the REST API. The crt, key and token are validated by +the mgr/cephadm module prior storing the values in the KV store. Invalid or missing entries should be reported to the +user. +:: + +# ceph cephadm clear-exporter-config + +Clear the current configuration (removes the associated keys from the KV store) +:: + +# ceph cephadm get-exporter-config + +Show the current exporter configuration, in JSON format + + +.. note:: If the service is already deployed any attempt to change or clear the configuration will + be denied. In order to change settings you must remove the service, apply the required configuration + and re-apply (``ceph orch apply cephadm-exporter``) + + + +New Ceph Configuration Keys +=========================== +The exporter configuration is persisted to the monitor's KV store, with the following keys: + +| mgr/cephadm/exporter_config +| mgr/cephadm/exporter_enabled + + + +RESTful API +=========== +The primary goal of the exporter is the provision of metadata from the host to the mgr. This interaction takes +place over a simple GET interface. Although only the GET method is supported, the API provides multiple URLs to +provide different views on the metadata that has been gathered. + +.. csv-table:: Supported URL endpoints + :header: "URL", "Purpose" + + "/v1/metadata", "show all metadata including health of all threads" + "/v1/metadata/health", "only report on the health of the data gathering threads" + "/v1/metadata/disks", "show the disk output (ceph-volume inventory data)" + "/v1/metadata/host", "show host related metadata from the gather-facts command" + "/v1/metatdata/daemons", "show the status of all ceph cluster related daemons on the host" + +Return Codes +------------ +The following HTTP return codes are generated by the API + +.. csv-table:: Supported HTTP Responses + :header: "Status Code", "Meaning" + + "200", "OK" + "204", "the thread associated with this request is no longer active, no data is returned" + "206", "some threads have stopped, so some content is missing" + "401", "request is not authorised - check your token is correct" + "404", "URL is malformed, not found" + "500", "all threads have stopped - unable to provide any metadata for the host" + + +Deployment +========== +During the initial phases of the exporter implementation, deployment is regarded as optional but is available +to new clusters and existing clusters that have the feature (Pacific and above). + +* new clusters : use the ``--with-exporter`` option +* existing clusters : you'll need to set the configuration and deploy the service manually + +.. code:: + + # ceph cephadm generate-exporter-config + # ceph orch apply cephadm-exporter + +If you choose to remove the cephadm-exporter service, you may simply + +.. code:: + + # ceph orch rm cephadm-exporter + +This will remove the daemons, and the exporter releated settings stored in the KV store. + + +Management +========== +Once the exporter is deployed, you can use the following snippet to extract the host's metadata. + +.. code-block:: python + + import ssl + import json + import sys + import tempfile + import time + from urllib.request import Request, urlopen + + # CHANGE THIS V + hostname = "rh8-1.storage.lab" + + print("Reading config.json") + try: + with open('./config.json', 'r') as f: + raw=f.read() + except FileNotFoundError as e: + print("You must first create a config.json file using the cephadm get-exporter-config command") + sys.exit(1) + + cfg = json.loads(raw) + with tempfile.NamedTemporaryFile(buffering=0) as t: + print("creating a temporary local crt file from the json") + t.write(cfg['crt'].encode('utf-8')) + + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.load_verify_locations(t.name) + hdrs={"Authorization":f"Bearer {cfg['token']}"} + print("Issuing call to gather metadata") + req=Request(f"https://{hostname}:9443/v1/metadata",headers=hdrs) + s_time = time.time() + r = urlopen(req,context=ctx) + print(r.status) + print("call complete") + # assert r.status == 200 + if r.status in [200, 206]: + + raw=r.read() # bytes string + js=json.loads(raw.decode()) + print(json.dumps(js, indent=2)) + elapsed = time.time() - s_time + print(f"Elapsed secs : {elapsed}") + + +.. note:: the above example uses python3, and assumes that you've extracted the config using the ``get-exporter-config`` command. + + +Implementation Specific Details +=============================== + +In the same way as a typical container based deployment, the exporter is deployed to a directory under ``/var/lib/ceph/<fsid>``. The +cephadm binary is stored in this cluster folder, and the daemon's configuration and systemd settings are stored +under ``/var/lib/ceph/<fsid>/cephadm-exporter.<id>/``. + +.. code:: + + [root@rh8-1 cephadm-exporter.rh8-1]# pwd + /var/lib/ceph/cb576f70-2f72-11eb-b141-525400da3eb7/cephadm-exporter.rh8-1 + [root@rh8-1 cephadm-exporter.rh8-1]# ls -al + total 24 + drwx------. 2 root root 100 Nov 25 18:10 . + drwx------. 8 root root 160 Nov 25 23:19 .. + -rw-------. 1 root root 1046 Nov 25 18:10 crt + -rw-------. 1 root root 1704 Nov 25 18:10 key + -rw-------. 1 root root 64 Nov 25 18:10 token + -rw-------. 1 root root 38 Nov 25 18:10 unit.configured + -rw-------. 1 root root 48 Nov 25 18:10 unit.created + -rw-r--r--. 1 root root 157 Nov 25 18:10 unit.run + + +In order to respond to requests quickly, the CephadmDaemon uses a cache object (CephadmCache) to hold the results +of the cephadm commands. + +The exporter doesn't introduce any new data gathering capability - instead it merely calls the existing cephadm commands. + +The CephadmDaemon class creates a local HTTP server(uses ThreadingMixIn), secured with TLS and uses the CephadmDaemonHandler +to handle the requests. The request handler inspects the request header and looks for a valid Bearer token - if this is invalid +or missing the caller receives a 401 Unauthorized error. + +The 'run' method of the CephadmDaemon class, places the scrape_* methods into different threads with each thread supporting +a different refresh interval. Each thread then periodically issues it's cephadm command, and places the output +in the cache object. + +In addition to the command output, each thread also maintains it's own timestamp record in the cache so the caller can +very easily determine the age of the data it's received. + +If the underlying cephadm command execution hits an exception, the thread passes control to a _handle_thread_exception method. +Here the exception is logged to the daemon's log file and the exception details are added to the cache, providing visibility +of the problem to the caller. + +Although each thread is effectively given it's own URL endpoint (host, disks, daemons), the recommended way to gather data from +the host is to simply use the ``/v1/metadata`` endpoint. This will provide all of the data, and indicate whether any of the +threads have failed. + +The run method uses "signal" to establish a reload hook, but in the initial implementation this doesn't take any action and simply +logs that a reload was received. + + +Future Work +=========== + +#. Consider the potential of adding a restart policy for threads +#. Once the exporter is fully integrated into mgr/cephadm, the goal would be to make the exporter the + default means of data gathering. However, until then the exporter will remain as an opt-in 'feature + preview'. diff --git a/doc/dev/cephadm/compliance-check.rst b/doc/dev/cephadm/compliance-check.rst new file mode 100644 index 00000000000..eea462445ad --- /dev/null +++ b/doc/dev/cephadm/compliance-check.rst @@ -0,0 +1,121 @@ +================ +Compliance Check +================ + +The stability and reliability of a Ceph cluster is dependent not just upon the Ceph daemons, but +also the OS and hardware that Ceph is installed on. This document is intended to promote a design +discussion for providing a "compliance" feature within mgr/cephadm, which would be responsible for +identifying common platform-related issues that could impact Ceph stability and operation. + +The ultimate goal of these checks is to identify issues early and raise a healthcheck WARN +event, to alert the Administrator to the issue. + +Prerequisites +============= +In order to effectively analyse the hosts that Ceph is deployed to, this feature requires a cache +of host-related metadata. The metadata is already available from cephadm's HostFacts class and the +``gather-facts`` cephadm command. For the purposes of this document, we will assume that this +data is available within the mgr/cephadm "cache" structure. + +Some checks will require that the host status is also populated e.g. ONLINE, OFFLINE, MAINTENANCE + +Administrator Interaction +========================= +Not all users will require this feature, and must be able to 'opt out'. For this reason, +mgr/cephadm must provide controls, such as the following; + +.. code-block:: + + ceph cephadm compliance enable | disable | status [--format json] + ceph cephadm compliance ls [--format json] + ceph cephadm compliance enable-check <name> + ceph cephadm compliance disable-check <name> + ceph cephadm compliance set-check-interval <int> + ceph cephadm compliance get-check-interval + +The status option would show the enabled/disabled state of the feature, along with the +check-interval. + +The ``ls`` subcommand would show all checks in the following format; + +``check-name status description`` + +Proposed Integration +==================== +The compliance checks are not required to run all the time, but instead should run at discrete +intervals. The interval would be configurable under via the :code:`set-check-interval` +subcommand (default would be every 12 hours) + + +mgr/cephadm currently executes an event driven (time based) serve loop to act on deploy/remove and +reconcile activity. In order to execute the compliance checks, the compliance check code would be +called from this main serve loop - when the :code:`set-check-interval` is met. + + +Proposed Checks +=============== +All checks would push any errors to a list, so multiple issues can be escalated to the Admin at +the same time. The list below provides a description of each check, with the text following the +name indicating a shortname version *(the shortname is the reference for command Interaction +when enabling or disabling a check)* + + +OS Consistency (OS) +___________________ +* all hosts must use same vendor +* all hosts must be on the same major release (this check would only be applicable to distributions that + offer a long-term-support strategy (RHEL, CentOS, SLES, Ubuntu etc) + + +*src: gather-facts output* + +Linux Kernel Security Mode (LSM) +________________________________ +* All hosts should have a consistent SELINUX/AppArmor configuration + +*src: gather-facts output* + +Services Check (SERVICES) +_________________________ +Hosts that are in an ONLINE state should adhere to the following; + +* all daemons (systemd units) should be enabled +* all daemons should be running (not dead) + +*src: list_daemons output* + +Support Status (SUPPORT) +________________________ +If support status has been detected, it should be consistent across all hosts. At this point +support status is available only for Red Hat machines. + +*src: gather-facts output* + +Network : MTU (MTU) +________________________________ +All network interfaces on the same Ceph network (public/cluster) should have the same MTU + +*src: gather-facts output* + +Network : LinkSpeed (LINKSPEED) +____________________________________________ +All network interfaces on the same Ceph network (public/cluster) should have the same Linkspeed + +*src: gather-facts output* + +Network : Consistency (INTERFACE) +______________________________________________ +All hosts with OSDs should have consistent network configuration - eg. if some hosts do +not separate cluster/public traffic but others do, that is an anomaly that would generate a +compliance check warning. + +*src: gather-facts output* + +Notification Strategy +===================== +If any of the checks fail, mgr/cephadm would raise a WARN level alert + +Futures +======= +The checks highlighted here serve only as a starting point, and we should expect to expand +on the checks over time. diff --git a/doc/dev/cephadm/index.rst b/doc/dev/cephadm/index.rst index e5567f3f6f0..c1aff75ee31 100644 --- a/doc/dev/cephadm/index.rst +++ b/doc/dev/cephadm/index.rst @@ -9,3 +9,5 @@ CEPHADM Developer Documentation host-maintenance + compliance-check + cephadm-exporter diff --git a/doc/dev/developer_guide/basic-workflow.rst b/doc/dev/developer_guide/basic-workflow.rst index 4a6913fb89a..1dfb0029d3d 100644 --- a/doc/dev/developer_guide/basic-workflow.rst +++ b/doc/dev/developer_guide/basic-workflow.rst @@ -1,7 +1,7 @@ Basic Workflow ============== -The following chart illustrates basic development workflow: +The following chart illustrates the basic Ceph development workflow: .. ditaa:: @@ -28,60 +28,62 @@ The following chart illustrates basic development workflow: | pull request | git push \-------------/ \--------------/ -Below we present an explanation of this chart. The explanation is written -with the assumption that you, the reader, are a beginning developer who -has an idea for a bugfix, but do not know exactly how to proceed. Watch -the `Getting Started with Ceph Development -<https://www.youtube.com/watch?v=t5UIehZ1oLs>`_ video for -a practical summary of the same. +The below explanation is written with the assumption that you, the reader, are +a new contributor who has an idea for a bugfix or enhancement, but do not know +exactly how to proceed. Watch the `Getting Started with Ceph Development +<https://www.youtube.com/watch?v=t5UIehZ1oLs>`_ video for a practical summary +of this workflow. Update the tracker ------------------ -Before you start, you should know the :ref:`issue-tracker` number of the bug -you intend to fix. If there is no tracker issue, now is the time to create -one. +Before you start, you should know the :ref:`issue-tracker` (Redmine) number +of the bug you intend to fix. If there is no tracker issue, now is the time to +create one for code changes. Straightforward documentation cleanup does +not necessarily require a corresponding tracker issue. However, an issue +(ticket) should be created if one is adding new documentation chapters or +files, or for other substantial changes. -The tracker is there to explain the issue (bug) to your fellow Ceph -developers and keep them informed as you make progress toward resolution. -To this end, then, provide a descriptive title as well as sufficient -information and details in the description. +The tracker ticket serves to explain the issue (bug) to your fellow Ceph +developers and keep them informed as you make progress toward resolution. To +this end, please provide a descriptive title and write appropriate information +and details into the description. When composing the ticket's title, consider "If I +want to search for this ticket two years from now, what keywords will I search +for?" If you have sufficient tracker permissions, assign the bug to yourself by -changing the ``Assignee`` field. If your tracker permissions have not yet -been elevated, simply add a comment to the issue with a short message like -"I am working on this issue". +setting the ``Assignee`` field. If your tracker permissions have not been +elevated, simply add a comment with a short message like "I am working on this +issue". Upstream code ------------- -This section, and the ones that follow, correspond to the nodes in the -above chart. +This section, and the ones that follow, correspond to nodes in the above chart. -The upstream code lives in https://github.com/ceph/ceph.git, which is -sometimes referred to as the "upstream repo", or simply "upstream". As the -chart illustrates, we will make a local copy of this code, modify it, test -our modifications, and submit the modifications back to the upstream repo -for review. +The upstream code is found at https://github.com/ceph/ceph.git, which is known +as the "upstream repo", or simply "upstream". As the chart shows, we will make +a local copy of this repository, modify it, test our modifications, then submit +the modifications for review and merging. A local copy of the upstream code is made by -1. forking the upstream repo on GitHub, and -2. cloning your fork to make a local working copy +1. Forking the upstream repo on GitHub, and +2. Cloning your fork to make a local working copy -See the `the GitHub documentation +See the `GitHub documentation <https://help.github.com/articles/fork-a-repo/#platform-linux>`_ for detailed instructions on forking. In short, if your GitHub username is -"mygithubaccount", your fork of the upstream repo will show up at +"mygithubaccount", your fork of the upstream repo will appear at https://github.com/mygithubaccount/ceph. Once you have created your fork, -you clone it by doing: +clone it by running: .. prompt:: bash $ git clone https://github.com/mygithubaccount/ceph -While it is possible to clone the upstream repo directly, in this case you -must fork it first. Forking is what enables us to open a `GitHub pull +While it is possible to clone the upstream repo directly, for the Ceph workflow +you must fork it first. Forking is what enables us to open a `GitHub pull request`_. For more information on using GitHub, refer to `GitHub Help @@ -90,13 +92,25 @@ For more information on using GitHub, refer to `GitHub Help Local environment ----------------- -In the local environment created in the previous step, you now have a -copy of the ``master`` branch in ``remotes/origin/master``. Since the fork +In the local environment created in the previous step, you now have a copy of +the ``master`` branch in ``remotes/origin/master``. This fork (https://github.com/mygithubaccount/ceph.git) is frozen in time and the upstream repo (https://github.com/ceph/ceph.git, typically abbreviated to -``ceph/ceph.git``) is updated frequently by other developers, you will need -to sync your fork periodically. To do this, first add the upstream repo as -a "remote" and fetch it +``ceph/ceph.git``) is updated frequently by other contributors, you must sync +your fork periodically. Failure to do so may result in your commits and pull +requests failing to merge because they refer to file contents that have since +changed. + +First, ensure that you have properly configured your local git environment with +your name and email address. Skip this step if you have already configured this +information. + +.. prompt:: bash $ + + git config user.name "FIRST_NAME LAST_NAME" + git config user.email "MY_NAME@example.com" + +Now add the upstream repo as a "remote" and fetch it: .. prompt:: bash $ @@ -107,10 +121,10 @@ Fetching downloads all objects (commits, branches) that were added since the last sync. After running these commands, all the branches from ``ceph/ceph.git`` are downloaded to the local git repo as ``remotes/ceph/$BRANCH_NAME`` and can be referenced as -``ceph/$BRANCH_NAME`` in certain git commands. +``ceph/$BRANCH_NAME`` in local git commands. For example, your local ``master`` branch can be reset to the upstream Ceph -``master`` branch by doing +``master`` branch by running .. prompt:: bash $ @@ -118,7 +132,7 @@ For example, your local ``master`` branch can be reset to the upstream Ceph git checkout master git reset --hard ceph/master -Finally, the ``master`` branch of your fork can then be synced to upstream +Finally, the ``master`` branch of your fork is synced to the upstream master by .. prompt:: bash $ @@ -128,7 +142,7 @@ master by Bugfix branch ------------- -Next, create a branch for the bugfix: +Next, create a branch for your bugfix: .. prompt:: bash $ @@ -136,29 +150,31 @@ Next, create a branch for the bugfix: git checkout -b fix_1 git push -u origin fix_1 -This creates a ``fix_1`` branch locally and in our GitHub fork. At this -point, the ``fix_1`` branch is identical to the ``master`` branch, but not -for long! You are now ready to modify the code. +This creates a ``fix_1`` branch locally and in our GitHub fork. At this point, +the ``fix_1`` branch is identical to the ``master`` branch, but not for long! +You are now ready to modify the code. Be careful to always run `git checkout +master` first, otherwise you may find commits from an unrelated branch mixed +with your new work. Fix bug locally --------------- -At this point, change the status of the tracker issue to "In progress" to -communicate to the other Ceph developers that you have begun working on a -fix. If you don't have permission to change that field, your comment that -you are working on the issue is sufficient. +Now change the status of the tracker issue to "In progress" to communicate to +other Ceph contributors that you have begun working on a fix. This helps avoid +duplication of effort. If you don't have permission to change that field, your +previous comment that you are working on the issue is sufficient. -Possibly, your fix is very simple and requires only minimal testing. -More likely, it will be an iterative process involving trial and error, not -to mention skill. An explanation of how to fix bugs is beyond the -scope of this document. Instead, we focus on the mechanics of the process -in the context of the Ceph project. +Your fix may be very simple and require only minimal testing. More likely, +this will be an iterative process involving trial and error, not to mention +skill. An explanation of how to fix bugs is beyond the scope of this +document. Instead, we focus on the mechanics of the process in the context of +the Ceph project. -A detailed discussion of the tools available for validating your bugfixes, +For a detailed discussion of the tools available for validating bugfixes, see the chapters on testing. -For now, let us just assume that you have finished work on the bugfix and -that you have tested it and believe it works. Commit the changes to your local +For now, let us just assume that you have finished work on the bugfix, that +you have tested, and that you believe it works. Commit the changes to your local branch using the ``--signoff`` option .. prompt:: bash $ @@ -174,44 +190,48 @@ and push the changes to your fork GitHub pull request ------------------- -The next step is to open a GitHub pull request. The purpose of this step is -to make your bugfix available to the community of Ceph developers. They -will review it and may do additional testing on it. +The next step is to open a GitHub pull request (PR). This makes your bugfix +visible to the community of Ceph contributors. They will review it and may +perform additional testing and / or request changes. -In short, this is the point where you "go public" with your modifications. -Psychologically, you should be prepared to receive suggestions and -constructive criticism. Don't worry! In our experience, the Ceph project is -a friendly place! +This is the point where you "go public" with your modifications. Be prepared +to receive suggestions and constructive criticism in the form of comments +within the PR. Don't worry! The Ceph project is a friendly place! -If you are uncertain how to use pull requests, you may read +If you are uncertain how to create and manage pull requests, you may read `this GitHub pull request tutorial`_. .. _`this GitHub pull request tutorial`: https://help.github.com/articles/using-pull-requests/ -For some ideas on what constitutes a "good" pull request, see +For ideas on what constitutes a "good" pull request, see the `Git Commit Good Practice`_ article at the `OpenStack Project Wiki`_. .. _`Git Commit Good Practice`: https://wiki.openstack.org/wiki/GitCommitMessages .. _`OpenStack Project Wiki`: https://wiki.openstack.org/wiki/Main_Page +and our own `Submitting Patches <https://github.com/ceph/ceph/blob/master/SubmittingPatches.rst>`_ document. + Once your pull request (PR) is opened, update the :ref:`issue-tracker` by -adding a comment to the bug pointing the other developers to your PR. The -update can be as simple as:: +adding a comment directing other contributors to your PR. The comment can be +as simple as:: *PR*: https://github.com/ceph/ceph/pull/$NUMBER_OF_YOUR_PULL_REQUEST Automated PR validation ----------------------- -When your PR hits GitHub, the Ceph project's `Continuous Integration (CI) -<https://en.wikipedia.org/wiki/Continuous_integration>`_ -infrastructure will test it automatically. At the time of this writing -(September 2020), the automated CI testing included five tests to check that the -commits in the PR are properly signed (see :ref:`submitting-patches`), to check that the documentation builds, to check that the submodules are unmodified, to check that the API is in order, and a :ref:`make-check` test. +When your PR is created or updated, the Ceph project's `Continuous Integration +(CI) <https://en.wikipedia.org/wiki/Continuous_integration>`_ infrastructure +will test it automatically. At the time of this writing (September 2020), the +automated CI testing included five tests to check that the commits in the PR +are properly signed (see :ref:`submitting-patches`), to check that the +documentation builds, to check that the submodules are unmodified, to check +that the API is in order, and a :ref:`make-check` test. Additional tests may +be performed depending on which files are modified by your PR. The :ref:`make-check`, builds the PR and runs it through a battery of -tests. These tests run on machines operated by the Ceph Continuous +tests. These tests run on servers operated by the Ceph Continuous Integration (CI) team. When the tests complete, the result will be shown on GitHub in the pull request itself. @@ -224,25 +244,32 @@ Notes on PR make check test The GitHub :ref:`make check<make-check>` test is driven by a Jenkins instance. Jenkins merges your PR branch into the latest version of the base branch before -starting the build. This means that you don't have to rebase the PR to pick up any fixes. +starting tests. This means that you don't have to rebase the PR to pick up any fixes. -You can trigger the PR tests at any time by adding a comment to the PR - the +You can trigger PR tests at any time by adding a comment to the PR - the comment should contain the string "test this please". Since a human subscribed -to the PR might interpret that as a request for him or her to test the PR, -we recommend that you address Jenkins directly in the request; for example, write "jenkins retest this please". - -If there is a build failure and you aren't sure what caused it, check the :ref:`make check<make-check>` log. To access it, first click on "details" (next -to the :ref:`make check<make-check>` test in the PR) to enter the Jenkins -web GUI. Then click on "Console Output" (on the left). - -Jenkins is set up to grep the log for strings known to have been associated -with :ref:`make check<make-check>` failures in the past. However, there is no guarantee that the known strings are associated with any given :ref:`make check<make-check>` failure. You'll have to dig into the log to determine the cause of the failure. +to the PR might interpret that as a request for him or her to test the PR, we +recommend that you address Jenkins directly. For example, write "jenkins retest +this please". For efficiency a single re-test can also be requested with +e.g. "jenkins test signed". For reference, a list of these requests is +automatically added to the end of each new PR's description. + +If there is a build failure and you aren't sure what caused it, check the +:ref:`make check<make-check>` log. To access it, click on the "details" (next +to the :ref:`make check<make-check>` test in the PR) link to enter the Jenkins web +GUI. Then click on "Console Output" (on the left). + +Jenkins is configured to search logs for strings known to have been associated +with :ref:`make check<make-check>` failures in the past. However, there is no +guarantee that these known strings are associated with any given +:ref:`make check<make-check>` failure. You'll have to read through the log to determine the +cause of your specific failure. Integration tests AKA ceph-qa-suite ----------------------------------- -Since Ceph is a complex beast, it may also be necessary to test your fix to -see how it behaves on real clusters running either on real or virtual +Since Ceph is complex, it may be necessary to test your fix to +see how it behaves on real clusters running on physical or virtual hardware. Tests designed for this purpose live in the `ceph/qa sub-directory`_ and are run via the `teuthology framework`_. @@ -252,7 +279,7 @@ sub-directory`_ and are run via the `teuthology framework`_. The Ceph community has access to the `Sepia lab <https://wiki.sepia.ceph.com/doku.php>`_ where :ref:`testing-integration-tests` can be -run on real hardware. Other developers may add tags like "needs-qa" to your +run on physical hardware. Other developers may add tags like "needs-qa" to your PR. This allows PRs that need testing to be merged into a single branch and tested all at the same time. Since teuthology suites can take hours (even days in some cases) to run, this can save a lot of time. @@ -267,7 +294,7 @@ Code review Once your bugfix has been thoroughly tested, or even during this process, it will be subjected to code review by other developers. This typically -takes the form of correspondence in the PR itself, but can be supplemented +takes the form of comments in the PR itself, but can be supplemented by discussions on :ref:`irc` and the :ref:`mailing-list`. Amending your PR @@ -276,7 +303,7 @@ Amending your PR While your PR is going through testing and `Code Review`_, you can modify it at any time by editing files in your local branch. -After the changes are committed locally (to the ``fix_1`` branch in our +After updates are committed locally (to the ``fix_1`` branch in our example), they need to be pushed to GitHub so they appear in the PR. Modifying the PR is done by adding commits to the ``fix_1`` branch upon @@ -290,11 +317,16 @@ will need to force push your branch with: git push --force origin fix_1 +Why do we take these extra steps instead of simply adding additional commits +the the PR? It is best practice for a PR to consist of a single commit; this +makes for clean history, eases peer review of your changes, and facilitates +merges. In rare circumstances it also makes it easier to cleanly revert +changes. + Merge ----- -The bugfixing process culminates when one of the project leads decides to -merge your PR. +The bugfix process completes when a project lead merges your PR. When this happens, it is a signal for you (or the lead who merged the PR) to change the :ref:`issue-tracker` status to "Resolved". Some issues may be @@ -314,22 +346,22 @@ This is the most basic form of a merge commit:: This consists of two parts: -#. The title of the commit of the pull request to be merged. +#. The title of the commit / PR to be merged. #. The name and email address of the reviewer. Enclose the reviewer's email address in angle brackets. Using .githubmap to Find a Reviewer's Email Address ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If you cannot find the email address of the reviewer on his or her github +If you cannot find the email address of the reviewer on his or her GitHub page, you can look it up in the **.githubmap** file, which can be found in the repository at **/ceph/.githubmap**. Using "git log" to find a Reviewer's Email Address ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If you cannot find a reviewer's email address by using the above methods, -you can search the git log for their email address. Reviewers are likely -to have committed something before, and as long as they have committed -something, the git log will probably contain their email address. +If you cannot find a reviewer's email address by using the above methods, you +can search the git log for their email address. Reviewers are likely to have +committed something before. If they have made previous contributions, the git +log will probably contain their email address. Use the following command @@ -350,3 +382,4 @@ the **ptl-tool** have the following form:: client: move client_lock to _unmount() client: add timer_lock support Reviewed-by: Patrick Donnelly <pdonnell@redhat.com> + diff --git a/doc/dev/developer_guide/merging.rst b/doc/dev/developer_guide/merging.rst index 6a8507bbc42..076e5b62a7e 100644 --- a/doc/dev/developer_guide/merging.rst +++ b/doc/dev/developer_guide/merging.rst @@ -1,11 +1,10 @@ .. _merging: -What is Merged Where and When? -=============================== +Commit merging: scope and cadence +================================== -Commits are merged into branches according to criteria that change -during the lifecycle of a Ceph release. This chapter is the inventory -of what can be merged in which branch at a given point in time. +Commits are merged into branches according to criteria specific to each phase +of the Ceph release lifecycle. This chapter codifies these criteria. Development releases (i.e. x.0.z) --------------------------------- @@ -13,38 +12,37 @@ Development releases (i.e. x.0.z) What ? ^^^^^^ -* features -* bug fixes +* Features +* Bug fixes Where ? ^^^^^^^ -Features are merged to the master branch. Bug fixes should be merged -to the corresponding named branch (e.g. "jewel" for 10.0.z, "kraken" -for 11.0.z, etc.). However, this is not mandatory - bug fixes can be -merged to the master branch as well, since the master branch is -periodically merged to the named branch during the development -releases phase. In either case, if the bugfix is important it can also -be flagged for backport to one or more previous stable releases. +Features are merged to the *master* branch. Bug fixes should be merged to the +corresponding named branch (e.g. *nautilus* for 14.0.z, *pacific* for 16.0.z, +etc.). However, this is not mandatory - bug fixes and documentation +enhancements can be merged to the *master* branch as well, since the *master* +branch is itself occasionally merged to the named branch during the development +releases phase. In either case, if a bug fix is important it can also be +flagged for backport to one or more previous stable releases. When ? ^^^^^^ -After the stable release candidates of the previous release enters -phase 2 (see below). For example: the "jewel" named branch was -created when the infernalis release candidates entered phase 2. From -this point on, master was no longer associated with infernalis. As -soon as the named branch of the next stable release is created, master -starts getting periodically merged into it. +After each stable release, candidate branches for previous releases enter +phase 2 (see below). For example: the *jewel* named branch was created when +the *infernalis* release candidates entered phase 2. From this point on, +*master* was no longer associated with *infernalis*. After he named branch of +the next stable release is created, *master* will be occasionally merged into +it. Branch merges ^^^^^^^^^^^^^ -* The branch of the stable release is merged periodically into master. -* The master branch is merged periodically into the branch of the - stable release. -* The master is merged into the branch of the stable release - immediately after each development x.0.z release. +* The latest stable release branch is merged periodically into master. +* The master branch is merged periodically into the branch of the stable release. +* The master is merged into the stable release branch + immediately after each development (x.0.z) release. Stable release candidates (i.e. x.1.z) phase 1 ---------------------------------------------- @@ -52,18 +50,18 @@ Stable release candidates (i.e. x.1.z) phase 1 What ? ^^^^^^ -* bug fixes only +* Bug fixes only Where ? ^^^^^^^ -The branch of the stable release (e.g. "jewel" for 10.0.z, "kraken" -for 11.0.z, etc.) or master. Bug fixes should be merged to the named -branch corresponding to the stable release candidate (e.g. "jewel" for -10.1.z) or to master. During this phase, all commits to master will be +The stable release branch (e.g. *jewel* for 10.0.z, *luminous* +for 12.0.z, etc.) or *master*. Bug fixes should be merged to the named +branch corresponding to the stable release candidate (e.g. *jewel* for +10.1.z) or to *master*. During this phase, all commits to *master* will be merged to the named branch, and vice versa. In other words, it makes no difference whether a commit is merged to the named branch or to -master - it will make it into the next release candidate either way. +*master* - it will make it into the next release candidate either way. When ? ^^^^^^ @@ -74,10 +72,9 @@ x.1.0 tag is set in the release branch. Branch merges ^^^^^^^^^^^^^ -* The branch of the stable release is merged periodically into master. -* The master branch is merged periodically into the branch of the - stable release. -* The master is merged into the branch of the stable release +* The stable release branch is merged periodically into *master*. +* The *master* branch is merged periodically into the stable release branch. +* The *master* branch is merged into the stable release branch immediately after each x.1.z release candidate. Stable release candidates (i.e. x.1.z) phase 2 @@ -86,27 +83,26 @@ Stable release candidates (i.e. x.1.z) phase 2 What ? ^^^^^^ -* bug fixes only +* Bug fixes only Where ? ^^^^^^^ -The branch of the stable release (e.g. "jewel" for 10.0.z, "kraken" -for 11.0.z, etc.). During this phase, all commits to the named branch -will be merged into master. Cherry-picking to the named branch during -release candidate phase 2 is done manually since the official -backporting process only begins when the release is pronounced -"stable". +The stable release branch (e.g. *mimic* for 13.0.z, *octopus* for 15.0.z +,etc.). During this phase, all commits to the named branch will be merged into +*master*. Cherry-picking to the named branch during release candidate phase 2 +is performed manually since the official backporting process begins only when +the release is pronounced "stable". When ? ^^^^^^ -After Sage Weil decides it is time for phase 2 to happen. +After Sage Weil announces that it is time for phase 2 to happen. Branch merges ^^^^^^^^^^^^^ -* The branch of the stable release is merged periodically into master. +* The stable release branch is occasionally merged into master. Stable releases (i.e. x.2.z) ---------------------------- @@ -114,21 +110,20 @@ Stable releases (i.e. x.2.z) What ? ^^^^^^ -* bug fixes -* features are sometime accepted -* commits should be cherry-picked from master when possible - -* commits that are not cherry-picked from master must be about a bug unique to +* Bug fixes +* Features are sometime accepted +* Commits should be cherry-picked from *master* when possible +* Commits that are not cherry-picked from *master* must pertain to a bug unique to the stable release -* see also `the backport HOWTO`_ +* See also the `backport HOWTO`_ document -.. _`the backport HOWTO`: +.. _`backport HOWTO`: http://tracker.ceph.com/projects/ceph-releases/wiki/HOWTO#HOWTO Where ? ^^^^^^^ -The branch of the stable release (hammer for 0.94.x, infernalis for 9.2.x, +The stable release branch (*hammer* for 0.94.x, *infernalis* for 9.2.x, etc.) When ? diff --git a/doc/dev/developer_guide/tests-unit-tests.rst b/doc/dev/developer_guide/tests-unit-tests.rst index 8c7cff8f92a..6327b969674 100644 --- a/doc/dev/developer_guide/tests-unit-tests.rst +++ b/doc/dev/developer_guide/tests-unit-tests.rst @@ -1,28 +1,24 @@ Testing - unit tests ==================== -Ceph has two types of tests: - -#. unit tests (also called ``make check`` tests) -#. integration tests. - -What are here called ``make check`` tests are not, strictly speaking, "unit -tests". They are tests that can be easily run on a single-build machine -after Ceph has been compiled from source. Such ``make check`` tests do -not require packages or a multi-machine cluster. - -Integration tests, however, require packages and multi-machine clusters. +The Ceph GitHub repository has two types of tests: unit tests (also called +``make check`` tests) and integration tests. Strictly speaking, the +``make check`` tests are not "unit tests", but rather tests that can be run +easily on a single build machine after compiling Ceph from source, whereas +integration tests require package installation and multi-machine clusters to +run. .. _make-check: What does "make check" mean? ---------------------------- -After Ceph has been compiled, its code can be run through a battery of -tests that cover various aspects of Ceph. For historical reasons, this -battery of tests is often referred to as ``make check`` even though the -actual command used to run the tests is now ``ctest``. In order to be -included in this battery of tests, a test must: +After compiling Ceph, the code can be run through a battery of tests +For historical reasons, this is +often referred to as ``make check`` even though the actual command used to run +the tests is now ``ctest``. For inclusion in this group of tests, a test +must: + * bind ports that do not conflict with other tests * not require root access @@ -34,20 +30,18 @@ check tests" or "unit tests". This is meant to distinguish these tests from the more complex "integration tests" that are run via the `teuthology framework`_. -While it is possible to run ``ctest`` directly, it can be tricky to -correctly set up your environment for it. Fortunately, a script is provided -to make it easier to run the unit tests on your code. This script can be -run from the top-level directory of the Ceph source tree by running the -following command: +While it is possible to run ``ctest`` directly, it can be tricky to correctly +set up your environment. Fortunately, a script is provided to make it easier +run the unit tests on your code. It can be run from the top-level directory of +the Ceph source tree by invoking:: .. prompt:: bash $ - ./run-make-check.sh +You will need a minimum of 8GB of RAM and 32GB of free drive space for this +command to complete successfully on x86_64; other architectures may have +different requirements. Depending on your hardware, it can take from twenty +minutes to three hours to complete, but it's worth the wait. -You will need a minimum of 8GB of RAM and 32GB of free disk space for this -command to complete successfully on x86_64 (other architectures may have -different constraints). Depending on your hardware, it can take from 20 -minutes to three hours to complete. How unit tests are declared --------------------------- @@ -84,10 +78,10 @@ Most python modules can be found under ``./src/pybind/``. Many modules use **tox** to run their unit tests. **tox** itself is a generic virtualenv management and test command line tool. -To find out quickly if tox can be run you can either just try to run ``tox`` -or find out if a ``tox.ini`` exists. +To find out quickly if **tox** can be run you can either just try to run ``tox`` +or check for the existence of a ``tox.ini`` file. -Currently the following modules use tox: +Currently the following modules use **tox**: - Cephadm (``./src/pybind/mgr/cephadm``) - Insights (``./src/pybind/mgr/insights``) @@ -96,15 +90,14 @@ Currently the following modules use tox: - Python common (``./src/python-common/tox.ini``) -Most tox configuration support multiple environments and tasks. You can see -which environments and tasks are supported by looking into the ``tox.ini`` -file to see what ``envlist`` is assigned. -To run **tox**, just execute ``tox`` in the directory where ``tox.ini`` lies. -Without any specified environments ``-e $env1,$env2``, all environments will -be run. Jenkins will run ``tox`` by executing ``run_tox.sh`` which lies under +Most **tox** configurations support multiple environments and tasks. You can see +which are supported by examining the ``envlist`` assignment within ``tox.ini`` +To run **tox**, just execute ``tox`` in the directory where ``tox.ini`` is found. +If no environments are specified with e.g. ``-e $env1,$env2``, all environments +will be run. Jenkins will run ``tox`` by executing ``run_tox.sh`` which is under ``./src/script``. -Here some examples from ceph dashboard on how to specify different +Here some examples from the Ceph Dashboard on how to specify environments and run options:: ## Run Python 2+3 tests+lint commands: @@ -122,17 +115,17 @@ Manager core unit tests Currently only doctests_ inside ``mgr_util.py`` are run. -To add more files that should be tested inside the core of the manager add -them at the end of the line that includes ``mgr_util.py`` inside ``tox.ini``. +To add test additional files inside the core of the manager, add +them at the end of the line that includes ``mgr_util.py`` within ``tox.ini``. .. _doctests: https://docs.python.org/3/library/doctest.html Unit test caveats ----------------- -1. Unlike the various Ceph daemons and ``ceph-fuse``, the unit tests +1. Unlike the various Ceph daemons and ``ceph-fuse``, unit tests are linked against the default memory allocator (glibc) unless explicitly - linked against something else. This enables tools like valgrind to be used + linked against something else. This enables tools like **valgrind** to be used in the tests. .. _make check: diff --git a/doc/dev/freebsd.rst b/doc/dev/freebsd.rst index 71568ef388d..b1645b873b3 100644 --- a/doc/dev/freebsd.rst +++ b/doc/dev/freebsd.rst @@ -44,7 +44,7 @@ MON creation Monitors are created by following the manual creation steps on:: - http://docs.ceph.com/docs/master/install/manual-deployment/ + https://docs.ceph.com/en/latest/install/manual-freebsd-deployment/ OSD creation diff --git a/doc/dev/osd_internals/backfill_reservation.rst b/doc/dev/osd_internals/backfill_reservation.rst index 95526236ab3..3c380dcf602 100644 --- a/doc/dev/osd_internals/backfill_reservation.rst +++ b/doc/dev/osd_internals/backfill_reservation.rst @@ -39,7 +39,7 @@ which PGs are recovered. Admins can override the default order by using ``force-recovery`` or ``force-backfill``. A ``force-recovery`` with op priority ``255`` will start before a ``force-backfill`` op at priority ``254``. -If a recovery is needed because a PG is below ``min_size`` a base priority of +If recovery is needed because a PG is below ``min_size`` a base priority of ``220`` is used. This is incremented by the number of OSDs short of the pool's ``min_size`` as well as a value relative to the pool's ``recovery_priority``. The resultant priority is capped at ``253`` so that it does not confound forced @@ -47,11 +47,12 @@ ops as described above. Under ordinary circumstances a recovery op is prioritized at ``180`` plus a value relative to the pool's ``recovery_priority``. The resultant priority is capped at ``219``. -If a backfill op is needed because the number of acting OSDs is less than +If backfill is needed because the number of acting OSDs is less than the pool's ``min_size``, a priority of ``220`` is used. The number of OSDs -short of the pool's `` min_size`` is added as well as a value relative to +short of the pool's ``min_size`` is added as well as a value relative to the pool's ``recovery_priority``. The total priority is limited to ``253``. -If a backfill op is needed because a PG is undersized, + +If backfill is needed because a PG is undersized, a priority of ``140`` is used. The number of OSDs below the size of the pool is added as well as a value relative to the pool's ``recovery_priority``. The resultant priority is capped at ``179``. If a backfill op is diff --git a/doc/foundation.rst b/doc/foundation.rst index 65b191934f8..3d0e318d8b8 100644 --- a/doc/foundation.rst +++ b/doc/foundation.rst @@ -112,7 +112,9 @@ Members * Phil Straw (SoftIron) * Robin Johnson (DigitalOcean) * Sage Weil (Red Hat) - Ceph project leader +* Winston Damarillio (Amihan) * Xie Xingguo (ZTE) +* Zhang Shaowen (China Mobile) Joining ======= diff --git a/doc/governance.rst b/doc/governance.rst index e3e53073655..0796fad9198 100644 --- a/doc/governance.rst +++ b/doc/governance.rst @@ -53,15 +53,14 @@ the CLT itself. Current CLT members are: * Abhishek Lekshmanan <abhishek@suse.com> - * Alfredo Deza <adeza@redhat.com> * Casey Bodley <cbodley@redhat.com> + * Ernesto Puerta <epuerta@redhat.com> * Gregory Farnum <gfarnum@redhat.com> * Haomai Wang <haomai@xsky.com> * Jason Dillaman <dillaman@redhat.com> * Josh Durgin <jdurgin@redhat.com> * João Eduardo Luis <joao@suse.de> * Ken Dreyer <kdreyer@redhat.com> - * Lenz Grimmer <lgrimmer@suse.com> * Matt Benjamin <mbenjami@redhat.com> * Myoungwon Oh <omwmw@sk.com> * Neha Ojha <nojha@redhat.com> @@ -70,7 +69,6 @@ Current CLT members are: * Sebastian Wagner <swagner@suse.com> * Xie Xingguo <xie.xingguo@zte.com.cn> * Yehuda Sadeh <yehuda@redhat.com> - * Zack Cerza <zcerza@redhat.com> Component Leads --------------- diff --git a/doc/install/manual-deployment.rst b/doc/install/manual-deployment.rst index 1f9f7b6b164..7c3c502cc1f 100644 --- a/doc/install/manual-deployment.rst +++ b/doc/install/manual-deployment.rst @@ -244,15 +244,10 @@ The procedure is as follows: #. Start the monitor(s). - For most distributions, services are started via systemd now:: + Start the service with systemd:: sudo systemctl start ceph-mon@node1 - For older Debian/CentOS/RHEL, use sysvinit:: - - sudo /etc/init.d/ceph start mon.node1 - - #. Verify that the monitor is running. :: sudo ceph -s diff --git a/doc/man/8/crushtool.rst b/doc/man/8/crushtool.rst index d48a75ee2e6..82cbfce84b9 100644 --- a/doc/man/8/crushtool.rst +++ b/doc/man/8/crushtool.rst @@ -264,7 +264,7 @@ Reclassify The *reclassify* function allows users to transition from older maps that maintain parallel hierarchies for OSDs of different types to a modern CRUSH map that makes use of the *device class* feature. For more information, -see http://docs.ceph.com/docs/master/rados/operations/crush-map-edits/#migrating-from-a-legacy-ssd-rule-to-device-classes. +see https://docs.ceph.com/en/latest/rados/operations/crush-map-edits/#migrating-from-a-legacy-ssd-rule-to-device-classes. Example output from --test ========================== diff --git a/doc/mgr/dashboard.rst b/doc/mgr/dashboard.rst index f6169bdff75..206ea92b3cf 100644 --- a/doc/mgr/dashboard.rst +++ b/doc/mgr/dashboard.rst @@ -7,32 +7,32 @@ Overview -------- The Ceph Dashboard is a built-in web-based Ceph management and monitoring -application to administer various aspects and objects of the cluster. It is -implemented as a :ref:`ceph-manager-daemon` module. +application through which you can inspect and administer various aspects +and resources within the cluster. It is implemented as a :ref:`ceph-manager-daemon` module. The original Ceph Dashboard that was shipped with Ceph Luminous started -out as a simple read-only view into various run-time information and performance -data of a Ceph cluster. It used a very simple architecture to achieve the -original goal. However, there was a growing demand for adding more web-based +out as a simple read-only view into run-time information and performance +data of Ceph clusters. It used a very simple architecture to achieve the +original goal. However, there was growing demand for richer web-based management capabilities, to make it easier to administer Ceph for users that -prefer a WebUI over using the command line. +prefer a WebUI over the CLI. -The new :term:`Ceph Dashboard` module is a replacement of the previous one and -adds a built-in web based monitoring and administration application to the Ceph -Manager. The architecture and functionality of this new module is derived from +The new :term:`Ceph Dashboard` module adds web-based monitoring and +administration to the Ceph Manager. The architecture and functionality of this new +module are derived from and inspired by the `openATTIC Ceph management and monitoring tool -<https://openattic.org/>`_. The development is actively driven by the team -behind openATTIC at `SUSE <https://www.suse.com/>`_, with a lot of support from -companies like `Red Hat <https://redhat.com/>`_ and other members of the Ceph +<https://openattic.org/>`_. Development is actively driven by the +openATTIC team at `SUSE <https://www.suse.com/>`_, with support from +companies including `Red Hat <https://redhat.com/>`_ and members of the Ceph community. -The dashboard module's backend code uses the CherryPy framework and a custom -REST API implementation. The WebUI implementation is based on -Angular/TypeScript, merging both functionality from the original dashboard as -well as adding new functionality originally developed for the standalone version -of openATTIC. The Ceph Dashboard module is implemented as a web -application that visualizes information and statistics about the Ceph cluster -using a web server hosted by ``ceph-mgr``. +The dashboard module's backend code uses the CherryPy framework and implements +a custom REST API. The WebUI implementation is based on +Angular/TypeScript and includes both functionality from the original dashboard +and new features originally developed for the standalone version +of openATTIC. The Ceph Dashboard module is implemented as an +application that provides a graphical representation of information and statistics +through a web server hosted by ``ceph-mgr``. Feature Overview ^^^^^^^^^^^^^^^^ @@ -40,65 +40,65 @@ Feature Overview The dashboard provides the following features: * **Multi-User and Role Management**: The dashboard supports multiple user - accounts with different permissions (roles). The user accounts and roles - can be modified on both the command line and via the WebUI. The dashboard - supports various methods to enhance password security, e.g. by enforcing - configurable password complexity rules, forcing users to change their password + accounts with different permissions (roles). User accounts and roles + can be managed via both the command line and the WebUI. The dashboard + supports various methods to enhance password security. Password + complexity rules may be configured, requiring users to change their password after the first login or after a configurable time period. See :ref:`dashboard-user-role-management` for details. -* **Single Sign-On (SSO)**: the dashboard supports authentication +* **Single Sign-On (SSO)**: The dashboard supports authentication via an external identity provider using the SAML 2.0 protocol. See :ref:`dashboard-sso-support` for details. * **SSL/TLS support**: All HTTP communication between the web browser and the dashboard is secured via SSL. A self-signed certificate can be created with a built-in command, but it's also possible to import custom certificates signed and issued by a CA. See :ref:`dashboard-ssl-tls-support` for details. -* **Auditing**: the dashboard backend can be configured to log all PUT, POST - and DELETE API requests in the Ceph audit log. See :ref:`dashboard-auditing` +* **Auditing**: The dashboard backend can be configured to log all ``PUT``, ``POST`` + and ``DELETE`` API requests in the Ceph audit log. See :ref:`dashboard-auditing` for instructions on how to enable this feature. -* **Internationalization (I18N)**: the dashboard can be used in different - languages that can be selected at run-time. +* **Internationalization (I18N)**: The language used for dashboard text can be + selected at run-time. -Currently, Ceph Dashboard is capable of monitoring and managing the following -aspects of your Ceph cluster: +The Ceph Dashboard offers the following monitoring and management capabilities: -* **Overall cluster health**: Display overall cluster status, performance - and capacity metrics. -* **Embedded Grafana Dashboards**: Ceph Dashboard is capable of embedding - `Grafana`_ dashboards in many locations, to display additional information - and performance metrics gathered by the :ref:`mgr-prometheus`. See +* **Overall cluster health**: Display performance and capacity metrics as well + as cluster status. +* **Embedded Grafana Dashboards**: Ceph Dashboard + `Grafana`_ dashboards may be embedded in external applications and web pages + to surface information and performance metrics gathered by + the :ref:`mgr-prometheus` module. See :ref:`dashboard-grafana` for details on how to configure this functionality. * **Cluster logs**: Display the latest updates to the cluster's event and audit log files. Log entries can be filtered by priority, date or keyword. -* **Hosts**: Display a list of all hosts associated to the cluster, which - disks are attached, which services are running and which version of Ceph is +* **Hosts**: Display a list of all cluster hosts along with their + storage drives, which services are running, and which version of Ceph is installed. * **Performance counters**: Display detailed service-specific statistics for each running service. -* **Monitors**: List all MONs, their quorum status, open sessions. -* **Monitoring**: Enable creation, re-creation, editing and expiration of - Prometheus' silences, list the alerting configuration of Prometheus and all +* **Monitors**: List all Mons, their quorum status, and open sessions. +* **Monitoring**: Enable creation, re-creation, editing, and expiration of + Prometheus' silences, list the alerting configuration and all configured and firing alerts. Show notifications for firing alerts. * **Configuration Editor**: Display all available configuration options, - their description, type and default values and edit the current values. -* **Pools**: List all Ceph pools and their details (e.g. applications, + their descriptions, types, default and currently set values. These may be edited as well. +* **Pools**: List Ceph pools and their details (e.g. applications, pg-autoscaling, placement groups, replication size, EC profile, CRUSH rulesets, quotas etc.) -* **OSDs**: List all OSDs, their status and usage statistics as well as +* **OSDs**: List OSDs, their status and usage statistics as well as detailed information like attributes (OSD map), metadata, performance counters and usage histograms for read/write operations. Mark OSDs up/down/out, purge and reweight OSDs, perform scrub operations, modify - various scrub-related configuration options, select different profiles to - adjust the level of backfilling activity. List all disks associated with an + various scrub-related configuration options, select profiles to + adjust the level of backfilling activity. List all drives associated with an OSD. Set and change the device class of an OSD, display and sort OSDs by - device class. Deploy new OSDs on new disks/hosts. + device class. Deploy OSDs on new drives and hosts. * **Device management**: List all hosts known by the orchestrator. List all - disks and their properties attached to a node. Display disk health information - (health prediction and SMART data). Blink enclosure LEDs. + drives attached to a host and their properties. Display drive + health predictions and SMART data. Blink enclosure LEDs. * **iSCSI**: List all hosts that run the TCMU runner service, display all images and their performance characteristics (read/write ops, traffic). - Create, modify and delete iSCSI targets (via ``ceph-iscsi``). Display the - iSCSI gateway status on the landing page and info about active initiators. + Create, modify, and delete iSCSI targets (via ``ceph-iscsi``). Display the + iSCSI gateway status and info about active initiators. See :ref:`dashboard-iscsi-management` for instructions on how to configure this feature. * **RBD**: List all RBD images and their properties (size, objects, features). @@ -108,10 +108,10 @@ aspects of your Ceph cluster: images, protect/unprotect these snapshots against modification. Copy or clone snapshots, flatten cloned images. * **RBD mirroring**: Enable and configure RBD mirroring to a remote Ceph server. - Lists all active sync daemons and their status, pools and RBD images including - their synchronization state. -* **CephFS**: List all active file system clients and associated pools, - including their usage statistics. Evict active CephFS clients. Manage CephFS + List active daemons and their status, pools and RBD images including + sync progress. +* **CephFS**: List active file system clients and associated pools, + including usage statistics. Evict active CephFS clients. Manage CephFS quotas and snapshots. Browse a CephFS directory structure. * **Object Gateway**: List all active object gateways and their performance counters. Display and manage (add/edit/delete) object gateway users and their @@ -121,14 +121,14 @@ aspects of your Ceph cluster: * **NFS**: Manage NFS exports of CephFS file systems and RGW S3 buckets via NFS Ganesha. See :ref:`dashboard-nfs-ganesha-management` for details on how to enable this functionality. -* **Ceph Manager Modules**: Enable and disable all Ceph Manager modules, change - the module-specific configuration settings. +* **Ceph Manager Modules**: Enable and disable Ceph Manager modules, manage + module-specific configuration settings. Overview of the Dashboard Landing Page ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Displays overall cluster status, performance and capacity metrics. Gives instant -feedback to changes in the cluster and provides easy access to subpages of the +Displays overall cluster status, performance, and capacity metrics. Shows instant +feedback for changes in the cluster and provides easy access to subpages of the dashboard. .. _dashboard-landing-page-status: @@ -139,56 +139,50 @@ Status * **Cluster Status**: Displays overall cluster health. In case of any error it displays a short description of the error and provides a link to the logs. * **Hosts**: Displays the total number of hosts associated to the cluster and - links to the subpage displaying the list and descriptions of all hosts. -* **Monitors**: Displays the total number of all MONs and their quorum status, - open sessions and links to the subpage providing a list and descriptions of all - the MONs. -* **OSDs**: Displays total number of object storage daemons for Ceph (ceph-osds) - and the total number of OSDs running (up), total number of OSDs in the cluster - (in) and total number of OSDs out of the cluster (out). Provides link to the - subpage containing a list of all OSDs and related management actions. -* **Managers**: Displays the total number of active and standby Ceph Manager - daemons (ceph-mgr) running alongside monitor daemons. -* **Object Gateway**: Displays the total number of active object gateways and - provides a link to the subpage displaying a list of all object gateway daemons. -* **Metadata Servers**: Displays total number of active and standby metadata - servers daemons for CephFS (ceph-mds). -* **iSCSI Gateways**: Display the total number of iSCSI gateways available, total - number of active iSCSI gateways (up) and total number of inactive iSCSI - Gateways (down). Provides link to the subpage providing a list of all iSCSI - Gateways. + links to a subpage that lists and describes each. +* **Monitors**: Displays mons and their quorum status and + open sessions. Links to a subpage that lists and describes each. +* **OSDs**: Displays object storage daemons (ceph-osds) and + the numbers of OSDs running (up), in service + (in), and out of the cluster (out). Provides links to + subpages providing a list of all OSDs and related management actions. +* **Managers**: Displays active and standby Ceph Manager + daemons (ceph-mgr). +* **Object Gateway**: Displays active object gateways (RGWs) and + provides links to subpages that list all object gateway daemons. +* **Metadata Servers**: Displays active and standby CephFS metadata + service daemons (ceph-mds). +* **iSCSI Gateways**: Display iSCSI gateways available, + active (up), and inactive (down). Provides a link to a subpage + showing a list of all iSCSI Gateways. .. _dashboard-landing-page-capacity: Capacity """""""" -* **Raw Capacity**: Displays the total physical capacity used out of the total - physical capacity provided by the ceph storage nodes (OSDs). -* **Objects**: Displays the number of objects in use and the status of objects - including the percentage of healthy, misplaced, degraded and unfound status of - objects. An object is the smallest unit of data storage in Ceph cluster. -* **PG Status**: Displays the total number of placement groups in use and the - status of placement groups, including the percentage of clean, working, - warning and unknown status of placement groups. -* **Pools**: Displays the total number of pools and links to the subpage that - lists all Ceph pools and their details. -* **PGs per OSD**: Displays the number of placement groups per object storage - daemons. +* **Raw Capacity**: Displays the capacity used out of the total + physical capacity provided by storage nodes (OSDs). +* **Objects**: Displays the number and status of RADOS objects + including the percentages of healthy, misplaced, degraded, and unfound + objects. +* **PG Status**: Displays the total number of placement groups and + their status, including the percentage clean, working, + warning, and unknown. +* **Pools**: Displays pools and links to a subpage listing details. +* **PGs per OSD**: Displays the number of placement groups assigned to + object storage daemons. .. _dashboard-landing-page-performance: Performance """"""""""" -* **Client READ/Write**: Displays an overview of rate the read/write operations, - the total number of input and output operations performed by the cluster on - storage devices on the client side. -* **Client Throughput**: Displays the data transfer rate to and from Ceph clients. -* **Recovery throughput**: Displays rate of moving the data back to the cluster - if the cluster is recovering to parity when the hard drive is lost. -* **Scrubbing**: Displays whether Ceph is comparing the data to other pieces of - data to ensure there is no data corruption on the cluster's storage devices. +* **Client READ/Write**: Displays an overview of + client input and output operations. +* **Client Throughput**: Displays the data transfer rates to and from Ceph clients. +* **Recovery throughput**: Displays rate of cluster healing and balancing operations. +* **Scrubbing**: Displays light and deep scrub status. Supported Browsers ^^^^^^^^^^^^^^^^^^ @@ -207,19 +201,19 @@ browsers: | `Firefox ESR <https://www.mozilla.org/firefox/enterprise/>`_ | latest major version | +---------------------------------------------------------------+---------------------------------------+ -While Ceph Dashboard might work in older browsers, we cannot guarantee it and -recommend you to update your browser to the latest version. +While Ceph Dashboard might work in older browsers, we cannot guarantee compatibility and +recommend keeping your browser up to date. Enabling -------- If you have installed ``ceph-mgr-dashboard`` from distribution packages, the -package management system should have taken care of installing all the required +package management system should take care of installing all required dependencies. -If you're installing Ceph from source and want to start the dashboard from your +If you're building Ceph from source and want to start the dashboard from your development environment, please see the files ``README.rst`` and ``HACKING.rst`` -in directory ``src/pybind/mgr/dashboard`` of the source code. +in the source directory ``src/pybind/mgr/dashboard``. Within a running Ceph cluster, the Ceph Dashboard is enabled with:: @@ -236,15 +230,15 @@ SSL/TLS Support All HTTP connections to the dashboard are secured with SSL/TLS by default. To get the dashboard up and running quickly, you can generate and install a -self-signed certificate using the following built-in command:: +self-signed certificate:: $ ceph dashboard create-self-signed-cert -Note that most web browsers will complain about such self-signed certificates +Note that most web browsers will complain about self-signed certificates and require explicit confirmation before establishing a secure connection to the dashboard. -To properly secure a deployment and to remove the certificate warning, a +To properly secure a deployment and to remove the warning, a certificate that is issued by a certificate authority (CA) should be used. For example, a key pair can be generated with a command similar to:: @@ -254,12 +248,12 @@ For example, a key pair can be generated with a command similar to:: -keyout dashboard.key -out dashboard.crt -extensions v3_ca The ``dashboard.crt`` file should then be signed by a CA. Once that is done, you -can enable it for all Ceph manager instances by running the following commands:: +can enable it for Ceph manager instances by running the following commands:: $ ceph dashboard set-ssl-certificate -i dashboard.crt $ ceph dashboard set-ssl-certificate-key -i dashboard.key -If different certificates are desired for each manager instance for some reason, +If unique certificates are desired for each manager instance, the name of the instance can be included as follows (where ``$name`` is the name of the ``ceph-mgr`` instance, usually the hostname):: @@ -282,7 +276,7 @@ wanted or required. See :ref:`dashboard-proxy-configuration` for more details. .. note:: - You need to restart the Ceph manager processes manually after changing the SSL + You must restart Ceph manager processes after changing the SSL certificate and key. This can be accomplished by either running ``ceph mgr fail mgr`` or by disabling and re-enabling the dashboard module (which also triggers the manager to respawn itself):: @@ -295,7 +289,7 @@ wanted or required. See :ref:`dashboard-proxy-configuration` for more details. Host Name and Port ^^^^^^^^^^^^^^^^^^ -Like most web applications, dashboard binds to a TCP/IP address and TCP port. +Like most web applications, the dashboard binds to a TCP/IP address and TCP port. By default, the ``ceph-mgr`` daemon hosting the dashboard (i.e., the currently active manager) will bind to TCP port 8443 or 8080 when SSL is disabled. @@ -310,7 +304,7 @@ cluster-wide level (so they apply to all manager instances) as follows:: $ ceph config set mgr mgr/dashboard/server_port $PORT $ ceph config set mgr mgr/dashboard/ssl_server_port $PORT -Since each ``ceph-mgr`` hosts its own instance of dashboard, it may also be +Since each ``ceph-mgr`` hosts its own instance of the dashboard, it may be necessary to configure them separately. The IP address and port for a specific manager instance can be changed with the following commands:: @@ -318,8 +312,7 @@ manager instance can be changed with the following commands:: $ ceph config set mgr mgr/dashboard/$name/server_port $PORT $ ceph config set mgr mgr/dashboard/$name/ssl_server_port $PORT -Replace ``$name`` with the ID of the ceph-mgr instance hosting the dashboard web -app. +Replace ``$name`` with the ID of the ceph-mgr instance hosting the dashboard. .. note:: @@ -347,8 +340,8 @@ You can now access the dashboard using your (JavaScript-enabled) web browser, by pointing it to any of the host names or IP addresses and the selected TCP port where a manager instance is running: e.g., ``http(s)://<$IP>:<$PORT>/``. -You should then be greeted by the dashboard login page, requesting your -previously defined username and password. +The dashboard page displays and requests a previously defined username and +password. .. _dashboard-enabling-object-gateway: @@ -357,19 +350,14 @@ Enabling the Object Gateway Management Frontend To use the Object Gateway management functionality of the dashboard, you will need to provide the login credentials of a user with the ``system`` flag -enabled. - -If you do not have a user which shall be used for providing those credentials, -you will also need to create one:: +enabled. If you do not have a ``system`` user already, you must create one:: $ radosgw-admin user create --uid=<user_id> --display-name=<display_name> \ --system -Take note of the keys ``access_key`` and ``secret_key`` in the output of this -command. +Take note of the keys ``access_key`` and ``secret_key`` in the output. -The credentials of an existing user can also be obtained by using -`radosgw-admin`:: +To obtain the credentials of an existing user via `radosgw-admin`:: $ radosgw-admin user info --uid=<user_id> @@ -378,10 +366,10 @@ Finally, provide the credentials to the dashboard:: $ ceph dashboard set-rgw-api-access-key <access_key> $ ceph dashboard set-rgw-api-secret-key <secret_key> -In a typical default configuration with a single RGW endpoint, this is all you +In a simple configuration with a single RGW endpoint, this is all you have to do to get the Object Gateway management functionality working. The -dashboard will try to automatically determine the host and port of the Object -Gateway by obtaining this information from the Ceph Manager's service map. +dashboard will try to automatically determine the host and port +from the Ceph Manager's service map. If multiple zones are used, it will automatically determine the host within the master zone group and master zone. This should be sufficient for most setups, @@ -397,7 +385,7 @@ exist and you may find yourself in the situation that you have to use them:: $ ceph dashboard set-rgw-api-admin-resource <admin_resource> $ ceph dashboard set-rgw-api-user-id <user_id> -If you are using a self-signed certificate in your Object Gateway setup, then +If you are using a self-signed certificate in your Object Gateway setup, you should disable certificate verification in the dashboard to avoid refused connections, e.g. caused by certificates signed by unknown CA or not matching the host name:: @@ -405,7 +393,7 @@ the host name:: $ ceph dashboard set-rgw-api-ssl-verify False If the Object Gateway takes too long to process requests and the dashboard runs -into timeouts, then you can set the timeout value to your needs:: +into timeouts, you can set the timeout value to your needs:: $ ceph dashboard set-rest-requests-timeout <seconds> @@ -417,7 +405,7 @@ Enabling iSCSI Management ^^^^^^^^^^^^^^^^^^^^^^^^^ The Ceph Dashboard can manage iSCSI targets using the REST API provided by the -`rbd-target-api` service of the :ref:`ceph-iscsi`. Please make sure that it's +``rbd-target-api`` service of the :ref:`ceph-iscsi`. Please make sure that it is installed and enabled on the iSCSI gateways. .. note:: @@ -425,10 +413,10 @@ installed and enabled on the iSCSI gateways. The iSCSI management functionality of Ceph Dashboard depends on the latest version 3 of the `ceph-iscsi <https://github.com/ceph/ceph-iscsi>`_ project. Make sure that your operating system provides the correct version, otherwise - the dashboard won't enable the management features. + the dashboard will not enable the management features. -If ceph-iscsi REST API is configured in HTTPS mode and its using a self-signed -certificate, then you need to configure the dashboard to avoid SSL certificate +If the ``ceph-iscsi`` REST API is configured in HTTPS mode and its using a self-signed +certificate, you need to configure the dashboard to avoid SSL certificate verification when accessing ceph-iscsi API. To disable API SSL verification run the following command:: @@ -447,12 +435,12 @@ The available iSCSI gateways must be defined using the following commands:: Enabling the Embedding of Grafana Dashboards ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -`Grafana`_ requires data from `Prometheus <https://prometheus.io/>`_. Although +`Grafana`_ pulls data from `Prometheus <https://prometheus.io/>`_. Although Grafana can use other data sources, the Grafana dashboards we provide contain queries that are specific to Prometheus. Our Grafana dashboards therefore -require Prometheus as the data source. The Ceph :ref:`mgr-prometheus` also only -exports its data in the Prometheus' common format. The Grafana dashboards rely -on metric names from the Prometheus module and `Node exporter +require Prometheus as the data source. The Ceph :ref:`mgr-prometheus` +module exports its data in the Prometheus exposition format. These Grafana +dashboards rely on metric names from the Prometheus module and `Node exporter <https://prometheus.io/docs/guides/node-exporter/>`_. The Node exporter is a separate application that provides machine metrics. @@ -476,16 +464,16 @@ Installation and Configuration using cephadm """""""""""""""""""""""""""""""""""""""""""" Grafana and Prometheus can be installed using :ref:`cephadm`. They will -automatically be configured by `cephadm`. Please see +automatically be configured by ``cephadm``. Please see :ref:`mgr-cephadm-monitoring` documentation for more details on how to use -cephadm for installing and configuring Prometheus and Grafana. +``cephadm`` for installing and configuring Prometheus and Grafana. Manual Installation and Configuration """"""""""""""""""""""""""""""""""""" The following process describes how to configure Grafana and Prometheus -manually. After you have installed Prometheus, Grafana and the Node exporter -on your preferred hosts, proceed with the following steps. +manually. After you have installed Prometheus, Grafana, and the Node exporter +on appropriate hosts, proceed with the following steps. #. Enable the Ceph Exporter which comes as Ceph Manager module by running:: @@ -512,21 +500,21 @@ on your preferred hosts, proceed with the following steps. .. note:: - Please note that in the aforementioned example, Prometheus is configured + Please note that in the above example, Prometheus is configured to scrape data from itself (port 9090), the Ceph manager module - `prometheus` (port 9283), which exports Ceph internal data and the Node - exporter (port 9100), which provides metrics of a machine. + `prometheus` (port 9283), which exports Ceph internal data, and the Node + Exporter (port 9100), which provides OS and hardware metrics for each host. Depending on your configuration, you may need to change the hostname in - this configuration or add additional configuration entries for the Node - exporter. It is unlikely that you will need to change the provided ports. + or add additional configuration entries for the Node + Exporter. It is unlikely that you will need to change the default TCP ports. Moreover, you don't *need* to have more than one target for Ceph specific data, provided by the `prometheus` mgr module. But it is recommended to configure Prometheus to scrape Ceph specific data from all existing Ceph - managers. This enables a built-in high availability mechanism, where - services run on a manager will be restarted automatically on a second - manager instance if one Ceph Manager goes down. + managers. This enables a built-in high availability mechanism, so that + services run on a manager host will be restarted automatically on a different + manager host if one Ceph Manager goes down. #. Add Prometheus as data source to Grafana `using the Grafana Web UI <https://grafana.com/docs/grafana/latest/features/datasources/add-a-data-source/>`_. @@ -536,21 +524,23 @@ on your preferred hosts, proceed with the following steps. grafana-cli plugins install vonage-status-panel grafana-cli plugins install grafana-piechart-panel -#. Add the Dashboards to Grafana: +#. Add Dashboards to Grafana: Dashboards can be added to Grafana by importing dashboard JSON files. Use the following command to download the JSON files:: wget https://raw.githubusercontent.com/ceph/ceph/master/monitoring/grafana/dashboards/<Dashboard-name>.json - You can find all the dashboard JSON files `here <https://github.com/ceph/ceph/tree/ + You can find various dashboard JSON files `here <https://github.com/ceph/ceph/tree/ master/monitoring/grafana/dashboards>`_ . For Example, for ceph-cluster overview you can use:: wget https://raw.githubusercontent.com/ceph/ceph/master/monitoring/grafana/dashboards/ceph-cluster.json -#. Configure Grafana in ``/etc/grafana/grafana.ini`` to enable anonymous mode:: + You may also author your own dashboards. + +#. Configure anonymous mode in ``/etc/grafana/grafana.ini``:: [auth.anonymous] enabled = true @@ -558,8 +548,8 @@ on your preferred hosts, proceed with the following steps. org_role = Viewer In newer versions of Grafana (starting with 6.2.0-beta1) a new setting named - ``allow_embedding`` has been introduced. This setting needs to be explicitly - set to ``true`` for the Grafana integration in Ceph Dashboard to work, as its + ``allow_embedding`` has been introduced. This setting must be explicitly + set to ``true`` for the Grafana integration in Ceph Dashboard to work, as the default is ``false``. :: @@ -570,10 +560,10 @@ on your preferred hosts, proceed with the following steps. Enabling RBD-Image monitoring """"""""""""""""""""""""""""" -Due to performance reasons, monitoring of RBD images is disabled by default. For -more information please see :ref:`prometheus-rbd-io-statistics`. If disabled, -the overview and details dashboards will stay empty in Grafana and the metrics -will not be visible in Prometheus. +Monitoring of RBD images is disabled by default, as it can significantly impact +performance. For more information please see :ref:`prometheus-rbd-io-statistics`. +When disabled, the overview and details dashboards will be empty in Grafana and +metrics will not be visible in Prometheus. Configuring Dashboard """"""""""""""""""""" @@ -581,7 +571,7 @@ Configuring Dashboard After you have set up Grafana and Prometheus, you will need to configure the connection information that the Ceph Dashboard will use to access Grafana. -You need to tell the dashboard on which url Grafana instance is running/deployed:: +Tell the dashboard the URL for the deployed Grafana instance:: $ ceph dashboard set-grafana-api-url <grafana-server-url> # default: '' @@ -589,21 +579,29 @@ The format of url is : `<protocol>:<IP-address>:<port>` .. note:: - Ceph Dashboard embeds the Grafana dashboards via ``iframe`` HTML elements. + The Ceph Dashboard embeds Grafana dashboards via ``iframe`` HTML elements. If Grafana is configured without SSL/TLS support, most browsers will block the - embedding of insecure content into a secured web page, if the SSL support in - the dashboard has been enabled (which is the default configuration). If you + embedding of insecure content if SSL support is + enabled for the dashboard (which is the default). If you can't see the embedded Grafana dashboards after enabling them as outlined above, check your browser's documentation on how to unblock mixed content. Alternatively, consider enabling SSL/TLS support in Grafana. -If you are using a self-signed certificate in your Grafana setup, then you should +If you are using a self-signed certificate for Grafana, disable certificate verification in the dashboard to avoid refused connections, -e.g. caused by certificates signed by unknown CA or not matching the host name:: +which can be a result of certificates signed by an unknown CA or that do not +matchn the host name:: $ ceph dashboard set-grafana-api-ssl-verify False -You can directly access Grafana Instance as well to monitor your cluster. +You can also access Grafana directly to monitor your cluster. + +.. note:: + + Ceph Dashboard configuration information can also be unset. For example, to + clear the Grafana API URL we configured above:: + + $ ceph dashboard reset-grafana-api-url .. _dashboard-sso-support: @@ -611,10 +609,10 @@ Enabling Single Sign-On (SSO) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The Ceph Dashboard supports external authentication of users via the -`SAML 2.0 <https://en.wikipedia.org/wiki/SAML_2.0>`_ protocol. You need to create -the user accounts and associate them with the desired roles first, as authorization -is still performed by the Dashboard. However, the authentication process can be -performed by an existing Identity Provider (IdP). +`SAML 2.0 <https://en.wikipedia.org/wiki/SAML_2.0>`_ protocol. You need to +first create user accounts and associate them with desired roles, as +authorization is performed by the Dashboard. However, the authentication +process can be performed by an existing Identity Provider (IdP). .. note:: @@ -664,15 +662,15 @@ To enable SSO:: Enabling Prometheus Alerting ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Using Prometheus for monitoring, you have to define `alerting rules +To use Prometheus for alerting you must define `alerting rules <https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules>`_. -To manage them you need to use the `Alertmanager +These are managed by the `Alertmanager <https://prometheus.io/docs/alerting/alertmanager>`_. -If you are not using the Alertmanager yet, please `install it -<https://github.com/prometheus/alertmanager#install>`_ as it's mandatory in -order to receive and manage alerts from Prometheus. +If you are not yet using the Alertmanager, `install it +<https://github.com/prometheus/alertmanager#install>`_ as it receives +and manages alerts from Prometheus. -The Alertmanager capabilities can be consumed by the dashboard in three different +Alertmanager capabilities can be consumed by the dashboard in three different ways: #. Use the notification receiver of the dashboard. @@ -681,7 +679,7 @@ ways: #. Use both sources simultaneously. -All three methods are going to notify you about alerts. You won't be notified +All three methods notify you about alerts. You won't be notified twice if you use both sources, but you need to consume at least the Alertmanager API in order to manage silences. @@ -704,14 +702,14 @@ in order to manage silences. - url: '<url-to-dashboard>/api/prometheus_receiver' - Please make sure that the Alertmanager considers your SSL certificate in terms + Ensure that the Alertmanager considers your SSL certificate in terms of the dashboard as valid. For more information about the correct configuration checkout the `<http_config> documentation <https://prometheus.io/docs/alerting/configuration/#%3Chttp_config%3E>`_. 2. Use the API of Prometheus and the Alertmanager - This allows you to manage alerts and silences. This will enable the "Active + This allows you to manage alerts and silences and will enable the "Active Alerts", "All Alerts" as well as the "Silences" tabs in the "Monitoring" section of the "Cluster" menu entry. @@ -752,18 +750,18 @@ in order to manage silences. $ ceph dashboard set-prometheus-api-host 'http://localhost:9090' - After setting up the hosts, you have to refresh the dashboard in your browser window. + After setting up the hosts, refresh your browser's dashboard window or tab. 3. Use both methods - The different behaviors of both methods are configured in a way that they - should not disturb each other through annoying duplicated notifications - popping up. + The behaviors of both methods are configured in a way that they + should not disturb each other, through annoying duplicated notifications + may pop up. If you are using a self-signed certificate in your Prometheus or your Alertmanager setup, you should disable certificate verification in the -dashboard to avoid refused connections, e.g. caused by certificates signed by -unknown CA or not matching the host name. +dashboard to avoid refused connections caused by certificates signed by +an unknown CA or that do not match the host name. - For Prometheus:: @@ -781,8 +779,8 @@ User and Role Management Password Policy ^^^^^^^^^^^^^^^ -By default the password policy feature is enabled including the following -checks: +By default the password policy feature is enabled, which includes the +following checks: - Is the password longer than N characters? - Are the old and new password the same? @@ -791,7 +789,7 @@ The password policy feature can be switched on or off completely:: $ ceph dashboard set-pwd-policy-enabled <true|false> -The following individual checks can be switched on or off:: +The following individual checks can also be switched on or off:: $ ceph dashboard set-pwd-policy-check-length-enabled <true|false> $ ceph dashboard set-pwd-policy-check-oldpwd-enabled <true|false> @@ -801,18 +799,18 @@ The following individual checks can be switched on or off:: $ ceph dashboard set-pwd-policy-check-sequential-chars-enabled <true|false> $ ceph dashboard set-pwd-policy-check-repetitive-chars-enabled <true|false> -Additionally the following options are available to configure the password -policy behaviour. +Additionally the following options are available to configure password +policy. -- The minimum password length (defaults to 8):: +- Minimum password length (defaults to 8):: $ ceph dashboard set-pwd-policy-min-length <N> -- The minimum password complexity (defaults to 10):: +- Minimum password complexity (defaults to 10):: $ ceph dashboard set-pwd-policy-min-complexity <N> - The password complexity is calculated by classifying each character in + Password complexity is calculated by classifying each character in the password. The complexity count starts by 0. A character is rated by the following rules in the given order. @@ -831,15 +829,15 @@ policy behaviour. User Accounts ^^^^^^^^^^^^^ -Ceph Dashboard supports managing multiple user accounts. Each user account +The Ceph Dashboard supports multiple user accounts. Each user account consists of a username, a password (stored in encrypted form using ``bcrypt``), an optional name, and an optional email address. -If a new user is created via Web UI, it is possible to set an option that this +If a new user is created via the Web UI, it is possible to set an option that the user must assign a new password when they log in for the first time. -User accounts are stored in MON's configuration database, and are globally -shared across all ceph-mgr instances. +User accounts are stored in the monitors' configuration database, and are +available to all ``ceph-mgr`` instances. We provide a set of CLI commands to manage user accounts: @@ -851,8 +849,8 @@ We provide a set of CLI commands to manage user accounts: $ ceph dashboard ac-user-create [--enabled] [--force-password] [--pwd_update_required] <username> [<password>] [<rolename>] [<name>] [<email>] [<pwd_expiration_date>] - To bypass the password policy checks use the `force-password` option. - Use the option `pwd_update_required` so that a newly created user has + To bypass password policy checks use the `force-password` option. + Add the option `pwd_update_required` so that a newly created user has to change their password after the first login. - *Delete User*:: @@ -885,8 +883,8 @@ We provide a set of CLI commands to manage user accounts: User Roles and Permissions ^^^^^^^^^^^^^^^^^^^^^^^^^^ -User accounts are also associated with a set of roles that define which -dashboard functionality can be accessed by the user. +User accounts are associated with a set of roles that define which +dashboard functionality can be accessed. The Dashboard functionality/modules are grouped within a *security scope*. Security scopes are predefined and static. The current available security @@ -898,13 +896,13 @@ scopes are: configuration options. - **pool**: includes all features related to pool management. - **osd**: includes all features related to OSD management. -- **monitor**: includes all features related to Monitor management. +- **monitor**: includes all features related to monitor management. - **rbd-image**: includes all features related to RBD image management. -- **rbd-mirroring**: includes all features related to RBD-Mirroring +- **rbd-mirroring**: includes all features related to RBD mirroring management. - **iscsi**: includes all features related to iSCSI management. -- **rgw**: includes all features related to Rados Gateway management. +- **rgw**: includes all features related to RADOS Gateway (RGW) management. - **cephfs**: includes all features related to CephFS management. - **manager**: include all features related to Ceph Manager management. @@ -921,7 +919,7 @@ A *role* specifies a set of mappings between a *security scope* and a set of - **update** - **delete** -See below for an example of a role specification based on a Python dictionary:: +See below for an example of a role specification, in the form of a Python dictionary:: # example of a role { @@ -937,30 +935,29 @@ The above role dictates that a user has *read* and *create* permissions for features related to pool management, and has full permissions for features related to RBD image management. -The Dashboard already provides a set of predefined roles that we call -*system roles*, and can be used right away in a fresh Ceph Dashboard +The Dashboard provides a set of predefined roles that we call +*system roles*, which can be used right away by a fresh Ceph Dashboard installation. The list of system roles are: -- **administrator**: provides full permissions for all security scopes. -- **read-only**: provides *read* permission for all security scopes except - the dashboard settings. -- **block-manager**: provides full permissions for *rbd-image*, +- **administrator**: allows full permissions for all security scopes. +- **read-only**: allows *read* permission for all security scopes except + dashboard settings. +- **block-manager**: allows full permissions for *rbd-image*, *rbd-mirroring*, and *iscsi* scopes. -- **rgw-manager**: provides full permissions for the *rgw* scope -- **cluster-manager**: provides full permissions for the *hosts*, *osd*, +- **rgw-manager**: allows full permissions for the *rgw* scope +- **cluster-manager**: allows full permissions for the *hosts*, *osd*, *monitor*, *manager*, and *config-opt* scopes. -- **pool-manager**: provides full permissions for the *pool* scope. -- **cephfs-manager**: provides full permissions for the *cephfs* scope. +- **pool-manager**: allows full permissions for the *pool* scope. +- **cephfs-manager**: allows full permissions for the *cephfs* scope. -The list of currently available roles can be retrieved by the following -command:: +The list of available roles can be retrieved with the following command:: $ ceph dashboard ac-role-show [<rolename>] -It is also possible to create new roles using CLI commands. The available -commands to manage roles are the following: +You can also use the CLI to create new roles. The available commands are the +following: - *Create Role*:: @@ -978,7 +975,7 @@ commands to manage roles are the following: $ ceph dashboard ac-role-del-scope-perms <rolename> <scopename> -To associate roles to users, the following CLI commands are available: +To assign roles to users, the following commands are available: - *Set User Roles*:: @@ -996,9 +993,9 @@ To associate roles to users, the following CLI commands are available: Example of User and Custom Role Creation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In this section we show a full example of the commands that need to be used -in order to create a user account, that should be able to manage RBD images, -view and create Ceph pools, and have read-only access to any other scopes. +In this section we show a complete example of the commands that +create a user account that can manage RBD images, view and create Ceph pools, +and has read-only access to other scopes. 1. *Create the user*:: @@ -1019,22 +1016,22 @@ view and create Ceph pools, and have read-only access to any other scopes. Proxy Configuration ------------------- -In a Ceph cluster with multiple ceph-mgr instances, only the dashboard running -on the currently active ceph-mgr daemon will serve incoming requests. Accessing -the dashboard's TCP port on any of the other ceph-mgr instances that are -currently on standby will perform a HTTP redirect (303) to the currently active -manager's dashboard URL. This way, you can point your browser to any of the -ceph-mgr instances in order to access the dashboard. +In a Ceph cluster with multiple ``ceph-mgr`` instances, only the dashboard +running on the currently active ``ceph-mgr`` daemon will serve incoming requests. +Connections to the dashboard's TCP port on standby ``ceph-mgr`` instances +will receive an HTTP redirect (303) to the active manager's dashboard URL. +This enables you to point your browser to any ``ceph-mgr`` instance in +order to access the dashboard. If you want to establish a fixed URL to reach the dashboard or if you don't want to allow direct connections to the manager nodes, you could set up a proxy that -automatically forwards incoming requests to the currently active ceph-mgr +automatically forwards incoming requests to the active ``ceph-mgr`` instance. Configuring a URL Prefix ^^^^^^^^^^^^^^^^^^^^^^^^ -If you are accessing the dashboard via a reverse proxy configuration, +If you are accessing the dashboard via a reverse proxy, you may wish to service it under a URL prefix. To get the dashboard to use hyperlinks that include your prefix, you can set the ``url_prefix`` setting: @@ -1049,21 +1046,21 @@ Disable the redirection ^^^^^^^^^^^^^^^^^^^^^^^ If the dashboard is behind a load-balancing proxy like `HAProxy <https://www.haproxy.org/>`_ -you might want to disable the redirection behaviour to prevent situations that -internal (unresolvable) URL's are published to the frontend client. Use the -following command to get the dashboard to respond with a HTTP error (500 by default) +you might want to disable redirection to prevent situations in which +internal (unresolvable) URLs are published to the frontend client. Use the +following command to get the dashboard to respond with an HTTP error (500 by default) instead of redirecting to the active dashboard:: $ ceph config set mgr mgr/dashboard/standby_behaviour "error" -To reset the setting to the default redirection behaviour, use the following command:: +To reset the setting to default redirection, use the following command:: $ ceph config set mgr mgr/dashboard/standby_behaviour "redirect" Configure the error status code ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -When the redirection behaviour is disabled, then you want to customize the HTTP status +When redirection is disabled, you may want to customize the HTTP status code of standby dashboards. To do so you need to run the command:: $ ceph config set mgr mgr/dashboard/standby_error_status_code 503 @@ -1071,16 +1068,16 @@ code of standby dashboards. To do so you need to run the command:: HAProxy example configuration ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Below you will find an example configuration for SSL/TLS pass through using +Below you will find an example configuration for SSL/TLS passthrough using `HAProxy <https://www.haproxy.org/>`_. -Please note that the configuration works under the following conditions. +Please note that this configuration works under the following conditions. If the dashboard fails over, the front-end client might receive a HTTP redirect (303) response and will be redirected to an unresolvable host. This happens when -the failover occurs during two HAProxy health checks. In this situation the +failover occurs between two HAProxy health checks. In this situation the previously active dashboard node will now respond with a 303 which points to -the new active node. To prevent that situation you should consider to disable -the redirection behaviour on standby nodes. +the new active node. To prevent that situation you should consider disabling +redirection on standby nodes. :: @@ -1116,7 +1113,7 @@ the redirection behaviour on standby nodes. Auditing API Requests --------------------- -The REST API is capable of logging PUT, POST and DELETE requests to the Ceph +The REST API can log PUT, POST and DELETE requests to the Ceph audit log. This feature is disabled by default, but can be enabled with the following command:: @@ -1143,8 +1140,8 @@ A log entry may look like this:: NFS-Ganesha Management ---------------------- -Ceph Dashboard can manage `NFS Ganesha <http://nfs-ganesha.github.io/>`_ exports that use -CephFS or RadosGW as their backstore. +The Ceph Dashboard can manage `NFS Ganesha <http://nfs-ganesha.github.io/>`_ exports that use +CephFS or RGW as their backstore. To enable this feature in Ceph Dashboard there are some assumptions that need to be met regarding the way NFS-Ganesha services are configured. @@ -1152,7 +1149,7 @@ to be met regarding the way NFS-Ganesha services are configured. The dashboard manages NFS-Ganesha config files stored in RADOS objects on the Ceph Cluster. NFS-Ganesha must store part of their configuration in the Ceph cluster. -These configuration files must follow some conventions. +These configuration files follow the below conventions. Each export block must be stored in its own RADOS object named ``export-<id>``, where ``<id>`` must match the ``Export_ID`` attribute of the export configuration. Then, for each NFS-Ganesha service daemon there should @@ -1171,9 +1168,9 @@ same RADOS pool/namespace. Configuring NFS-Ganesha in the Dashboard ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To enable the management of NFS-Ganesha exports in Ceph Dashboard, we only -need to tell the Dashboard, in which RADOS pool and namespace the -configuration objects are stored. Then, Ceph Dashboard can access the objects +To enable management of NFS-Ganesha exports in the Ceph Dashboard, we +need to tell the Dashboard the RADOS pool and namespace in which +configuration objects are stored. The Ceph Dashboard can then access them by following the naming convention described above. The Dashboard command to configure the NFS-Ganesha configuration objects @@ -1181,12 +1178,12 @@ location is:: $ ceph dashboard set-ganesha-clusters-rados-pool-namespace <pool_name>[/<namespace>] -After running the above command, Ceph Dashboard is able to find the NFS-Ganesha -configuration objects and we can start manage the exports through the Web UI. +After running the above command, the Ceph Dashboard is able to find the NFS-Ganesha +configuration objects and we can manage exports through the Web UI. .. note:: - A separate pool for the NFS shares should be used. Otherwise it can cause the + A dedicated pool for the NFS shares should be used. Otherwise it can cause the `known issue <https://tracker.ceph.com/issues/46176>`_ with listing of shares if the NFS objects are stored together with a lot of other objects in a single pool. @@ -1195,16 +1192,16 @@ configuration objects and we can start manage the exports through the Web UI. Support for Multiple NFS-Ganesha Clusters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Ceph Dashboard also supports the management of NFS-Ganesha exports belonging -to different NFS-Ganesha clusters. An NFS-Ganesha cluster is a group of -NFS-Ganesha service daemons sharing the same exports. Different NFS-Ganesha -clusters are independent and don't share the exports configuration between each +The Ceph Dashboard also supports management of NFS-Ganesha exports belonging +to other NFS-Ganesha clusters. An NFS-Ganesha cluster is a group of +NFS-Ganesha service daemons sharing the same exports. NFS-Ganesha +clusters are independent and don't share the exports configuration among each other. Each NFS-Ganesha cluster should store its configuration objects in a -different RADOS pool/namespace to isolate the configuration from each other. +unique RADOS pool/namespace to isolate the configuration. -To specify the locations of the configuration of each NFS-Ganesha cluster we +To specify the the configuration location of each NFS-Ganesha cluster we can use the same command as above but with a different value pattern:: $ ceph dashboard set-ganesha-clusters-rados-pool-namespace <cluster_id>:<pool_name>[/<namespace>](,<cluster_id>:<pool_name>[/<namespace>])* @@ -1213,14 +1210,14 @@ The ``<cluster_id>`` is an arbitrary string that should uniquely identify the NFS-Ganesha cluster. When configuring the Ceph Dashboard with multiple NFS-Ganesha clusters, the -Web UI will automatically allow to choose to which cluster an export belongs. +Web UI will allow you to choose to which cluster an export belongs. Support for NFS-Ganesha Clusters Deployed by the Orchestrator ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Ceph Dashboard can be used to manage NFS-Ganesha clusters deployed by the -Orchestrator. It can detect the clusters automatically. For more details +The Ceph Dashboard can be used to manage NFS-Ganesha clusters deployed by the +Orchestrator and will detect them automatically. For more details on deploying NFS-Ganesha clusters with the Orchestrator, please see :ref:`orchestrator-cli-stateless-services`. Or particularly, see :ref:`deploy-cephadm-nfs-ganesha` for how to deploy NFS-Ganesha clusters with the Cephadm backend. @@ -1229,7 +1226,7 @@ NFS-Ganesha clusters with the Cephadm backend. Plug-ins -------- -Dashboard Plug-ins extend the functionality of the dashboard in a modular +Plug-ins extend the functionality of the Ceph Dashboard in a modular and loosely coupled fashion. .. _Grafana: https://grafana.com/ @@ -1253,15 +1250,15 @@ The command returns the URL where the Ceph Dashboard is located: ``https://<host .. note:: - Many Ceph command line tools return results in JSON format. You may have to install - the `jq <https://stedolan.github.io/jq>`_ command-line JSON processor utility on - your operating system beforehand. + Many Ceph tools return results in JSON format. We suggest that + you install the `jq <https://stedolan.github.io/jq>`_ command-line + utility to faciliate working with JSON data. Accessing the Dashboard ^^^^^^^^^^^^^^^^^^^^^^^ -If you are unable to access the Ceph Dashboard, run through the following +If you are unable to access the Ceph Dashboard, run the following commands: #. Verify the Ceph Dashboard module is enabled:: @@ -1281,10 +1278,9 @@ commands: $ ceph mgr module enable dashboard -#. Check the Ceph Dashboard and/or mgr log file for any errors. The exact - location of the log files depends on the Ceph configuration. +#. Check the Ceph Dashboard and/or ``ceph-mgr`` log files for any errors. - * Check if mgr log messages are written to a file by:: + * Check if ``ceph-mgr`` log messages are written to a file by:: $ ceph config get mgr log_to_file true @@ -1309,7 +1305,7 @@ commands: $ ceph config-key get mgr/dashboard/key - * If it doesn't, run the following command to generate a self-signed + * If it doesn't return ``true``, run the following command to generate a self-signed certificate or follow the instructions outlined in :ref:`dashboard-ssl-tls-support`:: @@ -1328,7 +1324,7 @@ error, run through the procedural checks below: #. Check that your user credentials are correct. If you are seeing the notification message above when trying to log into the Ceph Dashboard, it is likely you are using the wrong credentials. Double check your username - and password, and ensure the caps lock key is not enabled by accident. + and password, and ensure that your keyboard's caps lock is not enabled by accident. #. If your user credentials are correct, but you are experiencing the same error, check that the user account exists:: @@ -1359,8 +1355,8 @@ A Dashboard Feature is Not Working When an error occurs on the backend, you will usually receive an error notification on the frontend. Run through the following scenarios to debug. -#. Check the Ceph Dashboard/mgr logfile(s) for any errors. These can be - identified by searching for keywords, such as *500 Internal Server Error*, +#. Check the Ceph Dashboard and ``ceph-mgr`` logfile(s) for any errors. These can + found by searching for keywords, such as *500 Internal Server Error*, followed by ``traceback``. The end of a traceback contains more details about what exact error occurred. #. Check your web browser's Javascript Console for any errors. @@ -1372,7 +1368,7 @@ Ceph Dashboard Logs Dashboard Debug Flag """""""""""""""""""" -With this flag enabled, traceback of errors are included in backend responses. +With this flag enabled, error traceback is included in backend responses. To enable this flag via the Ceph Dashboard, navigate from *Cluster* to *Manager modules*. Select *Dashboard module* and click the edit button. Click the @@ -1401,3 +1397,15 @@ debugging. * To adjust it via the CLI, run the following command:: $ bin/ceph config set mgr mgr/dashboard/log_level debug + +#. High log levels can result in considerable log volume, which can +easily fill up your filesystem. Set a calendar reminder for an hour, a day, +or a week in the future to revert this temporary logging increase. This looks +something like this:: + + $ ceph config log + ... + --- 11 --- 2020-11-07 11:11:11.960659 --- mgr.x/dashboard/log_level = debug --- + ... + $ ceph config reset 11 + diff --git a/doc/mgr/orchestrator.rst b/doc/mgr/orchestrator.rst index bb0a6bc68ef..102f65c15a2 100644 --- a/doc/mgr/orchestrator.rst +++ b/doc/mgr/orchestrator.rst @@ -6,9 +6,9 @@ Orchestrator CLI ================ This module provides a command line interface (CLI) to orchestrator -modules (ceph-mgr modules which interface with external orchestration services). +modules (``ceph-mgr`` modules which interface with external orchestration services). -As the orchestrator CLI unifies different external orchestrators, a common nomenclature +As the orchestrator CLI unifies multiple external orchestrators, a common nomenclature for the orchestrator module is needed. +--------------------------------------+---------------------------------------+ @@ -603,9 +603,9 @@ Or in YAML: MONs and other services may require some enhanced network specifications:: - orch daemon add mon --placement="myhost:[v2:1.2.3.4:3000,v1:1.2.3.4:6789]=name" + orch daemon add mon --placement="myhost:[v2:1.2.3.4:3300,v1:1.2.3.4:6789]=name" -where ``[v2:1.2.3.4:3000,v1:1.2.3.4:6789]`` is the network address of the monitor +where ``[v2:1.2.3.4:3300,v1:1.2.3.4:6789]`` is the network address of the monitor and ``=name`` specifies the name of the new monitor. Placement by labels diff --git a/doc/mgr/prometheus.rst b/doc/mgr/prometheus.rst index 6fcb292f98f..2c0fff5dcf7 100644 --- a/doc/mgr/prometheus.rst +++ b/doc/mgr/prometheus.rst @@ -31,7 +31,7 @@ Configuration By default the module will accept HTTP requests on port ``9283`` on all IPv4 and IPv6 addresses on the host. The port and listen address are both -configurable with ``ceph config-key set``, with keys +configurable with ``ceph config set``, with keys ``mgr/prometheus/server_addr`` and ``mgr/prometheus/server_port``. This port is registered with Prometheus's `registry <https://github.com/prometheus/prometheus/wiki/Default-port-allocations>`_. diff --git a/doc/rados/configuration/common.rst b/doc/rados/configuration/common.rst index fe19f6c6f0a..8a65b83ecd9 100644 --- a/doc/rados/configuration/common.rst +++ b/doc/rados/configuration/common.rst @@ -186,15 +186,32 @@ Example ceph.conf Running Multiple Clusters (DEPRECATED) ====================================== -Some Ceph CLI commands take a ``-c`` (cluster name) option. This option is -present purely for backward compatibility. You should not attempt to deploy -or run multiple clusters on the same hardware, and it is recommended to always -leave the cluster name as the default ("ceph"). - -If you need to allow multiple clusters to exist on the same host, please use +Each Ceph cluster has an internal name that is used as part of configuration +and log file names as well as directory and mountpoint names. This name +defaults to "ceph". Previous releases of Ceph allowed one to specify a custom +name instead, for example "ceph2". This was intended to faciliate running +multiple logical clusters on the same physical hardware, but in practice this +was rarely exploited and should no longer be attempted. Prior documentation +could also be misinterpreted as requiring unique cluster names in order to +use ``rbd-mirror``. + +Custom cluster names are now considered deprecated and the ability to deploy +them has already been removed from some tools, though existing custom name +deployments continue to operate. The ability to run and manage clusters with +custom names may be progressively removed by future Ceph releases, so it is +strongly recommended to deploy all new clusters with the default name "ceph". + +Some Ceph CLI commands accept an optional ``--cluster`` (cluster name) option. This +option is present purely for backward compatibility and need not be accomodated +by new tools and deployments. + +If you do need to allow multiple clusters to exist on the same host, please use :ref:`cephadm`, which uses containers to fully isolate each cluster. + + + .. _Hardware Recommendations: ../../../start/hardware-recommendations .. _Network Configuration Reference: ../network-config-ref .. _OSD Config Reference: ../osd-config-ref diff --git a/doc/rados/configuration/mon-config-ref.rst b/doc/rados/configuration/mon-config-ref.rst index ee0e872a29b..2edbdd0ba2e 100644 --- a/doc/rados/configuration/mon-config-ref.rst +++ b/doc/rados/configuration/mon-config-ref.rst @@ -467,6 +467,8 @@ by setting it in the ``[mon]`` section of the configuration file. .. index:: Ceph Storage Cluster; capacity planning, Ceph Monitor; capacity planning +.. _storage-capacity: + Storage Capacity ---------------- @@ -952,6 +954,7 @@ Client :Type: 64-bit Integer Unsigned :Default: ``100ul << 20`` +.. _pool-settings: Pool settings ============= @@ -1116,12 +1119,12 @@ Miscellaneous ``mon scrub interval`` -:Description: How often (in seconds) the monitor scrub its store by comparing +:Description: How often the monitor scrub its store by comparing the stored checksums with the computed ones of all the stored - keys. + keys. (0 disables it. dangerous, use with care) -:Type: Integer -:Default: ``3600*24`` +:Type: Seconds +:Default: ``1 day`` ``mon scrub max keys`` diff --git a/doc/rados/configuration/osd-config-ref.rst b/doc/rados/configuration/osd-config-ref.rst index e185139ced6..f17921462f1 100644 --- a/doc/rados/configuration/osd-config-ref.rst +++ b/doc/rados/configuration/osd-config-ref.rst @@ -433,7 +433,9 @@ Operations ``osd client op priority`` -:Description: The priority set for client operations. +:Description: The priority set for client operations. This value is relative + to that of ``osd recovery op priority`` below. The default + strongly favors client ops over recovery. :Type: 32-bit Integer :Default: ``63`` @@ -442,7 +444,12 @@ Operations ``osd recovery op priority`` -:Description: The priority set for recovery operations, if not specified by the pool's ``recovery_op_priority``. +:Description: The priority of recovery operations vs client operations, if not specified by the + pool's ``recovery_op_priority``. The default value prioritizes client + ops (see above) over recovery ops. You may adjust the tradeoff of client + impact against the time to restore cluster health by lowering this value + for increased prioritization of client ops, or by increasing it to favor + recovery. :Type: 32-bit Integer :Default: ``3`` @@ -1084,7 +1091,7 @@ Miscellaneous when osd data is on HDD and osd journal is on SSD. :Type: Float -:Default: ``2`` +:Default: ``1`` ``osd command max records`` diff --git a/doc/rados/operations/crush-map.rst b/doc/rados/operations/crush-map.rst index 2558a42fee3..68c12d1d5f7 100644 --- a/doc/rados/operations/crush-map.rst +++ b/doc/rados/operations/crush-map.rst @@ -959,39 +959,99 @@ Primary Affinity ================ When a Ceph Client reads or writes data, it first contacts the primary OSD in -each affected PG's acting set. In the acting set ``[2, 3, 4]``, ``osd.2`` is -listed first and thus is the primary (lead). Sometimes an +each affected PG's acting set. By default, the first OSD in the acting set is +the primary. For example, in the acting set ``[2, 3, 4]``, ``osd.2`` is +listed first and thus is the primary (aka lead) OSD. Sometimes we know that an OSD is less well suited to act as the lead than are other OSDs (e.g., it has -a slow disk or a slow controller). To prevent performance bottlenecks +a slow drive or a slow controller). To prevent performance bottlenecks (especially on read operations) while maximizing utilization of your hardware, -you can set a Ceph OSD's primary affinity so that CRUSH is less likely to use -the OSD as a primary in an acting set. :: +you can influence the selection of primary OSDs by adjusting primary affinity +values, or by crafting a CRUSH rule that selects preferred OSDs first. + +Tuning primary OSD selection is mainly useful for replicated pools, because +by default read operations are served from the primary OSD for each PG. +For erasure coded (EC) pools, a way to speed up read operations is to enable +**fast read** as described in :ref:`pool-settings`. + +A common scenario for primary affinity is when a cluster contains +a mix of drive sizes, for example older racks with 1.9 TB SATA SSDS and newer racks with +3.84TB SATA SSDs. On average the latter will be assigned double the number of +PGs and thus will serve double the number of write and read operations, thus +they'll be busier than the former. A rough assignment of primary affinity +inversely proportional to OSD size won't be 100% optimal, but it can readily +achieve a 15% improvement in overall read throughput by utilizing SATA +interface bandwidth and CPU cycles more evenly. + +By default, all ceph OSDs have primary affinity of ``1``, which indicates that +any OSD may act as a primary with equal probability. + +You can reduce a Ceph OSD's primary affinity so that CRUSH is less likely to choose +the OSD as primary in a PG's acting set.:: ceph osd primary-affinity <osd-id> <weight> -Primary affinity is ``1`` by default (*i.e.,* an OSD may act as a primary). You -may set the OSD primary range as a real number in the range ``[0-1]``, where ``0`` -indicates that the OSD may **NOT** be used as a primary and ``1`` means that an -OSD may be used as a primary. When the weight is ``< 1``, it is less likely that -CRUSH will select the Ceph OSD Daemon to act as a primary. The process for +You may set an OSD's primary affinity to a real number in the range +``[0-1]``, where ``0`` indicates that the OSD may **NOT** be used as a primary +and ``1`` indicates that an OSD may be used as a primary. When the weight is +between these extremes, it is less likely that +CRUSH will select that OSD as a primary. The process for selecting the lead OSD is more nuanced than a simple probability based on relative affinity values, but measurable results can be achieved even with first-order approximations of desirable values. -There are occasional clusters -that balance cost and performance by mixing SSDs and HDDs in the same pool. -Careful application of CRUSH rules can direct each PG's acting set to contain -exactly one SSD OSD with the balance HDDs. By using primary affinity one can -direct most or all read operations to the SSD in the acting set. This is -a tricky setup to maintain and it is discouraged, but it's a useful example. - -Another, more common scenario for primary affinity is when a cluster contains -a mix of drive sizes, for example older racks with 1.9 TB SATA SSDS and newer racks with -3.84TB SATA SSDs. On average the latter will be assigned double the number of -PGs and thus will serve double the number of write and read operations, thus -they'll be busier than the former. A rough application of primary affinity in -proportion to OSD size won't be 100% optimal, but it can readily achieve a 15% -improvement in overall read throughput by utilizing SATA interface bandwidth -and CPU cycles more evenly. - +Custom CRUSH Rules +------------------ + +There are occasional clusters that balance cost and performance by mixing SSDs +and HDDs in the same replicated pool. By setting the primary affinity of HDD +OSDs to ``0`` one can direct operations to the SSD in each acting set. An +alternative is to define a CRUSH rule that always selects an SSD OSD as the +first OSD, then selects HDDs for the remaining OSDs. Thus, each PG's acting +set will contain exactly one SSD OSD as the primary with the balance on HDDs. + +For example, the CRUSH rule below:: + + rule mixed_replicated_rule { + id 11 + type replicated + min_size 1 + max_size 10 + step take default class ssd + step chooseleaf firstn 1 type host + step emit + step take default class hdd + step chooseleaf firstn 0 type host + step emit + } + +chooses an SSD as the first OSD. Note that for an ``N``-times replicated pool +this rule selects ``N+1`` OSDs to guarantee that ``N`` copies are on different +hosts, because the first SSD OSD might be co-located with any of the ``N`` HDD +OSDs. + +This extra storage requirement can be avoided by placing SSDs and HDDs in +different hosts with the tradeoff that hosts with SSDs will receive all client +requests. You may thus consider faster CPU(s) for SSD hosts and more modest +ones for HDD nodes, since the latter will normally only service recovery +operations. Here the CRUSH roots ``ssd_hosts`` and ``hdd_hosts`` strictly +must not contain the same servers:: + + rule mixed_replicated_rule_two { + id 1 + type replicated + min_size 1 + max_size 10 + step take ssd_hosts class ssd + step chooseleaf firstn 1 type host + step emit + step take hdd_hosts class hdd + step chooseleaf firstn -1 type host + step emit + } + + + +Note also that on failure of an SSD, requests to a PG will be served temporarily +from a (slower) HDD OSD until the PG's data has been replicated onto the replacement +primary SSD OSD. diff --git a/doc/rados/operations/health-checks.rst b/doc/rados/operations/health-checks.rst index 4f503d65b7e..a8085e92f79 100644 --- a/doc/rados/operations/health-checks.rst +++ b/doc/rados/operations/health-checks.rst @@ -981,8 +981,9 @@ Setting the quota value to 0 will disable the quota. POOL_NEAR_FULL ______________ -One or more pools is approaching is quota. The threshold to trigger -this warning condition is controlled by the +One or more pools is approaching a configured fullness threshold. + +One threshold that can trigger this warning condition is the ``mon_pool_quota_warn_threshold`` configuration option. Pool quotas can be adjusted up or down (or removed) with:: @@ -992,6 +993,11 @@ Pool quotas can be adjusted up or down (or removed) with:: Setting the quota value to 0 will disable the quota. +Other thresholds that can trigger the above two warning conditions are +``mon_osd_nearfull_ratio`` and ``mon_osd_full_ratio``. Visit the +:ref:`storage-capacity` and :ref:`no-free-drive-space` documents for details +and resolution. + OBJECT_MISPLACED ________________ @@ -1100,8 +1106,6 @@ also indicate some other performance issue with the OSDs. The exact size of the snapshot trim queue is reported by the ``snaptrimq_len`` field of ``ceph pg ls -f json-detail``. - - Miscellaneous ------------- @@ -1195,7 +1199,6 @@ Alternatively, the capabilities for the user can be updated with:: For more information about auth capabilities, see :ref:`user-management`. - OSD_NO_DOWN_OUT_INTERVAL ________________________ @@ -1212,3 +1215,4 @@ This warning can silenced by setting the ``mon_warn_on_osd_down_out_interval_zero`` to false:: ceph config global mon mon_warn_on_osd_down_out_interval_zero false + diff --git a/doc/rados/operations/pools.rst b/doc/rados/operations/pools.rst index 18beffbfc7a..1a12fc16a72 100644 --- a/doc/rados/operations/pools.rst +++ b/doc/rados/operations/pools.rst @@ -276,21 +276,21 @@ You may set values for the following keys: ``compression_algorithm`` -:Description: Sets inline compression algorithm to use for underlying BlueStore. This setting overrides the `global setting <http://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression>`_ of ``bluestore compression algorithm``. +:Description: Sets inline compression algorithm to use for underlying BlueStore. This setting overrides the `global setting <https://docs.ceph.com/en/latest/rados/configuration/bluestore-config-ref/#inline-compression>`__ of ``bluestore compression algorithm``. :Type: String :Valid Settings: ``lz4``, ``snappy``, ``zlib``, ``zstd`` ``compression_mode`` -:Description: Sets the policy for the inline compression algorithm for underlying BlueStore. This setting overrides the `global setting <http://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression>`_ of ``bluestore compression mode``. +:Description: Sets the policy for the inline compression algorithm for underlying BlueStore. This setting overrides the `global setting <http://docs.ceph.com/en/latest/rados/configuration/bluestore-config-ref/#inline-compression>`__ of ``bluestore compression mode``. :Type: String :Valid Settings: ``none``, ``passive``, ``aggressive``, ``force`` ``compression_min_blob_size`` -:Description: Chunks smaller than this are never compressed. This setting overrides the `global setting <http://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#inline-compression>`_ of ``bluestore compression min blob *``. +:Description: Chunks smaller than this are never compressed. This setting overrides the `global setting <http://docs.ceph.com/en/latest/rados/configuration/bluestore-config-ref/#inline-compression>`__ of ``bluestore compression min blob *``. :Type: Unsigned Integer @@ -837,4 +837,3 @@ a size of 3). .. _setting the number of placement groups: ../placement-groups#set-the-number-of-placement-groups .. _Erasure Coding with Overwrites: ../erasure-code#erasure-coding-with-overwrites .. _Block Device Commands: ../../../rbd/rados-rbd-cmds/#create-a-block-device-pool - diff --git a/doc/rados/operations/stretch-mode.rst b/doc/rados/operations/stretch-mode.rst index 98902a95be0..5c120703578 100644 --- a/doc/rados/operations/stretch-mode.rst +++ b/doc/rados/operations/stretch-mode.rst @@ -23,16 +23,15 @@ two or three data centers (or, in clouds, availability zones). With two zones, we expect each site to hold a copy of the data, and for a third site to have a tiebreaker monitor (this can be a VM or high-latency compared to the main sites) to pick a winner if the network connection fails and both -DCs remain alive. For three sites, we expect a a copy of the data and an equal +DCs remain alive. For three sites, we expect a copy of the data and an equal number of monitors in each site. -Note, the standard Ceph configuration will survive MANY failures of -the network or Data Centers, if you have configured it correctly, and it will -never compromise data consistency -- if you bring back enough of the Ceph servers -following a failure, it will recover. If you lose -a data center and can still form a quorum of monitors and have all the data -available (with enough copies to satisfy min_size, or CRUSH rules that will -re-replicate to meet it), Ceph will maintain availability. +Note that the standard Ceph configuration will survive MANY failures of the +network or data centers and it will never compromise data consistency. If you +bring back enough Ceph servers following a failure, it will recover. If you +lose a data center, but can still form a quorum of monitors and have all the data +available (with enough copies to satisfy pools' ``min_size``, or CRUSH rules +that will re-replicate to meet it), Ceph will maintain availability. What can't it handle? @@ -54,32 +53,32 @@ guarantees. The second important category of failures is when you think you have data replicated across data centers, but the constraints aren't sufficient to guarantee this. For instance, you might have data centers A and B, and your CRUSH rule targets 3 copies -and places a copy in each data center with a min_size of 2. The PG may go active with +and places a copy in each data center with a ``min_size`` of 2. The PG may go active with 2 copies in site A and no copies in site B, which means that if you then lose site A you have lost data and Ceph can't operate on it. This situation is surprisingly difficult to avoid with standard CRUSH rules. Stretch Mode ============ -The new stretch mode is designed to handle the 2-site case. (3 sites are -just as susceptible to netsplit issues, but much more resilient to surprising -data availability ones than 2-site clusters are.) +The new stretch mode is designed to handle the 2-site case. Three sites are +just as susceptible to netsplit issues, but are much more tolerant of +component availability outages than 2-site clusters are. To enter stretch mode, you must set the location of each monitor, matching -your CRUSH map. For instance, to place mon.a in your first data center :: +your CRUSH map. For instance, to place ``mon.a`` in your first data center :: $ ceph mon set_location a datacenter=site1 Next, generate a CRUSH rule which will place 2 copies in each data center. This -will require editing the crush map directly:: +will require editing the CRUSH map directly:: $ ceph osd getcrushmap > crush.map.bin $ crushtool -d crush.map.bin -o crush.map.txt -Then edit the crush.map.txt file to add a new rule. Here -there is only one other rule, so this is id 1, but you may need -to use a different rule id. We also have two data center buckets -named site1 and site2:: +Now edit the ``crush.map.txt`` file to add a new rule. Here +there is only one other rule, so this is ID 1, but you may need +to use a different rule ID. We also have two datacenter buckets +named ``site1`` and ``site2``:: rule stretch_rule { id 1 @@ -94,7 +93,7 @@ named site1 and site2:: step emit } -Finally, inject the crushmap to make the rule available to the cluster:: +Finally, inject the CRUSH map to make the rule available to the cluster:: $ crushtool -c crush.map.txt -o crush2.map.bin $ ceph osd setcrushmap -i crush2.map.bin @@ -104,14 +103,13 @@ the instructions in `Changing Monitor Elections`_. .. _Changing Monitor elections: ../change-mon-elections +And lastly, tell the cluster to enter stretch mode. Here, ``mon.e`` is the +tiebreaker and we are splitting across data centers :: -And last, tell the cluster to enter stretch mode. Here, mon.e is the -tiebreaker and we are splitting across datacenters :: - - $ ceph mon enable_stretch_mode e stretch_rule datacenter + $ ceph mon enable_stretch_mode e stretch_rule data center When stretch mode is enabled, the OSDs wlll only take PGs active when -they peer across datacenters (or whatever other CRUSH bucket type +they peer across data centers (or whatever other CRUSH bucket type you specified), assuming both are alive. Pools will increase in size from the default 3 to 4, expecting 2 copies in each site. OSDs will only be allowed to connect to monitors in the same data center. @@ -149,10 +147,11 @@ refuse, and it will not allow you to create EC pools once in stretch mode. You must create your own CRUSH rule which provides 2 copies in each site, and you must use 4 total copies with 2 in each site. If you have existing pools with non-default size/min_size, Ceph will object when you attempt to -enable_stretch_mode. +enable stretch mode. -Because it runs with min_size 1 when degraded, you should only use stretch mode -with all-flash OSDs. +Because it runs with ``min_size 1`` when degraded, you should only use stretch +mode with all-flash OSDs. This minimizes the time needed to recover once +connectivity is restored, and thus minimizes the potential for data loss. Hopefully, future development will extend this feature to support EC pools and running with more than 2 full sites. @@ -178,4 +177,4 @@ recovered), you can invoke :: This command should not be necessary; it is included to deal with unanticipated situations. But you might wish to invoke it to remove -the HEALTH_WARN state which recovery mode generates. +the ``HEALTH_WARN`` state which recovery mode generates. diff --git a/doc/rados/operations/user-management.rst b/doc/rados/operations/user-management.rst index 739ddf4f71d..d8695effbfb 100644 --- a/doc/rados/operations/user-management.rst +++ b/doc/rados/operations/user-management.rst @@ -45,7 +45,6 @@ For details on configuring the Ceph Storage Cluster to use authentication, see `Cephx Config Reference`_. For details on the architecture of Cephx, see `Architecture - High Availability Authentication`_. - Background ========== @@ -56,7 +55,6 @@ Additionally, Ceph users must have execute permissions to use Ceph's administrative commands. The following concepts will help you understand Ceph user management. - User ---- @@ -292,6 +290,29 @@ The following entries describe valid capability profiles: cap supports optional ``pool`` and ``namespace`` keyword arguments. +``profile simple-rados-client`` (Monitor only) + +:Description: Gives a user read-only permissions for monitor, OSD, and PG data. + Intended for use by direct librados client applications. + +``profile fs-client`` (Monitor only) + +:Description: Gives a user read-only permissions for monitor, OSD, PG, and MDS + data. Intended for CephFS clients. + +``profile role-definer`` (Monitor and Auth) + +:Description: Gives a user **all** permissions for the auth subsystem, read-only + access to monitors, and nothing else. Useful for automation + tools. Do not assign this unless you really, **really** know what + you're doing as the security ramifications are substantial and + pervasive. + +``profile crash`` (Monitor only) + +:Description: Gives a user read-only access to monitors, used in conjunction + with the manager ``crash`` module when collecting daemon crash + dumps for later analysis. Pool ---- @@ -339,7 +360,6 @@ capability. Limited globbing of namespaces is supported; if the last character of the specified namespace is ``*``, then access is granted to any namespace starting with the provided argument. - Managing Users ============== @@ -351,7 +371,6 @@ When you create or delete users in the Ceph Storage Cluster, you may need to distribute keys to clients so that they can be added to keyrings. See `Keyring Management`_ for details. - List Users ---------- @@ -415,8 +434,6 @@ save the output to a file. Developers may also execute the following:: The ``auth export`` command is identical to ``auth get``. - - Add a User ---------- @@ -486,7 +503,6 @@ For example:: See `Authorization (Capabilities)`_ for additional details on capabilities. - Delete a User ------------- @@ -513,7 +529,6 @@ software with a user's key (e.g., libvirt). :: mount -t ceph serverhost:/ mountpoint -o name=client.user,secret=`ceph auth print-key client.user` - Import a User(s) ---------------- @@ -527,11 +542,10 @@ For example:: sudo ceph auth import -i /etc/ceph/ceph.keyring -.. note:: The ceph storage cluster will add new users, their keys and their +.. note:: The Ceph storage cluster will add new users, their keys and their capabilities and will update existing users, their keys and their capabilities. - Keyring Management ================== @@ -561,7 +575,6 @@ The `User Management`_ section details how to list, get, add, modify and delete users directly in the Ceph Storage Cluster. However, Ceph also provides the ``ceph-authtool`` utility to allow you to manage keyrings from a Ceph client. - Create a Keyring ---------------- @@ -596,7 +609,6 @@ intend to use the keyring for a particular user or group of users, ensure that you execute ``chown`` or ``chmod`` to establish appropriate keyring ownership and access. - Add a User to a Keyring ----------------------- @@ -618,7 +630,6 @@ For example:: sudo ceph-authtool /etc/ceph/ceph.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring - Create a User ------------- @@ -642,7 +653,6 @@ the new user to the Ceph Storage Cluster. :: sudo ceph auth add client.ringo -i /etc/ceph/ceph.keyring - Modify a User ------------- @@ -663,7 +673,6 @@ You may also `Modify User Capabilities`_ directly in the cluster, store the results to a keyring file; then, import the keyring into your main ``ceph.keyring`` file. - Command Line Usage ================== @@ -709,7 +718,6 @@ Ceph supports the following usage for user name and secret: .. _pools: ../pools - Limitations =========== diff --git a/doc/rados/troubleshooting/log-and-debug.rst b/doc/rados/troubleshooting/log-and-debug.rst index 5cf9e15b350..71170149bca 100644 --- a/doc/rados/troubleshooting/log-and-debug.rst +++ b/doc/rados/troubleshooting/log-and-debug.rst @@ -152,7 +152,7 @@ verbose [#]_ . In general, the logs in-memory are not sent to the output log unl - a fatal signal is raised or - an ``assert`` in source code is triggered or -- upon requested. Please consult `document on admin socket <http://docs.ceph.com/docs/master/man/8/ceph/#daemon>`_ for more details. +- upon requested. Please consult `document on admin socket <http://docs.ceph.com/en/latest/man/8/ceph/#daemon>`_ for more details. A debug logging setting can take a single value for the log level and the memory level, which sets them both as the same value. For example, if you diff --git a/doc/rados/troubleshooting/troubleshooting-osd.rst b/doc/rados/troubleshooting/troubleshooting-osd.rst index 9347210eca9..cc852d73db9 100644 --- a/doc/rados/troubleshooting/troubleshooting-osd.rst +++ b/doc/rados/troubleshooting/troubleshooting-osd.rst @@ -12,8 +12,6 @@ are running properly, because networks may have a significant impact on OSD operation and performance. Look for dropped packets on the host side and CRC errors on the switch side. - - Obtaining Data About OSDs ========================= @@ -60,7 +58,6 @@ The admin socket, among other things, allows you to: - Dump operations in flight - Dump perfcounters - Display Freespace ----------------- @@ -71,7 +68,6 @@ Filesystem issues may arise. To display your file system's free space, execute Execute ``df --help`` for additional usage. - I/O Statistics -------------- @@ -79,7 +75,6 @@ Use `iostat`_ to identify I/O-related issues. :: iostat -x - Diagnostic Messages ------------------- @@ -88,7 +83,6 @@ or ``tail``. For example:: dmesg | grep scsi - Stopping w/out Rebalancing ========================== @@ -110,24 +104,28 @@ Or an entire CRUSH bucket at a time. Say you're going to take down ceph osd set-group noout prod-ceph-data1701 -Once the flag is set you can begin stopping the OSDs within the -failure domain that requires maintenance work. :: +Once the flag is set you can stop the OSDs and any other colocated Ceph +services within the failure domain that requires maintenance work. :: - stop ceph-osd id={num} + systemctl stop ceph\*.service ceph\*.target .. note:: Placement groups within the OSDs you stop will become ``degraded`` while you are addressing issues with within the failure domain. -Once you have completed your maintenance, restart the OSDs. :: +Once you have completed your maintenance, restart the OSDs and any other +daemons. If you rebooted the host as part of the maintenance, these should +come back on their own without intervention. :: - start ceph-osd id={num} + sudo systemctl start ceph.target -Finally, you must unset the cluster from ``noout``. :: +Finally, you must unset the cluster-wide``noout`` flag:: ceph osd unset noout ceph osd unset-group noout prod-ceph-data1701 - +Note that most Linux distributions that Ceph supports today employ ``systemd`` +for service management. For other or older operating systems you may need +to issue equivalent ``service`` or ``start``/``stop`` commands. .. _osd-not-running: @@ -184,7 +182,6 @@ If you start your cluster and an OSD won't start, check the following: may activate connection tracking anyway, so a "set and forget" strategy for the tunables is advised. On modern systems this will not consume appreciable resources. - - **Kernel Version:** Identify the kernel version and distribution you are using. Ceph uses some third party tools by default, which may be @@ -202,7 +199,6 @@ If you start your cluster and an OSD won't start, check the following: release being run, ``ceph.conf`` (with secrets XXX'd out), your monitor status output and excerpts from your log file(s). - An OSD Failed ------------- @@ -224,7 +220,6 @@ or :: ceph osd tree down - If there is a drive failure or other fault preventing ``ceph-osd`` from functioning or restarting, an error message should be present in its log file under @@ -241,6 +236,7 @@ unexpected error), search the archives and tracker as above, and report it to the `ceph-devel`_ email list if there's no clear fix or existing bug. +.. _no-free-drive-space: No Free Drive Space ------------------- @@ -317,11 +313,10 @@ some space deleting a few placement group directories in the full OSD. See `Monitor Config Reference`_ for additional details. - OSDs are Slow/Unresponsive ========================== -A commonly recurring issue involves slow or unresponsive OSDs. Ensure that you +A common issue involves slow or unresponsive OSDs. Ensure that you have eliminated other troubleshooting possibilities before delving into OSD performance issues. For example, ensure that your network(s) is working properly and your OSDs are running. Check to see if OSDs are throttling recovery traffic. @@ -330,7 +325,6 @@ and your OSDs are running. Check to see if OSDs are throttling recovery traffic. recovering OSDs from using up system resources so that ``up`` and ``in`` OSDs are not available or are otherwise slow. - Networking Issues ----------------- @@ -350,7 +344,6 @@ Check network statistics. :: netstat -s - Drive Configuration ------------------- @@ -370,7 +363,6 @@ we recommend against using ``Btrfs`` for production deployments.) sequential read/write limits. Running a journal in a separate partition may help, but you should prefer a separate physical drive. - Bad Sectors / Fragmented Disk ----------------------------- @@ -378,7 +370,6 @@ Check your drives for bad blocks, fragmentation, and other errors that can cause performance to drop substantially. Invaluable tools include ``dmesg``, ``syslog`` logs, and ``smartctl`` (from the ``smartmontools`` package). - Co-resident Monitors/OSDs ------------------------- @@ -394,7 +385,6 @@ OSDs, you may incur performance issues related to: In these cases, multiple OSDs running on the same host can drag each other down by doing lots of commits. That often leads to the bursty writes. - Co-resident Processes --------------------- @@ -405,7 +395,6 @@ recommend optimizing hosts for use with Ceph and using other hosts for other processes. The practice of separating Ceph operations from other applications may help improve performance and may streamline troubleshooting and maintenance. - Logging Levels -------------- @@ -414,7 +403,6 @@ logging levels back down, the OSD may be putting a lot of logs onto the disk. If you intend to keep logging levels high, you may consider mounting a drive to the default path for logging (i.e., ``/var/log/ceph/$cluster-$name.log``). - Recovery Throttling ------------------- @@ -422,21 +410,18 @@ Depending upon your configuration, Ceph may reduce recovery rates to maintain performance or it may increase recovery rates to the point that recovery impacts OSD performance. Check to see if the OSD is recovering. - Kernel Version -------------- Check the kernel version you are running. Older kernels may not receive new backports that Ceph depends upon for better performance. - Kernel Issues with SyncFS ------------------------- Try running one OSD per host to see if performance improves. Old kernels might not have a recent enough version of ``glibc`` to support ``syncfs(2)``. - Filesystem Issues ----------------- @@ -454,7 +439,6 @@ For more information, see `Filesystem Recommendations`_. .. _Filesystem Recommendations: ../configuration/filesystem-recommendations - Insufficient RAM ---------------- @@ -467,7 +451,6 @@ when OSDs experience recovery their memory utilization spikes. If there is insufficient RAM available, OSD performance will slow considerably and the daemons may even crash or be killed by the Linux ``OOM Killer``. - Blocked Requests or Slow Requests --------------------------------- @@ -485,7 +468,6 @@ New versions of Ceph complain about ``slow requests``:: {date} {osd.num} [WRN] 1 slow requests, 1 included below; oldest blocked for > 30.005692 secs {date} {osd.num} [WRN] slow request 30.005692 seconds old, received at {date-time}: osd_op(client.4240.0:8 benchmark_data_ceph-1_39426_object7 [write 0~4194304] 0.69848840) v4 currently waiting for subops from [610] - Possible causes include: - A failing drive (check ``dmesg`` output) diff --git a/doc/radosgw/bucketpolicy.rst b/doc/radosgw/bucketpolicy.rst index ba14e097ece..5d6dddc744f 100644 --- a/doc/radosgw/bucketpolicy.rst +++ b/doc/radosgw/bucketpolicy.rst @@ -65,6 +65,8 @@ Currently, we support only the following actions: - s3:GetObjectVersion - s3:GetObjectVersionTorrent - s3:GetReplicationConfiguration +- s3:IPAddress +- s3:NotIpAddress - s3:ListAllMyBuckets - s3:ListBucketMultipartUploads - s3:ListBucket diff --git a/doc/radosgw/elastic-sync-module.rst b/doc/radosgw/elastic-sync-module.rst index bb2f65d7722..4105a723b8c 100644 --- a/doc/radosgw/elastic-sync-module.rst +++ b/doc/radosgw/elastic-sync-module.rst @@ -125,9 +125,6 @@ For example :: Will return all the indexed keys that user has read permission to, and are named 'foo'. -Will return all the indexed keys that user has read permission to, and -are named 'foo'. - The output will be a list of keys in XML that is similar to the S3 list buckets response. diff --git a/doc/radosgw/index.rst b/doc/radosgw/index.rst index 098d50119a6..5d4ab692df1 100644 --- a/doc/radosgw/index.rst +++ b/doc/radosgw/index.rst @@ -71,7 +71,7 @@ you may write data with one API and retrieve it with the other. STS Lite <STSLite> Keycloak <keycloak> Role <role> - Orphan List and Associated Tooliing <orphans> + Orphan List and Associated Tooling <orphans> OpenID Connect Provider <oidc> troubleshooting Manpage radosgw <../../man/8/radosgw> diff --git a/doc/radosgw/multisite.rst b/doc/radosgw/multisite.rst index d3963234169..fc859594568 100644 --- a/doc/radosgw/multisite.rst +++ b/doc/radosgw/multisite.rst @@ -1387,7 +1387,7 @@ Set a Zone Configuring a zone involves specifying a series of Ceph Object Gateway pools. For consistency, we recommend using a pool prefix that is the same as the zone name. See -`Pools <http://docs.ceph.com/docs/master/rados/operations/pools/#pools>`__ +`Pools <http://docs.ceph.com/en/latest/rados/operations/pools/#pools>`__ for details of configuring pools. To set a zone, create a JSON object consisting of the pools, save the diff --git a/doc/radosgw/notifications.rst b/doc/radosgw/notifications.rst index 7a4b9d07066..8e81fd47898 100644 --- a/doc/radosgw/notifications.rst +++ b/doc/radosgw/notifications.rst @@ -12,8 +12,8 @@ Currently, notifications could be sent to: HTTP, AMQP0.9.1 and Kafka endpoints. Note, that if the events should be stored in Ceph, in addition, or instead of being pushed to an endpoint, the `PubSub Module`_ should be used instead of the bucket notification mechanism. -A user can create different topics. A topic entity is defined by its user and its name. A -user can only manage its own topics, and can only associate them with buckets it owns. +A user can create different topics. A topic entity is defined by its name and is per tenant. A +user can only associate its topics (via notification configuration) with buckets it owns. In order to send notifications for events for a specific bucket, a notification entity needs to be created. A notification can be created on a subset of event types, or for all event types (default). @@ -50,25 +50,25 @@ In this case, the only latency added to the original operation is of committing Topic Management via CLI ------------------------ -Configuration of all topics of a user could be fetched using the following command: +Configuration of all topics, associated with a tenant, could be fetched using the following command: :: - # radosgw-admin topic list --uid={user-id} + # radosgw-admin topic list [--tenant={tenant}] Configuration of a specific topic could be fetched using: :: - # radosgw-admin topic get --uid={user-id} --topic={topic-name} + # radosgw-admin topic get --topic={topic-name} [--tenant={tenant}] And removed using: :: - # radosgw-admin topic rm --uid={user-id} --topic={topic-name} + # radosgw-admin topic rm --topic={topic-name} [--tenant={tenant}] Notification Performance Stats @@ -188,10 +188,68 @@ The topic ARN in the response will have the following format: arn:aws:sns:<zone-group>:<tenant>:<topic> +Get Topic Attributes +```````````````````` + +Returns information about a specific topic. This includes push-endpoint information, if provided. + +:: + + POST + + Action=GetTopicAttributes + &TopicArn=<topic-arn> + +Response will have the following format: + +:: + + <GetTopicAttributesResponse> + <GetTopicAttributesRersult> + <Attributes> + <entry> + <key>User</key> + <value></value> + </entry> + <entry> + <key>Name</key> + <value></value> + </entry> + <entry> + <key>EndPoint</key> + <value></value> + </entry> + <entry> + <key>TopicArn</key> + <value></value> + </entry> + <entry> + <key>OpaqueData</key> + <value></value> + </entry> + </Attributes> + </GetTopicAttributesResult> + <ResponseMetadata> + <RequestId></RequestId> + </ResponseMetadata> + </GetTopicAttributesResponse> + +- User: name of the user that created the topic +- Name: name of the topic +- EndPoint: JSON formatted endpoint parameters, including: + - EndpointAddress: the push-endpoint URL + - EndpointArgs: the push-endpoint args + - EndpointTopic: the topic name that should be sent to the endpoint (may be different than the above topic name) + - HasStoredSecret: "true" if if endpoint URL contain user/password information. In this case request must be made over HTTPS. If not, topic get request will be rejected + - Persistent: "true" is topic is persistent +- TopicArn: topic ARN +- OpaqueData: the opaque data set on the topic + Get Topic Information ````````````````````` Returns information about specific topic. This includes push-endpoint information, if provided. +Note that this API is now deprecated in favor of the AWS compliant `GetTopicAttributes` API. :: @@ -213,6 +271,8 @@ Response will have the following format: <EndpointAddress></EndpointAddress> <EndpointArgs></EndpointArgs> <EndpointTopic></EndpointTopic> + <HasStoredSecret></HasStoredSecret> + <Persistent></Persistent> </EndPoint> <TopicArn></TopicArn> <OpaqueData></OpaqueData> @@ -226,10 +286,12 @@ Response will have the following format: - User: name of the user that created the topic - Name: name of the topic - EndpointAddress: the push-endpoint URL -- if endpoint URL contain user/password information, request must be made over HTTPS. If not, topic get request will be rejected. - EndpointArgs: the push-endpoint args -- EndpointTopic: the topic name that should be sent to the endpoint (mat be different than the above topic name) +- EndpointTopic: the topic name that should be sent to the endpoint (may be different than the above topic name) +- HasStoredSecret: "true" if endpoint URL contain user/password information. In this case request must be made over HTTPS. If not, topic get request will be rejected +- Persistent: "true" is topic is persistent - TopicArn: topic ARN +- OpaqueData: the opaque data set on the topic Delete Topic ```````````` @@ -256,7 +318,7 @@ The response will have the following format: List Topics ``````````` -List all topics that user defined. +List all topics associated with a tenant. :: diff --git a/doc/radosgw/pools.rst b/doc/radosgw/pools.rst index a904883b36c..a9b00eac1d6 100644 --- a/doc/radosgw/pools.rst +++ b/doc/radosgw/pools.rst @@ -19,7 +19,7 @@ are sufficient for some pools, but others (especially those listed in tuning. We recommend using the `Ceph Placement Group’s per Pool Calculator <http://ceph.com/pgcalc/>`__ to calculate a suitable number of placement groups for these pools. See -`Pools <http://docs.ceph.com/docs/master/rados/operations/pools/#pools>`__ +`Pools <http://docs.ceph.com/en/latest/rados/operations/pools/#pools>`__ for details on pool creation. .. _radosgw-pool-namespaces: diff --git a/doc/radosgw/pubsub-module.rst b/doc/radosgw/pubsub-module.rst index d39ab3e84a8..e2eaac7c8e2 100644 --- a/doc/radosgw/pubsub-module.rst +++ b/doc/radosgw/pubsub-module.rst @@ -15,9 +15,8 @@ A push notification mechanism exists too, currently supporting HTTP, AMQP0.9.1 and Kafka endpoints. In this case, the events are pushed to an endpoint on top of storing them in Ceph. If events should only be pushed to an endpoint and do not need to be stored in Ceph, the `Bucket Notification`_ mechanism should be used instead of pubsub sync module. -A user can create different topics. A topic entity is defined by its user and its name. A -user can only manage its own topics, and can only subscribe to events published by buckets -it owns. +A user can create different topics. A topic entity is defined by its name and is per tenant. A +user can only associate its topics (via notification configuration) with buckets it owns. In order to publish events for specific bucket a notification entity needs to be created. A notification can be created on a subset of event types, or for all event types (default). @@ -31,7 +30,7 @@ mechanisms. This API has two flavors, one is S3-compatible and one is not. The t together, although it is recommended to use the S3-compatible one. The S3-compatible API is similar to the one used in the bucket notification mechanism. -Events are stored as RGW objects in a special bucket, under a special user. Events cannot +Events are stored as RGW objects in a special bucket, under a special user (pubsub control user). Events cannot be accessed directly, but need to be pulled and acked using the new REST API. .. toctree:: @@ -116,52 +115,52 @@ A configuration field can be removed by using ``--tier-config-rm={key}``. Topic and Subscription Management via CLI ----------------------------------------- -Configuration of all topics of a user could be fetched using the following command: +Configuration of all topics, associated with a tenant, could be fetched using the following command: :: - # radosgw-admin topic list --uid={user-id} + # radosgw-admin topic list [--tenant={tenant}] Configuration of a specific topic could be fetched using: :: - # radosgw-admin topic get --uid={user-id} --topic={topic-name} + # radosgw-admin topic get --topic={topic-name} [--tenant={tenant}] And removed using: :: - # radosgw-admin topic rm --uid={user-id} --topic={topic-name} + # radosgw-admin topic rm --topic={topic-name} [--tenant={tenant}] Configuration of a subscription could be fetched using: :: - # radosgw-admin subscription get --uid={user-id} --subscription={topic-name} + # radosgw-admin subscription get --subscription={topic-name} [--tenant={tenant}] And removed using: :: - # radosgw-admin subscription rm --uid={user-id} --subscription={topic-name} + # radosgw-admin subscription rm --subscription={topic-name} [--tenant={tenant}] To fetch all of the events stored in a subcription, use: :: - # radosgw-admin subscription pull --uid={user-id} --subscription={topic-name} [--marker={last-marker}] + # radosgw-admin subscription pull --subscription={topic-name} [--marker={last-marker}] [--tenant={tenant}] To ack (and remove) an event from a subscription, use: :: - # radosgw-admin subscription ack --uid={user-id} --subscription={topic-name} --event-id={event-id} + # radosgw-admin subscription ack --subscription={topic-name} --event-id={event-id} [--tenant={tenant}] PubSub Performance Stats @@ -276,7 +275,9 @@ Response will have the following format (JSON): "oid_prefix":"", "push_endpoint":"", "push_endpoint_args":"", - "push_endpoint_topic":"" + "push_endpoint_topic":"", + "stored_secret":"", + "persistent":"" }, "arn":"" "opaqueData":"" @@ -307,7 +308,7 @@ Delete the specified topic. List Topics ``````````` -List all topics that user defined. +List all topics associated with a tenant. :: diff --git a/doc/radosgw/s3.rst b/doc/radosgw/s3.rst index 6ede95f8c14..9d23bfcb198 100644 --- a/doc/radosgw/s3.rst +++ b/doc/radosgw/s3.rst @@ -87,8 +87,6 @@ The following common request header fields are not supported: +----------------------------+------------+ | Name | Type | +============================+============+ -| **x-amz-security-token** | Request | -+----------------------------+------------+ | **Server** | Response | +----------------------------+------------+ | **x-amz-delete-marker** | Response | diff --git a/doc/rbd/iscsi-initiator-esx.rst b/doc/rbd/iscsi-initiator-esx.rst index 41c144dd55b..8bed6f2a2d6 100644 --- a/doc/rbd/iscsi-initiator-esx.rst +++ b/doc/rbd/iscsi-initiator-esx.rst @@ -16,7 +16,7 @@ The following instructions will use the default vSphere web client and esxcli. :align: center Click on "Storage" from "Navigator", and select the "Adapters" tab. - From there right click "Confgure iSCSI". + From there right click "Configure iSCSI". #. Set Initiator Name diff --git a/doc/rbd/iscsi-requirements.rst b/doc/rbd/iscsi-requirements.rst index 794a9884045..50dfc2a2740 100644 --- a/doc/rbd/iscsi-requirements.rst +++ b/doc/rbd/iscsi-requirements.rst @@ -9,7 +9,7 @@ For hardware recommendations, see :ref:`hardware-recommendations` . .. note:: On iSCSI gateway nodes the memory footprint is a function of - of the RBD images mapped and can grow to be largee. Plan memory + of the RBD images mapped and can grow to be large. Plan memory requirements accordingly based on the number RBD images to be mapped. There are no specific iSCSI gateway options for the Ceph Monitors or diff --git a/doc/rbd/iscsi-target-cli.rst b/doc/rbd/iscsi-target-cli.rst index 7a668382854..d888a34b09d 100644 --- a/doc/rbd/iscsi-target-cli.rst +++ b/doc/rbd/iscsi-target-cli.rst @@ -90,7 +90,7 @@ For rpm based instructions execute the following commands: If it does not exist instructions for creating pools can be found on the `RADOS pool operations page - <http://docs.ceph.com/docs/master/rados/operations/pools/>`_. + <http://docs.ceph.com/en/latest/rados/operations/pools/>`_. #. As ``root``, on a iSCSI gateway node, create a file named ``iscsi-gateway.cfg`` in the ``/etc/ceph/`` directory: diff --git a/doc/releases/general.rst b/doc/releases/general.rst index 7bee3c01da4..be1ee9b4e91 100644 --- a/doc/releases/general.rst +++ b/doc/releases/general.rst @@ -122,6 +122,8 @@ Release timeline .. ceph_timeline:: releases.yml development octopus nautilus mimic luminous kraken jewel infernalis hammer giant firefly .. _Octopus: ../octopus +.. _15.2.7: ../octopus#v15-2-7-octopus +.. _15.2.6: ../octopus#v15-2-6-octopus .. _15.2.5: ../octopus#v15-2-5-octopus .. _15.2.4: ../octopus#v15-2-4-octopus .. _15.2.3: ../octopus#v15-2-3-octopus @@ -130,6 +132,7 @@ Release timeline .. _15.2.0: ../octopus#v15-2-0-octopus .. _Nautilus: ../nautilus +.. _14.2.14: ../nautilus#v14-2-14-nautilus .. _14.2.13: ../nautilus#v14-2-13-nautilus .. _14.2.12: ../nautilus#v14-2-12-nautilus .. _14.2.11: ../nautilus#v14-2-11-nautilus diff --git a/doc/releases/nautilus.rst b/doc/releases/nautilus.rst index f388c12c2c9..be096999089 100644 --- a/doc/releases/nautilus.rst +++ b/doc/releases/nautilus.rst @@ -1,3 +1,67 @@ +v14.2.15 Nautilus +================= + +This is the 15th backport release in the Nautilus series. This release fixes a +ceph-volume regression introduced in v14.2.13 and includes few other fixes. We +recommend users to update to this release. + +Notable Changes +--------------- + +* ceph-volume: Fixes lvm batch --auto, which breaks backward compatibility + when using non rotational devices only (SSD and/or NVMe). +* BlueStore: Fixes a bug in collection_list_legacy which makes pgs inconsistent + during scrub when running mixed versions of osds, prior to 14.2.12 with newer. +* MGR: progress module can now be turned on/off, using the commands: + ``ceph progress on`` and ``ceph progress off``. + +Changelog +--------- + +* ceph-volume: fix filestore/dmcrypt activate (`pr#38198 <https://github.com/ceph/ceph/pull/38198>`_, Guillaume Abrioux) +* ceph-volume: fix lvm batch auto with full SSDs (`pr#38046 <https://github.com/ceph/ceph/pull/38046>`_, Dimitri Savineau, Guillaume Abrioux) +* os/bluestore: fix "end reached" check in collection_list_legacy (`pr#38100 <https://github.com/ceph/ceph/pull/38100>`_, Mykola Golub) +* mgr/progress: introduce turn off/on feature (`pr#38173 <https://github.com/ceph/ceph/pull/38173>`_, kamoltat) + + +v14.2.14 Nautilus +================= + +This is the 14th backport release in the Nautilus series. This release fixes +a security flaw affecting Messenger v2, among other fixes across components. +We recommend users to update to this release. + +Notable Changes +--------------- + +* CVE 2020-25660: CEPHX_V2 replay attack protection lost, for Messenger v2 (Ilya Dryomov) + +Changelog +--------- + +* mgr/dashboard: Strange iSCSI discovery auth behavior (`pr#37333 <https://github.com/ceph/ceph/pull/37333>`_, Volker Theile) +* mgr/dashboard: redirect to original URL after successful login (`pr#36834 <https://github.com/ceph/ceph/pull/36834>`_, Avan Thakkar) +* mgr/prometheus: add pool compression stats (`pr#37563 <https://github.com/ceph/ceph/pull/37563>`_, Paul Cuzner) +* bluestore: test/objectstore/store_test: kill ExcessiveFragmentation test case (`pr#37824 <https://github.com/ceph/ceph/pull/37824>`_, Igor Fedotov) +* bluestore: BlockDevice.cc: use pending_aios instead of iovec size as ios num (`pr#37823 <https://github.com/ceph/ceph/pull/37823>`_, weixinwei) +* bluestore: Support flock retry (`pr#37842 <https://github.com/ceph/ceph/pull/37842>`_, Kefu Chai, wanghongxu) +* bluestore: attach csum for compressed blobs (`pr#37843 <https://github.com/ceph/ceph/pull/37843>`_, Igor Fedotov) +* osdc/ObjectCacher: overwrite might cause stray read request callbacks (`pr#37813 <https://github.com/ceph/ceph/pull/37813>`_, Jason Dillaman) +* mgr: avoid false alarm of MGR_MODULE_ERROR (`pr#38069 <https://github.com/ceph/ceph/pull/38069>`_, Kefu Chai, Sage Weil) +* mgr: fix race between module load and notify (`pr#37844 <https://github.com/ceph/ceph/pull/37844>`_, Mykola Golub, Patrick Donnelly) +* mon: set session_timeout when adding to session_map (`pr#37554 <https://github.com/ceph/ceph/pull/37554>`_, Ilya Dryomov) +* mon/MonClient: bring back CEPHX_V2 authorizer challenges (Ilya Dryomov) +* osd/osd-rep-recov-eio.sh: TEST_rados_repair_warning: return 1 (`pr#37815 <https://github.com/ceph/ceph/pull/37815>`_, David Zafman) +* rbd: librbd: ignore -ENOENT error when disabling object-map (`pr#37814 <https://github.com/ceph/ceph/pull/37814>`_, Jason Dillaman) +* rbd: rbd-nbd: don't ignore namespace when unmapping by image spec (`pr#37811 <https://github.com/ceph/ceph/pull/37811>`_, Mykola Golub) +* rgw/rgw_file: Fix the incorrect lru object eviction (`pr#37804 <https://github.com/ceph/ceph/pull/37804>`_, luo rixin) +* rgw: fix expiration header returned even if there is only one tag in the object the same as the rule (`pr#37806 <https://github.com/ceph/ceph/pull/37806>`_, Or Friedmann) +* rgw: fix: S3 API KeyCount incorrect return (`pr#37810 <https://github.com/ceph/ceph/pull/37810>`_, 胡玮文) +* rgw: radosgw-admin should paginate internally when listing bucket (`pr#37802 <https://github.com/ceph/ceph/pull/37802>`_, J. Eric Ivancich) +* rgw: rgw_file: avoid long-ish delay on shutdown (`pr#37552 <https://github.com/ceph/ceph/pull/37552>`_, Matt Benjamin) +* rgw: use yum rather than dnf for teuthology testing of rgw-orphan-list (`pr#37805 <https://github.com/ceph/ceph/pull/37805>`_, J. Eric Ivancich) + + v14.2.13 Nautilus ================= diff --git a/doc/releases/octopus.rst b/doc/releases/octopus.rst index 182acdbcf6f..b470fabdc7a 100644 --- a/doc/releases/octopus.rst +++ b/doc/releases/octopus.rst @@ -1,3 +1,38 @@ +v15.2.7 Octopus +=============== + +This is the 7th backport release in the Octopus series. This release fixes +a serious bug in RGW that has been shown to cause data loss when a read of +a large RGW object (i.e., one with at least one tail segment) takes longer than +one half the time specified in the configuration option ``rgw_gc_obj_min_wait``. +The bug causes the tail segments of that read object to be added to the RGW +garbage collection queue, which will in turn cause them to be deleted after +a period of time. + +Changelog +--------- + +* rgw: during GC defer, prevent new GC enqueue (`issue#47866 <https://tracker.ceph.com/issues/47866>`_, `pr#38249 <https://github.com/ceph/ceph/pull/38249>`_, Eric Ivancich, Casey Bodley) + + +v15.2.6 Octopus +=============== + +This is the 6th backport release in the Octopus series. This release fixes +a security flaw affecting Messenger v1 & v2. We recommend users to update to +this release. + +Notable Changes +--------------- + +* CVE 2020-25660: CEPHX_V2 replay attack protection lost, for Messenger v1 & v2 (Ilya Dryomov) + +Changelog +--------- + +* mon/MonClient: bring back CEPHX_V2 authorizer challenges (Ilya Dryomov) + + v15.2.5 Octopus =============== @@ -5,7 +40,6 @@ This is the fifth release of the Ceph Octopus stable release series. This release brings a range of fixes across all components. We recommend that all Octopus users upgrade to this release. - Notable Changes --------------- @@ -882,7 +916,7 @@ Upgrade compatibility notes * The RGW "num_rados_handles" has been removed. If you were using a value of "num_rados_handles" greater than 1 multiply your current "objecter_inflight_ops" and - "objecter_inflight_op_bytes" paramaeters by the old + "objecter_inflight_op_bytes" parameters by the old "num_rados_handles" to get the same throttle behavior. * Ceph now packages python bindings for python3.6 instead of diff --git a/doc/releases/releases.yml b/doc/releases/releases.yml index da5b7b3dcf4..44cbfdcec93 100644 --- a/doc/releases/releases.yml +++ b/doc/releases/releases.yml @@ -15,6 +15,10 @@ releases: octopus: target_eol: 2022-06-01 releases: + - version: 15.2.7 + released: 2020-11-30 + - version: 15.2.6 + released: 2020-11-18 - version: 15.2.5 released: 2020-09-16 - version: 15.2.4 @@ -31,6 +35,8 @@ releases: nautilus: target_eol: 2021-06-01 releases: + - version: 14.2.14 + released: 2020-11-18 - version: 14.2.13 released: 2020-11-02 - version: 14.2.12 diff --git a/doc/start/documenting-ceph.rst b/doc/start/documenting-ceph.rst index f345302f8af..fa528a64737 100644 --- a/doc/start/documenting-ceph.rst +++ b/doc/start/documenting-ceph.rst @@ -43,25 +43,35 @@ see :ref:`Get Involved`. The most common way to make contributions is to use the `Fork and Pull`_ approach. You must: -#. Install git locally. For Debian/Ubuntu, execute:: +#. Install git locally. For Debian/Ubuntu, execute: + + .. prompt:: bash $ sudo apt-get install git - For Fedora, execute:: + For Fedora, execute: + + .. prompt:: bash $ sudo yum install git - For CentOS/RHEL, execute:: + For CentOS/RHEL, execute: + + .. prompt:: bash $ sudo yum install git -#. Ensure your ``.gitconfig`` file has your name and email address. :: +#. Ensure your ``.gitconfig`` file has your name and email address. : + + .. code-block:: ini [user] email = {your-email-address} name = {your-name} - For example:: + For example: + + .. prompt:: bash $ git config --global user.name "John Doe" git config --global user.email johndoe@example.com @@ -110,12 +120,16 @@ Select a Branch When you make small changes to the documentation, such as fixing typographical errors or clarifying explanations, use the ``master`` branch (default). You should also use the ``master`` branch when making contributions to features that -are in the current release. ``master`` is the most commonly used branch. :: +are in the current release. ``master`` is the most commonly used branch. : + +.. prompt:: bash $ git checkout master When you make changes to documentation that affect an upcoming release, use -the ``next`` branch. ``next`` is the second most commonly used branch. :: +the ``next`` branch. ``next`` is the second most commonly used branch. : + +.. prompt:: bash $ git checkout next @@ -139,11 +153,15 @@ http://tracker.ceph.com/issues/4000. describing the relevant changes/options. Before you create your branch name, ensure that it doesn't already exist in the -local or remote repository. :: +local or remote repository. : + +.. prompt:: bash $ git branch -a | grep wip-doc-{your-branch-name} -If it doesn't exist, create your branch:: +If it doesn't exist, create your branch: + +.. prompt:: bash $ git checkout -b wip-doc-{your-branch-name} @@ -166,12 +184,16 @@ Your new document doesn't get tracked by ``git`` automatically. When you want to add the document to the repository, you must use ``git add {path-to-filename}``. For example, from the top level directory of the repository, adding an ``example.rst`` file to the ``rados`` subdirectory would -look like this:: +look like this: + +.. prompt:: bash $ git add doc/rados/example.rst Deleting a document involves removing it from the repository with ``git rm -{path-to-filename}``. For example:: +{path-to-filename}``. For example: + +.. prompt:: bash $ git rm doc/rados/example.rst @@ -181,33 +203,49 @@ You must also remove any reference to a deleted document from other documents. Build the Source ---------------- -To build the documentation, navigate to the ``ceph`` repository directory:: +To build the documentation, navigate to the ``ceph`` repository directory: + + +.. prompt:: bash $ cd ceph -To build the documentation on Debian/Ubuntu, Fedora, or CentOS/RHEL, execute:: +.. note:: + The directory that contains ``build-doc`` and ``serve-doc`` must be included + in the ``PATH`` environment variable in order for these commands to work. + + +To build the documentation on Debian/Ubuntu, Fedora, or CentOS/RHEL, execute: + +.. prompt:: bash $ admin/build-doc -To scan for the reachability of external links, execute:: +To scan for the reachability of external links, execute: + +.. prompt:: bash $ admin/build-doc linkcheck -Executing ``admin/build-doc`` will create a ``build-doc`` directory under ``ceph``. -You may need to create a directory under ``ceph/build-doc`` for output of Javadoc -files. :: +Executing ``admin/build-doc`` will create a ``build-doc`` directory under +``ceph``. You may need to create a directory under ``ceph/build-doc`` for +output of Javadoc files: + +.. prompt:: bash $ mkdir -p output/html/api/libcephfs-java/javadoc The build script ``build-doc`` will produce an output of errors and warnings. -You MUST fix errors in documents you modified before committing a change, and you -SHOULD fix warnings that are related to syntax you modified. +You MUST fix errors in documents you modified before committing a change, and +you SHOULD fix warnings that are related to syntax you modified. .. important:: You must validate ALL HYPERLINKS. If a hyperlink is broken, it automatically breaks the build! Once you build the documentation set, you may start an HTTP server at -``http://localhost:8080/`` to view it:: +``http://localhost:8080/`` to view it: + +.. prompt:: bash $ admin/serve-doc @@ -287,12 +325,16 @@ the following packages are required: Install each dependency that is not installed on your host. For Debian/Ubuntu -distributions, execute the following:: +distributions, execute the following: + +.. prompt:: bash $ sudo apt-get install gcc python-dev python-pip python-virtualenv libxml2-dev libxslt-dev doxygen graphviz ant ditaa sudo apt-get install python-sphinx -For Fedora distributions, execute the following:: +For Fedora distributions, execute the following: + +.. prompt:: bash $ sudo yum install gcc python-devel python-pip python-virtualenv libxml2-devel libxslt-devel doxygen graphviz ant sudo pip install html2text @@ -302,18 +344,24 @@ For Fedora distributions, execute the following:: For CentOS/RHEL distributions, it is recommended to have ``epel`` (Extra Packages for Enterprise Linux) repository as it provides some extra packages which are not available in the default repository. To install ``epel``, execute -the following:: +the following: + +.. prompt:: bash $ sudo yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -For CentOS/RHEL distributions, execute the following:: +For CentOS/RHEL distributions, execute the following: + +.. prompt:: bash $ sudo yum install gcc python-devel python-pip python-virtualenv libxml2-devel libxslt-devel doxygen graphviz ant sudo pip install html2text -For CentOS/RHEL distributions, the remaining python packages are not available in -the default and ``epel`` repositories. So, use http://rpmfind.net/ to find the -packages. Then, download them from a mirror and install them. For example:: +For CentOS/RHEL distributions, the remaining python packages are not available +in the default and ``epel`` repositories. So, use http://rpmfind.net/ to find +the packages. Then, download them from a mirror and install them. For example: + +.. prompt:: bash $ wget http://rpmfind.net/linux/centos/7/os/x86_64/Packages/python-jinja2-2.7.2-2.el7.noarch.rpm sudo yum install python-jinja2-2.7.2-2.el7.noarch.rpm @@ -328,15 +376,17 @@ Ceph documentation makes extensive use of `ditaa`_, which is not presently built for CentOS/RHEL7. You must install ``ditaa`` if you are making changes to ``ditaa`` diagrams so that you can verify that they render properly before you commit new or modified ``ditaa`` diagrams. You may retrieve compatible required -packages for CentOS/RHEL distributions and install them manually. To run ``ditaa`` -on CentOS/RHEL7, following dependencies are required: +packages for CentOS/RHEL distributions and install them manually. To run +``ditaa`` on CentOS/RHEL7, following dependencies are required: - jericho-html - jai-imageio-core - batik Use http://rpmfind.net/ to find compatible ``ditaa`` and the dependencies. -Then, download them from a mirror and install them. For example:: +Then, download them from a mirror and install them. For example: + +.. prompt:: bash $ wget http://rpmfind.net/linux/fedora/linux/releases/22/Everything/x86_64/os/Packages/j/jericho-html-3.3-4.fc22.noarch.rpm sudo yum install jericho-html-3.3-4.fc22.noarch.rpm @@ -398,7 +448,9 @@ There is a carriage return between the summary line and the description:: Signed-off-by: John Doe <john.doe@gmail.com> -To commit changes, execute the following:: +To commit changes, execute the following: + +.. prompt:: bash $ git commit -a @@ -410,15 +462,21 @@ your uncommitted changes, staging them for commit, committing the changes and pushing them to your forked Ceph repository. -For Debian/Ubuntu, execute:: +For Debian/Ubuntu, execute: + +.. prompt:: bash $ sudo apt-get install gitk git-gui -For Fedora/CentOS/RHEL, execute:: +For Fedora/CentOS/RHEL, execute: + +.. prompt:: bash $ sudo yum install gitk git-gui -Then, execute:: +Then, execute: + +.. prompt:: bash $ cd {git-ceph-repo-path} gitk @@ -431,11 +489,15 @@ Push the Change Once you have one or more commits, you must push them from the local copy of the repository to ``github``. A graphical tool like ``git-gui`` provides a user -interface for pushing to the repository. If you created a branch previously:: +interface for pushing to the repository. If you created a branch previously: + +.. prompt:: bash $ git push origin wip-doc-{your-branch-name} -Otherwise:: +Otherwise: + +.. prompt:: bash $ git push @@ -463,7 +525,9 @@ the documentation in both native restructuredText format and its rendered formats such as HTML. Navigate to your Ceph repository and view a document in its native format. You may notice that it is generally as legible in a terminal as it is in its rendered HTML format. Additionally, you may also notice that -diagrams in ``ditaa`` format also render reasonably well in text mode. :: +diagrams in ``ditaa`` format also render reasonably well in text mode. : + +.. prompt:: bash $ less doc/architecture.rst diff --git a/examples/boto3/README.md b/examples/boto3/README.md index 2abbd2812fe..be6799da9c2 100644 --- a/examples/boto3/README.md +++ b/examples/boto3/README.md @@ -13,16 +13,86 @@ The standard [AWS CLI](https://docs.aws.amazon.com/cli/latest/) may also be used ``` aws --endpoint-url http://localhost:8000 s3api list-objects --bucket=mybucket --allow-unordered ``` -- Bucket notifications with filtering extensions: + +- Use the following command to set SNS signature to s3v2: +``` +aws configure set default.sns.signature_version s3 +``` + +- Topic creation with endpoint: +``` +aws --endpoint-url http://localhost:8000 sns create-topic --name=mytopic --attributes='{"push-endpoint": "amqp://localhost:5672", "amqp-exchange": "ex1", "amqp-ack-level": "broker"}' +``` +Expected output: +``` +{ + "TopicArn": "arn:aws:sns:default::mytopic" +} +``` + +- Get topic attributes: ``` -aws --region=default --endpoint-url http://localhost:8000 s3api put-bucket-notification-configuration --bucket mybucket --notification-configuration='{"TopicConfigurations": [{"Id": "notif1", "TopicArn": "arn:aws:sns:default::mytopic", -"Events": ["s3:ObjectCreated:*", "s3:ObjectRemoved:*"], -"Filter": {"Metadata": {"FilterRules": [{"Name": "x-amz-meta-foo", "Value": "bar"}, {"Name": "x-amz-meta-hello", "Value": "world"}]}, "Key": {"FilterRules": [{"Name": "regex", "Value": "([a-z]+)"}]}}}]}' - ``` +aws --endpoint-url http://localhost:8000 sns get-topic-attributes --topic-arn="arn:aws:sns:default::mytopic" +``` +Expected output: +``` +{ + "Attributes": { + "User": "", + "Name": "mytopic", + "EndPoint": "{\"EndpointAddress\":\"amqp://localhost:5672\",\"EndpointArgs\":\"Attributes.entry.1.key=push-endpoint&Attributes.entry.1.value=amqp://localhost:5672&Attributes.entry.2.key=amqp-exchange&Attributes.entry.2.value=ex1&Attributes.entry.3.key=amqp-ack-level&Attributes.entry.3.value=broker&Version=2010-03-31&amqp-ack-level=broker&amqp-exchange=ex1&push-endpoint=amqp://localhost:5672\",\"EndpointTopic\":\"mytopic\",\"HasStoredSecret\":\"false\",\"Persistent\":\"false\"}", + "TopicArn": "arn:aws:sns:default::mytopic", + "OpaqueData": "" + } +} +``` + +- Bucket notifications with filtering extensions (bucket must exist before calling this command): +``` +aws --region=default --endpoint-url http://localhost:8000 s3api put-bucket-notification-configuration --bucket=mybucket --notification-configuration='{"TopicConfigurations": [{"Id": "notif1", "TopicArn": "arn:aws:sns:default::mytopic", "Events": ["s3:ObjectCreated:*", "s3:ObjectRemoved:*"], "Filter": {"Metadata": {"FilterRules": [{"Name": "x-amz-meta-foo", "Value": "bar"}, {"Name": "x-amz-meta-hello", "Value": "world"}]}, "Key": {"FilterRules": [{"Name": "regex", "Value": "([a-z]+)"}]}}}]}' +``` + - Get configuration of a specific notification of a bucket: ``` aws --endpoint-url http://localhost:8000 s3api get-bucket-notification-configuration --bucket=mybucket --notification=notif1 ``` +Expected output: +``` +{ + "TopicConfigurations": [ + { + "Id": "notif1", + "TopicArn": "arn:aws:sns:default::mytopic", + "Events": [ + "s3:ObjectCreated:*", + "s3:ObjectRemoved:*" + ], + "Filter": { + "Key": { + "FilterRules": [ + { + "Name": "regex", + "Value": "([a-z]+)" + } + ] + }, + "Metadata": { + "FilterRules": [ + { + "Name": "x-amz-meta-foo", + "Value": "bar" + }, + { + "Name": "x-amz-meta-hello", + "Value": "world" + } + ] + } + } + } + ] +} +``` # Developers Anyone developing an extension to the S3 API supported by AWS, please modify ``service-2.sdk-extras.json`` (all extensions should go into the same file), so that boto3 could be used to test the new API. diff --git a/examples/boto3/delete_notification.py b/examples/boto3/delete_notification.py index 8e4d3d7b719..ca5958e52ec 100755 --- a/examples/boto3/delete_notification.py +++ b/examples/boto3/delete_notification.py @@ -13,7 +13,7 @@ elif len(sys.argv) == 2: bucketname = sys.argv[1] notification_name = "" else: - print 'Usage: ' + sys.argv[0] + ' <bucket> [notification]' + print('Usage: ' + sys.argv[0] + ' <bucket> [notification]') sys.exit(1) # endpoint and keys from vstart @@ -30,7 +30,7 @@ client = boto3.client('s3', # deleting all notification configurations on a bucket (without deleting the bucket itself) are extension to AWS S3 API if notification_name == "": - print client.delete_bucket_notification_configuration(Bucket=bucketname) + print(client.delete_bucket_notification_configuration(Bucket=bucketname)) else: - print client.delete_bucket_notification_configuration(Bucket=bucketname, - Notification=notification_name) + print(client.delete_bucket_notification_configuration(Bucket=bucketname, + Notification=notification_name)) diff --git a/examples/boto3/get_notification.py b/examples/boto3/get_notification.py index 6e32198950c..490c018d4ca 100755 --- a/examples/boto3/get_notification.py +++ b/examples/boto3/get_notification.py @@ -4,7 +4,7 @@ import boto3 import sys if len(sys.argv) != 3: - print 'Usage: ' + sys.argv[0] + ' <bucket> <notification>' + print('Usage: ' + sys.argv[0] + ' <bucket> <notification>') sys.exit(1) # bucket name as first argument @@ -24,5 +24,5 @@ client = boto3.client('s3', # getting a specific notification configuration is an extension to AWS S3 API -print client.get_bucket_notification_configuration(Bucket=bucketname, - Notification=notification_name) +print(client.get_bucket_notification_configuration(Bucket=bucketname, + Notification=notification_name)) diff --git a/examples/boto3/list_unordered.py b/examples/boto3/list_unordered.py index b2339eaa636..2aa5a8e0608 100755 --- a/examples/boto3/list_unordered.py +++ b/examples/boto3/list_unordered.py @@ -4,7 +4,7 @@ import boto3 import sys if len(sys.argv) != 2: - print 'Usage: ' + sys.argv[0] + ' <bucket>' + print('Usage: ' + sys.argv[0] + ' <bucket>') sys.exit(1) # bucket name as first argument @@ -22,4 +22,4 @@ client = boto3.client('s3', # geting an unordered list of objets is an extension to AWS S3 API -print client.list_objects(Bucket=bucketname, AllowUnordered=True) +print(client.list_objects(Bucket=bucketname, AllowUnordered=True)) diff --git a/examples/boto3/notification_filters.py b/examples/boto3/notification_filters.py index a45393c74f9..2687c8b3aad 100755 --- a/examples/boto3/notification_filters.py +++ b/examples/boto3/notification_filters.py @@ -4,7 +4,7 @@ import boto3 import sys if len(sys.argv) != 4: - print 'Usage: ' + sys.argv[0] + ' <bucket> <topic ARN> <notification Id>' + print('Usage: ' + sys.argv[0] + ' <bucket> <topic ARN> <notification Id>') sys.exit(1) # bucket name as first argument @@ -44,5 +44,5 @@ topic_conf_list = [{'Id': notification_id, } }}] -print client.put_bucket_notification_configuration(Bucket=bucketname, - NotificationConfiguration={'TopicConfigurations': topic_conf_list}) +print(client.put_bucket_notification_configuration(Bucket=bucketname, + NotificationConfiguration={'TopicConfigurations': topic_conf_list})) diff --git a/examples/boto3/service-2.sdk-extras.json b/examples/boto3/service-2.sdk-extras.json index 65883226366..9ee66730e85 100644 --- a/examples/boto3/service-2.sdk-extras.json +++ b/examples/boto3/service-2.sdk-extras.json @@ -191,12 +191,22 @@ "UsageStatsSummary": { "type": "structure", "members": { - "TotalBytes":{"shape":"TotalBytes"}, + "QuotaMaxBytes":{"shape":"QuotaMaxBytes"}, + "QuotaMaxBuckets":{"shape": "QuotaMaxBuckets"}, + "QuotaMaxObjCount":{"shape":"QuotaMaxObjCount"}, + "QuotaMaxBytesPerBucket":{"shape":"QuotaMaxBytesPerBucket"}, + "QuotaMaxObjCountPerBucket":{"shape":"QuotaMaxObjCountPerBucket"}, + "TotalBytes":{"shape":"TotalBytes"}, "TotalBytesRounded":{"shape":"TotalBytesRounded"}, "TotalEntries":{"shape":"TotalEntries"} } }, - "TotalBytesRounded":{"type":"integer"}, + "QuotaMaxBytes":{"type":"integer"}, + "QuotaMaxBuckets":{"type": "integer"}, + "QuotaMaxObjCount":{"type":"integer"}, + "QuotaMaxBytesPerBucket":{"type":"integer"}, + "QuotaMaxObjCountPerBucket":{"type":"integer"}, + "TotalBytesRounded":{"type":"integer"}, "TotalBytes":{"type":"integer"}, "TotalEntries":{"type":"integer"} }, diff --git a/examples/boto3/topic_attributes.py b/examples/boto3/topic_attributes.py new file mode 100644 index 00000000000..3caeb1fec26 --- /dev/null +++ b/examples/boto3/topic_attributes.py @@ -0,0 +1,46 @@ +import sys +import urllib +import hmac +import hashlib +import base64 +import xmltodict +import http.client +from urllib import parse as urlparse +from time import gmtime, strftime + +if len(sys.argv) == 2: + # topic arn as first argument + topic_arn = sys.argv[1] +else: + print ('Usage: ' + sys.argv[0] + ' <topic arn> [region name]') + sys.exit(1) + +# endpoint and keys from vstart +endpoint = '127.0.0.1:8000' +access_key='0555b35654ad1656d804' +secret_key='h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q==' + + +parameters = {'Action': 'GetTopic', 'TopicArn': topic_arn} +body = urlparse.urlencode(parameters) +string_date = strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) +content_type = 'application/x-www-form-urlencoded; charset=utf-8' +resource = '/' +method = 'POST' +string_to_sign = method + '\n\n' + content_type + '\n' + string_date + '\n' + resource +signature = base64.b64encode(hmac.new(secret_key.encode('utf-8'), string_to_sign.encode('utf-8'), hashlib.sha1).digest()).decode('ascii') +headers = {'Authorization': 'AWS '+access_key+':'+signature, + 'Date': string_date, + 'Host': endpoint, + 'Content-Type': content_type} +http_conn = http.client.HTTPConnection(endpoint) +http_conn.request(method, resource, body, headers) +response = http_conn.getresponse() +data = response.read() +status = response.status +http_conn.close() +dict_response = xmltodict.parse(data) + +# getting attributes of a specific topic is an extension to AWS sns + +print(dict_response, status) diff --git a/examples/boto3/topic_with_endpoint.py b/examples/boto3/topic_with_endpoint.py index b6e626e0200..3137cee7d40 100755 --- a/examples/boto3/topic_with_endpoint.py +++ b/examples/boto3/topic_with_endpoint.py @@ -15,7 +15,7 @@ elif len(sys.argv) == 2: topic_name = sys.argv[1] region_name = "" else: - print 'Usage: ' + sys.argv[0] + ' <topic name> [region name]' + print('Usage: ' + sys.argv[0] + ' <topic name> [region name]') sys.exit(1) # endpoint and keys from vstart @@ -38,4 +38,4 @@ client = boto3.client('sns', endpoint_args = 'push-endpoint=amqp://127.0.0.1:5672&amqp-exchange=ex1&amqp-ack-level=broker' attributes = {nvp[0] : nvp[1] for nvp in urlparse.parse_qsl(endpoint_args, keep_blank_values=True)} -print client.create_topic(Name=topic_name, Attributes=attributes) +print(client.create_topic(Name=topic_name, Attributes=attributes)) diff --git a/monitoring/grafana/README.md b/monitoring/grafana/README.md index 4054e985384..b4bf4ec3273 100644 --- a/monitoring/grafana/README.md +++ b/monitoring/grafana/README.md @@ -3,7 +3,7 @@ Here you can find a collection of [Grafana](https://grafana.com/grafana) dashboards for Ceph Monitoring. These dashboards are based on metrics collected from [prometheus](https://prometheus.io/) scraping the [prometheus mgr -plugin](http://docs.ceph.com/docs/master/mgr/prometheus/) and the +plugin](http://docs.ceph.com/en/latest/mgr/prometheus/) and the [node_exporter](https://github.com/prometheus/node_exporter). ### Other requirements diff --git a/qa/config/rados.yaml b/qa/config/rados.yaml index eb24e5e04d6..e468e126a4e 100644 --- a/qa/config/rados.yaml +++ b/qa/config/rados.yaml @@ -6,3 +6,5 @@ overrides: osd op queue cut off: debug_random osd debug verify missing on start: true osd debug verify cached snaps: true + mon: + mon scrub interval: 300 diff --git a/qa/distros/all/centos_8.2.yaml b/qa/distros/all/centos_8.2.yaml new file mode 100644 index 00000000000..a2a899d716a --- /dev/null +++ b/qa/distros/all/centos_8.2.yaml @@ -0,0 +1,6 @@ +os_type: centos +os_version: "8.2" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 diff --git a/qa/distros/all/centos_8.yaml b/qa/distros/all/centos_8.yaml index 8abfcfd1247..713f312fad5 120000 --- a/qa/distros/all/centos_8.yaml +++ b/qa/distros/all/centos_8.yaml @@ -1 +1 @@ -centos_8.1.yaml
\ No newline at end of file +centos_8.2.yaml
\ No newline at end of file diff --git a/qa/qa_scripts/openstack/connectceph.sh b/qa/qa_scripts/openstack/connectceph.sh index 2d70df7ffb3..d975daada0e 100755 --- a/qa/qa_scripts/openstack/connectceph.sh +++ b/qa/qa_scripts/openstack/connectceph.sh @@ -4,7 +4,7 @@ # # Essentially implements: # -# http://docs.ceph.com/docs/master/rbd/rbd-openstack/ +# http://docs.ceph.com/en/latest/rbd/rbd-openstack/ # # The directory named files contains templates for the /etc/glance/glance-api.conf, # /etc/cinder/cinder.conf, /etc/nova/nova.conf Openstack files diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index 51439aa3f4e..1461d0d6c61 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -20,7 +20,7 @@ source $CEPH_ROOT/qa/standalone/ceph-helpers.sh if [ `uname` = FreeBSD ]; then # erasure coding overwrites are only tested on Bluestore # erasure coding on filestore is unsafe - # http://docs.ceph.com/docs/master/rados/operations/erasure-code/#erasure-coding-with-overwrites + # http://docs.ceph.com/en/latest/rados/operations/erasure-code/#erasure-coding-with-overwrites use_ec_overwrite=false else use_ec_overwrite=true diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh index 624c1dd217d..5dd029c356f 100755 --- a/qa/standalone/scrub/osd-scrub-test.sh +++ b/qa/standalone/scrub/osd-scrub-test.sh @@ -351,6 +351,10 @@ function _scrub_abort() { fi ceph osd set $stopscrub + if [ "$type" = "deep_scrub" ]; + then + ceph osd set noscrub + fi # Wait for scrubbing to end set -o pipefail @@ -375,7 +379,13 @@ function _scrub_abort() { fi local last_scrub=$(get_last_scrub_stamp $pgid) - ceph osd unset noscrub + ceph config set osd "osd_scrub_sleep" "0.1" + + ceph osd unset $stopscrub + if [ "$type" = "deep_scrub" ]; + then + ceph osd unset noscrub + fi TIMEOUT=$(($objects / 2)) wait_for_scrub $pgid "$last_scrub" || return 1 diff --git a/qa/suites/fs/thrash/mount/fuse.yaml b/qa/suites/fs/thrash/mount/fuse.yaml deleted file mode 120000 index 0e55da9fb7a..00000000000 --- a/qa/suites/fs/thrash/mount/fuse.yaml +++ /dev/null @@ -1 +0,0 @@ -.qa/cephfs/mount/fuse.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/% b/qa/suites/fs/thrash/multifs/% index e69de29bb2d..e69de29bb2d 100644 --- a/qa/suites/fs/thrash/% +++ b/qa/suites/fs/thrash/multifs/% diff --git a/qa/suites/fs/thrash/clusters/.qa b/qa/suites/fs/thrash/multifs/.qa index a602a0353e7..a602a0353e7 120000 --- a/qa/suites/fs/thrash/clusters/.qa +++ b/qa/suites/fs/thrash/multifs/.qa diff --git a/qa/suites/fs/thrash/begin.yaml b/qa/suites/fs/thrash/multifs/begin.yaml index 311d404f7c2..311d404f7c2 120000 --- a/qa/suites/fs/thrash/begin.yaml +++ b/qa/suites/fs/thrash/multifs/begin.yaml diff --git a/qa/suites/fs/thrash/mount/.qa b/qa/suites/fs/thrash/multifs/clusters/.qa index a602a0353e7..a602a0353e7 120000 --- a/qa/suites/fs/thrash/mount/.qa +++ b/qa/suites/fs/thrash/multifs/clusters/.qa diff --git a/qa/suites/fs/thrash/multifs/clusters/1a3s-mds-2c-client.yaml b/qa/suites/fs/thrash/multifs/clusters/1a3s-mds-2c-client.yaml new file mode 120000 index 00000000000..c190ea92ff5 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/clusters/1a3s-mds-2c-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1a3s-mds-2c-client.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/conf b/qa/suites/fs/thrash/multifs/conf index 16e8cc44b7d..16e8cc44b7d 120000 --- a/qa/suites/fs/thrash/conf +++ b/qa/suites/fs/thrash/multifs/conf diff --git a/qa/suites/fs/thrash/distro b/qa/suites/fs/thrash/multifs/distro index 0862b4457b3..0862b4457b3 120000 --- a/qa/suites/fs/thrash/distro +++ b/qa/suites/fs/thrash/multifs/distro diff --git a/qa/suites/fs/thrash/multifs/mount b/qa/suites/fs/thrash/multifs/mount new file mode 120000 index 00000000000..e3600f453f2 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/mount @@ -0,0 +1 @@ +.qa/cephfs/mount/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/msgr-failures/.qa b/qa/suites/fs/thrash/multifs/msgr-failures/.qa index a602a0353e7..a602a0353e7 120000 --- a/qa/suites/fs/thrash/msgr-failures/.qa +++ b/qa/suites/fs/thrash/multifs/msgr-failures/.qa diff --git a/qa/suites/fs/thrash/msgr-failures/none.yaml b/qa/suites/fs/thrash/multifs/msgr-failures/none.yaml index e69de29bb2d..e69de29bb2d 100644 --- a/qa/suites/fs/thrash/msgr-failures/none.yaml +++ b/qa/suites/fs/thrash/multifs/msgr-failures/none.yaml diff --git a/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml b/qa/suites/fs/thrash/multifs/msgr-failures/osd-mds-delay.yaml index b4ca87f5127..b4ca87f5127 100644 --- a/qa/suites/fs/thrash/msgr-failures/osd-mds-delay.yaml +++ b/qa/suites/fs/thrash/multifs/msgr-failures/osd-mds-delay.yaml diff --git a/qa/suites/fs/thrash/overrides/.qa b/qa/suites/fs/thrash/multifs/objectstore/.qa index a602a0353e7..a602a0353e7 120000 --- a/qa/suites/fs/thrash/overrides/.qa +++ b/qa/suites/fs/thrash/multifs/objectstore/.qa diff --git a/qa/suites/fs/thrash/multifs/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/thrash/multifs/objectstore/bluestore-bitmap.yaml new file mode 120000 index 00000000000..a59cf517506 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/overrides/+ b/qa/suites/fs/thrash/multifs/overrides/+ index e69de29bb2d..e69de29bb2d 100644 --- a/qa/suites/fs/thrash/overrides/+ +++ b/qa/suites/fs/thrash/multifs/overrides/+ diff --git a/qa/suites/fs/thrash/tasks/.qa b/qa/suites/fs/thrash/multifs/overrides/.qa index a602a0353e7..a602a0353e7 120000 --- a/qa/suites/fs/thrash/tasks/.qa +++ b/qa/suites/fs/thrash/multifs/overrides/.qa diff --git a/qa/suites/fs/thrash/overrides/frag_enable.yaml b/qa/suites/fs/thrash/multifs/overrides/frag_enable.yaml index 34a39a368cf..34a39a368cf 120000 --- a/qa/suites/fs/thrash/overrides/frag_enable.yaml +++ b/qa/suites/fs/thrash/multifs/overrides/frag_enable.yaml diff --git a/qa/suites/fs/thrash/multifs/overrides/multifs.yaml b/qa/suites/fs/thrash/multifs/overrides/multifs.yaml new file mode 100644 index 00000000000..faf7838c2a6 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/overrides/multifs.yaml @@ -0,0 +1,16 @@ +overrides: + ceph: + cephfs: + fs: + - name: a + - name: b + ceph-fuse: + client.0: + cephfs_name: a + client.1: + cephfs_name: b + kclient: + client.0: + cephfs_name: a + client.1: + cephfs_name: b diff --git a/qa/suites/fs/thrash/overrides/session_timeout.yaml b/qa/suites/fs/thrash/multifs/overrides/session_timeout.yaml index fce0318c589..fce0318c589 120000 --- a/qa/suites/fs/thrash/overrides/session_timeout.yaml +++ b/qa/suites/fs/thrash/multifs/overrides/session_timeout.yaml diff --git a/qa/suites/fs/thrash/overrides/thrashosds-health.yaml b/qa/suites/fs/thrash/multifs/overrides/thrashosds-health.yaml index 9124eb1aa29..9124eb1aa29 120000 --- a/qa/suites/fs/thrash/overrides/thrashosds-health.yaml +++ b/qa/suites/fs/thrash/multifs/overrides/thrashosds-health.yaml diff --git a/qa/suites/fs/thrash/overrides/whitelist_health.yaml b/qa/suites/fs/thrash/multifs/overrides/whitelist_health.yaml index 74f39a49b27..74f39a49b27 120000 --- a/qa/suites/fs/thrash/overrides/whitelist_health.yaml +++ b/qa/suites/fs/thrash/multifs/overrides/whitelist_health.yaml diff --git a/qa/suites/fs/thrash/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/fs/thrash/multifs/overrides/whitelist_wrongly_marked_down.yaml index b4528c0f8c0..b4528c0f8c0 120000 --- a/qa/suites/fs/thrash/overrides/whitelist_wrongly_marked_down.yaml +++ b/qa/suites/fs/thrash/multifs/overrides/whitelist_wrongly_marked_down.yaml diff --git a/qa/suites/fs/thrash/tasks/% b/qa/suites/fs/thrash/multifs/tasks/% index e69de29bb2d..e69de29bb2d 100644 --- a/qa/suites/fs/thrash/tasks/% +++ b/qa/suites/fs/thrash/multifs/tasks/% diff --git a/qa/suites/fs/thrash/tasks/1-thrash/.qa b/qa/suites/fs/thrash/multifs/tasks/.qa index a602a0353e7..a602a0353e7 120000 --- a/qa/suites/fs/thrash/tasks/1-thrash/.qa +++ b/qa/suites/fs/thrash/multifs/tasks/.qa diff --git a/qa/suites/fs/thrash/tasks/2-workunit/.qa b/qa/suites/fs/thrash/multifs/tasks/1-thrash/.qa index a602a0353e7..a602a0353e7 120000 --- a/qa/suites/fs/thrash/tasks/2-workunit/.qa +++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/.qa diff --git a/qa/suites/fs/thrash/tasks/1-thrash/mds.yaml b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml index 33748cea5cd..33748cea5cd 100644 --- a/qa/suites/fs/thrash/tasks/1-thrash/mds.yaml +++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml diff --git a/qa/suites/fs/thrash/tasks/1-thrash/mon.yaml b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml index fbbe16151ce..fbbe16151ce 100644 --- a/qa/suites/fs/thrash/tasks/1-thrash/mon.yaml +++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/.qa b/qa/suites/fs/thrash/multifs/tasks/2-workunit/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/tasks/2-workunit/cfuse_workunit_snaptests.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_snaptests.yaml index 790c93c2b17..790c93c2b17 100644 --- a/qa/suites/fs/thrash/tasks/2-workunit/cfuse_workunit_snaptests.yaml +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_snaptests.yaml diff --git a/qa/suites/fs/thrash/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml index c2e859fffbc..c2e859fffbc 120000 --- a/qa/suites/fs/thrash/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml diff --git a/qa/suites/fs/thrash/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml index f7784383be2..f7784383be2 100644 --- a/qa/suites/fs/thrash/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml diff --git a/qa/suites/fs/thrash/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml index a1df032772f..a1df032772f 120000 --- a/qa/suites/fs/thrash/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml diff --git a/qa/suites/fs/thrash/tasks/2-workunit/ffsb.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/ffsb.yaml index 7e4f711a237..7e4f711a237 100644 --- a/qa/suites/fs/thrash/tasks/2-workunit/ffsb.yaml +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/ffsb.yaml diff --git a/qa/suites/fs/thrash/tasks/2-workunit/iozone.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/iozone.yaml index 9270f3c51e2..9270f3c51e2 100644 --- a/qa/suites/fs/thrash/tasks/2-workunit/iozone.yaml +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/iozone.yaml diff --git a/qa/suites/fs/thrash/workloads/% b/qa/suites/fs/thrash/workloads/% new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/qa/suites/fs/thrash/workloads/% diff --git a/qa/suites/fs/thrash/workloads/.qa b/qa/suites/fs/thrash/workloads/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/begin.yaml b/qa/suites/fs/thrash/workloads/begin.yaml new file mode 120000 index 00000000000..311d404f7c2 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/begin.yaml @@ -0,0 +1 @@ +.qa/cephfs/begin.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/clusters/.qa b/qa/suites/fs/thrash/workloads/clusters/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/clusters/1-mds-1-client-coloc.yaml b/qa/suites/fs/thrash/workloads/clusters/1-mds-1-client-coloc.yaml index d15ecfda012..d15ecfda012 120000 --- a/qa/suites/fs/thrash/clusters/1-mds-1-client-coloc.yaml +++ b/qa/suites/fs/thrash/workloads/clusters/1-mds-1-client-coloc.yaml diff --git a/qa/suites/fs/thrash/workloads/conf b/qa/suites/fs/thrash/workloads/conf new file mode 120000 index 00000000000..16e8cc44b7d --- /dev/null +++ b/qa/suites/fs/thrash/workloads/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/distro b/qa/suites/fs/thrash/workloads/distro new file mode 120000 index 00000000000..0862b4457b3 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/mount b/qa/suites/fs/thrash/workloads/mount new file mode 120000 index 00000000000..e3600f453f2 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/mount @@ -0,0 +1 @@ +.qa/cephfs/mount/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/msgr-failures/.qa b/qa/suites/fs/thrash/workloads/msgr-failures/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/msgr-failures/none.yaml b/qa/suites/fs/thrash/workloads/msgr-failures/none.yaml new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/qa/suites/fs/thrash/workloads/msgr-failures/none.yaml diff --git a/qa/suites/fs/thrash/workloads/msgr-failures/osd-mds-delay.yaml b/qa/suites/fs/thrash/workloads/msgr-failures/osd-mds-delay.yaml new file mode 100644 index 00000000000..b4ca87f5127 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/msgr-failures/osd-mds-delay.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms inject delay type: osd mds + ms inject delay probability: .005 + ms inject delay max: 1 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/fs/thrash/objectstore-ec b/qa/suites/fs/thrash/workloads/objectstore-ec index affe294932e..affe294932e 120000 --- a/qa/suites/fs/thrash/objectstore-ec +++ b/qa/suites/fs/thrash/workloads/objectstore-ec diff --git a/qa/suites/fs/thrash/workloads/overrides/+ b/qa/suites/fs/thrash/workloads/overrides/+ new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/+ diff --git a/qa/suites/fs/thrash/workloads/overrides/.qa b/qa/suites/fs/thrash/workloads/overrides/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/frag_enable.yaml b/qa/suites/fs/thrash/workloads/overrides/frag_enable.yaml new file mode 120000 index 00000000000..34a39a368cf --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/frag_enable.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/frag_enable.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/session_timeout.yaml b/qa/suites/fs/thrash/workloads/overrides/session_timeout.yaml new file mode 120000 index 00000000000..fce0318c589 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/session_timeout.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/session_timeout.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/thrashosds-health.yaml b/qa/suites/fs/thrash/workloads/overrides/thrashosds-health.yaml new file mode 120000 index 00000000000..9124eb1aa29 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/whitelist_health.yaml b/qa/suites/fs/thrash/workloads/overrides/whitelist_health.yaml new file mode 120000 index 00000000000..74f39a49b27 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/whitelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/whitelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/fs/thrash/workloads/overrides/whitelist_wrongly_marked_down.yaml new file mode 120000 index 00000000000..b4528c0f8c0 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/whitelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/whitelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/tasks/% b/qa/suites/fs/thrash/workloads/tasks/% new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/% diff --git a/qa/suites/fs/thrash/workloads/tasks/.qa b/qa/suites/fs/thrash/workloads/tasks/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/.qa b/qa/suites/fs/thrash/workloads/tasks/1-thrash/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml new file mode 100644 index 00000000000..33748cea5cd --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml @@ -0,0 +1,7 @@ +tasks: +- mds_thrash: + +overrides: + ceph: + log-ignorelist: + - Replacing daemon mds diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml new file mode 100644 index 00000000000..fbbe16151ce --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) +tasks: +- mon_thrash: + check_mds_failover: True + revive_delay: 20 + thrash_delay: 10 diff --git a/qa/suites/fs/thrash/tasks/1-thrash/osd.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml index d69fb1402f0..d69fb1402f0 100644 --- a/qa/suites/fs/thrash/tasks/1-thrash/osd.yaml +++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/.qa b/qa/suites/fs/thrash/workloads/tasks/2-workunit/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/cfuse_workunit_snaptests.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/cfuse_workunit_snaptests.yaml new file mode 100644 index 00000000000..790c93c2b17 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/cfuse_workunit_snaptests.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - fs/snaps diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml new file mode 120000 index 00000000000..c2e859fffbc --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1 @@ +.qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml new file mode 100644 index 00000000000..f7784383be2 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + conf: + client: + fuse set user groups: true +tasks: +- workunit: + timeout: 6h + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml new file mode 120000 index 00000000000..a1df032772f --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml @@ -0,0 +1 @@ +.qa/cephfs/tasks/cfuse_workunit_trivial_sync.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/ffsb.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/ffsb.yaml new file mode 100644 index 00000000000..7e4f711a237 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/ffsb.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-ignorelist: + - SLOW_OPS + - slow request + conf: + osd: + filestore flush min: 0 +tasks: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/iozone.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/iozone.yaml new file mode 100644 index 00000000000..9270f3c51e2 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/iozone.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/rados/monthrash/ceph.yaml b/qa/suites/rados/monthrash/ceph.yaml index dbca827d610..124ac3850c3 100644 --- a/qa/suites/rados/monthrash/ceph.yaml +++ b/qa/suites/rados/monthrash/ceph.yaml @@ -8,8 +8,12 @@ overrides: mon osdmap full prune min: 15 mon osdmap full prune interval: 2 mon osdmap full prune txsize: 2 + mon scrub inject crc mismatch: 0.01 + mon scrub inject missing keys: 0.05 # thrashing monitors may make mgr have trouble w/ its keepalive log-ignorelist: + - ScrubResult + - scrub mismatch - overall HEALTH_ - \(MGR_DOWN\) # slow mons -> slow peering -> PG_AVAILABILITY diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_migration.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_migration.yaml new file mode 100644 index 00000000000..b04ac08f7b4 --- /dev/null +++ b/qa/suites/rbd/cli/workloads/rbd_cli_migration.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/cli_migration.sh diff --git a/qa/suites/rgw/sts/tasks/ststests.yaml b/qa/suites/rgw/sts/tasks/ststests.yaml index e5c52737e3a..10c2f2aed78 100644 --- a/qa/suites/rgw/sts/tasks/ststests.yaml +++ b/qa/suites/rgw/sts/tasks/ststests.yaml @@ -1,8 +1,9 @@ tasks: - s3tests: client.0: - extra_attrs: ['sts_test'] - force-branch: master + sts_tests: True + extra_attrs: ["test_of_sts"] + force-branch: ceph-master rgw_server: client.0 overrides: ceph: diff --git a/qa/suites/rgw/sts/tasks/webidentity.yaml b/qa/suites/rgw/sts/tasks/webidentity.yaml new file mode 100644 index 00000000000..61427f359a5 --- /dev/null +++ b/qa/suites/rgw/sts/tasks/webidentity.yaml @@ -0,0 +1,17 @@ +tasks: +- tox: [ client.0 ] +- keycloak: + client.0: + keycloak_version: 11.0.0 +- s3tests: + client.0: + sts_tests: True + force-branch: ceph-master + rgw_server: client.0 + extra_attrs: ['webidentity_test'] +overrides: + ceph: + conf: + client: + rgw sts key: abcdefghijklmnop + rgw s3 auth use sts: true diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py index 50173b4bb8e..1d5dcc24759 100644 --- a/qa/tasks/ceph.py +++ b/qa/tasks/ceph.py @@ -3,6 +3,7 @@ Ceph cluster task. Handle the setup, starting, and clean-up of a Ceph cluster. """ +from copy import deepcopy from io import BytesIO from io import StringIO @@ -255,10 +256,27 @@ def ceph_log(ctx, config): not (ctx.config.get('archive-on-error') and ctx.summary['success']): # and logs log.info('Compressing logs...') - ctx.cluster.sh( - 'sudo find /var/log/ceph -name *.log -print0 | ' - 'sudo xargs -0 --no-run-if-empty -- gzip --', - wait=False) + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'find', + '/var/log/ceph', + '-name', + '*.log', + '-print0', + run.Raw('|'), + 'sudo', + 'xargs', + '-0', + '--no-run-if-empty', + '--', + 'gzip', + '--', + ], + wait=False, + ), + ) log.info('Archiving logs...') path = os.path.join(ctx.archive, 'remote') @@ -391,10 +409,20 @@ def cephfs_setup(ctx, config): # If there are any MDSs, then create a filesystem for them to use # Do this last because requires mon cluster to be up and running if mdss.remotes: - log.info('Setting up CephFS filesystem...') - - Filesystem(ctx, fs_config=config.get('cephfs', None), name='cephfs', - create=True, ec_profile=config.get('cephfs_ec_profile', None)) + log.info('Setting up CephFS filesystem(s)...') + cephfs_config = config.get('cephfs', {}) + fs_configs = cephfs_config.pop('fs', [{'name': 'cephfs'}]) + set_allow_multifs = len(fs_configs) > 1 + + for fs_config in fs_configs: + assert isinstance(fs_config, dict) + name = fs_config.pop('name') + temp = deepcopy(cephfs_config) + teuthology.deep_merge(temp, fs_config) + fs = Filesystem(ctx, fs_config=temp, name=name, create=True) + if set_allow_multifs: + fs.set_allow_multifs() + set_allow_multifs = False yield @@ -1678,6 +1706,20 @@ def task(ctx, config): cephfs: max_mds: 2 + To change the max_mds of a specific filesystem, use:: + + tasks: + - ceph: + cephfs: + max_mds: 2 + fs: + - name: a + max_mds: 3 + - name: b + + In the above example, filesystem 'a' will have 'max_mds' 3, + and filesystme 'b' will have 'max_mds' 2. + To change the mdsmap's default session_timeout (60 seconds), use:: tasks: diff --git a/qa/tasks/ceph_fuse.py b/qa/tasks/ceph_fuse.py index 0e178d3d449..67432ead41d 100644 --- a/qa/tasks/ceph_fuse.py +++ b/qa/tasks/ceph_fuse.py @@ -11,27 +11,6 @@ from tasks.cephfs.fuse_mount import FuseMount log = logging.getLogger(__name__) -def get_client_configs(ctx, config): - """ - Get a map of the configuration for each FUSE client in the configuration by - combining the configuration of the current task with any global overrides. - - :param ctx: Context instance - :param config: configuration for this task - :return: dict of client name to config or to None - """ - if config is None: - config = dict(('client.{id}'.format(id=id_), None) - for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')) - elif isinstance(config, list): - config = dict((name, None) for name in config) - - overrides = ctx.config.get('overrides', {}) - teuthology.deep_merge(config, overrides.get('ceph-fuse', {})) - - return config - - @contextlib.contextmanager def task(ctx, config): """ @@ -98,14 +77,21 @@ def task(ctx, config): """ log.info('Running ceph_fuse task...') + if config is None: + ids = teuthology.all_roles_of_type(ctx.cluster, 'client') + client_roles = [f'client.{id_}' for id_ in ids] + config = dict([r, dict()] for r in client_roles) + elif isinstance(config, list): + client_roles = config + config = dict([r, dict()] for r in client_roles) + elif isinstance(config, dict): + client_roles = filter(lambda x: 'client.' in x, config.keys()) + else: + raise ValueError(f"Invalid config object: {config} ({config.__class__})") + log.info(f"config is {config}") + + clients = list(teuthology.get_clients(ctx=ctx, roles=client_roles)) testdir = teuthology.get_testdir(ctx) - log.info("config is {}".format(str(config))) - config = get_client_configs(ctx, config) - log.info("new config is {}".format(str(config))) - - # List clients we will configure mounts for, default is all clients - clients = list(teuthology.get_clients(ctx=ctx, roles=filter(lambda x: 'client.' in x, config.keys()))) - all_mounts = getattr(ctx, 'mounts', {}) mounted_by_me = {} skipped = {} @@ -114,13 +100,25 @@ def task(ctx, config): brxnet = config.get("brxnet", None) # Construct any new FuseMount instances + overrides = ctx.config.get('overrides', {}).get('ceph-fuse', {}) + top_overrides = dict(filter(lambda x: 'client.' not in x[0], overrides.items())) for id_, remote in clients: - remotes.add(remote) - client_config = config.get("client.%s" % id_) + entity = f"client.{id_}" + client_config = config.get(entity) if client_config is None: client_config = {} + # top level overrides + for k, v in top_overrides.items(): + if v is not None: + client_config[k] = v + # mount specific overrides + client_config_overrides = overrides.get(entity) + teuthology.deep_merge(client_config, client_config_overrides) + log.info(f"{entity} config is {client_config}") + remotes.add(remote) auth_id = client_config.get("auth_id", id_) + cephfs_name = client_config.get("cephfs_name") skip = client_config.get("skip", False) if skip: @@ -130,7 +128,8 @@ def task(ctx, config): if id_ not in all_mounts: fuse_mount = FuseMount(ctx=ctx, client_config=client_config, test_dir=testdir, client_id=auth_id, - client_remote=remote, brxnet=brxnet) + client_remote=remote, brxnet=brxnet, + cephfs_name=cephfs_name) all_mounts[id_] = fuse_mount else: # Catch bad configs where someone has e.g. tried to use ceph-fuse and kcephfs for the same client diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index 4fbf5743e75..9705dbe3cf7 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -452,18 +452,17 @@ class Filesystem(MDSCluster): This object is for driving a CephFS filesystem. The MDS daemons driven by MDSCluster may be shared with other Filesystems. """ - def __init__(self, ctx, fs_config=None, fscid=None, name=None, create=False, - ec_profile=None): + def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False): super(Filesystem, self).__init__(ctx) self.name = name - self.ec_profile = ec_profile self.id = None self.metadata_pool_name = None self.metadata_overlay = False self.data_pool_name = None self.data_pools = None self.fs_config = fs_config + self.ec_profile = fs_config.get('cephfs_ec_profile') client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client')) self.client_id = client_list[0] diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py index 7259082ac9f..03ab3ddaaa8 100644 --- a/qa/tasks/cephfs/kernel_mount.py +++ b/qa/tasks/cephfs/kernel_mount.py @@ -1,5 +1,6 @@ import json import logging +import re from io import StringIO from textwrap import dedent @@ -26,6 +27,8 @@ class KernelMount(CephFSMount): cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet) self.rbytes = config.get('rbytes', False) + self.inst = None + self.addr = None def mount(self, mntopts=[], createfs=True, check_status=True, **kwargs): self.update_attrs(**kwargs) @@ -196,7 +199,10 @@ class KernelMount(CephFSMount): )) raise - def _read_debug_file(self, filename): + def read_debug_file(self, filename): + """ + Read the debug file "filename", return None if the file doesn't exist. + """ debug_dir = self._find_debug_dir() pyscript = dedent(""" @@ -205,10 +211,17 @@ class KernelMount(CephFSMount): print(open(os.path.join("{debug_dir}", "{filename}")).read()) """).format(debug_dir=debug_dir, filename=filename) - output = self.client_remote.sh([ - 'sudo', 'python3', '-c', pyscript - ], timeout=(5*60)) - return output + stderr = StringIO() + try: + output = self.client_remote.sh([ + 'sudo', 'python3', '-c', pyscript + ], stderr=stderr, timeout=(5*60)) + + return output + except CommandFailedError: + if 'no such file or directory' in stderr.getvalue().lower(): + return None + raise def get_global_id(self): """ @@ -217,15 +230,57 @@ class KernelMount(CephFSMount): assert self.mounted - mds_sessions = self._read_debug_file("mds_sessions") + mds_sessions = self.read_debug_file("mds_sessions") + assert mds_sessions + lines = mds_sessions.split("\n") return int(lines[0].split()[1]) + @property + def _global_addr(self): + if self.addr is not None: + return self.addr + + # The first line of the "status" file's output will be something + # like: + # "instance: client.4297 (0)10.72.47.117:0/1148470933" + # What we need here is only the string "10.72.47.117:0/1148470933" + status = self.read_debug_file("status") + if status is None: + return None + + instance = re.findall(r'instance:.*', status)[0] + self.addr = instance.split()[2].split(')')[1] + return self.addr; + + @property + def _global_inst(self): + if self.inst is not None: + return self.inst + + client_gid = "client%d" % self.get_global_id() + self.inst = " ".join([client_gid, self._global_addr]) + return self.inst + + def get_global_inst(self): + """ + Look up the CephFS client instance for this mount + """ + return self._global_inst + + def get_global_addr(self): + """ + Look up the CephFS client addr for this mount + """ + return self._global_addr + def get_osd_epoch(self): """ Return 2-tuple of osd_epoch, osd_epoch_barrier """ - osd_map = self._read_debug_file("osdmap") + osd_map = self.read_debug_file("osdmap") + assert osd_map + lines = osd_map.split("\n") first_line_tokens = lines[0].split() epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3]) diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py index 0cc86f906a0..61fd1574e40 100644 --- a/qa/tasks/cephfs/mount.py +++ b/qa/tasks/cephfs/mount.py @@ -563,6 +563,10 @@ class CephFSMount(object): def is_blocklisted(self): addr = self.get_global_addr() + if addr is None: + log.warn("Couldn't get the client address, so the blocklisted status undetermined") + return False + blocklist = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "blocklist", "ls", "--format=json")) for b in blocklist: if addr == b["addr"]: diff --git a/qa/tasks/cephfs/test_snapshots.py b/qa/tasks/cephfs/test_snapshots.py index a036ec4a63f..fa4d9431fef 100644 --- a/qa/tasks/cephfs/test_snapshots.py +++ b/qa/tasks/cephfs/test_snapshots.py @@ -1,3 +1,4 @@ +import errno import logging import signal from textwrap import dedent @@ -38,6 +39,25 @@ class TestSnapshots(CephFSTestCase): def _get_pending_snap_destroy(self, rank=0, status=None): return self._get_snapserver_dump(rank,status=status)["pending_destroy"] + def test_allow_new_snaps_config(self): + """ + Check whether 'allow_new_snaps' setting works + """ + self.mount_a.run_shell(["mkdir", "test-allow-snaps"]) + + self.fs.set_allow_new_snaps(False); + try: + self.mount_a.run_shell(["mkdir", "test-allow-snaps/.snap/snap00"]) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, "expected EPERM") + else: + self.fail("expected snap creatiion to fail") + + self.fs.set_allow_new_snaps(True); + self.mount_a.run_shell(["mkdir", "test-allow-snaps/.snap/snap00"]) + self.mount_a.run_shell(["rmdir", "test-allow-snaps/.snap/snap00"]) + self.mount_a.run_shell(["rmdir", "test-allow-snaps"]) + def test_kill_mdstable(self): """ check snaptable transcation diff --git a/qa/tasks/devstack.py b/qa/tasks/devstack.py index 954a6fa885a..2499e9e538d 100644 --- a/qa/tasks/devstack.py +++ b/qa/tasks/devstack.py @@ -51,7 +51,7 @@ def install(ctx, config): This was created using documentation found here: https://github.com/openstack-dev/devstack/blob/master/README.md - http://docs.ceph.com/docs/master/rbd/rbd-openstack/ + http://docs.ceph.com/en/latest/rbd/rbd-openstack/ """ if config is None: config = {} diff --git a/qa/tasks/kclient.py b/qa/tasks/kclient.py index 5378a6c0b95..be75286bd0d 100644 --- a/qa/tasks/kclient.py +++ b/qa/tasks/kclient.py @@ -48,25 +48,25 @@ def task(ctx, config): -kclient: client.0: debug: true + mntopts: ["nowsync"] :param ctx: Context :param config: Configuration """ log.info('Mounting kernel clients...') - assert config is None or isinstance(config, list) or isinstance(config, dict), \ - "task kclient got invalid config" if config is None: - config = ['client.{id}'.format(id=id_) - for id_ in misc.all_roles_of_type(ctx.cluster, 'client')] - - if isinstance(config, list): + ids = misc.all_roles_of_type(ctx.cluster, 'client') + client_roles = [f'client.{id_}' for id_ in ids] + config = dict([r, dict()] for r in client_roles) + elif isinstance(config, list): client_roles = config config = dict([r, dict()] for r in client_roles) elif isinstance(config, dict): client_roles = filter(lambda x: 'client.' in x, config.keys()) else: - raise ValueError("Invalid config object: {0} ({1})".format(config, config.__class__)) + raise ValueError(f"Invalid config object: {config} ({config.__class__})") + log.info(f"config is {config}") clients = list(misc.get_clients(ctx=ctx, roles=client_roles)) @@ -77,13 +77,22 @@ def task(ctx, config): mounts = {} overrides = ctx.config.get('overrides', {}).get('kclient', {}) + top_overrides = dict(filter(lambda x: 'client.' not in x[0], overrides.items())) for id_, remote in clients: - client_config = config.get("client.%s" % id_) + entity = f"client.{id_}" + client_config = config.get(entity) if client_config is None: client_config = {} - - deep_merge(client_config, overrides) - + # top level overrides + for k, v in top_overrides.items(): + if v is not None: + client_config[k] = v + # mount specific overrides + client_config_overrides = overrides.get(entity) + deep_merge(client_config, client_config_overrides) + log.info(f"{entity} config is {client_config}") + + cephfs_name = client_config.get("cephfs_name") if config.get("disabled", False) or not client_config.get('mounted', True): continue @@ -93,7 +102,8 @@ def task(ctx, config): client_id=id_, client_remote=remote, brxnet=ctx.teuthology_config.get('brxnet', None), - config=client_config) + config=client_config, + cephfs_name=cephfs_name) mounts[id_] = kernel_mount @@ -101,8 +111,7 @@ def task(ctx, config): remote.run(args=["sudo", "bash", "-c", "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"]) remote.run(args=["sudo", "bash", "-c", "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"]) - kernel_mount.mount() - + kernel_mount.mount(mntopts=client_config.get('mntopts', [])) def umount_all(): log.info('Unmounting kernel clients...') diff --git a/qa/tasks/keycloak.py b/qa/tasks/keycloak.py new file mode 100644 index 00000000000..0f1fc9d7a38 --- /dev/null +++ b/qa/tasks/keycloak.py @@ -0,0 +1,335 @@ +""" +Deploy and configure Keycloak for Teuthology +""" +import contextlib +import logging +import os + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.exceptions import ConfigError + +log = logging.getLogger(__name__) + +def get_keycloak_version(config): + for client, client_config in config.items(): + if 'keycloak_version' in client_config: + keycloak_version = client_config.get('keycloak_version') + return keycloak_version + +def get_keycloak_dir(ctx, config): + keycloak_version = get_keycloak_version(config) + current_version = 'keycloak-'+keycloak_version + return '{tdir}/{ver}'.format(tdir=teuthology.get_testdir(ctx),ver=current_version) + +def run_in_keycloak_dir(ctx, client, config, args, **kwargs): + return ctx.cluster.only(client).run( + args=[ 'cd', get_keycloak_dir(ctx,config), run.Raw('&&'), ] + args, + **kwargs + ) + +def get_toxvenv_dir(ctx): + return ctx.tox.venv_path + +def toxvenv_sh(ctx, remote, args, **kwargs): + activate = get_toxvenv_dir(ctx) + '/bin/activate' + return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs) + +@contextlib.contextmanager +def install_packages(ctx, config): + """ + Downloading the two required tar files + 1. Keycloak + 2. Wildfly (Application Server) + """ + assert isinstance(config, dict) + log.info('Installing packages for Keycloak...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + test_dir=teuthology.get_testdir(ctx) + current_version = get_keycloak_version(config) + link1 = 'https://downloads.jboss.org/keycloak/'+current_version+'/keycloak-'+current_version+'.tar.gz' + toxvenv_sh(ctx, remote, ['wget', link1]) + + file1 = 'keycloak-'+current_version+'.tar.gz' + toxvenv_sh(ctx, remote, ['tar', '-C', test_dir, '-xvzf', file1]) + + link2 ='https://downloads.jboss.org/keycloak/'+current_version+'/adapters/keycloak-oidc/keycloak-wildfly-adapter-dist-'+current_version+'.tar.gz' + toxvenv_sh(ctx, remote, ['cd', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), 'wget', link2]) + + file2 = 'keycloak-wildfly-adapter-dist-'+current_version+'.tar.gz' + toxvenv_sh(ctx, remote, ['tar', '-C', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config)), '-xvzf', '{tdr}/{file}'.format(tdr=get_keycloak_dir(ctx,config),file=file2)]) + + try: + yield + finally: + log.info('Removing packaged dependencies of Keycloak...') + for client in config: + ctx.cluster.only(client).run( + args=['rm', '-rf', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config))], + ) + +@contextlib.contextmanager +def build(ctx,config): + """ + Build process which needs to be done before starting a server. + """ + assert isinstance(config, dict) + log.info('Building Keycloak...') + for (client,_) in config.items(): + run_in_keycloak_dir(ctx, client, config,['cd', 'bin', run.Raw('&&'), './jboss-cli.sh', '--file=adapter-elytron-install-offline.cli']) + try: + yield + finally: + pass + +@contextlib.contextmanager +def run_keycloak(ctx,config): + """ + This includes two parts: + 1. Adding a user to keycloak which is actually used to log in when we start the server and check in browser. + 2. Starting the server. + """ + assert isinstance(config, dict) + log.info('Bringing up Keycloak...') + for (client,_) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + ctx.cluster.only(client).run( + args=[ + '{tdir}/bin/add-user-keycloak.sh'.format(tdir=get_keycloak_dir(ctx,config)), + '-r', 'master', + '-u', 'admin', + '-p', 'admin', + ], + ) + + toxvenv_sh(ctx, remote, ['cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), './standalone.sh', run.Raw('&'), 'exit']) + try: + yield + finally: + log.info('Stopping Keycloak Server...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + toxvenv_sh(ctx, remote, ['cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), './jboss-cli.sh', '--connect', 'command=:shutdown']) + +@contextlib.contextmanager +def run_admin_cmds(ctx,config): + """ + Running Keycloak Admin commands(kcadm commands) in order to get the token, aud value, thumbprint and realm name. + """ + assert isinstance(config, dict) + log.info('Running admin commands...') + for (client,_) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'config', 'credentials', + '--server', 'http://localhost:8080/auth', + '--realm', 'master', + '--user', 'admin', + '--password', 'admin', + '--client', 'admin-cli', + ], + ) + + realm_name='demorealm' + realm='realm={}'.format(realm_name) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'create', 'realms', + '-s', realm, + '-s', 'enabled=true', + '-s', 'accessTokenLifespan=1800', + '-o', + ], + ) + + client_name='my_client' + client='clientId={}'.format(client_name) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'create', 'clients', + '-r', realm_name, + '-s', client, + '-s', 'redirectUris=["http://localhost:8080/myapp/*"]', + ], + ) + + ans1= toxvenv_sh(ctx, remote, + [ + 'cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), + './kcadm.sh', 'get', 'clients', + '-r', realm_name, + '-F', 'id,clientId', run.Raw('|'), + 'jq', '-r', '.[] | select (.clientId == "my_client") | .id' + ]) + + pre0=ans1.rstrip() + pre1="clients/{}".format(pre0) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'update', pre1, + '-r', realm_name, + '-s', 'enabled=true', + '-s', 'serviceAccountsEnabled=true', + '-s', 'redirectUris=["http://localhost:8080/myapp/*"]', + ], + ) + + ans2= pre1+'/client-secret' + + out2= toxvenv_sh(ctx, remote, + [ + 'cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), + './kcadm.sh', 'get', ans2, + '-r', realm_name, + '-F', 'value' + ]) + + ans0= '{client}:{secret}'.format(client=client_name,secret=out2[15:51]) + ans3= 'client_secret={}'.format(out2[15:51]) + clientid='client_id={}'.format(client_name) + + out3= toxvenv_sh(ctx, remote, + [ + 'curl', '-k', '-v', + '-X', 'POST', + '-H', 'Content-Type:application/x-www-form-urlencoded', + '-d', 'scope=openid', + '-d', 'grant_type=client_credentials', + '-d', clientid, + '-d', ans3, + 'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/token', run.Raw('|'), + 'jq', '-r', '.access_token' + ]) + + pre2=out3.rstrip() + acc_token= 'token={}'.format(pre2) + ans4= '{}'.format(pre2) + + out4= toxvenv_sh(ctx, remote, + [ + 'curl', '-k', '-v', + '-X', 'GET', + '-H', 'Content-Type:application/x-www-form-urlencoded', + 'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/certs', run.Raw('|'), + 'jq', '-r', '.keys[].x5c[]' + ]) + + pre3=out4.rstrip() + cert_value='{}'.format(pre3) + start_value= "-----BEGIN CERTIFICATE-----\n" + end_value= "\n-----END CERTIFICATE-----" + user_data="" + user_data+=start_value + user_data+=cert_value + user_data+=end_value + + remote.write_file( + path='{tdir}/bin/certificate.crt'.format(tdir=get_keycloak_dir(ctx,config)), + data=user_data + ) + + out5= toxvenv_sh(ctx, remote, + [ + 'openssl', 'x509', + '-in', '{tdir}/bin/certificate.crt'.format(tdir=get_keycloak_dir(ctx,config)), + '--fingerprint', '--noout', '-sha1' + ]) + + pre_ans= '{}'.format(out5[17:76]) + ans5="" + + for character in pre_ans: + if(character!=':'): + ans5+=character + + out6= toxvenv_sh(ctx, remote, + [ + 'curl', '-k', '-v', + '-X', 'POST', + '-u', ans0, + '-d', acc_token, + 'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/token/introspect', run.Raw('|'), + 'jq', '-r', '.aud' + ]) + + ans6=out6.rstrip() + + os.environ['TOKEN']=ans4 + os.environ['THUMBPRINT']=ans5 + os.environ['AUD']=ans6 + os.environ['KC_REALM']=realm_name + + try: + yield + finally: + log.info('Removing certificate.crt file...') + for (client,_) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=['rm', '-f', + '{tdir}/bin/certificate.crt'.format(tdir=get_keycloak_dir(ctx,config)), + ], + ) + +@contextlib.contextmanager +def task(ctx,config): + """ + To run keycloak the prerequisite is to run the tox task. Following is the way how to run + tox and then keycloak:: + + tasks: + - tox: [ client.0 ] + - keycloak: + client.0: + keycloak_version: 11.0.0 + + To pass extra arguments to nose (e.g. to run a certain test):: + + tasks: + - tox: [ client.0 ] + - keycloak: + client.0: + keycloak_version: 11.0.0 + - s3tests: + client.0: + extra_attrs: ['webidentity_test'] + + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task keycloak only supports a list or dictionary for configuration" + + if not hasattr(ctx, 'tox'): + raise ConfigError('keycloak must run after the tox task') + + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + log.debug('Keycloak config is %s', config) + + with contextutil.nested( + lambda: install_packages(ctx=ctx, config=config), + lambda: build(ctx=ctx, config=config), + lambda: run_keycloak(ctx=ctx, config=config), + lambda: run_admin_cmds(ctx=ctx, config=config), + ): + yield + diff --git a/qa/tasks/mds_thrash.py b/qa/tasks/mds_thrash.py index ebd9e81a30a..44dceae2287 100644 --- a/qa/tasks/mds_thrash.py +++ b/qa/tasks/mds_thrash.py @@ -416,7 +416,7 @@ def task(ctx, config): config['cluster'] = 'ceph' for fs in status.get_filesystems(): - thrasher = MDSThrasher(ctx, manager, config, Filesystem(ctx, fs['id']), fs['mdsmap']['max_mds']) + thrasher = MDSThrasher(ctx, manager, config, Filesystem(ctx, fscid=fs['id']), fs['mdsmap']['max_mds']) thrasher.start() ctx.ceph[config['cluster']].thrashers.append(thrasher) diff --git a/qa/tasks/mgr/dashboard/test_host.py b/qa/tasks/mgr/dashboard/test_host.py index 069f729579e..49bb33533cd 100644 --- a/qa/tasks/mgr/dashboard/test_host.py +++ b/qa/tasks/mgr/dashboard/test_host.py @@ -9,6 +9,7 @@ class HostControllerTest(DashboardTestCase): AUTH_ROLES = ['read-only'] URL_HOST = '/api/host' + URL_UI_HOST = '/ui-api/host' ORCHESTRATOR = True @@ -21,6 +22,14 @@ class HostControllerTest(DashboardTestCase): cmd = ['test_orchestrator', 'load_data', '-i', '-'] cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd, stdin='{}') + @property + def test_data_inventory(self): + return self.ORCHESTRATOR_TEST_DATA['inventory'] + + @property + def test_data_daemons(self): + return self.ORCHESTRATOR_TEST_DATA['daemons'] + @DashboardTestCase.RunAs('test', 'test', ['block-manager']) def test_access_permissions(self): self._get(self.URL_HOST) @@ -97,6 +106,43 @@ class HostControllerTest(DashboardTestCase): self._get('{}/smart'.format('{}/{}'.format(self.URL_HOST, hosts[0]))) self.assertStatus(200) + def _validate_inventory(self, data, resp_data): + self.assertEqual(data['name'], resp_data['name']) + self.assertEqual(len(data['devices']), len(resp_data['devices'])) + + if not data['devices']: + return + test_devices = sorted(data['devices'], key=lambda d: d['path']) + resp_devices = sorted(resp_data['devices'], key=lambda d: d['path']) + + for test, resp in zip(test_devices, resp_devices): + self._validate_device(test, resp) + + def _validate_device(self, data, resp_data): + for key, value in data.items(): + self.assertEqual(value, resp_data[key]) + + def test_inventory_get(self): + # get a inventory + node = self.test_data_inventory[0] + resp = self._get('{}/{}/inventory'.format(self.URL_HOST, node['name'])) + self.assertStatus(200) + self._validate_inventory(node, resp) + + def test_inventory_list(self): + # get all inventory + data = self._get('{}/inventory'.format(self.URL_UI_HOST)) + self.assertStatus(200) + + def sorting_key(node): + return node['name'] + + test_inventory = sorted(self.test_data_inventory, key=sorting_key) + resp_inventory = sorted(data, key=sorting_key) + self.assertEqual(len(test_inventory), len(resp_inventory)) + for test, resp in zip(test_inventory, resp_inventory): + self._validate_inventory(test, resp) + class HostControllerNoOrchestratorTest(DashboardTestCase): def test_host_create(self): diff --git a/qa/tasks/mgr/dashboard/test_orchestrator.py b/qa/tasks/mgr/dashboard/test_orchestrator.py index 4fc45744084..8395853e3d6 100644 --- a/qa/tasks/mgr/dashboard/test_orchestrator.py +++ b/qa/tasks/mgr/dashboard/test_orchestrator.py @@ -9,19 +9,9 @@ class OrchestratorControllerTest(DashboardTestCase): AUTH_ROLES = ['cluster-manager'] URL_STATUS = '/api/orchestrator/status' - URL_INVENTORY = '/api/orchestrator/inventory' - URL_OSD = '/api/orchestrator/osd' ORCHESTRATOR = True - @property - def test_data_inventory(self): - return self.ORCHESTRATOR_TEST_DATA['inventory'] - - @property - def test_data_daemons(self): - return self.ORCHESTRATOR_TEST_DATA['daemons'] - @classmethod def setUpClass(cls): super(OrchestratorControllerTest, cls).setUpClass() @@ -31,54 +21,7 @@ class OrchestratorControllerTest(DashboardTestCase): cmd = ['test_orchestrator', 'load_data', '-i', '-'] cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd, stdin='{}') - def _validate_inventory(self, data, resp_data): - self.assertEqual(data['name'], resp_data['name']) - self.assertEqual(len(data['devices']), len(resp_data['devices'])) - - if not data['devices']: - return - test_devices = sorted(data['devices'], key=lambda d: d['path']) - resp_devices = sorted(resp_data['devices'], key=lambda d: d['path']) - - for test, resp in zip(test_devices, resp_devices): - self._validate_device(test, resp) - - def _validate_device(self, data, resp_data): - for key, value in data.items(): - self.assertEqual(value, resp_data[key]) - - def _validate_daemon(self, data, resp_data): - for key, value in data.items(): - self.assertEqual(value, resp_data[key]) - - @DashboardTestCase.RunAs('test', 'test', ['block-manager']) - def test_access_permissions(self): - self._get(self.URL_STATUS) - self.assertStatus(200) - self._get(self.URL_INVENTORY) - self.assertStatus(403) - def test_status_get(self): data = self._get(self.URL_STATUS) - self.assertTrue(data['available']) - - def test_inventory_list(self): - # get all inventory - data = self._get(self.URL_INVENTORY) - self.assertStatus(200) - - def sorting_key(node): - return node['name'] - - test_inventory = sorted(self.test_data_inventory, key=sorting_key) - resp_inventory = sorted(data, key=sorting_key) - self.assertEqual(len(test_inventory), len(resp_inventory)) - for test, resp in zip(test_inventory, resp_inventory): - self._validate_inventory(test, resp) - - # get inventory by hostname - node = self.test_data_inventory[-1] - data = self._get('{}?hostname={}'.format(self.URL_INVENTORY, node['name'])) self.assertStatus(200) - self.assertEqual(len(data), 1) - self._validate_inventory(node, data[0]) + self.assertTrue(data['available']) diff --git a/qa/tasks/mgr/dashboard/test_user.py b/qa/tasks/mgr/dashboard/test_user.py index 30fe0928f8a..be2a57d563c 100644 --- a/qa/tasks/mgr/dashboard/test_user.py +++ b/qa/tasks/mgr/dashboard/test_user.py @@ -169,6 +169,16 @@ class UserTest(DashboardTestCase): self.assertError(code='role_does_not_exist', component='user') + def test_create_user_invalid_chars_in_name(self): + self._create_user(username='userö', + password='mypassword10#', + name='administrator', + email='my@email.com', + roles=['administrator']) + self.assertStatus(400) + self.assertError(code='ceph_type_not_valid', + component='user') + def test_delete_user_does_not_exist(self): self._delete('/api/user/user2') self.assertStatus(404) diff --git a/qa/tasks/mgr/test_dashboard.py b/qa/tasks/mgr/test_dashboard.py index 5e9b3829b2a..b30175f4f6f 100644 --- a/qa/tasks/mgr/test_dashboard.py +++ b/qa/tasks/mgr/test_dashboard.py @@ -1,9 +1,11 @@ import logging +import ssl + import requests +from requests.adapters import HTTPAdapter from .mgr_test_case import MgrTestCase - log = logging.getLogger(__name__) @@ -26,6 +28,16 @@ class TestDashboard(MgrTestCase): "mgr/dashboard/standby_error_status_code", "500") + def wait_until_webserver_available(self, url): + def _check_connection(): + try: + requests.get(url, allow_redirects=False, verify=False) + return True + except requests.ConnectionError: + pass + return False + self.wait_until_true(_check_connection, timeout=30) + def test_standby(self): original_active_id = self.mgr_cluster.get_active_id() original_uri = self._get_uri("dashboard") @@ -46,6 +58,9 @@ class TestDashboard(MgrTestCase): self.assertNotEqual(original_uri, failed_over_uri) + # Wait until web server of the standby node is settled. + self.wait_until_webserver_available(original_uri) + # The original active daemon should have come back up as a standby # and be doing redirects to the new active daemon. r = requests.get(original_uri, allow_redirects=False, verify=False) @@ -53,7 +68,7 @@ class TestDashboard(MgrTestCase): self.assertEqual(r.headers['Location'], failed_over_uri) # Ensure that every URL redirects to the active daemon. - r = requests.get("{}/runtime.js".format(original_uri), + r = requests.get("{}/runtime.js".format(original_uri.strip('/')), allow_redirects=False, verify=False) self.assertEqual(r.status_code, 303) @@ -83,6 +98,9 @@ class TestDashboard(MgrTestCase): self.assertNotEqual(original_uri, failed_over_uri) + # Wait until web server of the standby node is settled. + self.wait_until_webserver_available(original_uri) + # Redirection should be disabled now, instead a 500 must be returned. r = requests.get(original_uri, allow_redirects=False, verify=False) self.assertEqual(r.status_code, 500) @@ -122,3 +140,32 @@ class TestDashboard(MgrTestCase): )) self.assertListEqual(failures, []) + + def test_tls(self): + class CustomHTTPAdapter(HTTPAdapter): + def __init__(self, ssl_version): + self.ssl_version = ssl_version + super().__init__() + + def init_poolmanager(self, *args, **kwargs): + kwargs['ssl_version'] = self.ssl_version + return super().init_poolmanager(*args, **kwargs) + + uri = self._get_uri("dashboard") + + # TLSv1 + with self.assertRaises(requests.exceptions.SSLError): + session = requests.Session() + session.mount(uri, CustomHTTPAdapter(ssl.PROTOCOL_TLSv1)) + session.get(uri, allow_redirects=False, verify=False) + + # TLSv1.1 + with self.assertRaises(requests.exceptions.SSLError): + session = requests.Session() + session.mount(uri, CustomHTTPAdapter(ssl.PROTOCOL_TLSv1_1)) + session.get(uri, allow_redirects=False, verify=False) + + session = requests.Session() + session.mount(uri, CustomHTTPAdapter(ssl.PROTOCOL_TLS)) + r = session.get(uri, allow_redirects=False, verify=False) + self.assertEqual(r.status_code, 200) diff --git a/qa/tasks/mgr/test_module_selftest.py b/qa/tasks/mgr/test_module_selftest.py index 59f91d43c14..8abfe51abe8 100644 --- a/qa/tasks/mgr/test_module_selftest.py +++ b/qa/tasks/mgr/test_module_selftest.py @@ -113,88 +113,6 @@ class TestModuleSelftest(MgrTestCase): "bar") self.wait_until_equal(get_localized_value, "bar", timeout=10) - def test_selftest_config_upgrade(self): - """ - That pre-mimic config-key config settings are migrated into - mimic-style config settings and visible from mgr modules. - """ - self._load_module("selftest") - - def get_value(): - return self.mgr_cluster.mon_manager.raw_cluster_cmd( - "mgr", "self-test", "config", "get", "testkey").strip() - - def get_config(): - lines = self.mgr_cluster.mon_manager.raw_cluster_cmd( - "config", "dump")\ - .strip().split("\n") - result = [] - for line in lines[1:]: - tokens = line.strip().split() - log.info("tokens: {0}".format(tokens)) - subsys, key, value = tokens[0], tokens[2], tokens[3] - result.append((subsys, key, value)) - - return result - - # Stop ceph-mgr while we synthetically create a pre-mimic - # configuration scenario - for mgr_id in self.mgr_cluster.mgr_daemons.keys(): - self.mgr_cluster.mgr_stop(mgr_id) - self.mgr_cluster.mgr_fail(mgr_id) - - # Blow away any modern-style mgr module config options - # (the ceph-mgr implementation may only do the upgrade if - # it doesn't see new style options) - stash = [] - for subsys, key, value in get_config(): - if subsys == "mgr" and key.startswith("mgr/"): - log.info("Removing config key {0} ahead of upgrade".format( - key)) - self.mgr_cluster.mon_manager.raw_cluster_cmd( - "config", "rm", subsys, key) - stash.append((subsys, key, value)) - - # Inject an old-style configuration setting in config-key - self.mgr_cluster.mon_manager.raw_cluster_cmd( - "config-key", "set", "mgr/selftest/testkey", "testvalue") - - # Inject configuration settings that looks data-ish and should - # not be migrated to a config key - self.mgr_cluster.mon_manager.raw_cluster_cmd( - "config-key", "set", "mgr/selftest/testnewline", "foo\nbar") - - # Inject configuration setting that does not appear in the - # module's config schema - self.mgr_cluster.mon_manager.raw_cluster_cmd( - "config-key", "set", "mgr/selftest/kvitem", "foo\nbar") - - # Bring mgr daemons back online, the one that goes active - # should be doing the upgrade. - for mgr_id in self.mgr_cluster.mgr_daemons.keys(): - self.mgr_cluster.mgr_restart(mgr_id) - - # Wait for a new active - self.wait_until_true( - lambda: self.mgr_cluster.get_active_id() != "", timeout=30) - - # Check that the selftest module sees the upgraded value - self.assertEqual(get_value(), "testvalue") - - # Check that the upgraded value is visible in the configuration - seen_keys = [k for s,k,v in get_config()] - self.assertIn("mgr/selftest/testkey", seen_keys) - - # ...and that the non-config-looking one isn't - self.assertNotIn("mgr/selftest/testnewline", seen_keys) - - # ...and that the not-in-schema one isn't - self.assertNotIn("mgr/selftest/kvitem", seen_keys) - - # Restore previous configuration - for subsys, key, value in stash: - self.mgr_cluster.mon_manager.raw_cluster_cmd( - "config", "set", subsys, key, value) def test_selftest_command_spam(self): # Use the selftest module to stress the mgr daemon diff --git a/qa/tasks/s3tests.py b/qa/tasks/s3tests.py index c0d5af5f3f0..d87c2ab0a43 100644 --- a/qa/tasks/s3tests.py +++ b/qa/tasks/s3tests.py @@ -97,93 +97,163 @@ def create_users(ctx, config): assert isinstance(config, dict) log.info('Creating rgw users...') testdir = teuthology.get_testdir(ctx) - users = {'s3 main': 'foo', 's3 alt': 'bar', 's3 tenant': 'testx$tenanteduser', 'iam': 'foobar'} - for client in config['clients']: - s3tests_conf = config['s3tests_conf'][client] - s3tests_conf.setdefault('fixtures', {}) - s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-') - for section, user in users.items(): - _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) - log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client)) - cluster_name, daemon_type, client_id = teuthology.split_role(client) - client_with_id = daemon_type + '.' + client_id - if section=='iam': - ctx.cluster.only(client).run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'radosgw-admin', - '-n', client_with_id, - 'user', 'create', - '--uid', s3tests_conf[section]['user_id'], - '--display-name', s3tests_conf[section]['display_name'], - '--access-key', s3tests_conf[section]['access_key'], - '--secret', s3tests_conf[section]['secret_key'], - '--cluster', cluster_name, - ], - ) - ctx.cluster.only(client).run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'radosgw-admin', - '-n', client_with_id, - 'caps', 'add', - '--uid', s3tests_conf[section]['user_id'], - '--caps', 'user-policy=*', - '--cluster', cluster_name, - ], - ) - ctx.cluster.only(client).run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'radosgw-admin', - '-n', client_with_id, - 'caps', 'add', - '--uid', s3tests_conf[section]['user_id'], - '--caps', 'roles=*', - '--cluster', cluster_name, - ], - ) - else: + + if ctx.sts_variable: + users = {'s3 main': 'foo', 's3 alt': 'bar', 's3 tenant': 'testx$tenanteduser', 'iam': 'foobar'} + for client in config['clients']: + s3tests_conf = config['s3tests_conf'][client] + s3tests_conf.setdefault('fixtures', {}) + s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-') + for section, user in users.items(): + _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) + log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client)) + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + if section=='iam': + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--display-name', s3tests_conf[section]['display_name'], + '--access-key', s3tests_conf[section]['access_key'], + '--secret', s3tests_conf[section]['secret_key'], + '--cluster', cluster_name, + ], + ) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'caps', 'add', + '--uid', s3tests_conf[section]['user_id'], + '--caps', 'user-policy=*', + '--cluster', cluster_name, + ], + ) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'caps', 'add', + '--uid', s3tests_conf[section]['user_id'], + '--caps', 'roles=*', + '--cluster', cluster_name, + ], + ) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'caps', 'add', + '--uid', s3tests_conf[section]['user_id'], + '--caps', 'oidc-provider=*', + '--cluster', cluster_name, + ], + ) + + else: + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--display-name', s3tests_conf[section]['display_name'], + '--access-key', s3tests_conf[section]['access_key'], + '--secret', s3tests_conf[section]['secret_key'], + '--email', s3tests_conf[section]['email'], + '--caps', 'user-policy=*', + '--cluster', cluster_name, + ], + ) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'mfa', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--totp-serial', s3tests_conf[section]['totp_serial'], + '--totp-seed', s3tests_conf[section]['totp_seed'], + '--totp-seconds', s3tests_conf[section]['totp_seconds'], + '--totp-window', '8', + '--totp-seed-type', 'base32', + '--cluster', cluster_name, + ], + ) + + else: + users = {'s3 main': 'foo', 's3 alt': 'bar', 's3 tenant': 'testx$tenanteduser'} + for client in config['clients']: + s3tests_conf = config['s3tests_conf'][client] + s3tests_conf.setdefault('fixtures', {}) + s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-') + for section, user in users.items(): + _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) + log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client)) + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id ctx.cluster.only(client).run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'radosgw-admin', - '-n', client_with_id, - 'user', 'create', - '--uid', s3tests_conf[section]['user_id'], - '--display-name', s3tests_conf[section]['display_name'], - '--access-key', s3tests_conf[section]['access_key'], - '--secret', s3tests_conf[section]['secret_key'], - '--email', s3tests_conf[section]['email'], - '--caps', 'user-policy=*', - '--cluster', cluster_name, - ], - ) + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--display-name', s3tests_conf[section]['display_name'], + '--access-key', s3tests_conf[section]['access_key'], + '--secret', s3tests_conf[section]['secret_key'], + '--email', s3tests_conf[section]['email'], + '--caps', 'user-policy=*', + '--cluster', cluster_name, + ], + ) ctx.cluster.only(client).run( - args=[ - 'adjust-ulimits', - 'ceph-coverage', - '{tdir}/archive/coverage'.format(tdir=testdir), - 'radosgw-admin', - '-n', client_with_id, - 'mfa', 'create', - '--uid', s3tests_conf[section]['user_id'], - '--totp-serial', s3tests_conf[section]['totp_serial'], - '--totp-seed', s3tests_conf[section]['totp_seed'], - '--totp-seconds', s3tests_conf[section]['totp_seconds'], - '--totp-window', '8', - '--totp-seed-type', 'base32', - '--cluster', cluster_name, - ], - ) + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'mfa', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--totp-serial', s3tests_conf[section]['totp_serial'], + '--totp-seed', s3tests_conf[section]['totp_seed'], + '--totp-seconds', s3tests_conf[section]['totp_seconds'], + '--totp-window', '8', + '--totp-seed-type', 'base32', + '--cluster', cluster_name, + ], + ) + + if "TOKEN" in os.environ: + s3tests_conf.setdefault('webidentity', {}) + s3tests_conf['webidentity'].setdefault('token',os.environ['TOKEN']) + s3tests_conf['webidentity'].setdefault('aud',os.environ['AUD']) + s3tests_conf['webidentity'].setdefault('thumbprint',os.environ['THUMBPRINT']) + s3tests_conf['webidentity'].setdefault('KC_REALM',os.environ['KC_REALM']) + try: yield finally: @@ -332,13 +402,12 @@ def run_tests(ctx, config): else: args += ['REQUESTS_CA_BUNDLE=/etc/pki/tls/certs/ca-bundle.crt'] # civetweb > 1.8 && beast parsers are strict on rfc2616 - attrs = ["!fails_on_rgw", "!lifecycle_expiration", "!fails_strict_rfc2616","!s3select","!sts_test"] + attrs = ["!fails_on_rgw", "!lifecycle_expiration", "!fails_strict_rfc2616","!s3select","!test_of_sts","!webidentity_test"] if client_config.get('calling-format') != 'ordinary': attrs += ['!fails_with_subdomain'] if 'extra_attrs' in client_config: - attrs = client_config.get('extra_attrs') - + attrs = client_config.get('extra_attrs') args += [ '{tdir}/s3-tests/virtualenv/bin/python'.format(tdir=testdir), '-m', 'nose', @@ -439,6 +508,17 @@ def task(ctx, config): extra_args: ['test_s3:test_object_acl_grand_public_read'] client.1: extra_args: ['--exclude', 'test_100_continue'] + + To run any sts-tests don't forget to set a config variable named 'sts_tests' to 'True' as follows:: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: + client.0: + sts_tests: True + rgw_server: client.0 + """ assert hasattr(ctx, 'rgw'), 's3tests must run after the rgw task' assert config is None or isinstance(config, list) \ @@ -462,26 +542,80 @@ def task(ctx, config): log.debug('s3tests config is %s', config) s3tests_conf = {} - for client in clients: - endpoint = ctx.rgw.role_endpoints.get(client) - assert endpoint, 's3tests: no rgw endpoint for {}'.format(client) - - s3tests_conf[client] = ConfigObj( - indent_type='', - infile={ - 'DEFAULT': - { - 'port' : endpoint.port, - 'is_secure' : endpoint.cert is not None, - 'api_name' : 'default', - }, - 'fixtures' : {}, - 's3 main' : {}, - 's3 alt' : {}, - 's3 tenant': {}, - 'iam' : {}, - } - ) + + for client, client_config in config.items(): + if 'sts_tests' in client_config: + ctx.sts_variable = True + else: + ctx.sts_variable = False + #This will be the structure of config file when you want to run webidentity_test (sts-test) + if ctx.sts_variable and "TOKEN" in os.environ: + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(client) + + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + 'api_name' : 'default', + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + 's3 tenant' : {}, + 'iam' : {}, + 'webidentity': {}, + } + ) + + elif ctx.sts_variable: + #This will be the structure of config file when you want to run assume_role_test and get_session_token_test (sts-test) + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(client) + + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + 'api_name' : 'default', + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + 's3 tenant' : {}, + 'iam' : {}, + } + ) + + else: + #This will be the structure of config file when you want to run normal s3-tests + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(client) + + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + 'api_name' : 'default', + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + 's3 tenant' : {}, + } + ) with contextutil.nested( lambda: download(ctx=ctx, config=config), diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py index 86a5a89f3f8..2e9035c94df 100644 --- a/qa/tasks/vstart_runner.py +++ b/qa/tasks/vstart_runner.py @@ -1165,18 +1165,18 @@ class LocalMgrCluster(LocalCephCluster, MgrCluster): class LocalFilesystem(Filesystem, LocalMDSCluster): - def __init__(self, ctx, fscid=None, name=None, create=False, ec_profile=None): + def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False): # Deliberately skip calling parent constructor self._ctx = ctx self.id = None self.name = name - self.ec_profile = ec_profile self.metadata_pool_name = None self.metadata_overlay = False self.data_pool_name = None self.data_pools = None - self.fs_config = None + self.fs_config = fs_config + self.ec_profile = fs_config.get('cephfs_ec_profile') # Hack: cheeky inspection of ceph.conf to see what MDSs exist self.mds_ids = set() @@ -1373,7 +1373,8 @@ class LogStream(object): self._write() def _write(self): - self._del_result_lines() + if opt_rotate_logs: + self._del_result_lines() if self.buffer == '': return diff --git a/qa/workunits/fs/snaps/snap-rm-diff.sh b/qa/workunits/fs/snaps/snap-rm-diff.sh index 63f642878be..30ffa9113a5 100755 --- a/qa/workunits/fs/snaps/snap-rm-diff.sh +++ b/qa/workunits/fs/snaps/snap-rm-diff.sh @@ -1,6 +1,5 @@ #!/bin/sh -ex -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it wget -q http://download.ceph.com/qa/linux-2.6.33.tar.bz2 mkdir foo cp linux* foo diff --git a/qa/workunits/fs/snaps/snaptest-0.sh b/qa/workunits/fs/snaps/snaptest-0.sh deleted file mode 100755 index 791caf9ec19..00000000000 --- a/qa/workunits/fs/snaps/snaptest-0.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh -x - -expect_failure() { - if "$@"; then return 1; else return 0; fi -} -set -e - -ceph fs set cephfs allow_new_snaps false -expect_failure mkdir .snap/foo -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - -echo asdf > foo -mkdir .snap/foo -grep asdf .snap/foo/foo -rmdir .snap/foo - -echo asdf > bar -mkdir .snap/bar -rm bar -grep asdf .snap/bar/bar -rmdir .snap/bar -rm foo - -ceph fs set cephfs allow_new_snaps false -expect_failure mkdir .snap/baz - -echo OK diff --git a/qa/workunits/fs/snaps/snaptest-1.sh b/qa/workunits/fs/snaps/snaptest-1.sh index 476531fc4d8..431e8338789 100755 --- a/qa/workunits/fs/snaps/snaptest-1.sh +++ b/qa/workunits/fs/snaps/snaptest-1.sh @@ -2,8 +2,6 @@ set -ex -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - echo 1 > file1 echo 2 > file2 echo 3 > file3 diff --git a/qa/workunits/fs/snaps/snaptest-2.sh b/qa/workunits/fs/snaps/snaptest-2.sh index 6ded7b66990..11fe9316ae8 100755 --- a/qa/workunits/fs/snaps/snaptest-2.sh +++ b/qa/workunits/fs/snaps/snaptest-2.sh @@ -1,7 +1,5 @@ #!/usr/bin/env bash -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - echo "Create dir 100 to 199 ..." for i in $(seq 100 199); do echo " create dir $i" diff --git a/qa/workunits/fs/snaps/snaptest-authwb.sh b/qa/workunits/fs/snaps/snaptest-authwb.sh index 2c53e2a6117..965ee851273 100755 --- a/qa/workunits/fs/snaps/snaptest-authwb.sh +++ b/qa/workunits/fs/snaps/snaptest-authwb.sh @@ -2,8 +2,6 @@ set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - touch foo chmod +x foo mkdir .snap/s @@ -11,4 +9,4 @@ find .snap/s/foo -executable | grep foo rmdir .snap/s rm foo -echo OK
\ No newline at end of file +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-capwb.sh b/qa/workunits/fs/snaps/snaptest-capwb.sh index f36d38ab5c0..d26f324b614 100755 --- a/qa/workunits/fs/snaps/snaptest-capwb.sh +++ b/qa/workunits/fs/snaps/snaptest-capwb.sh @@ -4,8 +4,6 @@ set -e mkdir foo -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - # make sure mds handles it when the client does not send flushsnap echo x > foo/x sync @@ -32,4 +30,4 @@ rmdir foo/.snap/s rm -r foo -echo OK
\ No newline at end of file +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-dir-rename.sh b/qa/workunits/fs/snaps/snaptest-dir-rename.sh index 85b929a25b0..3bbd9a11ef8 100755 --- a/qa/workunits/fs/snaps/snaptest-dir-rename.sh +++ b/qa/workunits/fs/snaps/snaptest-dir-rename.sh @@ -2,8 +2,6 @@ set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - # # make sure we keep an existing dn's seq # @@ -16,4 +14,4 @@ rmdir a stat .snap/bar/a rmdir .snap/bar -echo OK
\ No newline at end of file +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-double-null.sh b/qa/workunits/fs/snaps/snaptest-double-null.sh index 49a1b271c50..cdf32e4f0ef 100755 --- a/qa/workunits/fs/snaps/snaptest-double-null.sh +++ b/qa/workunits/fs/snaps/snaptest-double-null.sh @@ -2,8 +2,6 @@ set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - # multiple intervening snapshots with no modifications, and thus no # snapflush client_caps messages. make sure the mds can handle this. diff --git a/qa/workunits/fs/snaps/snaptest-estale.sh b/qa/workunits/fs/snaps/snaptest-estale.sh index e005b9a820b..a4fb94368d4 100755 --- a/qa/workunits/fs/snaps/snaptest-estale.sh +++ b/qa/workunits/fs/snaps/snaptest-estale.sh @@ -1,7 +1,5 @@ #!/bin/sh -x -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - mkdir .snap/foo echo "We want ENOENT, not ESTALE, here." diff --git a/qa/workunits/fs/snaps/snaptest-git-ceph.sh b/qa/workunits/fs/snaps/snaptest-git-ceph.sh index 50b854a5583..0d11efedb73 100755 --- a/qa/workunits/fs/snaps/snaptest-git-ceph.sh +++ b/qa/workunits/fs/snaps/snaptest-git-ceph.sh @@ -2,8 +2,6 @@ set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - git clone git://git.ceph.com/ceph.git cd ceph diff --git a/qa/workunits/fs/snaps/snaptest-hardlink.sh b/qa/workunits/fs/snaps/snaptest-hardlink.sh index 9848a01981e..90f3583b19e 100755 --- a/qa/workunits/fs/snaps/snaptest-hardlink.sh +++ b/qa/workunits/fs/snaps/snaptest-hardlink.sh @@ -2,8 +2,6 @@ set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - mkdir 1 2 echo asdf >1/file1 echo asdf >1/file2 diff --git a/qa/workunits/fs/snaps/snaptest-intodir.sh b/qa/workunits/fs/snaps/snaptest-intodir.sh index 94af442278b..d6a220f73bf 100755 --- a/qa/workunits/fs/snaps/snaptest-intodir.sh +++ b/qa/workunits/fs/snaps/snaptest-intodir.sh @@ -1,7 +1,5 @@ #!/bin/sh -ex -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - # this tests fix for #1399 mkdir foo mkdir foo/.snap/one @@ -21,4 +19,4 @@ rmdir foo/baz/.snap/two rmdir foo/.snap/one rm -r foo -echo OK
\ No newline at end of file +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-multiple-capsnaps.sh b/qa/workunits/fs/snaps/snaptest-multiple-capsnaps.sh index 56ceaa8a95e..5ebc852cf6c 100755 --- a/qa/workunits/fs/snaps/snaptest-multiple-capsnaps.sh +++ b/qa/workunits/fs/snaps/snaptest-multiple-capsnaps.sh @@ -2,8 +2,6 @@ set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - echo asdf > a mkdir .snap/1 chmod 777 a diff --git a/qa/workunits/fs/snaps/snaptest-parents.sh b/qa/workunits/fs/snaps/snaptest-parents.sh index a66a977fd57..7ab1ba7cf2d 100755 --- a/qa/workunits/fs/snaps/snaptest-parents.sh +++ b/qa/workunits/fs/snaps/snaptest-parents.sh @@ -2,8 +2,6 @@ set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - echo "making directory tree and files" mkdir -p 1/a/b/c/ echo "i'm file1" > 1/a/file1 diff --git a/qa/workunits/fs/snaps/snaptest-realm-split.sh b/qa/workunits/fs/snaps/snaptest-realm-split.sh index 3f01fd54dee..300cca21de5 100755 --- a/qa/workunits/fs/snaps/snaptest-realm-split.sh +++ b/qa/workunits/fs/snaps/snaptest-realm-split.sh @@ -2,8 +2,6 @@ set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - mkdir -p 1/a exec 3<> 1/a/file1 diff --git a/qa/workunits/fs/snaps/snaptest-snap-rename.sh b/qa/workunits/fs/snaps/snaptest-snap-rename.sh index 9301a296377..aa7325b92ea 100755 --- a/qa/workunits/fs/snaps/snaptest-snap-rename.sh +++ b/qa/workunits/fs/snaps/snaptest-snap-rename.sh @@ -5,8 +5,6 @@ expect_failure() { } set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - mkdir -p d1/d2 mkdir -p d1/d3 mkdir d1/.snap/foo diff --git a/qa/workunits/fs/snaps/snaptest-snap-rm-cmp.sh b/qa/workunits/fs/snaps/snaptest-snap-rm-cmp.sh index c5bd65e9a8f..88a0e8ae54a 100755 --- a/qa/workunits/fs/snaps/snaptest-snap-rm-cmp.sh +++ b/qa/workunits/fs/snaps/snaptest-snap-rm-cmp.sh @@ -2,8 +2,6 @@ set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - file=linux-2.6.33.tar.bz2 wget -q http://download.ceph.com/qa/$file diff --git a/qa/workunits/fs/snaps/snaptest-upchildrealms.sh b/qa/workunits/fs/snaps/snaptest-upchildrealms.sh index a4cc9ab304b..4e531a96652 100755 --- a/qa/workunits/fs/snaps/snaptest-upchildrealms.sh +++ b/qa/workunits/fs/snaps/snaptest-upchildrealms.sh @@ -2,8 +2,6 @@ set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - # # verify that a snap update on a parent realm will induce # snap cap writeback for inodes child realms @@ -27,4 +25,4 @@ rmdir a/.snap/a1 rmdir a/.snap/a2 rm -r a -echo "OK"
\ No newline at end of file +echo "OK" diff --git a/qa/workunits/fs/snaps/snaptest-xattrwb.sh b/qa/workunits/fs/snaps/snaptest-xattrwb.sh index 09398878092..e503aed77b4 100755 --- a/qa/workunits/fs/snaps/snaptest-xattrwb.sh +++ b/qa/workunits/fs/snaps/snaptest-xattrwb.sh @@ -2,8 +2,6 @@ set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - echo "testing simple xattr wb" touch x setfattr -n user.foo x @@ -28,4 +26,4 @@ getfattr -n user.foo a/.snap/s/b | grep user.foo # should be there, too! rmdir a/.snap/s rm -r a -echo OK
\ No newline at end of file +echo OK diff --git a/qa/workunits/fs/snaps/untar_snap_rm.sh b/qa/workunits/fs/snaps/untar_snap_rm.sh index 928e8911ba8..8a8412e6659 100755 --- a/qa/workunits/fs/snaps/untar_snap_rm.sh +++ b/qa/workunits/fs/snaps/untar_snap_rm.sh @@ -2,8 +2,6 @@ set -e -ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it - do_tarball() { wget http://download.ceph.com/qa/$1 tar xvf$2 $1 diff --git a/qa/workunits/rbd/cli_migration.sh b/qa/workunits/rbd/cli_migration.sh new file mode 100755 index 00000000000..e1a4d2df8ce --- /dev/null +++ b/qa/workunits/rbd/cli_migration.sh @@ -0,0 +1,219 @@ +#!/usr/bin/env bash +set -ex + +. $(dirname $0)/../../standalone/ceph-helpers.sh + +TEMPDIR= +IMAGE1=image1 +IMAGE2=image2 +IMAGE3=image3 +IMAGES="${IMAGE1} ${IMAGE2} ${IMAGE3}" + +cleanup() { + cleanup_tempdir + remove_images +} + +setup_tempdir() { + TEMPDIR=`mktemp -d` +} + +cleanup_tempdir() { + rm -rf ${TEMPDIR} +} + +create_base_image() { + local image=$1 + + rbd create --size 1G ${image} + rbd bench --io-type write --io-pattern rand --io-size=4K --io-total 256M ${image} + rbd snap create ${image}@1 + rbd bench --io-type write --io-pattern rand --io-size=4K --io-total 64M ${image} + rbd snap create ${image}@2 + rbd bench --io-type write --io-pattern rand --io-size=4K --io-total 128M ${image} +} + +export_raw_image() { + local image=$1 + + rm -rf "${TEMPDIR}/${image}" + rbd export ${image} "${TEMPDIR}/${image}" +} + +export_base_image() { + local image=$1 + + export_raw_image "${image}" + export_raw_image "${image}@1" + export_raw_image "${image}@2" +} + +remove_image() { + local image=$1 + + (rbd migration abort $image || true) >/dev/null 2>&1 + (rbd snap purge $image || true) >/dev/null 2>&1 + (rbd rm $image || true) >/dev/null 2>&1 +} + +remove_images() { + for image in ${IMAGES} + do + remove_image ${image} + done +} + +compare_images() { + local src_image=$1 + local dst_image=$2 + + export_raw_image ${dst_image} + cmp "${TEMPDIR}/${src_image}" "${TEMPDIR}/${dst_image}" +} + +test_import_native_format() { + local base_image=$1 + local dest_image=$2 + + local pool_id=$(ceph osd pool ls detail --format xml | xmlstarlet sel -t -v "//pools/pool[pool_name='rbd']/pool_id") + cat > ${TEMPDIR}/spec.json <<EOF +{ + "type": "native", + "pool_id": ${pool_id}, + "pool_namespace": "", + "image_name": "${base_image}" +} +EOF + cat ${TEMPDIR}/spec.json + + rbd migration prepare --import-only \ + --source-spec-path ${TEMPDIR}/spec.json ${dest_image} + + compare_images "${base_image}@1" "${dest_image}@1" + compare_images "${base_image}@2" "${dest_image}@2" + compare_images "${base_image}" "${dest_image}" + + rbd snap create ${dest_image}@head + rbd bench --io-type write --io-pattern rand --io-size=32K --io-total=32M ${dest_image} + compare_images "${base_image}" "${dest_image}@head" + + rbd migration abort ${dest_image} + + rbd migration prepare --import-only \ + --source-spec-path ${TEMPDIR}/spec.json ${dest_image} + rbd migration execute ${dest_image} + + compare_images "${base_image}@1" "${dest_image}@1" + compare_images "${base_image}@2" "${dest_image}@2" + compare_images "${base_image}" "${dest_image}" + + rbd migration abort ${dest_image} + + rbd migration prepare --import-only \ + --source-spec "{\"type\": \"native\", \"pool_id\": "${pool_id}", \"image_name\": \"${base_image}\"}" \ + ${dest_image} + rbd migration abort ${dest_image} + + rbd migration prepare --import-only \ + --source-spec "{\"type\": \"native\", \"pool_name\": \"rbd\", \"image_name\": \"${base_image}\"}" \ + ${dest_image} + rbd migration execute ${dest_image} + rbd migration commit ${dest_image} + + compare_images "${base_image}@1" "${dest_image}@1" + compare_images "${base_image}@2" "${dest_image}@2" + compare_images "${base_image}" "${dest_image}" + + remove_image "${dest_image}" +} + +test_import_raw_format() { + local base_image=$1 + local dest_image=$2 + + cat > ${TEMPDIR}/spec.json <<EOF +{ + "type": "raw", + "stream": { + "type": "file", + "file_path": "${TEMPDIR}/${base_image}" + } +} +EOF + cat ${TEMPDIR}/spec.json + + cat ${TEMPDIR}/spec.json | rbd migration prepare --import-only \ + --source-spec-path - ${dest_image} + compare_images ${base_image} ${dest_image} + rbd migration abort ${dest_image} + + rbd migration prepare --import-only \ + --source-spec-path ${TEMPDIR}/spec.json ${dest_image} + rbd migration execute ${dest_image} + rbd migration commit ${dest_image} + + compare_images ${base_image} ${dest_image} + + remove_image "${dest_image}" + + cat > ${TEMPDIR}/spec.json <<EOF +{ + "type": "raw", + "stream": { + "type": "file", + "file_path": "${TEMPDIR}/${base_image}" + }, + "snapshots": [{ + "type": "raw", + "name": "snap1", + "stream": { + "type": "file", + "file_path": "${TEMPDIR}/${base_image}@1" + } + }, { + "type": "raw", + "name": "snap2", + "stream": { + "type": "file", + "file_path": "${TEMPDIR}/${base_image}@2" + } + }] +} +EOF + cat ${TEMPDIR}/spec.json + + rbd migration prepare --import-only \ + --source-spec-path ${TEMPDIR}/spec.json ${dest_image} + + rbd snap create ${dest_image}@head + rbd bench --io-type write --io-pattern rand --io-size=32K --io-total=32M ${dest_image} + + compare_images "${base_image}" "${dest_image}@head" + compare_images "${base_image}@1" "${dest_image}@snap1" + compare_images "${base_image}@2" "${dest_image}@snap2" + compare_images "${base_image}" "${dest_image}@head" + + rbd migration execute ${dest_image} + + compare_images "${base_image}@1" "${dest_image}@snap1" + compare_images "${base_image}@2" "${dest_image}@snap2" + compare_images "${base_image}" "${dest_image}@head" + + rbd migration commit ${dest_image} + + remove_image "${dest_image}" +} + +# make sure rbd pool is EMPTY.. this is a test script!! +rbd ls 2>&1 | wc -l | grep -v '^0$' && echo "nonempty rbd pool, aborting! run this script on an empty test cluster only." && exit 1 + +setup_tempdir +trap 'cleanup $?' INT TERM EXIT + +create_base_image ${IMAGE1} +export_base_image ${IMAGE1} + +test_import_native_format ${IMAGE1} ${IMAGE2} +test_import_raw_format ${IMAGE1} ${IMAGE2} + +echo OK diff --git a/run-make-check.sh b/run-make-check.sh index 6e032e9f1ef..3a7451c54e7 100755 --- a/run-make-check.sh +++ b/run-make-check.sh @@ -20,6 +20,8 @@ source src/script/run-make.sh +set -e + function run() { # to prevent OSD EMFILE death on tests, make sure ulimit >= 1024 $DRY_RUN ulimit -n $(ulimit -Hn) @@ -62,7 +64,8 @@ function main() { cmake_opts+=" -DWITH_ZBD=ON" fi configure $cmake_opts $@ - build tests && echo "make check: successful build on $(git rev-parse HEAD)" + build tests + echo "make check: successful build on $(git rev-parse HEAD)" run } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2af3f66e3d4..d5ce19c54b7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -28,7 +28,8 @@ add_definitions( -D_REENTRANT -D_THREAD_SAFE -D__STDC_FORMAT_MACROS - -D_FILE_OFFSET_BITS=64) + -D_FILE_OFFSET_BITS=64 + -DBOOST_ASIO_DISABLE_THREAD_KEYWORD_EXTENSION) if(LINUX) add_definitions("-D_GNU_SOURCE") endif() @@ -299,7 +300,7 @@ add_subdirectory(json_spirit) include_directories(SYSTEM "${CMAKE_SOURCE_DIR}/src/xxHash") include_directories(SYSTEM "${CMAKE_SOURCE_DIR}/src/rapidjson/include") -find_package(fmt 5.2.1 QUIET) +find_package(fmt 6.0.0 QUIET) if(fmt_FOUND) include_directories(SYSTEM "${fmt_INCLUDE_DIR}") else() @@ -511,7 +512,7 @@ endif(${WITH_LTTNG}) add_subdirectory(global) -find_package(Lua REQUIRED) +find_package(Lua 5.3 REQUIRED) # rados object classes add_subdirectory(cls) diff --git a/src/blk/BlockDevice.cc b/src/blk/BlockDevice.cc index 345d51fdb09..6804ee50cbc 100644 --- a/src/blk/BlockDevice.cc +++ b/src/blk/BlockDevice.cc @@ -196,3 +196,20 @@ void BlockDevice::reap_ioc() --ioc_reap_count; } } + +bool BlockDevice::is_valid_io(uint64_t off, uint64_t len) const { + bool ret = (off % block_size == 0 && + len % block_size == 0 && + len > 0 && + off < size && + off + len <= size); + + if (!ret) { + derr << __func__ << " " << std::hex + << off << "~" << len + << " block_size " << block_size + << " size " << size + << std::dec << dendl; + } + return ret; +} diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h index 8ed53e69266..191eb8ec908 100644 --- a/src/blk/BlockDevice.h +++ b/src/blk/BlockDevice.h @@ -272,13 +272,7 @@ public: virtual void close() = 0; protected: - bool is_valid_io(uint64_t off, uint64_t len) const { - return (off % block_size == 0 && - len % block_size == 0 && - len > 0 && - off < size && - off + len <= size); - } + bool is_valid_io(uint64_t off, uint64_t len) const; }; #endif //CEPH_BLK_BLOCKDEVICE_H diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index 58142b3c106..e3ef9f155d8 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -70,7 +70,9 @@ KernelDevice::KernelDevice(CephContext* cct, aio_callback_t cb, void *cbpriv, ai unsigned int iodepth = cct->_conf->bdev_aio_max_queue_depth; if (use_ioring && ioring_queue_t::supported()) { - io_queue = std::make_unique<ioring_queue_t>(iodepth); + bool use_ioring_hipri = cct->_conf.get_val<bool>("bdev_ioring_hipri"); + bool use_ioring_sqthread_poll = cct->_conf.get_val<bool>("bdev_ioring_sqthread_poll"); + io_queue = std::make_unique<ioring_queue_t>(iodepth, use_ioring_hipri, use_ioring_sqthread_poll); } else { static bool once; if (use_ioring && !once) { diff --git a/src/blk/kernel/io_uring.cc b/src/blk/kernel/io_uring.cc index f248d38197a..3eb7a2d831d 100644 --- a/src/blk/kernel/io_uring.cc +++ b/src/blk/kernel/io_uring.cc @@ -3,16 +3,11 @@ #include "io_uring.h" -#if defined(HAVE_LIBURING) && defined(__x86_64__) +#if defined(HAVE_LIBURING) #include "liburing.h" #include <sys/epoll.h> -/* Options */ - -static bool hipri = false; /* use IO polling */ -static bool sq_thread = false; /* use kernel submission/poller thread */ - struct ioring_data { struct io_uring io_uring; pthread_mutex_t cq_mutex; @@ -108,9 +103,11 @@ static void build_fixed_fds_map(struct ioring_data *d, } } -ioring_queue_t::ioring_queue_t(unsigned iodepth_) : +ioring_queue_t::ioring_queue_t(unsigned iodepth_, bool hipri_, bool sq_thread_) : d(make_unique<ioring_data>()), - iodepth(iodepth_) + iodepth(iodepth_), + hipri(hipri_), + sq_thread(sq_thread_) { } @@ -220,11 +217,11 @@ bool ioring_queue_t::supported() return true; } -#else // #if defined(HAVE_LIBURING) && defined(__x86_64__) +#else // #if defined(HAVE_LIBURING) struct ioring_data {}; -ioring_queue_t::ioring_queue_t(unsigned iodepth_) +ioring_queue_t::ioring_queue_t(unsigned iodepth_, bool hipri_, bool sq_thread_) { ceph_assert(0); } @@ -261,4 +258,4 @@ bool ioring_queue_t::supported() return false; } -#endif // #if defined(HAVE_LIBURING) && defined(__x86_64__) +#endif // #if defined(HAVE_LIBURING) diff --git a/src/blk/kernel/io_uring.h b/src/blk/kernel/io_uring.h index f4ac2f6e12d..e7d0acde013 100644 --- a/src/blk/kernel/io_uring.h +++ b/src/blk/kernel/io_uring.h @@ -13,13 +13,15 @@ struct ioring_data; struct ioring_queue_t final : public io_queue_t { std::unique_ptr<ioring_data> d; unsigned iodepth = 0; + bool hipri = false; + bool sq_thread = false; typedef std::list<aio_t>::iterator aio_iter; // Returns true if arch is x86-64 and kernel supports io_uring static bool supported(); - ioring_queue_t(unsigned iodepth_); + ioring_queue_t(unsigned iodepth_, bool hipri_, bool sq_thread_); ~ioring_queue_t() final; int init(std::vector<int> &fds) final; diff --git a/src/blk/zoned/HMSMRDevice.cc b/src/blk/zoned/HMSMRDevice.cc index 867c9df43b3..8a30be9b0a2 100644 --- a/src/blk/zoned/HMSMRDevice.cc +++ b/src/blk/zoned/HMSMRDevice.cc @@ -64,7 +64,9 @@ HMSMRDevice::HMSMRDevice(CephContext* cct, aio_callback_t cb, void *cbpriv, aio_ unsigned int iodepth = cct->_conf->bdev_aio_max_queue_depth; if (use_ioring && ioring_queue_t::supported()) { - io_queue = std::make_unique<ioring_queue_t>(iodepth); + bool use_ioring_hipri = cct->_conf.get_val<bool>("bdev_ioring_hipri"); + bool use_ioring_sqthread_poll = cct->_conf.get_val<bool>("bdev_ioring_sqthread_poll"); + io_queue = std::make_unique<ioring_queue_t>(iodepth, use_ioring_hipri, use_ioring_sqthread_poll); } else { static bool once; if (use_ioring && !once) { diff --git a/src/ceph-volume/ceph_volume/devices/lvm/batch.py b/src/ceph-volume/ceph_volume/devices/lvm/batch.py index 2dbe20f05e4..a6f7632b317 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/batch.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/batch.py @@ -175,28 +175,28 @@ class Batch(object): 'devices', metavar='DEVICES', nargs='*', - type=arg_validators.ValidDevice(), + type=arg_validators.ValidBatchDevice(), default=[], help='Devices to provision OSDs', ) parser.add_argument( '--db-devices', nargs='*', - type=arg_validators.ValidDevice(), + type=arg_validators.ValidBatchDevice(), default=[], help='Devices to provision OSDs db volumes', ) parser.add_argument( '--wal-devices', nargs='*', - type=arg_validators.ValidDevice(), + type=arg_validators.ValidBatchDevice(), default=[], help='Devices to provision OSDs wal volumes', ) parser.add_argument( '--journal-devices', nargs='*', - type=arg_validators.ValidDevice(), + type=arg_validators.ValidBatchDevice(), default=[], help='Devices to provision OSDs journal volumes', ) diff --git a/src/ceph-volume/ceph_volume/devices/lvm/create.py b/src/ceph-volume/ceph_volume/devices/lvm/create.py index 8380a18f427..af2cd96c084 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/create.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/create.py @@ -42,7 +42,7 @@ class Create(object): Create an OSD by assigning an ID and FSID, registering them with the cluster with an ID and FSID, formatting and mounting the volume, adding all the metadata to the logical volumes using LVM tags, and starting - the OSD daemon. This is a convinience command that combines the prepare + the OSD daemon. This is a convenience command that combines the prepare and activate steps. Encryption is supported via dmcrypt and the --dmcrypt flag. diff --git a/src/ceph-volume/ceph_volume/inventory/main.py b/src/ceph-volume/ceph_volume/inventory/main.py index 7053a3ebf8f..aa70e92f19d 100644 --- a/src/ceph-volume/ceph_volume/inventory/main.py +++ b/src/ceph-volume/ceph_volume/inventory/main.py @@ -38,17 +38,25 @@ class Inventory(object): 'no effect when <path> is passed'), default=False, ) + parser.add_argument( + '--with-lsm', + action='store_true', + help=('Attempt to retrieve additional health and metadata through ' + 'libstoragemgmt'), + default=False, + ) self.args = parser.parse_args(self.argv) if self.args.path: - self.format_report(Device(self.args.path)) + self.format_report(Device(self.args.path, with_lsm=self.args.with_lsm)) else: - self.format_report(Devices(filter_for_batch=self.args.filter_for_batch)) + self.format_report(Devices(filter_for_batch=self.args.filter_for_batch, + with_lsm=self.args.with_lsm)) def get_report(self): if self.args.path: - return Device(self.args.path).json_report() + return Device(self.args.path, with_lsm=self.args.with_lsm).json_report() else: - return Devices(filter_for_batch=self.args.filter_for_batch).json_report() + return Devices(filter_for_batch=self.args.filter_for_batch, with_lsm=self.args.with_lsm).json_report() def format_report(self, inventory): if self.args.format == 'json': diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py index 44a05bfa828..7c968ae81d5 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py @@ -1,7 +1,12 @@ import pytest import json import random + +from argparse import ArgumentError +from mock import MagicMock, patch + from ceph_volume.devices.lvm import batch +from ceph_volume.util import arg_validators class TestBatch(object): @@ -19,6 +24,15 @@ class TestBatch(object): batch.ensure_disjoint_device_lists(devices, db_devices) assert 'Device lists are not disjoint' in str(disjoint_ex.value) + @patch('ceph_volume.util.arg_validators.Device') + def test_reject_partition(self, mocked_device): + mocked_device.return_value = MagicMock( + is_partition=True, + has_gpt_headers=False, + ) + with pytest.raises(ArgumentError): + arg_validators.ValidBatchDevice()('foo') + @pytest.mark.parametrize('format_', ['pretty', 'json', 'json-pretty']) def test_report(self, format_, factory, conf_ceph_stub, mock_device_generator): # just ensure reporting works diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py index 993e721f107..70915a0fe03 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py @@ -117,7 +117,7 @@ class TestPrepare(object): assert expected in str(error.value) def test_setup_device_device_name_is_none(self): - result = lvm.prepare.Prepare.setup_device(self=None, device_type='data', device_name=None, tags={'ceph.type': 'data'}, size=0, slots=None) + result = lvm.prepare.Prepare([]).setup_device(device_type='data', device_name=None, tags={'ceph.type': 'data'}, size=0, slots=None) assert result == ('', '', {'ceph.type': 'data'}) @patch('ceph_volume.api.lvm.Volume.set_tags') diff --git a/src/ceph-volume/ceph_volume/util/arg_validators.py b/src/ceph-volume/ceph_volume/util/arg_validators.py index a04c19924bb..94cb4f691db 100644 --- a/src/ceph-volume/ceph_volume/util/arg_validators.py +++ b/src/ceph-volume/ceph_volume/util/arg_validators.py @@ -12,11 +12,23 @@ class ValidDevice(object): self.as_string = as_string self.gpt_ok = gpt_ok - def __call__(self, string): - device = Device(string) + def __call__(self, dev_path): + device = self._is_valid_device(dev_path) + return self._format_device(device) + + def _format_device(self, device): + if self.as_string: + if device.is_lv: + # all codepaths expect an lv path to be returned in this format + return "{}/{}".format(device.vg_name, device.lv_name) + return device.path + return device + + def _is_valid_device(self, dev_path): + device = Device(dev_path) error = None if not device.exists: - error = "Unable to proceed with non-existing device: %s" % string + error = "Unable to proceed with non-existing device: %s" % dev_path # FIXME this is not a nice API, this validator was meant to catch any # non-existing devices upfront, not check for gpt headers. Now this # needs to optionally skip checking gpt headers which is beyond @@ -24,19 +36,26 @@ class ValidDevice(object): # configure this with a list of checks that can be excluded/included on # __init__ elif device.has_gpt_headers and not self.gpt_ok: - error = "GPT headers found, they must be removed on: %s" % string + error = "GPT headers found, they must be removed on: %s" % dev_path if error: raise argparse.ArgumentError(None, error) - if self.as_string: - if device.is_lv: - # all codepaths expect an lv path to be returned in this format - return "{}/{}".format(device.vg_name, device.lv_name) - return string return device +class ValidBatchDevice(ValidDevice): + + def __call__(self, dev_path): + dev = self._is_valid_device(dev_path) + if dev.is_partition: + raise argparse.ArgumentError( + None, + '{} is a partition, please pass ' + 'LVs or raw block devices'.format(dev_path)) + return self._format_device(dev) + + class OSDPath(object): """ Validate path exists and it looks like an OSD directory. diff --git a/src/ceph-volume/ceph_volume/util/device.py b/src/ceph-volume/ceph_volume/util/device.py index 708227e8314..193a72f3d00 100644 --- a/src/ceph-volume/ceph_volume/util/device.py +++ b/src/ceph-volume/ceph_volume/util/device.py @@ -27,10 +27,10 @@ class Devices(object): A container for Device instances with reporting """ - def __init__(self, filter_for_batch=False): + def __init__(self, filter_for_batch=False, with_lsm=False): if not sys_info.devices: sys_info.devices = disk.get_devices() - self.devices = [Device(k) for k in + self.devices = [Device(k, with_lsm) for k in sys_info.devices.keys()] if filter_for_batch: self.devices = [d for d in self.devices if d.available_lvm_batch] @@ -83,7 +83,7 @@ class Device(object): # unittests lvs = [] - def __init__(self, path): + def __init__(self, path, with_lsm=False): self.path = path # LVs can have a vg/lv path, while disks will have /dev/sda self.abspath = path @@ -98,7 +98,7 @@ class Device(object): self._exists = None self._is_lvm_member = None self._parse() - self.lsm_data = self.fetch_lsm() + self.lsm_data = self.fetch_lsm(with_lsm) self.available_lvm, self.rejected_reasons_lvm = self._check_lvm_reject_reasons() self.available_raw, self.rejected_reasons_raw = self._check_raw_reject_reasons() @@ -108,7 +108,7 @@ class Device(object): self.device_id = self._get_device_id() - def fetch_lsm(self): + def fetch_lsm(self, with_lsm): ''' Attempt to fetch libstoragemgmt (LSM) metadata, and return to the caller as a dict. An empty dict is passed back to the caller if the target path @@ -116,11 +116,11 @@ class Device(object): json returned will provide LSM attributes, and any associated errors that lsm encountered when probing the device. ''' - if not self.exists or not self.is_device: + if not with_lsm or not self.exists or not self.is_device: return {} lsm_disk = LSMDisk(self.path) - + return lsm_disk.json_report() def __lt__(self, other): diff --git a/src/ceph_fuse.cc b/src/ceph_fuse.cc index 1529ec2322e..b7f8a4d4b90 100644 --- a/src/ceph_fuse.cc +++ b/src/ceph_fuse.cc @@ -179,9 +179,8 @@ int main(int argc, const char **argv, const char *envp[]) { } { - g_ceph_context->_conf.finalize_reexpand_meta(); common_init_finish(g_ceph_context); - + init_async_signal_handler(); register_async_signal_handler(SIGHUP, sighup_handler); diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index 8f7dde093c7..896b7a09361 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -59,6 +59,14 @@ import tempfile import time import errno import struct +from socketserver import ThreadingMixIn +from http.server import BaseHTTPRequestHandler, HTTPServer +import signal +import io +from contextlib import redirect_stdout +import ssl + + try: from typing import Dict, List, Tuple, Optional, Union, Any, NoReturn, Callable, IO except ImportError: @@ -69,7 +77,7 @@ import uuid from functools import wraps from glob import glob -from threading import Thread +from threading import Thread, RLock if sys.version_info >= (3, 0): from io import StringIO @@ -677,6 +685,7 @@ def get_supported_daemons(): supported_daemons.append(NFSGanesha.daemon_type) supported_daemons.append(CephIscsi.daemon_type) supported_daemons.append(CustomContainer.daemon_type) + supported_daemons.append(CephadmDaemon.daemon_type) assert len(supported_daemons) == len(set(supported_daemons)) return supported_daemons @@ -1409,13 +1418,16 @@ def get_last_local_ceph_image(): [container_path, 'images', '--filter', 'label=ceph=True', '--filter', 'dangling=false', - '--format', '{{.Repository}} {{.Tag}}']) - for line in out.splitlines(): - if len(line.split()) == 2: - repository, tag = line.split() - r = '{}:{}'.format(repository, tag) - logger.info('Using recent ceph image %s' % r) - return r + '--format', '{{.Repository}}@{{.Digest}}']) + return _filter_last_local_ceph_image(out) + + +def _filter_last_local_ceph_image(out): + # str -> Optional[str] + for image in out.splitlines(): + if image and not image.endswith('@'): + logger.info('Using recent ceph image %s' % image) + return image return None @@ -1604,7 +1616,9 @@ def find_program(filename): def get_unit_name(fsid, daemon_type, daemon_id=None): # type: (str, str, Optional[Union[int, str]]) -> str # accept either name or type + id - if daemon_id is not None: + if daemon_type == CephadmDaemon.daemon_type and daemon_id is not None: + return 'ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id) + elif daemon_id is not None: return 'ceph-%s@%s.%s' % (fsid, daemon_type, daemon_id) else: return 'ceph-%s@%s' % (fsid, daemon_type) @@ -2088,7 +2102,7 @@ def deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid, osd_fsid=None, reconfig=False, ports=None): - # type: (str, str, Union[int, str], CephContainer, int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None + # type: (str, str, Union[int, str], Optional[CephContainer], int, int, Optional[str], Optional[str], Optional[str], Optional[bool], Optional[List[int]]) -> None ports = ports or [] if any([port_in_use(port) for port in ports]): @@ -2140,8 +2154,23 @@ def deploy_daemon(fsid, daemon_type, daemon_id, c, uid, gid, config, keyring) if not reconfig: - deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c, - osd_fsid=osd_fsid) + if daemon_type == CephadmDaemon.daemon_type: + port = next(iter(ports), None) # get first tcp port provided or None + + if args.config_json == '-': + config_js = get_parm('-') + else: + config_js = get_parm(args.config_json) + assert isinstance(config_js, dict) + + cephadm_exporter = CephadmDaemon(fsid, daemon_id, port) + cephadm_exporter.deploy_daemon_unit(config_js) + else: + if c: + deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c, + osd_fsid=osd_fsid) + else: + raise RuntimeError("attempting to deploy a daemon without a container image") if not os.path.exists(data_dir + '/unit.created'): with open(data_dir + '/unit.created', 'w') as f: @@ -2274,11 +2303,12 @@ def deploy_daemon_units(fsid, uid, gid, daemon_type, daemon_id, c, os.rename(data_dir + '/unit.poststop.new', data_dir + '/unit.poststop') - with open(data_dir + '/unit.image.new', 'w') as f: - f.write(c.image + '\n') - os.fchmod(f.fileno(), 0o600) - os.rename(data_dir + '/unit.image.new', - data_dir + '/unit.image') + if c: + with open(data_dir + '/unit.image.new', 'w') as f: + f.write(c.image + '\n') + os.fchmod(f.fileno(), 0o600) + os.rename(data_dir + '/unit.image.new', + data_dir + '/unit.image') # systemd install_base_units(fsid) @@ -2367,6 +2397,26 @@ class Firewalld(object): else: logger.debug('firewalld port %s is enabled in current zone' % tcp_port) + def close_ports(self, fw_ports): + # type: (List[int]) -> None + if not self.available: + logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports) + return + + for port in fw_ports: + tcp_port = str(port) + '/tcp' + out, err, ret = call([self.cmd, '--permanent', '--query-port', tcp_port], verbose_on_failure=False) + if not ret: + logger.info('Disabling port %s in current zone...' % tcp_port) + out, err, ret = call([self.cmd, '--permanent', '--remove-port', tcp_port]) + if ret: + raise RuntimeError('unable to remove port %s from current zone: %s' % + (tcp_port, err)) + else: + logger.info(f"Port {tcp_port} disabled") + else: + logger.info(f"firewalld port {tcp_port} already closed") + def apply_rules(self): # type: () -> None if not self.available: @@ -3207,6 +3257,28 @@ def command_bootstrap(): if args.container_init: cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(args.container_init), '--force']) + if args.with_exporter: + cli(['config-key', 'set', 'mgr/cephadm/exporter_enabled', 'true']) + if args.exporter_config: + logger.info("Applying custom cephadm exporter settings") + # validated within the parser, so we can just apply to the store + with tempfile.NamedTemporaryFile(buffering=0) as tmp: + tmp.write(json.dumps(args.exporter_config).encode('utf-8')) + mounts = { + tmp.name: "/tmp/exporter-config.json:z" + } + cli(["cephadm", "set-exporter-config", "-i", "/tmp/exporter-config.json"], extra_mounts=mounts) + logger.info("-> Use ceph orch apply cephadm-exporter to deploy") + else: + # generate a default SSL configuration for the exporter(s) + logger.info("Generating a default cephadm exporter configuration (self-signed)") + cli(['cephadm', 'generate-exporter-config']) + # + # deploy the service (commented out until the cephadm changes are in the ceph container build) + # logger.info('Deploying cephadm exporter service with default placement...') + # cli(['orch', 'apply', 'cephadm-exporter']) + + if not args.skip_dashboard: # Configure SSL port (cephadm only allows to configure dashboard SSL port) # if the user does not want to use SSL he can change this setting once the cluster is up @@ -3445,6 +3517,20 @@ def command_deploy(): keyring=None, reconfig=args.reconfig, ports=daemon_ports) + elif daemon_type == CephadmDaemon.daemon_type: + # get current user gid and uid + uid = os.getuid() + gid = os.getgid() + config_js = get_parm(args.config_json) # type: Dict[str, str] + if not daemon_ports: + logger.info("cephadm-exporter will use default port ({})".format(CephadmDaemon.default_port)) + daemon_ports =[CephadmDaemon.default_port] + + CephadmDaemon.validate_config(config_js) + + deploy_daemon(args.fsid, daemon_type, daemon_id, None, + uid, gid, ports=daemon_ports) + else: raise Error('daemon type {} not implemented in command_deploy function' .format(daemon_type)) @@ -3608,7 +3694,7 @@ def command_ceph_volume(): privileged=True, volume_mounts=mounts, ) - out, err, code = call_throws(c.run_cmd(), verbose=True) + out, err, code = call_throws(c.run_cmd(), verbose=args.log_output) if not code: print(out) @@ -4296,10 +4382,9 @@ def command_rm_daemon(): l = FileLock(args.fsid) l.acquire() - + (daemon_type, daemon_id) = args.name.split('.', 1) unit_name = get_unit_name_by_daemon_name(args.fsid, args.name) - (daemon_type, daemon_id) = args.name.split('.', 1) if daemon_type in ['mon', 'osd'] and not args.force: raise Error('must pass --force to proceed: ' 'this command may destroy precious data!') @@ -4322,6 +4407,8 @@ def command_rm_daemon(): os.rename(data_dir, os.path.join(backup_dir, dirname)) else: + if daemon_type == CephadmDaemon.daemon_type: + CephadmDaemon.uninstall(args.fsid, daemon_type, daemon_id) call_throws(['rm', '-rf', data_dir]) ################################## @@ -4514,6 +4601,16 @@ class CustomValidation(argparse.Action): if self.dest == "name": self._check_name(values) setattr(namespace, self.dest, values) + elif self.dest == 'exporter_config': + cfg = get_parm(values) + # run the class' validate method, and convert to an argparse error + # if problems are found + try: + CephadmDaemon.validate_config(cfg) + except Error as e: + raise argparse.ArgumentError(self, + str(e)) + setattr(namespace, self.dest, cfg) ################################## @@ -5436,7 +5533,6 @@ class HostFacts(): up_secs, _ = raw_time.split() return float(up_secs) - @property def kernel_security(self): # type: () -> Dict[str, str] """Determine the security features enabled in the kernel - SELinux, AppArmor""" @@ -5501,6 +5597,23 @@ class HostFacts(): "description": "Linux Security Module framework is not available" } + @property + def kernel_parameters(self): + # type: () -> Dict[str, str] + """Get kernel parameters required/used in Ceph clusters""" + + k_param = {} + out, _, _ = call_throws(['sysctl', '-a']) + if out: + param_list = out.split('\n') + param_dict = { param.split(" = ")[0]:param.split(" = ")[-1] for param in param_list} + + # return only desired parameters + if 'net.ipv4.ip_nonlocal_bind' in param_dict: + k_param['net.ipv4.ip_nonlocal_bind'] = param_dict['net.ipv4.ip_nonlocal_bind'] + + return k_param + def dump(self): # type: () -> str """Return the attributes of this HostFacts object as json""" @@ -5522,6 +5635,668 @@ def command_gather_facts(): ################################## +class CephadmCache: + task_types = ['disks', 'daemons', 'host', 'http_server'] + + def __init__(self): + self.started_epoch_secs = time.time() + self.tasks = { + "daemons": "inactive", + "disks": "inactive", + "host": "inactive", + "http_server": "inactive", + } + self.errors = [] + self.disks = {} + self.daemons = {} + self.host = {} + self.lock = RLock() + + @property + def health(self): + return { + "started_epoch_secs": self.started_epoch_secs, + "tasks": self.tasks, + "errors": self.errors, + } + + def to_json(self): + return { + "health": self.health, + "host": self.host, + "daemons": self.daemons, + "disks": self.disks, + } + + def update_health(self, task_type, task_status, error_msg=None): + assert task_type in CephadmCache.task_types + with self.lock: + self.tasks[task_type] = task_status + if error_msg: + self.errors.append(error_msg) + + def update_task(self, task_type, content): + assert task_type in CephadmCache.task_types + assert isinstance(content, dict) + with self.lock: + current = getattr(self, task_type) + for k in content: + current[k] = content[k] + + setattr(self, task_type, current) + + +class CephadmHTTPServer(ThreadingMixIn, HTTPServer): + allow_reuse_address = True + daemon_threads = True + cephadm_cache: CephadmCache + token: str + +class CephadmDaemonHandler(BaseHTTPRequestHandler): + server: CephadmHTTPServer + api_version = 'v1' + valid_routes = [ + f'/{api_version}/metadata', + f'/{api_version}/metadata/health', + f'/{api_version}/metadata/disks', + f'/{api_version}/metadata/daemons', + f'/{api_version}/metadata/host', + ] + class Decorators: + @classmethod + def authorize(cls, f): + """Implement a basic token check. + + The token is installed at deployment time and must be provided to + ensure we only respond to callers who know our token i.e. mgr + """ + def wrapper(self, *args, **kwargs): + auth = self.headers.get("Authorization", None) + if auth != "Bearer " + self.server.token: + self.send_error(401) + return + f(self, *args, **kwargs) + return wrapper + + def _help_page(self): + return """<!DOCTYPE html> +<html> +<head><title>cephadm metadata exporter</title></head> +<style> +body {{ + font-family: sans-serif; + font-size: 0.8em; +}} +table {{ + border-width: 0px; + border-spacing: 0px; + margin-left:20px; +}} +tr:hover {{ + background: PowderBlue; +}} +td,th {{ + padding: 5px; +}} +</style> +<body> + <h1>cephadm metadata exporter {api_version}</h1> + <table> + <thead> + <tr><th>Endpoint</th><th>Methods</th><th>Response</th><th>Description</th></tr> + </thead> + <tr><td><a href='{api_version}/metadata'>{api_version}/metadata</a></td><td>GET</td><td>JSON</td><td>Return <b>all</b> metadata for the host</td></tr> + <tr><td><a href='{api_version}/metadata/daemons'>{api_version}/metadata/daemons</a></td><td>GET</td><td>JSON</td><td>Return daemon and systemd states for ceph daemons (ls)</td></tr> + <tr><td><a href='{api_version}/metadata/disks'>{api_version}/metadata/disks</a></td><td>GET</td><td>JSON</td><td>show disk inventory (ceph-volume)</td></tr> + <tr><td><a href='{api_version}/metadata/health'>{api_version}/metadata/health</a></td><td>GET</td><td>JSON</td><td>Show current health of the exporter sub-tasks</td></tr> + <tr><td><a href='{api_version}/metadata/host'>{api_version}/metadata/host</a></td><td>GET</td><td>JSON</td><td>Show host metadata (gather-facts)</td></tr> + </table> +</body> +</html>""".format(api_version=CephadmDaemonHandler.api_version) + + def _fetch_root(self): + self.send_response(200) + self.send_header('Content-type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(self._help_page().encode('utf-8')) + + @Decorators.authorize + def do_GET(self): + """Handle *all* GET requests""" + + if self.path == '/': + # provide a html response if someone hits the root url, to document the + # available api endpoints + return self._fetch_root() + elif self.path in CephadmDaemonHandler.valid_routes: + u = self.path.split('/')[-1] + data = json.dumps({}) + status_code = 200 + + tasks = self.server.cephadm_cache.health.get('tasks', {}) + assert tasks + + # We're using the http status code to help indicate thread health + # - 200 (OK): request successful + # - 204 (No Content): access to a cache relating to a dead thread + # - 206 (Partial content): one or more theads are inactive + # - 500 (Server Error): all threads inactive + if u == 'metadata': + data = json.dumps(self.server.cephadm_cache.to_json()) + if all([tasks[task_name] == 'inactive' for task_name in tasks if task_name != 'http_server']): + # All the subtasks are dead! + status_code = 500 + elif any([tasks[task_name] == 'inactive' for task_name in tasks if task_name != 'http_server']): + status_code = 206 + + # Individual GETs against the a tasks endpoint will also return a 503 if the corresponding thread is inactive + elif u == 'daemons': + data = json.dumps(self.server.cephadm_cache.daemons) + if tasks['daemons'] == 'inactive': + status_code = 204 + elif u == 'disks': + data = json.dumps(self.server.cephadm_cache.disks) + if tasks['disks'] == 'inactive': + status_code = 204 + elif u == 'host': + data = json.dumps(self.server.cephadm_cache.host) + if tasks['host'] == 'inactive': + status_code = 204 + + # a GET against health will always return a 200, since the op is always successful + elif u == 'health': + data = json.dumps(self.server.cephadm_cache.health) + + self.send_response(status_code) + self.send_header('Content-type','application/json') + self.end_headers() + self.wfile.write(data.encode('utf-8')) + else: + # Invalid GET URL + bad_request_msg = "Valid URLs are: {}".format(', '.join(CephadmDaemonHandler.valid_routes)) + self.send_response(404, message=bad_request_msg) # reason + self.send_header('Content-type','application/json') + self.end_headers() + self.wfile.write(json.dumps({"message": bad_request_msg}).encode('utf-8')) + + def log_message(self, format, *args): + rqst = " ".join(str(a) for a in args) + logger.info(f"client:{self.address_string()} [{self.log_date_time_string()}] {rqst}") + + +class CephadmDaemon(): + + daemon_type = "cephadm-exporter" + default_port = 9443 + bin_name = 'cephadm' + key_name = "key" + crt_name = "crt" + token_name = "token" + config_requirements = [ + key_name, + crt_name, + token_name, + ] + loop_delay = 1 + thread_check_interval = 5 + + def __init__(self, fsid, daemon_id=None, port=None): + self.fsid = fsid + self.daemon_id = daemon_id + if not port: + self.port = CephadmDaemon.default_port + else: + self.port = port + self.workers = [] + self.http_server: CephadmHTTPServer + self.stop = False + self.cephadm_cache = CephadmCache() + self.errors = [] + self.token = read_file([os.path.join(self.daemon_path, CephadmDaemon.token_name)]) + + @classmethod + def validate_config(cls, config): + reqs = ", ".join(CephadmDaemon.config_requirements) + errors = [] + + if not config or not all([k_name in config for k_name in CephadmDaemon.config_requirements]): + raise Error(f"config must contain the following fields : {reqs}") + + if not all([isinstance(config[k_name], str) for k_name in CephadmDaemon.config_requirements]): + errors.append(f"the following fields must be strings: {reqs}") + + crt = config[CephadmDaemon.crt_name] + key = config[CephadmDaemon.key_name] + token = config[CephadmDaemon.token_name] + + if not crt.startswith('-----BEGIN CERTIFICATE-----') or not crt.endswith('-----END CERTIFICATE-----\n'): + errors.append("crt field is not a valid SSL certificate") + if not key.startswith('-----BEGIN PRIVATE KEY-----') or not key.endswith('-----END PRIVATE KEY-----\n'): + errors.append("key is not a valid SSL private key") + if len(token) < 8: + errors.append("'token' must be more than 8 characters long") + + if 'port' in config: + try: + p = int(config['port']) + if p <= 1024: + raise ValueError + except (TypeError, ValueError): + errors.append("port must be an integer > 1024") + + if errors: + raise Error("Parameter errors : {}".format(", ".join(errors))) + + @property + def port_active(self): + return port_in_use(self.port) + + @property + def can_run(self): + # if port is in use + if self.port_active: + self.errors.append(f"TCP port {self.port} already in use, unable to bind") + if not os.path.exists(os.path.join(self.daemon_path, CephadmDaemon.key_name)): + self.errors.append(f"Key file '{CephadmDaemon.key_name}' is missing from {self.daemon_path}") + if not os.path.exists(os.path.join(self.daemon_path, CephadmDaemon.crt_name)): + self.errors.append(f"Certificate file '{CephadmDaemon.crt_name}' is missing from {self.daemon_path}") + if self.token == "Unknown": + self.errors.append(f"Authentication token '{CephadmDaemon.token_name}' is missing from {self.daemon_path}") + return len(self.errors) == 0 + + @staticmethod + def _unit_name(fsid, daemon_id): + return "{}.service".format(get_unit_name(fsid, CephadmDaemon.daemon_type, daemon_id)) + + @property + def unit_name(self): + return CephadmDaemon._unit_name(self.fsid, self.daemon_id) + + @property + def daemon_path(self): + return os.path.join( + args.data_dir, + self.fsid, + f'{self.daemon_type}.{self.daemon_id}' + ) + + @property + def binary_path(self): + return os.path.join( + args.data_dir, + self.fsid, + CephadmDaemon.bin_name + ) + + def _handle_thread_exception(self, exc, thread_type): + e_msg = f"{exc.__class__.__name__} exception: {str(exc)}" + thread_info = getattr(self.cephadm_cache, thread_type) + errors = thread_info.get('scrape_errors', []) + errors.append(e_msg) + logger.error(e_msg) + logger.exception(exc) + self.cephadm_cache.update_task( + thread_type, + { + "scrape_errors": errors, + "data": None, + } + ) + + def _scrape_host_facts(self, refresh_interval=10): + ctr = 0 + exception_encountered = False + + while True: + + if self.stop or exception_encountered: + break + + if ctr >= refresh_interval: + ctr = 0 + logger.debug("executing host-facts scrape") + errors = [] + s_time = time.time() + + try: + facts = HostFacts() + except Exception as e: + self._handle_thread_exception(e, 'host') + exception_encountered = True + else: + elapsed = time.time() - s_time + try: + data = json.loads(facts.dump()) + except json.decoder.JSONDecodeError: + errors.append("host-facts provided invalid JSON") + logger.warning(errors[-1]) + data = {} + self.cephadm_cache.update_task( + 'host', + { + "scrape_timestamp": s_time, + "scrape_duration_secs": elapsed, + "scrape_errors": errors, + "data": data, + } + ) + logger.debug(f"completed host-facts scrape - {elapsed}s") + + time.sleep(CephadmDaemon.loop_delay) + ctr += CephadmDaemon.loop_delay + logger.info("host-facts thread stopped") + + def _scrape_ceph_volume(self, refresh_interval=15): + # we're invoking the ceph_volume command, so we need to set the args that it + # expects to use + args.command = "inventory --format=json".split() + args.fsid = self.fsid + args.log_output = False + + ctr = 0 + exception_encountered = False + + while True: + if self.stop or exception_encountered: + break + + if ctr >= refresh_interval: + ctr = 0 + logger.debug("executing ceph-volume scrape") + errors = [] + s_time = time.time() + stream = io.StringIO() + try: + with redirect_stdout(stream): + command_ceph_volume() + except Exception as e: + self._handle_thread_exception(e, 'disks') + exception_encountered = True + else: + elapsed = time.time() - s_time + + # if the call to ceph-volume returns junk with the + # json, it won't parse + stdout = stream.getvalue() + + if stdout: + try: + data = json.loads(stdout) + except json.decoder.JSONDecodeError: + errors.append("ceph-volume thread provided bad json data") + logger.warning(errors[-1]) + data = [] + else: + errors.append("ceph-volume didn't return any data") + logger.warning(errors[-1]) + + self.cephadm_cache.update_task( + 'disks', + { + "scrape_timestamp": s_time, + "scrape_duration_secs": elapsed, + "scrape_errors": errors, + "data": data, + } + ) + + logger.debug(f"completed ceph-volume scrape - {elapsed}s") + time.sleep(CephadmDaemon.loop_delay) + ctr += CephadmDaemon.loop_delay + + logger.info("ceph-volume thread stopped") + + def _scrape_list_daemons(self, refresh_interval=20): + ctr = 0 + exception_encountered = False + while True: + if self.stop or exception_encountered: + break + + if ctr >= refresh_interval: + ctr = 0 + logger.debug("executing list-daemons scrape") + errors = [] + s_time = time.time() + + try: + # list daemons should ideally be invoked with a fsid + data = list_daemons() + except Exception as e: + self._handle_thread_exception(e, 'daemons') + exception_encountered = True + else: + if not isinstance(data, list): + errors.append("list-daemons didn't supply a list?") + logger.warning(errors[-1]) + data = [] + elapsed = time.time() - s_time + self.cephadm_cache.update_task( + 'daemons', + { + "scrape_timestamp": s_time, + "scrape_duration_secs": elapsed, + "scrape_errors": errors, + "data": data, + } + ) + logger.debug(f"completed list-daemons scrape - {elapsed}s") + + time.sleep(CephadmDaemon.loop_delay) + ctr += CephadmDaemon.loop_delay + logger.info("list-daemons thread stopped") + + def _create_thread(self, target, name, refresh_interval=None): + if refresh_interval: + t = Thread(target=target, args=(refresh_interval,)) + else: + t = Thread(target=target) + t.daemon = True + t.name = name + self.cephadm_cache.update_health(name, "active") + t.start() + + start_msg = f"Started {name} thread" + if refresh_interval: + logger.info(f"{start_msg}, with a refresh interval of {refresh_interval}s") + else: + logger.info(f"{start_msg}") + return t + + def reload(self, *args): + """reload -HUP received + + This is a placeholder function only, and serves to provide the hook that could + be exploited later if the exporter evolves to incorporate a config file + """ + logger.info("Reload request received - ignoring, no action needed") + + def shutdown(self, *args): + logger.info("Shutdown request received") + self.stop = True + self.http_server.shutdown() + + def run(self): + logger.info(f"cephadm exporter starting for FSID '{self.fsid}'") + if not self.can_run: + logger.error("Unable to start the exporter daemon") + for e in self.errors: + logger.error(e) + return + + # register signal handlers for running under systemd control + signal.signal(signal.SIGTERM, self.shutdown) + signal.signal(signal.SIGINT, self.shutdown) + signal.signal(signal.SIGHUP, self.reload) + logger.debug("Signal handlers attached") + + host_facts = self._create_thread(self._scrape_host_facts, 'host', 5) + self.workers.append(host_facts) + + daemons = self._create_thread(self._scrape_list_daemons, 'daemons', 20) + self.workers.append(daemons) + + disks = self._create_thread(self._scrape_ceph_volume, 'disks', 20) + self.workers.append(disks) + + self.http_server = CephadmHTTPServer(('0.0.0.0', self.port), CephadmDaemonHandler) # IPv4 only + self.http_server.socket = ssl.wrap_socket(self.http_server.socket, + keyfile=os.path.join(self.daemon_path, CephadmDaemon.key_name), + certfile=os.path.join(self.daemon_path, CephadmDaemon.crt_name), + server_side=True) + + self.http_server.cephadm_cache = self.cephadm_cache + self.http_server.token = self.token + server_thread = self._create_thread(self.http_server.serve_forever, 'http_server') + logger.info(f"https server listening on {self.http_server.server_address[0]}:{self.http_server.server_port}") + + ctr = 0 + while server_thread.is_alive(): + if self.stop: + break + + if ctr >= CephadmDaemon.thread_check_interval: + ctr = 0 + for worker in self.workers: + if self.cephadm_cache.tasks[worker.name] == 'inactive': + continue + if not worker.is_alive(): + logger.warning(f"{worker.name} thread not running") + stop_time = datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S") + self.cephadm_cache.update_health(worker.name, "inactive", f"{worker.name} stopped at {stop_time}") + + time.sleep(CephadmDaemon.loop_delay) + ctr += CephadmDaemon.loop_delay + + logger.info("Main http server thread stopped") + + @property + def unit_run(self): + + return """set -e +{py3} {bin_path} exporter --fsid {fsid} --id {daemon_id} --port {port} &""".format( + py3 = shutil.which('python3'), + bin_path=self.binary_path, + fsid=self.fsid, + daemon_id=self.daemon_id, + port=self.port + ) + + @property + def unit_file(self): + return """#generated by cephadm +[Unit] +Description=cephadm exporter service for cluster {fsid} +After=network-online.target +Wants=network-online.target + +PartOf=ceph-{fsid}.target +Before=ceph-{fsid}.target + +[Service] +Type=forking +ExecStart=/bin/bash {daemon_path}/unit.run +ExecReload=/bin/kill -HUP $MAINPID +Restart=on-failure +RestartSec=10s + +[Install] +WantedBy=ceph-{fsid}.target +""".format( + fsid=self.fsid, + daemon_path=self.daemon_path +) + + def deploy_daemon_unit(self, config=None): + """deploy a specific unit file for cephadm + + The normal deploy_daemon_units doesn't apply for this + daemon since it's not a container, so we just create a + simple service definition and add it to the fsid's target + """ + if not config: + raise Error("Attempting to deploy cephadm daemon without a config") + assert isinstance(config, dict) + + # Create the required config files in the daemons dir, with restricted permissions + for filename in config: + with open(os.open(os.path.join(self.daemon_path, filename), os.O_CREAT | os.O_WRONLY, mode=0o600), "w") as f: + f.write(config[filename]) + + # When __file__ is <stdin> we're being invoked over remoto via the orchestrator, so + # we pick up the file from where the orchestrator placed it - otherwise we'll + # copy it to the binary location for this cluster + if not __file__ == '<stdin>': + shutil.copy(__file__, + self.binary_path) + + with open(os.path.join(self.daemon_path, 'unit.run'), "w") as f: + f.write(self.unit_run) + + with open(os.path.join(args.unit_dir, f"{self.unit_name}.new"), "w") as f: + f.write(self.unit_file) + os.rename( + os.path.join(args.unit_dir, f"{self.unit_name}.new"), + os.path.join(args.unit_dir, self.unit_name)) + + call_throws(['systemctl', 'daemon-reload']) + call(['systemctl', 'stop', self.unit_name], + verbose_on_failure=False) + call(['systemctl', 'reset-failed', self.unit_name], + verbose_on_failure=False) + call_throws(['systemctl', 'enable', '--now', self.unit_name]) + + @classmethod + def uninstall(cls, fsid, daemon_type, daemon_id): + unit_name = CephadmDaemon._unit_name(fsid, daemon_id) + unit_path = os.path.join(args.unit_dir, unit_name) + unit_run = os.path.join(args.data_dir, fsid, f"{daemon_type}.{daemon_id}", "unit.run") + try: + with open(unit_run, "r") as u: + contents = u.read().strip(" &") + except OSError: + logger.warning(f"Unable to access the unit.run file @ {unit_run}") + return + + for line in contents.split('\n'): + if '--port ' in line: + try: + port = int(line.split('--port ')[-1]) + except ValueError: + logger.warning("Unexpected format in unit.run file: port is not numeric") + logger.warning("Unable to remove the systemd file and close the port") + return + break + + if port: + fw = Firewalld() + try: + fw.close_ports([port]) + except RuntimeError: + logger.error(f"Unable to close port {port}") + + stdout, stderr, rc = call(["rm", "-f", unit_path]) + if rc: + logger.error(f"Unable to remove the systemd file @ {unit_path}") + else: + logger.info(f"removed systemd unit file @ {unit_path}") + stdout, stderr, rc = call(["systemctl", "daemon-reload"]) + + +def command_exporter(): + + exporter = CephadmDaemon(args.fsid, daemon_id=args.id, port=args.port) + + if args.fsid not in os.listdir(args.data_dir): + raise Error(f"cluster fsid '{args.fsid}' not found in '{args.data_dir}'") + + exporter.run() + + + +################################## + + def _get_parser(): # type: () -> argparse.ArgumentParser parser = argparse.ArgumentParser( @@ -5746,6 +6521,11 @@ def _get_parser(): '--keyring', '-k', help='ceph.keyring to pass through to the container') parser_ceph_volume.add_argument( + '--log-output', + action='store_true', + default=True, + help='suppress ceph volume output from the log') + parser_ceph_volume.add_argument( 'command', nargs=argparse.REMAINDER, help='command') @@ -5928,6 +6708,14 @@ def _get_parser(): '--container-init', action='store_true', help='Run podman/docker with `--init`') + parser_bootstrap.add_argument( + '--with-exporter', + action='store_true', + help='Automatically deploy cephadm metadata exporter to each node') + parser_bootstrap.add_argument( + '--exporter-config', + action=CustomValidation, + help=f'Exporter configuration information in JSON format (providing: {", ".join(CephadmDaemon.config_requirements)}, port information)') parser_deploy = subparsers.add_parser( 'deploy', help='deploy a daemon') @@ -6049,6 +6837,25 @@ def _get_parser(): 'gather-facts', help='gather and return host related information (JSON format)') parser_gather_facts.set_defaults(func=command_gather_facts) + parser_exporter = subparsers.add_parser( + 'exporter', help='Start cephadm in exporter mode (web service), providing host/daemon/disk metadata') + parser_exporter.add_argument( + '--fsid', + required=True, + type=str, + help='fsid of the cephadm exporter to run against') + parser_exporter.add_argument( + '--port', + type=int, + default=int(CephadmDaemon.default_port), + help='port number for the cephadm exporter service') + parser_exporter.add_argument( + '--id', + type=str, + default=get_hostname().split('.')[0], + help='daemon identifer for the exporter') + parser_exporter.set_defaults(func=command_exporter) + return parser diff --git a/src/cephadm/tests/fixtures.py b/src/cephadm/tests/fixtures.py new file mode 100644 index 00000000000..71ccfb24227 --- /dev/null +++ b/src/cephadm/tests/fixtures.py @@ -0,0 +1,41 @@ + +import mock +from mock import patch +import pytest + +import os +import time + +with patch('builtins.open', create=True): + from importlib.machinery import SourceFileLoader + cd = SourceFileLoader('cephadm', 'cephadm').load_module() + + +def _daemon_path(): + return os.getcwd() + + +def _mock_scrape_host(obj, interval): + try: + raise ValueError("wah") + except Exception as e: + obj._handle_thread_exception(e, 'host') + + +def _mock_run(obj): + t = obj._create_thread(obj._scrape_host_facts, 'host', 5) + time.sleep(1) + if not t.is_alive(): + obj.cephadm_cache.update_health('host', "inactive", "host thread stopped") + + +@pytest.fixture +def exporter(): + with mock.patch('cephadm.CephadmDaemon.daemon_path', _daemon_path()), \ + mock.patch('cephadm.CephadmDaemon.can_run', return_value=True), \ + mock.patch('cephadm.CephadmDaemon.run', _mock_run), \ + mock.patch('cephadm.CephadmDaemon._scrape_host_facts', _mock_scrape_host): + + exporter = cd.CephadmDaemon(fsid='foobar', daemon_id='test') + assert exporter.token == 'MyAccessToken' + yield exporter diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py index c9e2769ed33..77364188672 100644 --- a/src/cephadm/tests/test_cephadm.py +++ b/src/cephadm/tests/test_cephadm.py @@ -4,9 +4,16 @@ from mock import patch import os import sys import unittest +import threading +import time +from http.server import HTTPServer +from urllib.request import Request, urlopen +from urllib.error import HTTPError import pytest +from .fixtures import exporter + with patch('builtins.open', create=True): from importlib.machinery import SourceFileLoader cd = SourceFileLoader('cephadm', 'cephadm').load_module() @@ -278,6 +285,15 @@ default via fe80::2480:28ec:5097:3fe2 dev wlp2s0 proto ra metric 20600 pref medi result = cd.dict_get_join({'a': 1}, 'a') assert result == 1 + def test_last_local_images(self): + out = ''' +docker.io/ceph/daemon-base@ +docker.io/ceph/ceph:v15.2.5 +docker.io/ceph/daemon-base:octopus + ''' + image = cd._filter_last_local_ceph_image(out) + assert image == 'docker.io/ceph/ceph:v15.2.5' + class TestCustomContainer(unittest.TestCase): cc: cd.CustomContainer @@ -361,3 +377,267 @@ class TestCustomContainer(unittest.TestCase): 'ro=true' ] ]) + + +class TestCephadmExporter(object): + exporter: cd.CephadmDaemon + files_created = [] + crt = """-----BEGIN CERTIFICATE----- +MIIC1zCCAb8CEFHoZE2MfUVzo53fzzBKAT0wDQYJKoZIhvcNAQENBQAwKjENMAsG +A1UECgwEQ2VwaDEZMBcGA1UECwwQY2VwaGFkbS1leHBvcnRlcjAeFw0yMDExMjUy +MzEwNTVaFw0zMDExMjMyMzEwNTVaMCoxDTALBgNVBAoMBENlcGgxGTAXBgNVBAsM +EGNlcGhhZG0tZXhwb3J0ZXIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIB +AQCsTfcJcXbREqfx1zTUuEmK+lJn9WWjk0URRF1Z+QgPkascNdkX16PnvhbGwXmF +BTdAcNl7V0U+z4EsGJ7hJsB7qTq6Rb6wNl7r0OxjeWOmB9xbF4Q/KR5yrbM1DA9A +B5fNswrUXViku5Y2jlOAz+ZMBhYxMx0edqhxSn297j04Z6RF4Mvkc43v0FH7Ju7k +O5+0VbdzcOdu37DFpoE4Ll2MZ/GuAHcJ8SD06sEdzFEjRCraav976743XcUlhZGX +ZTTG/Zf/a+wuCjtMG3od7vRFfuRrM5oTE133DuQ5deR7ybcZNDyopDjHF8xB1bAk +IOz4SbP6Q25K99Czm1K+3kMLAgMBAAEwDQYJKoZIhvcNAQENBQADggEBACmtvZb8 +dJGHx/WC0/JHxnEJCJM2qnn87ELzbbIQL1w1Yb/I6JQYPgq+WiQPaHaLL9eYsm0l +dFwvrh+WC0JpXDfADnUnkTSB/WpZ2nC+2JxBptrQEuIcqNXpcJd0bKDiHunv04JI +uEVpTAK05dBV38qNmIlu4HyB4OEnuQpyOr9xpIhdxuJ95O9K0j5BIw98ZaEwYNUP +Rm3YlQwfS6R5xaBvL9kyfxyAD2joNj44q6w/5zj4egXVIA5VpkQm8DmMtu0Pd2NG +dzfYRmqrDolh+rty8HiyIxzeDJQ5bj6LKbUkmABvX50nDySVyMfHmt461/n7W65R +CHFLoOmfJJik+Uc=\n-----END CERTIFICATE----- +""" + key = """-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCsTfcJcXbREqfx +1zTUuEmK+lJn9WWjk0URRF1Z+QgPkascNdkX16PnvhbGwXmFBTdAcNl7V0U+z4Es +GJ7hJsB7qTq6Rb6wNl7r0OxjeWOmB9xbF4Q/KR5yrbM1DA9AB5fNswrUXViku5Y2 +jlOAz+ZMBhYxMx0edqhxSn297j04Z6RF4Mvkc43v0FH7Ju7kO5+0VbdzcOdu37DF +poE4Ll2MZ/GuAHcJ8SD06sEdzFEjRCraav976743XcUlhZGXZTTG/Zf/a+wuCjtM +G3od7vRFfuRrM5oTE133DuQ5deR7ybcZNDyopDjHF8xB1bAkIOz4SbP6Q25K99Cz +m1K+3kMLAgMBAAECggEASnAwToMXWsGdjqxzpYasNv9oBIOO0nk4OHp5ffpJUjiT +XM+ip1tA80g7HMjPD/mt4gge3NtaDgWlf4Bve0O7mnEE7x5cgFIs9eG/jkYOF9eD +ilMBjivcfJywNDWujPH60iIMhqyBNEHaZl1ck+S9UJC8m6rCZLvMj40n/5riFfBy +1sjf2uOwcfWrjSj9Ju4wlMI6khSSz2aYC7glQQ/fo2+YArbEUcy60iloPQ6wEgZK +okoVWZA9AehwLcnRjkwd9EVmMMtRGPE/AcP4s/kKA0tRDRicPLN727Ke/yxv+Ppo +hbIZIcOn7soOFAENcodJ4YRSCd++QfCNaVAi7vwWWQKBgQDeBY4vvr+H0brbSjQg +O7Fpqub/fxZY3UoHWDqWs2X4o3qhDqaTQODpuYtCm8YQE//55JoLWKAD0evq5dLS +YLrtC1Vyxf+TA7opCUjWBe+liyndbJdB5q0zF7qdWUtQKGVSWyUWhK8gHa6M64fP +oi83DD7F0OGusTWGtfbceErk/wKBgQDGrJLRo/5xnAH5VmPfNu+S6h0M2qM6CYwe +Y5wHFG2uQQct73adf53SkhvZVmOzJsWQbVnlDOKMhqazcs+7VWRgO5X3naWVcctE +Hggw9MgpbXAWFOI5sNYsCYE58E+fTHjE6O4A3MhMCsze+CIC3sKuPQBBiL9bWSOX +8POswqfl9QKBgDe/nVxPwTgRaaH2l/AgDQRDbY1qE+psZlJBzTRaB5jPM9ONIjaH +a/JELLuk8a7H1tagmC2RK1zKMTriSnWY5FbxKZuQLAR2QyBavHdBNlOTBggbZD+f +9I2Hv8wSx95wxkBPsphc6Lxft5ya55czWjewU3LIaGK9DHuu5TWm3udxAoGBAJGP +PsJ59KIoOwoDUYjpJv3sqPwR9CVBeXeKY3aMcQ+KdUgiejVKmsb8ZYsG0GUhsv3u +ID7BAfsTbG9tXuVR2wjmnymcRwUHKnXtyvKTZVN06vpCsryx4zjAff2FI9ECpjke +r8HSAK41+4QhKEoSC3C9IMLi/dBfrsRTtTSOKZVBAoGBAI2dl5HEIFpufaI4toWM +LO5HFrlXgRDGoc/+Byr5/8ZZpYpU115Ol/q6M+l0koV2ygJ9jeJJEllFWykIDS6F +XxazFI74swAqobHb2ZS/SLhoVxE82DdSeXrjkTvUjNtrW5zs1gIMKBR4nD6H8AqL +iMN28C2bKGao5UHvdER1rGy7 +-----END PRIVATE KEY----- +""" + token = "MyAccessToken" + + @classmethod + def setup_class(cls): + # create the ssl files + fname = os.path.join(os.getcwd(), 'crt') + with open(fname, 'w') as crt: + crt.write(cls.crt) + cls.files_created.append(fname) + fname = os.path.join(os.getcwd(), 'key') + with open(fname, 'w') as crt: + crt.write(cls.key) + cls.files_created.append(fname) + fname = os.path.join(os.getcwd(), 'token') + with open(fname, 'w') as crt: + crt.write(cls.token) + cls.files_created.append(fname) + # start a simple http instance to test the requesthandler + cls.server = HTTPServer(('0.0.0.0', 9443), cd.CephadmDaemonHandler) + cls.server.cephadm_cache = cd.CephadmCache() + cls.server.token = cls.token + t = threading.Thread(target=cls.server.serve_forever) + t.daemon = True + t.start() + + @classmethod + def teardown_class(cls): + cls.server.shutdown() + assert len(cls.files_created) > 0 + for f in cls.files_created: + os.remove(f) + + def setup_method(self): + # re-init the cache for every test + TestCephadmExporter.server.cephadm_cache = cd.CephadmCache() + + def teardown_method(self): + pass + + def test_files_ready(self): + assert os.path.exists(os.path.join(os.getcwd(), 'crt')) + assert os.path.exists(os.path.join(os.getcwd(), 'key')) + assert os.path.exists(os.path.join(os.getcwd(), 'token')) + + def test_can_run(self, exporter): + assert exporter.can_run + + def test_token_valid(self, exporter): + assert exporter.token == self.token + + def test_unit_name(self,exporter): + assert exporter.unit_name + assert exporter.unit_name == "ceph-foobar-cephadm-exporter.test.service" + + def test_unit_run(self,exporter): + assert exporter.unit_run + lines = exporter.unit_run.split('\n') + assert len(lines) == 2 + assert "/var/lib/ceph/foobar/cephadm exporter --fsid foobar --id test --port 9443 &" in lines[1] + + def test_binary_path(self, exporter): + # fsid = foobar + args = cd._parse_args([]) + cd.args = args + assert exporter.binary_path == "/var/lib/ceph/foobar/cephadm" + + def test_systemd_unit(self, exporter): + assert exporter.unit_file + + def test_validate_passes(self, exporter): + config = { + "crt": self.crt, + "key": self.key, + "token": self.token, + } + cd.CephadmDaemon.validate_config(config) + + def test_validate_fails(self, exporter): + config = { + "key": self.key, + "token": self.token, + } + with pytest.raises(cd.Error): + cd.CephadmDaemon.validate_config(config) + + def test_port_active(self, exporter): + assert exporter.port_active == True + + def test_rqst_health_200(self): + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata/health",headers=hdrs) + r = urlopen(req) + assert r.status == 200 + + def test_rqst_all_inactive_500(self): + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata",headers=hdrs) + try: + r = urlopen(req) + except HTTPError as e: + assert e.code == 500 + + def test_rqst_no_auth_401(self): + req=Request("http://localhost:9443/v1/metadata") + try: + urlopen(req) + except HTTPError as e: + assert e.code == 401 + + def test_rqst_bad_auth_401(self): + hdrs={"Authorization":f"Bearer BogusAuthToken"} + req=Request("http://localhost:9443/v1/metadata",headers=hdrs) + try: + urlopen(req) + except HTTPError as e: + assert e.code == 401 + + def test_rqst_badURL_404(self): + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metazoic",headers=hdrs) + try: + urlopen(req) + except HTTPError as e: + assert e.code == 404 + + def test_rqst_inactive_task_204(self): + # all tasks initialise as inactive, and then 'go' active as their thread starts + # so we can pick any task to check for an inactive response (no content) + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata/disks",headers=hdrs) + r = urlopen(req) + assert r.status == 204 + + def test_rqst_active_task_200(self): + TestCephadmExporter.server.cephadm_cache.tasks['host'] = 'active' + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata/host",headers=hdrs) + r = urlopen(req) + assert r.status == 200 + + def test_rqst_all_206(self): + TestCephadmExporter.server.cephadm_cache.tasks['disks'] = 'active' + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata",headers=hdrs) + r = urlopen(req) + assert r.status == 206 + + def test_rqst_disks_200(self): + TestCephadmExporter.server.cephadm_cache.tasks['disks'] = 'active' + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata/disks",headers=hdrs) + r = urlopen(req) + assert r.status == 200 + + def test_thread_exception(self, exporter): + # run is patched to invoke a mocked scrape_host thread that will raise so + # we check here that the exception handler updates the cache object as we'd + # expect with the error + exporter.run() + assert exporter.cephadm_cache.host['scrape_errors'] + assert exporter.cephadm_cache.host['scrape_errors'] == ['ValueError exception: wah'] + assert exporter.cephadm_cache.errors == ['host thread stopped'] + + # Test the requesthandler does the right thing with invalid methods... + # ie. return a "501" - Not Implemented / Unsupported Method + def test_invalid_method_HEAD(self): + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata/health",headers=hdrs, method="HEAD") + with pytest.raises(HTTPError, match=r"HTTP Error 501: .*") as e: + urlopen(req) + + def test_invalid_method_DELETE(self): + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata/health",headers=hdrs, method="DELETE") + with pytest.raises(HTTPError, match=r"HTTP Error 501: .*") as e: + urlopen(req) + + def test_invalid_method_POST(self): + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata/health",headers=hdrs, method="POST") + with pytest.raises(HTTPError, match=r"HTTP Error 501: .*") as e: + urlopen(req) + + def test_invalid_method_PUT(self): + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata/health",headers=hdrs, method="PUT") + with pytest.raises(HTTPError, match=r"HTTP Error 501: .*") as e: + urlopen(req) + + def test_invalid_method_CONNECT(self): + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata/health",headers=hdrs, method="CONNECT") + with pytest.raises(HTTPError, match=r"HTTP Error 501: .*") as e: + urlopen(req) + + def test_invalid_method_TRACE(self): + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata/health",headers=hdrs, method="TRACE") + with pytest.raises(HTTPError, match=r"HTTP Error 501: .*") as e: + urlopen(req) + + def test_invalid_method_OPTIONS(self): + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata/health",headers=hdrs, method="OPTIONS") + with pytest.raises(HTTPError, match=r"HTTP Error 501: .*") as e: + urlopen(req) + + def test_invalid_method_PATCH(self): + hdrs={"Authorization":f"Bearer {TestCephadmExporter.token}"} + req=Request("http://localhost:9443/v1/metadata/health",headers=hdrs, method="PATCH") + with pytest.raises(HTTPError, match=r"HTTP Error 501: .*") as e: + urlopen(req) diff --git a/src/client/Client.cc b/src/client/Client.cc index aaf9db20c35..9f9296fd9e0 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -482,6 +482,7 @@ void Client::dump_status(Formatter *f) f->dump_int("osd_epoch", osd_epoch); f->dump_int("osd_epoch_barrier", cap_epoch_barrier); f->dump_bool("blocklisted", blocklisted); + f->dump_string("fs_name", mdsmap->get_fs_name()); } } @@ -2775,8 +2776,6 @@ void Client::cancel_commands(const MDSMap& newmap) void Client::handle_mds_map(const MConstRef<MMDSMap>& m) { - mds_gid_t old_inc, new_inc; - std::unique_lock cl(client_lock); if (m->get_epoch() <= mdsmap->get_epoch()) { ldout(cct, 1) << __func__ << " epoch " << m->get_epoch() @@ -2805,8 +2804,8 @@ void Client::handle_mds_map(const MConstRef<MMDSMap>& m) if (!mdsmap->is_up(mds)) { session->con->mark_down(); } else if (mdsmap->get_addrs(mds) != session->addrs) { - old_inc = _mdsmap->get_incarnation(mds); - new_inc = mdsmap->get_incarnation(mds); + auto old_inc = _mdsmap->get_incarnation(mds); + auto new_inc = mdsmap->get_incarnation(mds); if (old_inc != new_inc) { ldout(cct, 1) << "mds incarnation changed from " << old_inc << " to " << new_inc << dendl; @@ -4329,7 +4328,7 @@ void Client::remove_session_caps(MetaSession *s, int err) int Client::_do_remount(bool retry_on_error) { - uint64_t max_retries = g_conf().get_val<uint64_t>("mds_max_retries_on_remount_failure"); + uint64_t max_retries = cct->_conf.get_val<uint64_t>("mds_max_retries_on_remount_failure"); errno = 0; int r = remount_cb(callback_handle); @@ -6524,13 +6523,6 @@ void Client::start_tick_thread() auto t_interval = clock::duration(cct->_conf.get_val<sec>("client_tick_interval")); auto d_interval = clock::duration(cct->_conf.get_val<sec>("client_debug_inject_tick_delay")); - // Clear the debug inject tick delay - if (unlikely(d_interval.count() > 0)) { - ldout(cct, 20) << "clear debug inject tick delay: " << d_interval << dendl; - ceph_assert(0 == cct->_conf.set_val("client_debug_inject_tick_delay", "0")); - cct->_conf.apply_changes(nullptr); - } - auto interval = std::max(t_interval, d_interval); if (likely(since >= interval)) { tick(); @@ -10182,6 +10174,8 @@ int Client::ftruncate(int fd, loff_t length, const UserPerm& perms) if (f->flags & O_PATH) return -EBADF; #endif + if ((f->mode & CEPH_FILE_MODE_WR) == 0) + return -EBADF; struct stat attr; attr.st_size = length; return _setattr(f->inode, &attr, CEPH_SETATTR_SIZE, perms); @@ -10481,7 +10475,7 @@ int Client::statfs(const char *path, struct statvfs *stbuf, stbuf->f_frsize = 1 << CEPH_BLOCK_SHIFT; stbuf->f_bsize = 1 << CEPH_BLOCK_SHIFT; stbuf->f_files = total_files_on_fs; - stbuf->f_ffree = 0; + stbuf->f_ffree = -1; stbuf->f_favail = -1; stbuf->f_fsid = -1; // ?? stbuf->f_flag = 0; // ?? @@ -12376,6 +12370,17 @@ size_t Client::_vxattrcb_mirror_info(Inode *in, char *val, size_t size) in->xattrs["ceph.mirror.info.fs_id"].c_str()); } +size_t Client::_vxattrcb_cluster_fsid(Inode *in, char *val, size_t size) +{ + return snprintf(val, size, "%s", monclient->get_fsid().to_string().c_str()); +} + +size_t Client::_vxattrcb_client_id(Inode *in, char *val, size_t size) +{ + auto name = messenger->get_myname(); + return snprintf(val, size, "%s%ld", name.type_str(), name.num()); +} + #define CEPH_XATTR_NAME(_type, _name) "ceph." #_type "." #_name #define CEPH_XATTR_NAME2(_type, _name, _name2) "ceph." #_type "." #_name "." #_name2 @@ -12490,6 +12495,24 @@ const Client::VXattr Client::_file_vxattrs[] = { { name: "" } /* Required table terminator */ }; +const Client::VXattr Client::_common_vxattrs[] = { + { + name: "ceph.cluster_fsid", + getxattr_cb: &Client::_vxattrcb_cluster_fsid, + readonly: true, + exists_cb: nullptr, + flags: 0, + }, + { + name: "ceph.client_id", + getxattr_cb: &Client::_vxattrcb_client_id, + readonly: true, + exists_cb: nullptr, + flags: 0, + }, + { name: "" } /* Required table terminator */ +}; + const Client::VXattr *Client::_get_vxattrs(Inode *in) { if (in->is_dir()) @@ -12510,7 +12533,16 @@ const Client::VXattr *Client::_match_vxattr(Inode *in, const char *name) vxattr++; } } + + // for common vxattrs + vxattr = _common_vxattrs; + while (!vxattr->name.empty()) { + if (vxattr->name == name) + return vxattr; + vxattr++; + } } + return NULL; } @@ -13184,6 +13216,15 @@ int Client::_rename(Inode *fromdir, const char *fromname, Inode *todir, const ch else return -EROFS; } + if (fromdir != todir) { + Inode *fromdir_root = + fromdir->quota.is_enable() ? fromdir : get_quota_root(fromdir, perm); + Inode *todir_root = + todir->quota.is_enable() ? todir : get_quota_root(todir, perm); + if (fromdir_root != todir_root) { + return -EXDEV; + } + } InodeRef target; MetaRequest *req = new MetaRequest(op); @@ -13216,32 +13257,7 @@ int Client::_rename(Inode *fromdir, const char *fromname, Inode *todir, const ch req->dentry_unless = CEPH_CAP_FILE_EXCL; InodeRef oldin, otherin; - Inode *fromdir_root = nullptr; - Inode *todir_root = nullptr; - int mask = 0; - bool quota_check = false; - if (fromdir != todir) { - fromdir_root = - fromdir->quota.is_enable() ? fromdir : get_quota_root(fromdir, perm); - todir_root = - todir->quota.is_enable() ? todir : get_quota_root(todir, perm); - - if (todir_root->quota.is_enable() && fromdir_root != todir_root) { - // use CEPH_STAT_RSTAT mask to force send getattr or lookup request - // to auth MDS to get latest rstat for todir_root and source dir - // even if their dentry caches and inode caps are satisfied. - res = _getattr(todir_root, CEPH_STAT_RSTAT, perm, true); - if (res < 0) - goto fail; - - quota_check = true; - if (oldde->inode && oldde->inode->is_dir()) { - mask |= CEPH_STAT_RSTAT; - } - } - } - - res = _lookup(fromdir, fromname, mask, &oldin, perm); + res = _lookup(fromdir, fromname, 0, &oldin, perm); if (res < 0) goto fail; @@ -13250,39 +13266,6 @@ int Client::_rename(Inode *fromdir, const char *fromname, Inode *todir, const ch req->set_old_inode(oldinode); req->old_inode_drop = CEPH_CAP_LINK_SHARED; - if (quota_check) { - int64_t old_bytes, old_files; - if (oldinode->is_dir()) { - old_bytes = oldinode->rstat.rbytes; - old_files = oldinode->rstat.rsize(); - } else { - old_bytes = oldinode->size; - old_files = 1; - } - - bool quota_exceed = false; - if (todir_root && todir_root->quota.max_bytes && - (old_bytes + todir_root->rstat.rbytes) >= todir_root->quota.max_bytes) { - ldout(cct, 10) << "_rename (" << oldinode->ino << " bytes=" - << old_bytes << ") to (" << todir->ino - << ") will exceed quota on " << *todir_root << dendl; - quota_exceed = true; - } - - if (todir_root && todir_root->quota.max_files && - (old_files + todir_root->rstat.rsize()) >= todir_root->quota.max_files) { - ldout(cct, 10) << "_rename (" << oldinode->ino << " files=" - << old_files << ") to (" << todir->ino - << ") will exceed quota on " << *todir_root << dendl; - quota_exceed = true; - } - - if (quota_exceed) { - res = (oldinode->is_dir()) ? -EXDEV : -EDQUOT; - goto fail; - } - } - res = _lookup(todir, toname, 0, &otherin, perm); switch (res) { case 0: @@ -15153,7 +15136,7 @@ void intrusive_ptr_add_ref(Inode *in) { in->get(); } - + void intrusive_ptr_release(Inode *in) { in->client->put_inode(in); diff --git a/src/client/Client.h b/src/client/Client.h index 01f469ea1ba..c94fc6ea7fa 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -1167,6 +1167,7 @@ private: static const VXattr _dir_vxattrs[]; static const VXattr _file_vxattrs[]; + static const VXattr _common_vxattrs[]; @@ -1324,6 +1325,9 @@ private: bool _vxattrcb_mirror_info_exists(Inode *in); size_t _vxattrcb_mirror_info(Inode *in, char *val, size_t size); + size_t _vxattrcb_cluster_fsid(Inode *in, char *val, size_t size); + size_t _vxattrcb_client_id(Inode *in, char *val, size_t size); + static const VXattr *_get_vxattrs(Inode *in); static const VXattr *_match_vxattr(Inode *in, const char *name); diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc index 82c152ad56f..e43f1537975 100644 --- a/src/client/fuse_ll.cc +++ b/src/client/fuse_ll.cc @@ -136,7 +136,8 @@ static int getgroups(fuse_req_t req, gid_t **sgids) static void get_fuse_groups(UserPerm& perms, fuse_req_t req) { - if (g_conf().get_val<bool>("fuse_set_user_groups")) { + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req); + if (cfuse->client->cct->_conf.get_val<bool>("fuse_set_user_groups")) { gid_t *gids = NULL; int count = getgroups(req, &gids); diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc index 94218bb9bc8..57ecdc6929e 100644 --- a/src/cls/rgw/cls_rgw.cc +++ b/src/cls/rgw/cls_rgw.cc @@ -982,9 +982,7 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist entry.index_ver = header.ver; /* resetting entry flags, entry might have been previously a delete * marker */ - entry.flags = (entry.key.instance.empty() ? - 0 : - rgw_bucket_dir_entry::FLAG_VER); + entry.flags &= rgw_bucket_dir_entry::FLAG_VER; if (op.tag.size()) { auto pinter = entry.pending_map.find(op.tag); diff --git a/src/cls/rgw_gc/cls_rgw_gc.cc b/src/cls/rgw_gc/cls_rgw_gc.cc index c67b201a639..d6cd7767a13 100644 --- a/src/cls/rgw_gc/cls_rgw_gc.cc +++ b/src/cls/rgw_gc/cls_rgw_gc.cc @@ -502,6 +502,11 @@ static int cls_rgw_gc_queue_update_entry(cls_method_context_t hctx, bufferlist * return -ENOSPC; } + // Due to Tracker 47866 we are no longer executing this code, as it + // appears to possibly create a GC entry for an object that has not + // been deleted. Instead we will log at level 0 to perhaps confirm + // that when and how often this bug would otherwise be hit. +#if 0 cls_queue_enqueue_op enqueue_op; bufferlist bl_data; encode(op.info, bl_data); @@ -512,6 +517,16 @@ static int cls_rgw_gc_queue_update_entry(cls_method_context_t hctx, bufferlist * if (ret < 0) { return ret; } +#else + std::string first_chain = "<empty-chain>"; + if (! op.info.chain.objs.empty()) { + first_chain = op.info.chain.objs.cbegin()->key.name; + } + CLS_LOG(0, + "INFO: refrained from enqueueing GC entry during GC defer" + " tag=%s, first_chain=%s\n", + op.info.tag.c_str(), first_chain.c_str()); +#endif if (has_urgent_data) { head.bl_urgent_data.clear(); diff --git a/src/common/CDC.cc b/src/common/CDC.cc index 69cb978278f..e478ba46e51 100644 --- a/src/common/CDC.cc +++ b/src/common/CDC.cc @@ -20,3 +20,26 @@ std::unique_ptr<CDC> CDC::create( } return nullptr; } + +void generate_buffer(int size, bufferlist *outbl, int seed) +{ + std::mt19937_64 engine, engine2; + engine.seed(seed); + engine2.seed(seed); + + // assemble from randomly-sized segments! + outbl->clear(); + auto left = size; + while (left) { + size_t l = std::min<size_t>((engine2() & 0xffff0) + 16, left); + left -= l; + bufferptr p(l); + p.set_length(l); + char *b = p.c_str(); + for (size_t i = 0; i < l / sizeof(uint64_t); ++i) { + ((ceph_le64 *)b)[i] = init_le64(engine()); + } + outbl->append(p); + } +} + diff --git a/src/common/CDC.h b/src/common/CDC.h index 8c564034f2c..5c4273a0874 100644 --- a/src/common/CDC.h +++ b/src/common/CDC.h @@ -6,6 +6,7 @@ #include <vector> #include <string> +#include "include/types.h" #include "include/buffer.h" class CDC { @@ -25,3 +26,5 @@ public: int bits, int windowbits = 0); }; + +void generate_buffer(int size, bufferlist *outbl, int seed = 0); diff --git a/src/common/Timer.cc b/src/common/Timer.cc index bf050e6395a..eab46661c99 100644 --- a/src/common/Timer.cc +++ b/src/common/Timer.cc @@ -117,9 +117,14 @@ void SafeTimer::timer_thread() Context* SafeTimer::add_event_after(double seconds, Context *callback) { + return add_event_after(ceph::make_timespan(seconds), callback); +} + +Context* SafeTimer::add_event_after(ceph::timespan duration, Context *callback) +{ ceph_assert(ceph_mutex_is_locked(lock)); - auto when = clock_t::now() + ceph::make_timespan(seconds); + auto when = clock_t::now() + duration; return add_event_at(when, callback); } diff --git a/src/common/Timer.h b/src/common/Timer.h index 61dad464b47..f543be68a76 100644 --- a/src/common/Timer.h +++ b/src/common/Timer.h @@ -74,6 +74,7 @@ public: /* Schedule an event in the future * Call with the event_lock LOCKED */ + Context* add_event_after(ceph::timespan duration, Context *callback); Context* add_event_after(double seconds, Context *callback); Context* add_event_at(clock_t::time_point when, Context *callback); diff --git a/src/common/config.cc b/src/common/config.cc index fcfa689a76d..9cd42c6a5d8 100644 --- a/src/common/config.cc +++ b/src/common/config.cc @@ -1124,17 +1124,19 @@ void md_config_t::early_expand_meta( bool md_config_t::finalize_reexpand_meta(ConfigValues& values, const ConfigTracker& tracker) { - for (auto& [name, value] : may_reexpand_meta) { - set_val(values, tracker, name, value); - } - - if (!may_reexpand_meta.empty()) { - // meta expands could have modified anything. Copy it all out again. - update_legacy_vals(values); - return true; - } else { - return false; + std::vector<std::string> reexpands; + reexpands.swap(may_reexpand_meta); + for (auto& name : reexpands) { + // always refresh the options if they are in the may_reexpand_meta + // map, because the options may have already been expanded with old + // meta. + const auto &opt_iter = schema.find(name); + ceph_assert(opt_iter != schema.end()); + const Option &opt = opt_iter->second; + _refresh(values, opt); } + + return !may_reexpand_meta.empty(); } Option::value_t md_config_t::_expand_meta( @@ -1218,7 +1220,7 @@ Option::value_t md_config_t::_expand_meta( } else if (var == "pid") { out += stringify(getpid()); if (o) { - may_reexpand_meta[o->name] = *str; + may_reexpand_meta.push_back(o->name); } } else if (var == "cctid") { out += stringify((unsigned long long)this); diff --git a/src/common/config.h b/src/common/config.h index a44fab0a4ee..bb3410e617d 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -100,7 +100,7 @@ public: std::map<std::string,std::string> ignored_mon_values; /// original raw values saved that may need to re-expand at certain time - mutable std::map<std::string, std::string> may_reexpand_meta; + mutable std::vector<std::string> may_reexpand_meta; /// encoded, cached copy of of values + ignored_mon_values ceph::bufferlist values_bl; diff --git a/src/common/config_proxy.h b/src/common/config_proxy.h index dda83181764..0cf53935241 100644 --- a/src/common/config_proxy.h +++ b/src/common/config_proxy.h @@ -199,7 +199,6 @@ public: rev_obs_map_t rev_obs; if (config.finalize_reexpand_meta(values, obs_mgr)) { _gather_changes(values.changed, &rev_obs, nullptr); - values.changed.clear(); } call_observers(locker, rev_obs); @@ -256,7 +255,6 @@ public: if (!values.cluster.empty()) { // meta expands could have modified anything. Copy it all out again. _gather_changes(values.changed, &rev_obs, oss); - values.changed.clear(); } call_observers(locker, rev_obs); @@ -268,6 +266,7 @@ public: [this, rev_obs](md_config_obs_t *obs, const std::string &key) { map_observer_changes(obs, key, rev_obs); }, oss); + changes.clear(); } int set_val(const std::string_view key, const std::string& s, std::stringstream* err_ss=nullptr) { @@ -290,7 +289,6 @@ public: rev_obs_map_t rev_obs; _gather_changes(values.changed, &rev_obs, nullptr); - values.changed.clear(); call_observers(locker, rev_obs); return ret; @@ -301,7 +299,6 @@ public: rev_obs_map_t rev_obs; _gather_changes(values.changed, &rev_obs, oss); - values.changed.clear(); call_observers(locker, rev_obs); return ret; diff --git a/src/common/hobject.h b/src/common/hobject.h index a3812b9b154..b7d4895c33e 100644 --- a/src/common/hobject.h +++ b/src/common/hobject.h @@ -397,6 +397,14 @@ public: shard_id(shard), max(false) {} + ghobject_t(shard_id_t shard, int64_t pool, uint32_t hash, + const std::string& nspace, const std::string& oid, + snapid_t snap, gen_t gen) + : hobj(object_t(oid), "", snap, hash, pool, nspace), + generation(gen), + shard_id(shard), + max(false) {} + static ghobject_t make_pgmeta(int64_t pool, uint32_t hash, shard_id_t shard) { hobject_t h(object_t(), std::string(), CEPH_NOSNAP, hash, pool, std::string()); return ghobject_t(h, NO_GEN, shard); diff --git a/src/common/legacy_config_opts.h b/src/common/legacy_config_opts.h index dd5b3e0af2f..162b324fdaa 100644 --- a/src/common/legacy_config_opts.h +++ b/src/common/legacy_config_opts.h @@ -269,7 +269,6 @@ OPTION(mon_data_avail_warn, OPT_INT) OPTION(mon_data_size_warn, OPT_U64) // issue a warning when the monitor's data store goes over 15GB (in bytes) OPTION(mon_warn_pg_not_scrubbed_ratio, OPT_FLOAT) OPTION(mon_warn_pg_not_deep_scrubbed_ratio, OPT_FLOAT) -OPTION(mon_scrub_interval, OPT_INT) // once a day OPTION(mon_scrub_timeout, OPT_INT) // let's give it 5 minutes; why not. OPTION(mon_scrub_max_keys, OPT_INT) // max number of keys to scrub each time OPTION(mon_scrub_inject_crc_mismatch, OPT_DOUBLE) // probability of injected crc mismatch [0.0, 1.0] @@ -1051,6 +1050,7 @@ OPTION(bluestore_debug_enforce_settings, OPT_STR) OPTION(bluestore_volume_selection_policy, OPT_STR) OPTION(bluestore_volume_selection_reserved_factor, OPT_DOUBLE) OPTION(bluestore_volume_selection_reserved, OPT_INT) +OPTION(bluestore_kv_sync_util_logging_s, OPT_DOUBLE) OPTION(kstore_max_ops, OPT_U64) OPTION(kstore_max_bytes, OPT_U64) diff --git a/src/common/options.cc b/src/common/options.cc index 9127a061958..0f495673302 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -2006,7 +2006,7 @@ std::vector<Option> get_global_options() { .set_long_description("") .add_see_also("osd_deep_scrub_interval"), - Option("mon_scrub_interval", Option::TYPE_INT, Option::LEVEL_ADVANCED) + Option("mon_scrub_interval", Option::TYPE_SECS, Option::LEVEL_ADVANCED) .set_default(1_day) .add_service("mon") .set_description("frequency for scrubbing mon database"), @@ -3658,11 +3658,11 @@ std::vector<Option> get_global_options() { .set_description("Time in seconds to sleep before next removal transaction for HDDs"), Option("osd_delete_sleep_ssd", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED) - .set_default(0) + .set_default(1) .set_description("Time in seconds to sleep before next removal transaction for SSDs"), Option("osd_delete_sleep_hybrid", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED) - .set_default(2) + .set_default(1) .set_description("Time in seconds to sleep before next removal transaction when data is on HDD and journal is on SSD"), Option("osd_failsafe_full_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED) @@ -4801,6 +4801,22 @@ std::vector<Option> get_global_options() { .set_default(false) .set_description("Enables Linux io_uring API instead of libaio"), + Option("bdev_ioring_hipri", Option::TYPE_BOOL, Option::LEVEL_ADVANCED) + .set_default(false) + .set_description("Enables Linux io_uring API Use polled IO completions"), + + Option("bdev_ioring_sqthread_poll", Option::TYPE_BOOL, Option::LEVEL_ADVANCED) + .set_default(false) + .set_description("Enables Linux io_uring API Offload submission/completion to kernel thread"), + + Option("bluestore_kv_sync_util_logging_s", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED) + .set_default(10.0) + .set_flag(Option::FLAG_RUNTIME) + .set_description("KV sync thread utilization logging period") + .set_long_description("How often (in seconds) to print KV sync thread utilization, " + "not logged when set to 0 or when utilization is 0%"), + + // ----------------------------------------- // kstore diff --git a/src/common/static_ptr.h b/src/common/static_ptr.h index 63fce434240..542f1e9a67a 100644 --- a/src/common/static_ptr.h +++ b/src/common/static_ptr.h @@ -33,7 +33,7 @@ namespace _mem { // of the same arguments (which is not true for function type erasure) // it's a pretty good one. enum class op { - copy, move, destroy, size + move, destroy, size }; template<typename T> static std::size_t op_fun(op oper, void* p1, void* p2) @@ -41,15 +41,6 @@ static std::size_t op_fun(op oper, void* p1, void* p2) auto me = static_cast<T*>(p1); switch (oper) { - case op::copy: - // One conspicuous downside is that immovable/uncopyable functions - // kill compilation right here, even if nobody ever calls the move - // or copy methods. Working around this is a pain, since we'd need - // four operator functions and a top-level class to - // provide/withhold copy/move operations as appropriate. - new (p2) T(*me); - break; - case op::move: new (p2) T(std::move(*me)); break; @@ -137,12 +128,6 @@ public: // Set from another static pointer. // // Since the templated versions don't count for overriding the defaults - static_ptr(const static_ptr& rhs) - noexcept(std::is_nothrow_copy_constructible_v<Base>) : operate(rhs.operate) { - if (operate) { - operate(_mem::op::copy, &rhs.buf, &buf); - } - } static_ptr(static_ptr&& rhs) noexcept(std::is_nothrow_move_constructible_v<Base>) : operate(rhs.operate) { if (operate) { @@ -151,14 +136,6 @@ public: } template<typename U, std::size_t S> - static_ptr(const static_ptr<U, S>& rhs) - noexcept(std::is_nothrow_copy_constructible_v<U>) : operate(rhs.operate) { - create_ward<U, S>(); - if (operate) { - operate(_mem::op::copy, &rhs.buf, &buf); - } - } - template<typename U, std::size_t S> static_ptr(static_ptr<U, S>&& rhs) noexcept(std::is_nothrow_move_constructible_v<U>) : operate(rhs.operate) { create_ward<U, S>(); @@ -167,16 +144,6 @@ public: } } - static_ptr& operator =(const static_ptr& rhs) - noexcept(std::is_nothrow_copy_constructible_v<Base>) { - reset(); - if (rhs) { - operate = rhs.operate; - operate(_mem::op::copy, - const_cast<void*>(static_cast<const void*>(&rhs.buf)), &buf); - } - return *this; - } static_ptr& operator =(static_ptr&& rhs) noexcept(std::is_nothrow_move_constructible_v<Base>) { reset(); @@ -188,18 +155,6 @@ public: } template<typename U, std::size_t S> - static_ptr& operator =(const static_ptr<U, S>& rhs) - noexcept(std::is_nothrow_copy_constructible_v<U>) { - create_ward<U, S>(); - reset(); - if (rhs) { - operate = rhs.operate; - operate(_mem::op::copy, - const_cast<void*>(static_cast<const void*>(&rhs.buf)), &buf); - } - return *this; - } - template<typename U, std::size_t S> static_ptr& operator =(static_ptr<U, S>&& rhs) noexcept(std::is_nothrow_move_constructible_v<U>) { create_ward<U, S>(); @@ -300,20 +255,6 @@ public: // nice idiom. Having to release and reconstruct is obnoxious. // template<typename U, std::size_t Z, typename T, std::size_t S> -static_ptr<U, Z> static_pointer_cast(const static_ptr<T, S>& p) { - static_assert(Z >= S, - "Value too large."); - static_ptr<U, Z> r; - // Really, this is always true because static_cast either succeeds - // or fails to compile, but it prevents an unused variable warning - // and should be optimized out. - if (static_cast<U*>(p.get())) { - p.operate(_mem::op::copy, &p.buf, &r.buf); - r.operate = p.operate; - } - return r; -} -template<typename U, std::size_t Z, typename T, std::size_t S> static_ptr<U, Z> static_pointer_cast(static_ptr<T, S>&& p) { static_assert(Z >= S, "Value too large."); @@ -329,17 +270,6 @@ static_ptr<U, Z> static_pointer_cast(static_ptr<T, S>&& p) { // same behavior as dynamic_cast. // template<typename U, std::size_t Z, typename T, std::size_t S> -static_ptr<U, Z> dynamic_pointer_cast(const static_ptr<T, S>& p) { - static_assert(Z >= S, - "Value too large."); - static_ptr<U, Z> r; - if (dynamic_cast<U*>(p.get())) { - p.operate(_mem::op::copy, &p.buf, &r.buf); - r.operate = p.operate; - } - return r; -} -template<typename U, std::size_t Z, typename T, std::size_t S> static_ptr<U, Z> dynamic_pointer_cast(static_ptr<T, S>&& p) { static_assert(Z >= S, "Value too large."); @@ -352,17 +282,6 @@ static_ptr<U, Z> dynamic_pointer_cast(static_ptr<T, S>&& p) { } template<typename U, std::size_t Z, typename T, std::size_t S> -static_ptr<U, Z> const_pointer_cast(const static_ptr<T, S>& p) { - static_assert(Z >= S, - "Value too large."); - static_ptr<U, Z> r; - if (const_cast<U*>(p.get())) { - p.operate(_mem::op::copy, &p.buf, &r.buf); - r.operate = p.operate; - } - return r; -} -template<typename U, std::size_t Z, typename T, std::size_t S> static_ptr<U, Z> const_pointer_cast(static_ptr<T, S>&& p) { static_assert(Z >= S, "Value too large."); @@ -378,15 +297,6 @@ static_ptr<U, Z> const_pointer_cast(static_ptr<T, S>&& p) { // where they might. It works, though! // template<typename U, std::size_t Z, typename T, std::size_t S> -static_ptr<U, Z> reinterpret_pointer_cast(const static_ptr<T, S>& p) { - static_assert(Z >= S, - "Value too large."); - static_ptr<U, Z> r; - p.operate(_mem::op::copy, &p.buf, &r.buf); - r.operate = p.operate; - return r; -} -template<typename U, std::size_t Z, typename T, std::size_t S> static_ptr<U, Z> reinterpret_pointer_cast(static_ptr<T, S>&& p) { static_assert(Z >= S, "Value too large."); @@ -404,17 +314,6 @@ static_ptr<U, Z> reinterpret_pointer_cast(static_ptr<T, S>&& p) { // I follow cast semantics. Since this is a pointer-like type, it // returns a null value rather than throwing. template<typename U, std::size_t Z, typename T, std::size_t S> -static_ptr<U, Z> resize_pointer_cast(const static_ptr<T, S>& p) { - static_assert(std::is_same_v<U, T>, - "resize_pointer_cast only changes size, not type."); - static_ptr<U, Z> r; - if (Z >= p.operate(_mem::op::size, &p.buf, nullptr)) { - p.operate(_mem::op::copy, &p.buf, &r.buf); - r.operate = p.operate; - } - return r; -} -template<typename U, std::size_t Z, typename T, std::size_t S> static_ptr<U, Z> resize_pointer_cast(static_ptr<T, S>&& p) { static_assert(std::is_same_v<U, T>, "resize_pointer_cast only changes size, not type."); @@ -427,11 +326,19 @@ static_ptr<U, Z> resize_pointer_cast(static_ptr<T, S>&& p) { } template<typename Base, std::size_t Size> -bool operator ==(static_ptr<Base, Size> s, std::nullptr_t) { +bool operator ==(const static_ptr<Base, Size>& s, std::nullptr_t) { + return !s; +} +template<typename Base, std::size_t Size> +bool operator ==(std::nullptr_t, const static_ptr<Base, Size>& s) { + return !s; +} +template<typename Base, std::size_t Size> +bool operator ==(static_ptr<Base, Size>& s, std::nullptr_t) { return !s; } template<typename Base, std::size_t Size> -bool operator ==(std::nullptr_t, static_ptr<Base, Size> s) { +bool operator ==(std::nullptr_t, static_ptr<Base, Size>& s) { return !s; } diff --git a/src/compressor/CompressionPlugin.h b/src/compressor/CompressionPlugin.h index 5e0ed777b85..2a21f2fef27 100644 --- a/src/compressor/CompressionPlugin.h +++ b/src/compressor/CompressionPlugin.h @@ -22,22 +22,23 @@ #include <iostream> #include "common/PluginRegistry.h" +#include "include/common_fwd.h" #include "Compressor.h" namespace ceph { class CompressionPlugin : public Plugin { public: - CompressorRef compressor; + TOPNSPC::CompressorRef compressor; - explicit CompressionPlugin(CephContext *cct) : Plugin(cct), - compressor(0) + explicit CompressionPlugin(CephContext *cct) + : Plugin(cct) {} ~CompressionPlugin() override {} - virtual int factory(CompressorRef *cs, - std::ostream *ss) = 0; + virtual int factory(TOPNSPC::CompressorRef *cs, + std::ostream *ss) = 0; virtual const char* name() {return "CompressionPlugin";} }; diff --git a/src/compressor/Compressor.cc b/src/compressor/Compressor.cc index e6faae164f1..fa0f052f69b 100644 --- a/src/compressor/Compressor.cc +++ b/src/compressor/Compressor.cc @@ -24,6 +24,8 @@ #include "common/debug.h" #include "common/dout.h" +namespace TOPNSPC { + const char* Compressor::get_comp_alg_name(int a) { auto p = std::find_if(std::cbegin(compression_algorithms), std::cend(compression_algorithms), @@ -35,7 +37,8 @@ const char* Compressor::get_comp_alg_name(int a) { return p->first; } -boost::optional<Compressor::CompressionAlgorithm> Compressor::get_comp_alg_type(const std::string &s) { +boost::optional<Compressor::CompressionAlgorithm> +Compressor::get_comp_alg_type(std::string_view s) { auto p = std::find_if(std::cbegin(compression_algorithms), std::cend(compression_algorithms), [&s](const auto& kv) { return kv.first == s; }); @@ -54,7 +57,8 @@ const char *Compressor::get_comp_mode_name(int m) { default: return "???"; } } -boost::optional<Compressor::CompressionMode> Compressor::get_comp_mode_type(const std::string &s) { +boost::optional<Compressor::CompressionMode> +Compressor::get_comp_mode_type(std::string_view s) { if (s == "force") return COMP_FORCE; if (s == "aggressive") @@ -100,3 +104,5 @@ CompressorRef Compressor::create(CephContext *cct, int alg) std::string type_name = get_comp_alg_name(alg); return create(cct, type_name); } + +} // namespace TOPNSPC diff --git a/src/compressor/Compressor.h b/src/compressor/Compressor.h index 6a4eb277668..0a45a990a87 100644 --- a/src/compressor/Compressor.h +++ b/src/compressor/Compressor.h @@ -27,6 +27,8 @@ #include "QatAccel.h" #endif +namespace TOPNSPC { + class Compressor; typedef std::shared_ptr<Compressor> CompressorRef; @@ -74,10 +76,10 @@ public: #endif static const char* get_comp_alg_name(int a); - static boost::optional<CompressionAlgorithm> get_comp_alg_type(const std::string &s); + static boost::optional<CompressionAlgorithm> get_comp_alg_type(std::string_view s); static const char *get_comp_mode_name(int m); - static boost::optional<CompressionMode> get_comp_mode_type(const std::string &s); + static boost::optional<CompressionMode> get_comp_mode_type(std::string_view s); Compressor(CompressionAlgorithm a, const char* t) : alg(a), type(t) { } @@ -103,4 +105,5 @@ protected: }; +} // namespace TOPNSPC #endif diff --git a/src/crimson/admin/admin_socket.cc b/src/crimson/admin/admin_socket.cc index 27cdcf132fc..75cb80a4660 100644 --- a/src/crimson/admin/admin_socket.cc +++ b/src/crimson/admin/admin_socket.cc @@ -7,6 +7,7 @@ #include <fmt/format.h> #include <seastar/net/api.hh> #include <seastar/net/inet_address.hh> +#include <seastar/core/future-util.hh> #include <seastar/core/reactor.hh> #include <seastar/core/sleep.hh> #include <seastar/core/thread.hh> @@ -122,7 +123,7 @@ seastar::future<> AdminSocket::finalize_response( } -seastar::future<> AdminSocket::handle_command(crimson::net::Connection* conn, +seastar::future<> AdminSocket::handle_command(crimson::net::ConnectionRef conn, boost::intrusive_ptr<MCommand> m) { return execute_command(m->cmd, std::move(m->get_data())).then( diff --git a/src/crimson/admin/admin_socket.h b/src/crimson/admin/admin_socket.h index 0beee751c0b..a842b62a2d1 100644 --- a/src/crimson/admin/admin_socket.h +++ b/src/crimson/admin/admin_socket.h @@ -126,7 +126,7 @@ class AdminSocket : public seastar::enable_lw_shared_from_this<AdminSocket> { * \param conn connection over which the incoming command message is received * \param m message carrying the command vector and optional input buffer */ - seastar::future<> handle_command(crimson::net::Connection* conn, + seastar::future<> handle_command(crimson::net::ConnectionRef conn, boost::intrusive_ptr<MCommand> m); private: diff --git a/src/crimson/common/errorator.h b/src/crimson/common/errorator.h index 4fdbb53b548..0733f68f8ab 100644 --- a/src/crimson/common/errorator.h +++ b/src/crimson/common/errorator.h @@ -596,14 +596,14 @@ private: } template <class FuncT> - auto finally(FuncT &&func) { + _future finally(FuncT &&func) { return this->then_wrapped( [func = std::forward<FuncT>(func)](auto &&result) mutable noexcept { if constexpr (seastar::is_future<std::invoke_result_t<FuncT>>::value) { return ::seastar::futurize_invoke(std::forward<FuncT>(func)).then_wrapped( [result = std::move(result)](auto&& f_res) mutable { // TODO: f_res.failed() - f_res.discard_result(); + (void)f_res.discard_result(); return std::move(result); }); } else { @@ -1017,6 +1017,7 @@ namespace ct_error { ct_error_code<std::errc::resource_unavailable_try_again>; using file_too_large = ct_error_code<std::errc::file_too_large>; + using address_in_use = ct_error_code<std::errc::address_in_use>; struct pass_further_all { template <class ErrorT> diff --git a/src/crimson/mgr/client.cc b/src/crimson/mgr/client.cc index fcc8f832607..5aa8a88ba21 100644 --- a/src/crimson/mgr/client.cc +++ b/src/crimson/mgr/client.cc @@ -47,19 +47,22 @@ seastar::future<> Client::stop() return fut; } -seastar::future<> Client::ms_dispatch(crimson::net::Connection* conn, - MessageRef m) +std::optional<seastar::future<>> +Client::ms_dispatch(crimson::net::ConnectionRef conn, MessageRef m) { - return gate.dispatch(__func__, *this, [this, conn, &m] { + bool dispatched = true; + gate.dispatch_in_background(__func__, *this, [this, conn, &m, &dispatched] { switch(m->get_type()) { case MSG_MGR_MAP: return handle_mgr_map(conn, boost::static_pointer_cast<MMgrMap>(m)); case MSG_MGR_CONFIGURE: return handle_mgr_conf(conn, boost::static_pointer_cast<MMgrConfigure>(m)); default: + dispatched = false; return seastar::now(); } }); + return (dispatched ? std::make_optional(seastar::now()) : std::nullopt); } void Client::ms_handle_connect(crimson::net::ConnectionRef c) @@ -114,7 +117,7 @@ seastar::future<> Client::reconnect() }); } -seastar::future<> Client::handle_mgr_map(crimson::net::Connection*, +seastar::future<> Client::handle_mgr_map(crimson::net::ConnectionRef, Ref<MMgrMap> m) { mgrmap = m->get_map(); @@ -128,7 +131,7 @@ seastar::future<> Client::handle_mgr_map(crimson::net::Connection*, } } -seastar::future<> Client::handle_mgr_conf(crimson::net::Connection* conn, +seastar::future<> Client::handle_mgr_conf(crimson::net::ConnectionRef, Ref<MMgrConfigure> m) { logger().info("{} {}", __func__, *m); diff --git a/src/crimson/mgr/client.h b/src/crimson/mgr/client.h index 19e4cd6ee25..ad7e1fde54e 100644 --- a/src/crimson/mgr/client.h +++ b/src/crimson/mgr/client.h @@ -3,9 +3,9 @@ #pragma once -#include <seastar/core/gate.hh> #include <seastar/core/timer.hh> +#include "crimson/common/gated.h" #include "crimson/net/Dispatcher.h" #include "crimson/net/Fwd.h" #include "mon/MgrMap.h" @@ -37,13 +37,13 @@ public: void report(); private: - seastar::future<> ms_dispatch(crimson::net::Connection* conn, - Ref<Message> m) override; + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef conn, Ref<Message> m) override; void ms_handle_reset(crimson::net::ConnectionRef conn, bool is_replace) final; void ms_handle_connect(crimson::net::ConnectionRef conn) final; - seastar::future<> handle_mgr_map(crimson::net::Connection* conn, + seastar::future<> handle_mgr_map(crimson::net::ConnectionRef conn, Ref<MMgrMap> m); - seastar::future<> handle_mgr_conf(crimson::net::Connection* conn, + seastar::future<> handle_mgr_conf(crimson::net::ConnectionRef conn, Ref<MMgrConfigure> m); seastar::future<> reconnect(); diff --git a/src/crimson/mon/MonClient.cc b/src/crimson/mon/MonClient.cc index 9be91ce8497..9dfbb103a38 100644 --- a/src/crimson/mon/MonClient.cc +++ b/src/crimson/mon/MonClient.cc @@ -518,10 +518,11 @@ bool Client::is_hunting() const { return !active_con; } -seastar::future<> -Client::ms_dispatch(crimson::net::Connection* conn, MessageRef m) +std::optional<seastar::future<>> +Client::ms_dispatch(crimson::net::ConnectionRef conn, MessageRef m) { - return gate.dispatch(__func__, *this, [this, conn, &m] { + bool dispatched = true; + gate.dispatch_in_background(__func__, *this, [this, conn, &m, &dispatched] { // we only care about these message types switch (m->get_type()) { case CEPH_MSG_MON_MAP: @@ -545,9 +546,11 @@ Client::ms_dispatch(crimson::net::Connection* conn, MessageRef m) return handle_config( boost::static_pointer_cast<MConfig>(m)); default: + dispatched = false; return seastar::now(); } }); + return (dispatched ? std::make_optional(seastar::now()) : std::nullopt); } void Client::ms_handle_reset(crimson::net::ConnectionRef conn, bool /* is_replace */) @@ -782,7 +785,7 @@ int Client::handle_auth_bad_method(crimson::net::ConnectionRef conn, } } -seastar::future<> Client::handle_monmap(crimson::net::Connection* conn, +seastar::future<> Client::handle_monmap(crimson::net::ConnectionRef conn, Ref<MMonMap> m) { monmap.decode(m->monmapbl); @@ -812,8 +815,8 @@ seastar::future<> Client::handle_monmap(crimson::net::Connection* conn, } } -seastar::future<> Client::handle_auth_reply(crimson::net::Connection* conn, - Ref<MAuthReply> m) +seastar::future<> Client::handle_auth_reply(crimson::net::ConnectionRef conn, + Ref<MAuthReply> m) { logger().info( "handle_auth_reply mon {} => {} returns {}: {}", diff --git a/src/crimson/mon/MonClient.h b/src/crimson/mon/MonClient.h index 0f651dd5d17..e7d2df86393 100644 --- a/src/crimson/mon/MonClient.h +++ b/src/crimson/mon/MonClient.h @@ -140,13 +140,13 @@ private: private: void tick(); - seastar::future<> ms_dispatch(crimson::net::Connection* conn, - MessageRef m) override; + std::optional<seastar::future<>> ms_dispatch(crimson::net::ConnectionRef conn, + MessageRef m) override; void ms_handle_reset(crimson::net::ConnectionRef conn, bool is_replace) override; - seastar::future<> handle_monmap(crimson::net::Connection* conn, + seastar::future<> handle_monmap(crimson::net::ConnectionRef conn, Ref<MMonMap> m); - seastar::future<> handle_auth_reply(crimson::net::Connection* conn, + seastar::future<> handle_auth_reply(crimson::net::ConnectionRef conn, Ref<MAuthReply> m); seastar::future<> handle_subscribe_ack(Ref<MMonSubscribeAck> m); seastar::future<> handle_get_version_reply(Ref<MMonGetVersionReply> m); diff --git a/src/crimson/net/Connection.h b/src/crimson/net/Connection.h index 25b3f5af562..6af12692e78 100644 --- a/src/crimson/net/Connection.h +++ b/src/crimson/net/Connection.h @@ -147,10 +147,6 @@ class Connection : public seastar::enable_shared_from_this<Connection> { auto get_last_keepalive() const { return last_keepalive; } auto get_last_keepalive_ack() const { return last_keepalive_ack; } - seastar::shared_ptr<Connection> get_shared() { - return shared_from_this(); - } - struct user_private_t { virtual ~user_private_t() = default; }; diff --git a/src/crimson/net/Dispatcher.h b/src/crimson/net/Dispatcher.h index fd26d146166..cc6fd4574c7 100644 --- a/src/crimson/net/Dispatcher.h +++ b/src/crimson/net/Dispatcher.h @@ -14,26 +14,22 @@ #pragma once -#include <seastar/core/future.hh> -#include <seastar/core/sharded.hh> -#include <boost/intrusive/slist.hpp> - -#include "crimson/common/gated.h" #include "Fwd.h" class AuthAuthorizer; namespace crimson::net { -class Dispatcher : public boost::intrusive::slist_base_hook< - boost::intrusive::link_mode< - boost::intrusive::safe_link>> { +class Dispatcher { public: virtual ~Dispatcher() {} - virtual seastar::future<> ms_dispatch(Connection* conn, MessageRef m) { - return seastar::make_ready_future<>(); - } + // Dispatchers are put into a chain as described by chain-of-responsibility + // pattern. If any of the dispatchers claims this message, it returns a valid + // future to prevent other dispatchers from processing it, and this is also + // used to throttle the connection if it's too busy. + virtual std::optional<seastar::future<>> ms_dispatch(ConnectionRef, MessageRef) = 0; + virtual void ms_handle_accept(ConnectionRef conn) {} virtual void ms_handle_connect(ConnectionRef conn) {} diff --git a/src/crimson/net/Fwd.h b/src/crimson/net/Fwd.h index 22215339672..e10120571f3 100644 --- a/src/crimson/net/Fwd.h +++ b/src/crimson/net/Fwd.h @@ -14,6 +14,9 @@ #pragma once +#include <boost/container/small_vector.hpp> +#include <seastar/core/future.hh> +#include <seastar/core/future-util.hh> #include <seastar/core/shared_ptr.hh> #include <seastar/core/sharded.hh> @@ -21,6 +24,8 @@ #include "msg/MessageRef.h" #include "msg/msg_types.h" +#include "crimson/common/errorator.h" + using auth_proto_t = int; class AuthConnectionMeta; @@ -35,6 +40,9 @@ class Connection; using ConnectionRef = seastar::shared_ptr<Connection>; class Dispatcher; +class ChainedDispatchers; +constexpr std::size_t NUM_DISPATCHERS = 4u; +using dispatchers_t = boost::container::small_vector<Dispatcher*, NUM_DISPATCHERS>; class Messenger; using MessengerRef = seastar::shared_ptr<Messenger>; diff --git a/src/crimson/net/Messenger.h b/src/crimson/net/Messenger.h index 7065d2ad2af..2b39fbf63a6 100644 --- a/src/crimson/net/Messenger.h +++ b/src/crimson/net/Messenger.h @@ -14,11 +14,8 @@ #pragma once -#include <seastar/core/future.hh> - #include "Fwd.h" #include "crimson/common/throttle.h" -#include "crimson/net/chained_dispatchers.h" #include "msg/Message.h" #include "msg/Policy.h" @@ -65,15 +62,18 @@ public: return seastar::now(); } + using bind_ertr = crimson::errorator< + crimson::ct_error::address_in_use // The address (range) is already bound + >; /// bind to the given address - virtual seastar::future<> bind(const entity_addrvec_t& addr) = 0; + virtual bind_ertr::future<> bind(const entity_addrvec_t& addr) = 0; /// try to bind to the first unused port of given address - virtual seastar::future<> try_bind(const entity_addrvec_t& addr, - uint32_t min_port, uint32_t max_port) = 0; + virtual bind_ertr::future<> try_bind(const entity_addrvec_t& addr, + uint32_t min_port, uint32_t max_port) = 0; /// start the messenger - virtual seastar::future<> start(ChainedDispatchersRef) = 0; + virtual seastar::future<> start(const dispatchers_t&) = 0; /// either return an existing connection to the peer, /// or a new pending connection @@ -90,13 +90,13 @@ public: // wait for messenger shutdown virtual seastar::future<> wait() = 0; - virtual void add_dispatcher(Dispatcher&) = 0; + // stop dispatching events and messages + virtual void stop() = 0; - virtual void remove_dispatcher(Dispatcher&) = 0; + virtual bool is_started() const = 0; - virtual bool dispatcher_chain_empty() const = 0; - /// stop listenening and wait for all connections to close. safe to destruct - /// after this future becomes available + // free internal resources before destruction, must be called after stopped, + // and must be called if is bound. virtual seastar::future<> shutdown() = 0; uint32_t get_crc_flags() const { diff --git a/src/crimson/net/Protocol.cc b/src/crimson/net/Protocol.cc index 25712314bc0..6973aa5df5b 100644 --- a/src/crimson/net/Protocol.cc +++ b/src/crimson/net/Protocol.cc @@ -7,7 +7,7 @@ #include "crimson/common/log.h" #include "crimson/net/Errors.h" -#include "crimson/net/Dispatcher.h" +#include "crimson/net/chained_dispatchers.h" #include "crimson/net/Socket.h" #include "crimson/net/SocketConnection.h" #include "msg/Message.h" @@ -21,10 +21,10 @@ namespace { namespace crimson::net { Protocol::Protocol(proto_t type, - ChainedDispatchersRef& dispatcher, + ChainedDispatchers& dispatchers, SocketConnection& conn) : proto_type(type), - dispatcher(dispatcher), + dispatchers(dispatchers), conn(conn), auth_meta{seastar::make_lw_shared<AuthConnectionMeta>()} {} @@ -73,19 +73,14 @@ void Protocol::close(bool dispatch_reset, auto gate_closed = gate.close(); if (dispatch_reset) { - try { - dispatcher->ms_handle_reset( - seastar::static_pointer_cast<SocketConnection>(conn.shared_from_this()), - is_replace); - } catch (...) { - logger().error("{} got unexpected exception in ms_handle_reset() {}", - conn, std::current_exception()); - } + dispatchers.ms_handle_reset( + seastar::static_pointer_cast<SocketConnection>(conn.shared_from_this()), + is_replace); } // asynchronous operations assert(!close_ready.valid()); - close_ready = std::move(gate_closed).finally([this] { + close_ready = std::move(gate_closed).then([this] { if (socket) { return socket->close(); } else { diff --git a/src/crimson/net/Protocol.h b/src/crimson/net/Protocol.h index 3deb706acb4..dc4e4f2af8f 100644 --- a/src/crimson/net/Protocol.h +++ b/src/crimson/net/Protocol.h @@ -50,7 +50,7 @@ class Protocol { virtual void print(std::ostream&) const = 0; protected: Protocol(proto_t type, - ChainedDispatchersRef& dispatcher, + ChainedDispatchers& dispatchers, SocketConnection& conn); virtual void trigger_close() = 0; @@ -71,7 +71,7 @@ class Protocol { SocketRef socket; protected: - ChainedDispatchersRef dispatcher; + ChainedDispatchers& dispatchers; SocketConnection &conn; AuthConnectionMetaRef auth_meta; diff --git a/src/crimson/net/ProtocolV1.cc b/src/crimson/net/ProtocolV1.cc index 8a290c8f744..34fb14573e2 100644 --- a/src/crimson/net/ProtocolV1.cc +++ b/src/crimson/net/ProtocolV1.cc @@ -15,7 +15,7 @@ #include "crimson/auth/AuthClient.h" #include "crimson/auth/AuthServer.h" #include "crimson/common/log.h" -#include "Dispatcher.h" +#include "chained_dispatchers.h" #include "Errors.h" #include "Socket.h" #include "SocketConnection.h" @@ -125,10 +125,10 @@ void discard_up_to(std::deque<MessageRef>* queue, namespace crimson::net { -ProtocolV1::ProtocolV1(ChainedDispatchersRef& dispatcher, +ProtocolV1::ProtocolV1(ChainedDispatchers& dispatchers, SocketConnection& conn, SocketMessenger& messenger) - : Protocol(proto_t::v1, dispatcher, conn), messenger{messenger} {} + : Protocol(proto_t::v1, dispatchers, conn), messenger{messenger} {} ProtocolV1::~ProtocolV1() {} @@ -848,10 +848,10 @@ seastar::future<> ProtocolV1::read_message() }).then([this] (bufferlist bl) { auto p = bl.cbegin(); ::decode(m.footer, p); - auto pconn = seastar::static_pointer_cast<SocketConnection>( + auto conn_ref = seastar::static_pointer_cast<SocketConnection>( conn.shared_from_this()); auto msg = ::decode_message(nullptr, 0, m.header, m.footer, - m.front, m.middle, m.data, std::move(pconn)); + m.front, m.middle, m.data, conn_ref); if (unlikely(!msg)) { logger().warn("{} decode message failed", conn); throw std::system_error{make_error_code(error::corrupted_message)}; @@ -871,15 +871,13 @@ seastar::future<> ProtocolV1::read_message() if (unlikely(!conn.update_rx_seq(msg->get_seq()))) { // skip this message - return; + return seastar::now(); } - // start dispatch, ignoring exceptions from the application layer - gate.dispatch_in_background("ms_dispatch", *this, [this, msg = std::move(msg_ref)] { - logger().debug("{} <== #{} === {} ({})", - conn, msg->get_seq(), *msg, msg->get_type()); - return dispatcher->ms_dispatch(&conn, std::move(msg)); - }); + logger().debug("{} <== #{} === {} ({})", + conn, msg_ref->get_seq(), *msg_ref, msg_ref->get_type()); + // throttle the reading process by the returned future + return dispatchers.ms_dispatch(conn_ref, std::move(msg_ref)); }); } @@ -919,15 +917,11 @@ void ProtocolV1::execute_open(open_t type) set_write_state(write_state_t::open); if (type == open_t::connected) { - gate.dispatch_in_background("ms_handle_connect", *this, [this] { - return dispatcher->ms_handle_connect( - seastar::static_pointer_cast<SocketConnection>(conn.shared_from_this())); - }); + dispatchers.ms_handle_connect( + seastar::static_pointer_cast<SocketConnection>(conn.shared_from_this())); } else { // type == open_t::accepted - gate.dispatch_in_background("ms_handle_accept", *this, [this] { - return dispatcher->ms_handle_accept( - seastar::static_pointer_cast<SocketConnection>(conn.shared_from_this())); - }); + dispatchers.ms_handle_accept( + seastar::static_pointer_cast<SocketConnection>(conn.shared_from_this())); } gate.dispatch_in_background("execute_open", *this, [this] { diff --git a/src/crimson/net/ProtocolV1.h b/src/crimson/net/ProtocolV1.h index d7d642c5727..c71af598bcf 100644 --- a/src/crimson/net/ProtocolV1.h +++ b/src/crimson/net/ProtocolV1.h @@ -12,7 +12,7 @@ namespace crimson::net { class ProtocolV1 final : public Protocol { public: - ProtocolV1(ChainedDispatchersRef& dispatcher, + ProtocolV1(ChainedDispatchers& dispatchers, SocketConnection& conn, SocketMessenger& messenger); ~ProtocolV1() override; diff --git a/src/crimson/net/ProtocolV2.cc b/src/crimson/net/ProtocolV2.cc index 750f458bd9d..4d7d06d7a33 100644 --- a/src/crimson/net/ProtocolV2.cc +++ b/src/crimson/net/ProtocolV2.cc @@ -12,7 +12,7 @@ #include "crimson/auth/AuthServer.h" #include "crimson/common/formatter.h" -#include "Dispatcher.h" +#include "chained_dispatchers.h" #include "Errors.h" #include "Socket.h" #include "SocketConnection.h" @@ -143,10 +143,10 @@ seastar::future<> ProtocolV2::Timer::backoff(double seconds) }); } -ProtocolV2::ProtocolV2(ChainedDispatchersRef& dispatcher, +ProtocolV2::ProtocolV2(ChainedDispatchers& dispatchers, SocketConnection& conn, SocketMessenger& messenger) - : Protocol(proto_t::v2, dispatcher, conn), + : Protocol(proto_t::v2, dispatchers, conn), messenger{messenger}, protocol_timer{conn} {} @@ -385,7 +385,7 @@ void ProtocolV2::reset_session(bool full) client_cookie = generate_client_cookie(); peer_global_seq = 0; reset_write(); - dispatcher->ms_handle_remote_reset( + dispatchers.ms_handle_remote_reset( seastar::static_pointer_cast<SocketConnection>(conn.shared_from_this())); } } @@ -1601,7 +1601,7 @@ void ProtocolV2::execute_establishing( accept_me(); } - dispatcher->ms_handle_accept( + dispatchers.ms_handle_accept( seastar::static_pointer_cast<SocketConnection>(conn.shared_from_this())); gated_execute("execute_establishing", [this] { @@ -1699,7 +1699,7 @@ void ProtocolV2::trigger_replacing(bool reconnect, if (socket) { socket->shutdown(); } - dispatcher->ms_handle_accept( + dispatchers.ms_handle_accept( seastar::static_pointer_cast<SocketConnection>(conn.shared_from_this())); gate.dispatch_in_background("trigger_replacing", *this, [this, @@ -1883,11 +1883,10 @@ seastar::future<> ProtocolV2::read_message(utime_t throttle_stamp) ceph_msg_footer footer{init_le32(0), init_le32(0), init_le32(0), init_le64(0), current_header.flags}; - auto pconn = seastar::static_pointer_cast<SocketConnection>( + auto conn_ref = seastar::static_pointer_cast<SocketConnection>( conn.shared_from_this()); Message *message = decode_message(nullptr, 0, header, footer, - msg_frame.front(), msg_frame.middle(), msg_frame.data(), - std::move(pconn)); + msg_frame.front(), msg_frame.middle(), msg_frame.data(), conn_ref); if (!message) { logger().warn("{} decode message failed", conn); abort_in_fault(); @@ -1914,7 +1913,7 @@ seastar::future<> ProtocolV2::read_message(utime_t throttle_stamp) local_conf()->ms_die_on_old_message) { ceph_assert(0 == "old msgs despite reconnect_seq feature"); } - return; + return seastar::now(); } else if (message->get_seq() > cur_seq + 1) { logger().error("{} missed message? skipped from seq {} to {}", conn, cur_seq, message->get_seq()); @@ -1932,7 +1931,8 @@ seastar::future<> ProtocolV2::read_message(utime_t throttle_stamp) // TODO: change MessageRef with seastar::shared_ptr auto msg_ref = MessageRef{message, false}; - std::ignore = dispatcher->ms_dispatch(&conn, std::move(msg_ref)); + // throttle the reading process by the returned future + return dispatchers.ms_dispatch(conn_ref, std::move(msg_ref)); }); } @@ -1941,7 +1941,7 @@ void ProtocolV2::execute_ready(bool dispatch_connect) assert(conn.policy.lossy || (client_cookie != 0 && server_cookie != 0)); trigger_state(state_t::READY, write_state_t::open, false); if (dispatch_connect) { - dispatcher->ms_handle_connect( + dispatchers.ms_handle_connect( seastar::static_pointer_cast<SocketConnection>(conn.shared_from_this())); } #ifdef UNIT_TESTS_BUILT diff --git a/src/crimson/net/ProtocolV2.h b/src/crimson/net/ProtocolV2.h index d4672c4ce49..be9a2281668 100644 --- a/src/crimson/net/ProtocolV2.h +++ b/src/crimson/net/ProtocolV2.h @@ -13,7 +13,7 @@ namespace crimson::net { class ProtocolV2 final : public Protocol { public: - ProtocolV2(ChainedDispatchersRef& dispatcher, + ProtocolV2(ChainedDispatchers& dispatchers, SocketConnection& conn, SocketMessenger& messenger); ~ProtocolV2() override; diff --git a/src/crimson/net/Socket.cc b/src/crimson/net/Socket.cc index b5b044b994e..8ad106dbdd7 100644 --- a/src/crimson/net/Socket.cc +++ b/src/crimson/net/Socket.cc @@ -198,7 +198,8 @@ void Socket::set_trap(bp_type_t type, bp_action_t action, socket_blocker* blocke } #endif -seastar::future<> FixedCPUServerSocket::listen(entity_addr_t addr) +FixedCPUServerSocket::listen_ertr::future<> +FixedCPUServerSocket::listen(entity_addr_t addr) { assert(seastar::this_shard_id() == cpu); logger().trace("FixedCPUServerSocket::listen({})...", addr); @@ -209,15 +210,23 @@ seastar::future<> FixedCPUServerSocket::listen(entity_addr_t addr) lo.reuse_address = true; lo.set_fixed_cpu(ss.cpu); ss.listener = seastar::listen(s_addr, lo); + }).then([] { + return true; }).handle_exception_type([addr] (const std::system_error& e) { if (e.code() == std::errc::address_in_use) { logger().trace("FixedCPUServerSocket::listen({}): address in use", addr); - throw; } else { logger().error("FixedCPUServerSocket::listen({}): " "got unexpeted error {}", addr, e); ceph_abort(); } + return false; + }).then([] (bool success) -> listen_ertr::future<> { + if (success) { + return listen_ertr::now(); + } else { + return crimson::ct_error::address_in_use::make(); + } }); } diff --git a/src/crimson/net/Socket.h b/src/crimson/net/Socket.h index 8b05a884896..d39a2517f95 100644 --- a/src/crimson/net/Socket.h +++ b/src/crimson/net/Socket.h @@ -9,10 +9,10 @@ #include <seastar/net/packet.hh> #include "include/buffer.h" -#include "msg/msg_types.h" #include "crimson/common/log.h" #include "Errors.h" +#include "Fwd.h" #ifdef UNIT_TESTS_BUILT #include "Interceptor.h" @@ -197,7 +197,10 @@ public: FixedCPUServerSocket(const FixedCPUServerSocket&) = delete; FixedCPUServerSocket& operator=(const FixedCPUServerSocket&) = delete; - seastar::future<> listen(entity_addr_t addr); + using listen_ertr = crimson::errorator< + crimson::ct_error::address_in_use // The address is already bound + >; + listen_ertr::future<> listen(entity_addr_t addr); // fn_accept should be a nothrow function of type // seastar::future<>(SocketRef, entity_addr_t) diff --git a/src/crimson/net/SocketConnection.cc b/src/crimson/net/SocketConnection.cc index b0c7197eedb..623dca32f0b 100644 --- a/src/crimson/net/SocketConnection.cc +++ b/src/crimson/net/SocketConnection.cc @@ -26,14 +26,14 @@ using namespace crimson::net; using crimson::common::local_conf; SocketConnection::SocketConnection(SocketMessenger& messenger, - ChainedDispatchersRef& dispatcher, + ChainedDispatchers& dispatchers, bool is_msgr2) : messenger(messenger) { if (is_msgr2) { - protocol = std::make_unique<ProtocolV2>(dispatcher, *this, messenger); + protocol = std::make_unique<ProtocolV2>(dispatchers, *this, messenger); } else { - protocol = std::make_unique<ProtocolV1>(dispatcher, *this, messenger); + protocol = std::make_unique<ProtocolV1>(dispatchers, *this, messenger); } #ifdef UNIT_TESTS_BUILT if (messenger.interceptor) { diff --git a/src/crimson/net/SocketConnection.h b/src/crimson/net/SocketConnection.h index 0af08e0e4f2..9c977c7cf66 100644 --- a/src/crimson/net/SocketConnection.h +++ b/src/crimson/net/SocketConnection.h @@ -18,13 +18,11 @@ #include "msg/Policy.h" #include "crimson/common/throttle.h" -#include "crimson/net/chained_dispatchers.h" #include "crimson/net/Connection.h" #include "crimson/net/Socket.h" namespace crimson::net { -class Dispatcher; class Protocol; class SocketMessenger; class SocketConnection; @@ -55,7 +53,7 @@ class SocketConnection : public Connection { public: SocketConnection(SocketMessenger& messenger, - ChainedDispatchersRef& dispatcher, + ChainedDispatchers& dispatchers, bool is_msgr2); ~SocketConnection() override; diff --git a/src/crimson/net/SocketMessenger.cc b/src/crimson/net/SocketMessenger.cc index 11914d71bd3..db9421e79e2 100644 --- a/src/crimson/net/SocketMessenger.cc +++ b/src/crimson/net/SocketMessenger.cc @@ -19,7 +19,6 @@ #include "auth/Auth.h" #include "Errors.h" -#include "Dispatcher.h" #include "Socket.h" namespace { @@ -49,7 +48,7 @@ seastar::future<> SocketMessenger::set_myaddrs(const entity_addrvec_t& addrs) return Messenger::set_myaddrs(my_addrs); } -seastar::future<> SocketMessenger::do_bind(const entity_addrvec_t& addrs) +SocketMessenger::bind_ertr::future<> SocketMessenger::do_bind(const entity_addrvec_t& addrs) { assert(seastar::this_shard_id() == master_sid); ceph_assert(addrs.front().get_family() == AF_INET); @@ -61,60 +60,73 @@ seastar::future<> SocketMessenger::do_bind(const entity_addrvec_t& addrs) } else { return seastar::now(); } - }).then([this] { + }).then([this] () -> bind_ertr::future<> { const entity_addr_t listen_addr = get_myaddr(); logger().debug("{} do_bind: try listen {}...", *this, listen_addr); if (!listener) { logger().warn("{} do_bind: listener doesn't exist", *this); - return seastar::now(); + return bind_ertr::now(); } return listener->listen(listen_addr); }); } -seastar::future<> SocketMessenger::bind(const entity_addrvec_t& addrs) +SocketMessenger::bind_ertr::future<> +SocketMessenger::bind(const entity_addrvec_t& addrs) { - return do_bind(addrs).then([this] { + return do_bind(addrs).safe_then([this] { logger().info("{} bind: done", *this); }); } -seastar::future<> +SocketMessenger::bind_ertr::future<> SocketMessenger::try_bind(const entity_addrvec_t& addrs, uint32_t min_port, uint32_t max_port) { auto addr = addrs.front(); if (addr.get_port() != 0) { - return do_bind(addrs).then([this] { + return do_bind(addrs).safe_then([this] { logger().info("{} try_bind: done", *this); }); } ceph_assert(min_port <= max_port); return seastar::do_with(uint32_t(min_port), [this, max_port, addr] (auto& port) { - return seastar::repeat([this, max_port, addr, &port] { + return seastar::repeat_until_value([this, max_port, addr, &port] { auto to_bind = addr; to_bind.set_port(port); - return do_bind(entity_addrvec_t{to_bind}).then([this] { + return do_bind(entity_addrvec_t{to_bind} + ).safe_then([this] () -> seastar::future<std::optional<bool>> { logger().info("{} try_bind: done", *this); - return stop_t::yes; - }).handle_exception_type([this, max_port, &port] (const std::system_error& e) { - assert(e.code() == std::errc::address_in_use); + return seastar::make_ready_future<std::optional<bool>>( + std::make_optional<bool>(true)); + }, bind_ertr::all_same_way([this, max_port, &port] + (const std::error_code& e) mutable + -> seastar::future<std::optional<bool>> { + assert(e == std::errc::address_in_use); logger().trace("{} try_bind: {} already used", *this, port); if (port == max_port) { - throw; + return seastar::make_ready_future<std::optional<bool>>( + std::make_optional<bool>(false)); } ++port; - return stop_t::no; - }); + return seastar::make_ready_future<std::optional<bool>>(); + })); + }).then([] (bool success) -> bind_ertr::future<> { + if (success) { + return bind_ertr::now(); + } else { + return crimson::ct_error::address_in_use::make(); + } }); }); } -seastar::future<> SocketMessenger::start(ChainedDispatchersRef chained_dispatchers) { +seastar::future<> SocketMessenger::start( + const dispatchers_t& _dispatchers) { assert(seastar::this_shard_id() == master_sid); - dispatchers = chained_dispatchers; + dispatchers.assign(_dispatchers); if (listener) { // make sure we have already bound to a valid address ceph_assert(get_myaddr().is_legacy() || get_myaddr().is_msgr2()); @@ -154,9 +166,7 @@ seastar::future<> SocketMessenger::shutdown() { assert(seastar::this_shard_id() == master_sid); return seastar::futurize_invoke([this] { - if (dispatchers) { - assert(dispatchers->empty()); - } + assert(dispatchers.empty()); if (listener) { auto d_listener = listener; listener = nullptr; diff --git a/src/crimson/net/SocketMessenger.h b/src/crimson/net/SocketMessenger.h index e86a44d6719..44c1d3c2137 100644 --- a/src/crimson/net/SocketMessenger.h +++ b/src/crimson/net/SocketMessenger.h @@ -15,8 +15,8 @@ #pragma once #include <map> -#include <optional> #include <set> +#include <vector> #include <seastar/core/gate.hh> #include <seastar/core/reactor.hh> #include <seastar/core/sharded.hh> @@ -35,15 +35,7 @@ class SocketMessenger final : public Messenger { seastar::promise<> shutdown_promise; FixedCPUServerSocket* listener = nullptr; - // as we want to unregister a dispatcher from the messengers when stopping - // that dispatcher, we have to use intrusive slist which, when used with - // "boost::intrusive::linear<true>", can tolerate ongoing iteration of the - // list when removing an element. However, the downside of this is that an - // element can only be attached to one slist. So, as we need to make multiple - // messenger reference the same set of dispatchers, we have to make them share - // the same ChainedDispatchers, which means registering/unregistering an element - // to one messenger will affect other messengers that share the same ChainedDispatchers. - ChainedDispatchersRef dispatchers; + ChainedDispatchers dispatchers; std::map<entity_addr_t, SocketConnectionRef> connections; std::set<SocketConnectionRef> accepting_conns; std::vector<SocketConnectionRef> closing_conns; @@ -56,7 +48,7 @@ class SocketMessenger final : public Messenger { uint32_t global_seq = 0; bool started = false; - seastar::future<> do_bind(const entity_addrvec_t& addr); + bind_ertr::future<> do_bind(const entity_addrvec_t& addr); public: SocketMessenger(const entity_name_t& myname, @@ -68,15 +60,12 @@ class SocketMessenger final : public Messenger { // Messenger interfaces are assumed to be called from its own shard, but its // behavior should be symmetric when called from any shard. - seastar::future<> bind(const entity_addrvec_t& addr) override; + bind_ertr::future<> bind(const entity_addrvec_t& addr) override; - seastar::future<> try_bind(const entity_addrvec_t& addr, - uint32_t min_port, uint32_t max_port) override; + bind_ertr::future<> try_bind(const entity_addrvec_t& addr, + uint32_t min_port, uint32_t max_port) override; - seastar::future<> start(ChainedDispatchersRef dispatchers) override; - void add_dispatcher(Dispatcher& disp) { - dispatchers->push_back(disp); - } + seastar::future<> start(const dispatchers_t& dispatchers) override; ConnectionRef connect(const entity_addr_t& peer_addr, const entity_name_t& peer_name) override; @@ -86,12 +75,14 @@ class SocketMessenger final : public Messenger { return shutdown_promise.get_future(); } - void remove_dispatcher(Dispatcher& disp) override { - dispatchers->erase(disp); + void stop() override { + dispatchers.clear(); } - bool dispatcher_chain_empty() const override { - return !dispatchers || dispatchers->empty(); + + bool is_started() const override { + return !dispatchers.empty(); } + seastar::future<> shutdown() override; void print(ostream& out) const override { diff --git a/src/crimson/net/chained_dispatchers.cc b/src/crimson/net/chained_dispatchers.cc index 19c7d3d4863..b13d40c8f73 100644 --- a/src/crimson/net/chained_dispatchers.cc +++ b/src/crimson/net/chained_dispatchers.cc @@ -1,39 +1,93 @@ +#include "crimson/common/log.h" #include "crimson/net/chained_dispatchers.h" #include "crimson/net/Connection.h" +#include "crimson/net/Dispatcher.h" #include "msg/Message.h" +namespace { + seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_ms); + } +} + +namespace crimson::net { + seastar::future<> -ChainedDispatchers::ms_dispatch(crimson::net::Connection* conn, +ChainedDispatchers::ms_dispatch(crimson::net::ConnectionRef conn, MessageRef m) { - return seastar::do_for_each(dispatchers, [conn, m](Dispatcher& dispatcher) { - return dispatcher.ms_dispatch(conn, m); - }); + try { + for (auto& dispatcher : dispatchers) { + auto dispatched = dispatcher->ms_dispatch(conn, m); + if (dispatched.has_value()) { + return std::move(*dispatched + ).handle_exception([conn] (std::exception_ptr eptr) { + logger().error("{} got unexpected exception in ms_dispatch() throttling {}", + *conn, eptr); + ceph_abort(); + }); + } + } + } catch (...) { + logger().error("{} got unexpected exception in ms_dispatch() {}", + *conn, std::current_exception()); + ceph_abort(); + } + if (!dispatchers.empty()) { + logger().error("ms_dispatch unhandled message {}", *m); + } + return seastar::now(); } void ChainedDispatchers::ms_handle_accept(crimson::net::ConnectionRef conn) { - for (auto& dispatcher : dispatchers) { - dispatcher.ms_handle_accept(conn); + try { + for (auto& dispatcher : dispatchers) { + dispatcher->ms_handle_accept(conn); + } + } catch (...) { + logger().error("{} got unexpected exception in ms_handle_accept() {}", + *conn, std::current_exception()); + ceph_abort(); } } void ChainedDispatchers::ms_handle_connect(crimson::net::ConnectionRef conn) { - for(auto& dispatcher : dispatchers) { - dispatcher.ms_handle_connect(conn); + try { + for(auto& dispatcher : dispatchers) { + dispatcher->ms_handle_connect(conn); + } + } catch (...) { + logger().error("{} got unexpected exception in ms_handle_connect() {}", + *conn, std::current_exception()); + ceph_abort(); } } void ChainedDispatchers::ms_handle_reset(crimson::net::ConnectionRef conn, bool is_replace) { - for (auto& dispatcher : dispatchers) { - dispatcher.ms_handle_reset(conn, is_replace); + try { + for (auto& dispatcher : dispatchers) { + dispatcher->ms_handle_reset(conn, is_replace); + } + } catch (...) { + logger().error("{} got unexpected exception in ms_handle_reset() {}", + *conn, std::current_exception()); + ceph_abort(); } } void ChainedDispatchers::ms_handle_remote_reset(crimson::net::ConnectionRef conn) { - for (auto& dispatcher : dispatchers) { - dispatcher.ms_handle_remote_reset(conn); + try { + for (auto& dispatcher : dispatchers) { + dispatcher->ms_handle_remote_reset(conn); + } + } catch (...) { + logger().error("{} got unexpected exception in ms_handle_remote_reset() {}", + *conn, std::current_exception()); + ceph_abort(); } } + +} diff --git a/src/crimson/net/chained_dispatchers.h b/src/crimson/net/chained_dispatchers.h index a5facef2b96..712b0894b9f 100644 --- a/src/crimson/net/chained_dispatchers.h +++ b/src/crimson/net/chained_dispatchers.h @@ -3,43 +3,34 @@ #pragma once -#include <boost/intrusive/slist.hpp> - -#include "crimson/net/Dispatcher.h" +#include "Fwd.h" #include "crimson/common/log.h" -using crimson::net::Dispatcher; +namespace crimson::net { + +class Dispatcher; -// in existing Messenger, dispatchers are put into a chain as described by -// chain-of-responsibility pattern. we could do the same to stop processing -// the message once any of the dispatchers claims this message, and prevent -// other dispatchers from reading it. but this change is more involved as -// it requires changing the ms_ methods to return a bool. so as an intermediate -// solution, we are using an observer dispatcher to notify all the interested -// or unintersted parties. class ChainedDispatchers { - boost::intrusive::slist< - Dispatcher, - boost::intrusive::linear<true>, - boost::intrusive::cache_last<true>> dispatchers; public: - void push_front(Dispatcher& dispatcher) { - dispatchers.push_front(dispatcher); + void assign(const dispatchers_t& _dispatchers) { + assert(empty()); + assert(!_dispatchers.empty()); + dispatchers = _dispatchers; } - void push_back(Dispatcher& dispatcher) { - dispatchers.push_back(dispatcher); - } - void erase(Dispatcher& dispatcher) { - dispatchers.erase(dispatchers.iterator_to(dispatcher)); + void clear() { + dispatchers.clear(); } bool empty() const { return dispatchers.empty(); } - seastar::future<> ms_dispatch(crimson::net::Connection* conn, MessageRef m); + seastar::future<> ms_dispatch(crimson::net::ConnectionRef, MessageRef); void ms_handle_accept(crimson::net::ConnectionRef conn); void ms_handle_connect(crimson::net::ConnectionRef conn); void ms_handle_reset(crimson::net::ConnectionRef conn, bool is_replace); void ms_handle_remote_reset(crimson::net::ConnectionRef conn); + + private: + dispatchers_t dispatchers; }; -using ChainedDispatchersRef = seastar::lw_shared_ptr<ChainedDispatchers>; +} diff --git a/src/crimson/os/seastore/CMakeLists.txt b/src/crimson/os/seastore/CMakeLists.txt index 4c265bcf231..5a764f19cc3 100644 --- a/src/crimson/os/seastore/CMakeLists.txt +++ b/src/crimson/os/seastore/CMakeLists.txt @@ -15,6 +15,18 @@ add_library(crimson-seastore STATIC onode_manager/simple-fltree/onode_block.cc onode_manager/simple-fltree/onode_delta.cc onode_manager/simple-fltree/onode_node.cc + onode_manager/staged-fltree/node.cc + onode_manager/staged-fltree/node_extent_manager.cc + onode_manager/staged-fltree/node_extent_manager/seastore.cc + onode_manager/staged-fltree/node_extent_mutable.cc + onode_manager/staged-fltree/node_impl.cc + onode_manager/staged-fltree/stages/item_iterator_stage.cc + onode_manager/staged-fltree/stages/key_layout.cc + onode_manager/staged-fltree/stages/node_stage_layout.cc + onode_manager/staged-fltree/stages/node_stage.cc + onode_manager/staged-fltree/stages/sub_items_stage.cc + onode_manager/staged-fltree/super.cc + onode_manager/staged-fltree/tree.cc extentmap_manager.cc extentmap_manager/btree/extentmap_btree_node_impl.cc extentmap_manager/btree/btree_extentmap_manager.cc diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 1b2a22391ce..6a406c1b85a 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -8,6 +8,7 @@ #include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h" #include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h" #include "crimson/os/seastore/onode_manager/simple-fltree/onode_block.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h" #include "test/crimson/seastore/test_block.h" namespace { @@ -467,6 +468,8 @@ Cache::get_root_ret Cache::get_root(Transaction &t) } } +using StagedOnodeBlock = crimson::os::seastore::onode::SeastoreNodeExtent; + Cache::get_extent_ertr::future<CachedExtentRef> Cache::get_extent_by_type( extent_types_t type, paddr_t offset, @@ -503,6 +506,11 @@ Cache::get_extent_ertr::future<CachedExtentRef> Cache::get_extent_by_type( ).safe_then([](auto extent) { return CachedExtentRef(extent.detach(), false /* add_ref */); }); + case extent_types_t::ONODE_BLOCK_STAGED: + return get_extent<StagedOnodeBlock>(offset, length + ).safe_then([](auto extent) { + return CachedExtentRef(extent.detach(), false /* add_ref */); + }); case extent_types_t::TEST_BLOCK: return get_extent<TestBlock>(offset, length ).safe_then([](auto extent) { diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index eff6c52dad0..5eb68051c81 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -120,6 +120,16 @@ public: get_root_ret get_root(Transaction &t); /** + * get_root_fast + * + * returns t.root and assume it is already present/read in t + */ + RootBlockRef get_root_fast(Transaction &t) { + assert(t.root); + return t.root; + } + + /** * get_extent * * returns ref to extent at offset~length of type T either from diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index 8c9312a6a92..b1875ba0d3b 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -34,6 +34,9 @@ using TCachedExtentRef = boost::intrusive_ptr<T>; /** * CachedExtent */ +namespace onode { + class DummyNodeExtent; +} class ExtentIndex; class CachedExtent : public boost::intrusive_ref_counter< CachedExtent, boost::thread_unsafe_counter> { @@ -47,6 +50,8 @@ class CachedExtent : public boost::intrusive_ref_counter< INVALID // Part of no ExtentIndex set } state = extent_state_t::INVALID; friend std::ostream &operator<<(std::ostream &, extent_state_t); + // allow a dummy onode extent to pretend it is a fresh block + friend class onode::DummyNodeExtent; uint32_t last_committed_crc = 0; diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc index 70356018078..a837ae37e62 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc @@ -31,12 +31,13 @@ BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs( lba_node_meta_t meta{0, L_ADDR_MAX, 1}; root_leaf->set_meta(meta); root_leaf->pin.set_range(meta); - croot->get_lba_root() = + croot->get_root() = root_t{ 1, 0, root_leaf->get_paddr(), - make_record_relative_paddr(0)}; + make_record_relative_paddr(0), + L_ADDR_NULL}; return mkfs_ertr::now(); }); } @@ -47,12 +48,12 @@ BtreeLBAManager::get_root(Transaction &t) return cache.get_root(t).safe_then([this, &t](auto croot) { logger().debug( "BtreeLBAManager::get_root: reading root at {} depth {}", - paddr_t{croot->get_lba_root().lba_root_addr}, - unsigned(croot->get_lba_root().lba_depth)); + paddr_t{croot->get_root().lba_root_addr}, + unsigned(croot->get_root().lba_depth)); return get_lba_btree_extent( get_context(t), - croot->get_lba_root().lba_depth, - croot->get_lba_root().lba_root_addr, + croot->get_root().lba_depth, + croot->get_root().lba_root_addr, paddr_t()); }); } @@ -486,8 +487,8 @@ BtreeLBAManager::insert_mapping_ret BtreeLBAManager::insert_mapping( L_ADDR_MIN, root->get_paddr(), nullptr); - croot->get_lba_root().lba_root_addr = nroot->get_paddr(); - croot->get_lba_root().lba_depth = root->get_node_meta().depth + 1; + croot->get_root().lba_root_addr = nroot->get_paddr(); + croot->get_root().lba_depth = root->get_node_meta().depth + 1; return nroot->split_entry( get_context(t), laddr, nroot->begin(), root); @@ -540,7 +541,7 @@ BtreeLBAManager::update_internal_mapping( paddr_t paddr) { return cache.get_root(t).safe_then([=, &t](RootBlockRef croot) { - if (depth == croot->get_lba_root().lba_depth) { + if (depth == croot->get_root().lba_depth) { logger().debug( "update_internal_mapping: updating lba root to: {}->{}", laddr, @@ -550,8 +551,8 @@ BtreeLBAManager::update_internal_mapping( croot = mut_croot->cast<RootBlock>(); } ceph_assert(laddr == 0); - auto old_paddr = croot->get_lba_root().lba_root_addr; - croot->get_lba_root().lba_root_addr = paddr; + auto old_paddr = croot->get_root().lba_root_addr; + croot->get_root().lba_root_addr = paddr; return update_internal_mapping_ret( update_internal_mapping_ertr::ready_future_marker{}, old_paddr); @@ -563,8 +564,8 @@ BtreeLBAManager::update_internal_mapping( paddr); return get_lba_btree_extent( get_context(t), - croot->get_lba_root().lba_depth, - croot->get_lba_root().lba_root_addr, + croot->get_root().lba_depth, + croot->get_root().lba_root_addr, paddr_t()).safe_then([=, &t](LBANodeRef broot) { return broot->mutate_internal_address( get_context(t), diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h b/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h new file mode 100644 index 00000000000..c45d60505db --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h @@ -0,0 +1,91 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <algorithm> +#include <cstring> +#include <limits> +#include <memory> +#include <string> + +#include "crimson/common/errorator.h" +#include "crimson/os/seastore/cached_extent.h" +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/transaction.h" + +namespace crimson::os::seastore::onode { + +using crimson::os::seastore::Transaction; +using crimson::os::seastore::TransactionRef; +using crimson::os::seastore::make_transaction; +using crimson::os::seastore::laddr_t; +using crimson::os::seastore::L_ADDR_MIN; +using crimson::os::seastore::L_ADDR_NULL; +using crimson::os::seastore::extent_len_t; + +class DeltaRecorder; +class NodeExtent; +class NodeExtentManager; +class RootNodeTracker; +using DeltaRecorderURef = std::unique_ptr<DeltaRecorder>; +using NodeExtentRef = crimson::os::seastore::TCachedExtentRef<NodeExtent>; +using NodeExtentManagerURef = std::unique_ptr<NodeExtentManager>; +using RootNodeTrackerURef = std::unique_ptr<RootNodeTracker>; +struct context_t { + NodeExtentManager& nm; + Transaction& t; +}; + +class LeafNodeImpl; +class InternalNodeImpl; +class NodeImpl; +using LeafNodeImplURef = std::unique_ptr<LeafNodeImpl>; +using InternalNodeImplURef = std::unique_ptr<InternalNodeImpl>; +using NodeImplURef = std::unique_ptr<NodeImpl>; + +using level_t = uint8_t; +constexpr auto INDEX_END = std::numeric_limits<size_t>::max(); +constexpr auto INDEX_LAST = INDEX_END - 0xf; +constexpr auto INDEX_UPPER_BOUND = INDEX_END - 0xff; +inline bool is_valid_index(size_t index) { return index < INDEX_UPPER_BOUND; } + +// TODO: decide by NODE_BLOCK_SIZE +using node_offset_t = uint16_t; +constexpr node_offset_t DISK_BLOCK_SIZE = 1u << 12; +constexpr node_offset_t NODE_BLOCK_SIZE = DISK_BLOCK_SIZE * 1u; + +enum class MatchKindBS : int8_t { NE = -1, EQ = 0 }; + +enum class MatchKindCMP : int8_t { LT = -1, EQ = 0, GT }; +inline MatchKindCMP toMatchKindCMP(int value) { + if (value > 0) { + return MatchKindCMP::GT; + } else if (value < 0) { + return MatchKindCMP::LT; + } else { + return MatchKindCMP::EQ; + } +} +template <typename Type> +MatchKindCMP toMatchKindCMP(const Type& l, const Type& r) { + int match = l - r; + return toMatchKindCMP(match); +} + +inline MatchKindCMP toMatchKindCMP( + std::string_view l, std::string_view r) { + return toMatchKindCMP(l.compare(r)); +} + +inline MatchKindCMP reverse(MatchKindCMP cmp) { + if (cmp == MatchKindCMP::LT) { + return MatchKindCMP::GT; + } else if (cmp == MatchKindCMP::GT) { + return MatchKindCMP::LT; + } else { + return cmp; + } +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc new file mode 100644 index 00000000000..e1e73f69dd7 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc @@ -0,0 +1,805 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "node.h" + +#include <cassert> +#include <exception> +#include <sstream> + +#include "common/likely.h" +#include "crimson/common/log.h" +#include "node_extent_manager.h" +#include "node_impl.h" +#include "stages/node_stage_layout.h" + +namespace { + seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } +} + +namespace crimson::os::seastore::onode { + +using node_ertr = Node::node_ertr; +template <class ValueT=void> +using node_future = Node::node_future<ValueT>; + +/* + * tree_cursor_t + */ + +tree_cursor_t::tree_cursor_t(Ref<LeafNode> node, const search_position_t& pos) + : leaf_node{node}, position{pos} { + assert(!is_end()); + leaf_node->do_track_cursor<true>(*this); +} + +tree_cursor_t::tree_cursor_t( + Ref<LeafNode> node, const search_position_t& pos, + const key_view_t& key, const onode_t* _p_value, layout_version_t v) + : leaf_node{node}, position{pos} { + assert(!is_end()); + update_kv(key, _p_value, v); + leaf_node->do_track_cursor<true>(*this); +} + +tree_cursor_t::tree_cursor_t(Ref<LeafNode> node) + : leaf_node{node}, position{search_position_t::end()} { + assert(is_end()); + assert(leaf_node->is_level_tail()); +} + +tree_cursor_t::~tree_cursor_t() { + if (!is_end()) { + leaf_node->do_untrack_cursor(*this); + } +} + +const key_view_t& tree_cursor_t::get_key_view() const { + ensure_kv(); + return *key_view; +} + +const onode_t* tree_cursor_t::get_p_value() const { + ensure_kv(); + return p_value; +} + +template <bool VALIDATE> +void tree_cursor_t::update_track( + Ref<LeafNode> node, const search_position_t& pos) { + // the cursor must be already untracked + // track the new node and new pos + assert(!pos.is_end()); + assert(!is_end()); + leaf_node = node; + position = pos; + key_view.reset(); + p_value = nullptr; + leaf_node->do_track_cursor<VALIDATE>(*this); +} +template void tree_cursor_t::update_track<true>(Ref<LeafNode>, const search_position_t&); +template void tree_cursor_t::update_track<false>(Ref<LeafNode>, const search_position_t&); + +void tree_cursor_t::update_kv( + const key_view_t& key, const onode_t* _p_value, layout_version_t v) const { + assert(!is_end()); + assert(_p_value); + assert(std::make_tuple(key, _p_value, v) == leaf_node->get_kv(position)); + key_view = key; + p_value = _p_value; + node_version = v; +} + +void tree_cursor_t::ensure_kv() const { + assert(!is_end()); + if (!p_value || node_version != leaf_node->get_layout_version()) { + // NOTE: the leaf node is always present when we hold its reference. + std::tie(key_view, p_value, node_version) = leaf_node->get_kv(position); + } + assert(p_value); +} + +/* + * Node + */ + +Node::Node(NodeImplURef&& impl) : impl{std::move(impl)} {} + +Node::~Node() { + // XXX: tolerate failure between allocate() and as_child() + if (is_root()) { + super->do_untrack_root(*this); + } else { + _parent_info->ptr->do_untrack_child(*this); + } +} + +level_t Node::level() const { + return impl->level(); +} + +node_future<Node::search_result_t> Node::lower_bound( + context_t c, const key_hobj_t& key) { + return seastar::do_with( + MatchHistory(), [this, c, &key](auto& history) { + return lower_bound_tracked(c, key, history); + } + ); +} + +node_future<std::pair<Ref<tree_cursor_t>, bool>> Node::insert( + context_t c, const key_hobj_t& key, const onode_t& value) { + return seastar::do_with( + MatchHistory(), [this, c, &key, &value](auto& history) { + return lower_bound_tracked(c, key, history + ).safe_then([c, &key, &value, &history](auto result) { + if (result.match() == MatchKindBS::EQ) { + return node_ertr::make_ready_future<std::pair<Ref<tree_cursor_t>, bool>>( + std::make_pair(result.p_cursor, false)); + } else { + auto leaf_node = result.p_cursor->get_leaf_node(); + return leaf_node->insert_value( + c, key, value, result.p_cursor->get_position(), history, result.mstat + ).safe_then([](auto p_cursor) { + return node_ertr::make_ready_future<std::pair<Ref<tree_cursor_t>, bool>>( + std::make_pair(p_cursor, true)); + }); + } + }); + } + ); +} + +node_future<tree_stats_t> Node::get_tree_stats(context_t c) { + return seastar::do_with( + tree_stats_t(), [this, c](auto& stats) { + return do_get_tree_stats(c, stats).safe_then([&stats] { + return stats; + }); + } + ); +} + +std::ostream& Node::dump(std::ostream& os) const { + return impl->dump(os); +} + +std::ostream& Node::dump_brief(std::ostream& os) const { + return impl->dump_brief(os); +} + +void Node::test_make_destructable( + context_t c, NodeExtentMutable& mut, Super::URef&& _super) { + impl->test_set_tail(mut); + make_root(c, std::move(_super)); +} + +node_future<> Node::mkfs(context_t c, RootNodeTracker& root_tracker) { + return LeafNode::allocate_root(c, root_tracker + ).safe_then([](auto ret) { /* FIXME: discard_result(); */ }); +} + +node_future<Ref<Node>> Node::load_root(context_t c, RootNodeTracker& root_tracker) { + return c.nm.get_super(c.t, root_tracker + ).safe_then([c, &root_tracker](auto&& _super) { + auto root_addr = _super->get_root_laddr(); + assert(root_addr != L_ADDR_NULL); + return Node::load(c, root_addr, true + ).safe_then([c, _super = std::move(_super), + &root_tracker](auto root) mutable { + assert(root->impl->field_type() == field_type_t::N0); + root->as_root(std::move(_super)); + assert(root == root_tracker.get_root(c.t)); + return node_ertr::make_ready_future<Ref<Node>>(root); + }); + }); +} + +void Node::make_root(context_t c, Super::URef&& _super) { + _super->write_root_laddr(c, impl->laddr()); + as_root(std::move(_super)); +} + +void Node::as_root(Super::URef&& _super) { + assert(!super && !_parent_info); + assert(_super->get_root_laddr() == impl->laddr()); + assert(impl->is_level_tail()); + super = std::move(_super); + super->do_track_root(*this); +} + +node_future<> Node::upgrade_root(context_t c) { + assert(is_root()); + assert(impl->is_level_tail()); + assert(impl->field_type() == field_type_t::N0); + super->do_untrack_root(*this); + return InternalNode::allocate_root(c, impl->level(), impl->laddr(), std::move(super) + ).safe_then([this](auto new_root) { + as_child(search_position_t::end(), new_root); + }); +} + +template <bool VALIDATE> +void Node::as_child(const search_position_t& pos, Ref<InternalNode> parent_node) { + assert(!super); + _parent_info = parent_info_t{pos, parent_node}; + parent_info().ptr->do_track_child<VALIDATE>(*this); +} +template void Node::as_child<true>(const search_position_t&, Ref<InternalNode>); +template void Node::as_child<false>(const search_position_t&, Ref<InternalNode>); + +node_future<> Node::insert_parent(context_t c, Ref<Node> right_node) { + assert(!is_root()); + // TODO(cross-node string dedup) + return parent_info().ptr->apply_child_split( + c, parent_info().position, this, right_node); +} + +node_future<Ref<Node>> Node::load( + context_t c, laddr_t addr, bool expect_is_level_tail) { + // NOTE: + // *option1: all types of node have the same length; + // option2: length is defined by node/field types; + // option3: length is totally flexible; + return c.nm.read_extent(c.t, addr, NODE_BLOCK_SIZE + ).safe_then([expect_is_level_tail](auto extent) { + auto [node_type, field_type] = extent->get_types(); + if (node_type == node_type_t::LEAF) { + auto impl = LeafNodeImpl::load(extent, field_type, expect_is_level_tail); + return Ref<Node>(new LeafNode(impl.get(), std::move(impl))); + } else if (node_type == node_type_t::INTERNAL) { + auto impl = InternalNodeImpl::load(extent, field_type, expect_is_level_tail); + return Ref<Node>(new InternalNode(impl.get(), std::move(impl))); + } else { + ceph_abort("impossible path"); + } + }); +} + +/* + * InternalNode + */ + +InternalNode::InternalNode(InternalNodeImpl* impl, NodeImplURef&& impl_ref) + : Node(std::move(impl_ref)), impl{impl} {} + +node_future<> InternalNode::apply_child_split( + context_t c, const search_position_t& pos, + Ref<Node> left_child, Ref<Node> right_child) { +#ifndef NDEBUG + if (pos.is_end()) { + assert(impl->is_level_tail()); + } +#endif + impl->prepare_mutate(c); + + auto left_key = left_child->impl->get_largest_key_view(); + auto left_child_addr = left_child->impl->laddr(); + auto left_child_addr_packed = laddr_packed_t{left_child_addr}; + auto right_key = right_child->impl->get_largest_key_view(); + auto right_child_addr = right_child->impl->laddr(); + logger().debug("OTree::Internal::Insert: " + "pos({}), left_child({}, {:#x}), right_child({}, {:#x}) ...", + pos, left_key, left_child_addr, right_key, right_child_addr); + // update pos => left_child to pos => right_child + impl->replace_child_addr(pos, right_child_addr, left_child_addr); + replace_track(pos, right_child, left_child); + + search_position_t insert_pos = pos; + auto [insert_stage, insert_size] = impl->evaluate_insert( + left_key, left_child_addr, insert_pos); + auto free_size = impl->free_size(); + if (free_size >= insert_size) { + // insert + [[maybe_unused]] auto p_value = impl->insert( + left_key, left_child_addr_packed, insert_pos, insert_stage, insert_size); + assert(impl->free_size() == free_size - insert_size); + assert(insert_pos <= pos); + assert(p_value->value == left_child_addr); + track_insert(insert_pos, insert_stage, left_child, right_child); + validate_tracked_children(); + return node_ertr::now(); + } + // split and insert + Ref<InternalNode> this_ref = this; + return (is_root() ? upgrade_root(c) : node_ertr::now() + ).safe_then([this, c] { + return InternalNode::allocate( + c, impl->field_type(), impl->is_level_tail(), impl->level()); + }).safe_then([this_ref, this, c, left_key, left_child, right_child, + insert_pos, insert_stage, insert_size](auto fresh_right) mutable { + auto right_node = fresh_right.node; + auto left_child_addr = left_child->impl->laddr(); + auto left_child_addr_packed = laddr_packed_t{left_child_addr}; + auto [split_pos, is_insert_left, p_value] = impl->split_insert( + fresh_right.mut, *right_node->impl, left_key, left_child_addr_packed, + insert_pos, insert_stage, insert_size); + assert(p_value->value == left_child_addr); + track_split(split_pos, right_node); + if (is_insert_left) { + track_insert(insert_pos, insert_stage, left_child); + } else { + right_node->track_insert(insert_pos, insert_stage, left_child); + } + validate_tracked_children(); + right_node->validate_tracked_children(); + + // propagate index to parent + return insert_parent(c, right_node); + // TODO (optimize) + // try to acquire space from siblings before split... see btrfs + }); +} + +node_future<Ref<InternalNode>> InternalNode::allocate_root( + context_t c, level_t old_root_level, + laddr_t old_root_addr, Super::URef&& super) { + return InternalNode::allocate(c, field_type_t::N0, true, old_root_level + 1 + ).safe_then([c, old_root_addr, + super = std::move(super)](auto fresh_node) mutable { + auto root = fresh_node.node; + auto p_value = root->impl->get_p_value(search_position_t::end()); + fresh_node.mut.copy_in_absolute( + const_cast<laddr_packed_t*>(p_value), old_root_addr); + root->make_root_from(c, std::move(super), old_root_addr); + return root; + }); +} + +node_future<Ref<tree_cursor_t>> +InternalNode::lookup_smallest(context_t c) { + auto position = search_position_t::begin(); + laddr_t child_addr = impl->get_p_value(position)->value; + return get_or_track_child(c, position, child_addr + ).safe_then([c](auto child) { + return child->lookup_smallest(c); + }); +} + +node_future<Ref<tree_cursor_t>> +InternalNode::lookup_largest(context_t c) { + // NOTE: unlike LeafNode::lookup_largest(), this only works for the tail + // internal node to return the tail child address. + auto position = search_position_t::end(); + laddr_t child_addr = impl->get_p_value(position)->value; + return get_or_track_child(c, position, child_addr).safe_then([c](auto child) { + return child->lookup_largest(c); + }); +} + +node_future<Node::search_result_t> +InternalNode::lower_bound_tracked( + context_t c, const key_hobj_t& key, MatchHistory& history) { + auto result = impl->lower_bound(key, history); + return get_or_track_child(c, result.position, result.p_value->value + ).safe_then([c, &key, &history](auto child) { + // XXX(multi-type): pass result.mstat to child + return child->lower_bound_tracked(c, key, history); + }); +} + +node_future<> InternalNode::do_get_tree_stats( + context_t c, tree_stats_t& stats) { + auto nstats = impl->get_stats(); + stats.size_persistent_internal += nstats.size_persistent; + stats.size_filled_internal += nstats.size_filled; + stats.size_logical_internal += nstats.size_logical; + stats.size_overhead_internal += nstats.size_overhead; + stats.size_value_internal += nstats.size_value; + stats.num_kvs_internal += nstats.num_kvs; + stats.num_nodes_internal += 1; + + Ref<const InternalNode> this_ref = this; + return seastar::do_with( + search_position_t(), [this, this_ref, c, &stats](auto& pos) { + pos = search_position_t::begin(); + return crimson::do_until( + [this, this_ref, c, &stats, &pos]() -> node_future<bool> { + auto child_addr = impl->get_p_value(pos)->value; + return get_or_track_child(c, pos, child_addr + ).safe_then([c, &stats](auto child) { + return child->do_get_tree_stats(c, stats); + }).safe_then([this, this_ref, &pos] { + if (pos.is_end()) { + return node_ertr::make_ready_future<bool>(true); + } else { + impl->next_position(pos); + if (pos.is_end()) { + if (impl->is_level_tail()) { + return node_ertr::make_ready_future<bool>(false); + } else { + return node_ertr::make_ready_future<bool>(true); + } + } else { + return node_ertr::make_ready_future<bool>(false); + } + } + }); + }); + } + ); +} + +node_future<> InternalNode::test_clone_root( + context_t c_other, RootNodeTracker& tracker_other) const { + assert(is_root()); + assert(impl->is_level_tail()); + assert(impl->field_type() == field_type_t::N0); + Ref<const InternalNode> this_ref = this; + return InternalNode::allocate(c_other, field_type_t::N0, true, impl->level() + ).safe_then([this, c_other, &tracker_other](auto fresh_other) { + impl->test_copy_to(fresh_other.mut); + auto cloned_root = fresh_other.node; + return c_other.nm.get_super(c_other.t, tracker_other + ).safe_then([c_other, cloned_root](auto&& super_other) { + cloned_root->make_root_new(c_other, std::move(super_other)); + return cloned_root; + }); + }).safe_then([this_ref, this, c_other](auto cloned_root) { + // clone tracked children + // In some unit tests, the children are stubbed out that they + // don't exist in NodeExtentManager, and are only tracked in memory. + return crimson::do_for_each( + tracked_child_nodes.begin(), + tracked_child_nodes.end(), + [this_ref, c_other, cloned_root](auto& kv) { + assert(kv.first == kv.second->parent_info().position); + return kv.second->test_clone_non_root(c_other, cloned_root); + } + ); + }); +} + +node_future<Ref<Node>> InternalNode::get_or_track_child( + context_t c, const search_position_t& position, laddr_t child_addr) { + bool level_tail = position.is_end(); + Ref<Node> child; + auto found = tracked_child_nodes.find(position); + Ref<InternalNode> this_ref = this; + return (found == tracked_child_nodes.end() + ? (logger().trace("OTree::Internal: load child untracked at {:#x}, pos({}), level={}", + child_addr, position, level() - 1), + Node::load(c, child_addr, level_tail + ).safe_then([this, position] (auto child) { + child->as_child(position, this); + return child; + })) + : (logger().trace("OTree::Internal: load child tracked at {:#x}, pos({}), level={}", + child_addr, position, level() - 1), + node_ertr::make_ready_future<Ref<Node>>(found->second)) + ).safe_then([this_ref, this, position, child_addr] (auto child) { + assert(child_addr == child->impl->laddr()); + assert(position == child->parent_info().position); + validate_child(*child); + return child; + }); +} + +void InternalNode::track_insert( + const search_position_t& insert_pos, match_stage_t insert_stage, + Ref<Node> insert_child, Ref<Node> nxt_child) { + // update tracks + auto pos_upper_bound = insert_pos; + pos_upper_bound.index_by_stage(insert_stage) = INDEX_UPPER_BOUND; + auto first = tracked_child_nodes.lower_bound(insert_pos); + auto last = tracked_child_nodes.lower_bound(pos_upper_bound); + std::vector<Node*> nodes; + std::for_each(first, last, [&nodes](auto& kv) { + nodes.push_back(kv.second); + }); + tracked_child_nodes.erase(first, last); + for (auto& node : nodes) { + auto _pos = node->parent_info().position; + assert(!_pos.is_end()); + ++_pos.index_by_stage(insert_stage); + node->as_child(_pos, this); + } + // track insert + insert_child->as_child(insert_pos, this); + +#ifndef NDEBUG + // validate left_child is before right_child + if (nxt_child) { + auto iter = tracked_child_nodes.find(insert_pos); + ++iter; + assert(iter->second == nxt_child); + } +#endif +} + +void InternalNode::replace_track( + const search_position_t& position, Ref<Node> new_child, Ref<Node> old_child) { + assert(tracked_child_nodes[position] == old_child); + tracked_child_nodes.erase(position); + new_child->as_child(position, this); + assert(tracked_child_nodes[position] == new_child); +} + +void InternalNode::track_split( + const search_position_t& split_pos, Ref<InternalNode> right_node) { + auto first = tracked_child_nodes.lower_bound(split_pos); + auto iter = first; + while (iter != tracked_child_nodes.end()) { + search_position_t new_pos = iter->first; + new_pos -= split_pos; + iter->second->as_child<false>(new_pos, right_node); + ++iter; + } + tracked_child_nodes.erase(first, tracked_child_nodes.end()); +} + +void InternalNode::validate_child(const Node& child) const { +#ifndef NDEBUG + assert(impl->level() - 1 == child.impl->level()); + assert(this == child.parent_info().ptr); + auto& child_pos = child.parent_info().position; + assert(impl->get_p_value(child_pos)->value == child.impl->laddr()); + if (child_pos.is_end()) { + assert(impl->is_level_tail()); + assert(child.impl->is_level_tail()); + } else { + assert(!child.impl->is_level_tail()); + assert(impl->get_key_view(child_pos) == child.impl->get_largest_key_view()); + } + // XXX(multi-type) + assert(impl->field_type() <= child.impl->field_type()); +#endif +} + +node_future<InternalNode::fresh_node_t> InternalNode::allocate( + context_t c, field_type_t field_type, bool is_level_tail, level_t level) { + return InternalNodeImpl::allocate(c, field_type, is_level_tail, level + ).safe_then([](auto&& fresh_impl) { + auto node = Ref<InternalNode>(new InternalNode( + fresh_impl.impl.get(), std::move(fresh_impl.impl))); + return fresh_node_t{node, fresh_impl.mut}; + }); +} + +/* + * LeafNode + */ + +LeafNode::LeafNode(LeafNodeImpl* impl, NodeImplURef&& impl_ref) + : Node(std::move(impl_ref)), impl{impl} {} + +bool LeafNode::is_level_tail() const { + return impl->is_level_tail(); +} + +std::tuple<key_view_t, const onode_t*, layout_version_t> LeafNode::get_kv( + const search_position_t& pos) const { + key_view_t key_view; + auto p_value = impl->get_p_value(pos, &key_view); + return {key_view, p_value, layout_version}; +} + +node_future<Ref<tree_cursor_t>> +LeafNode::lookup_smallest(context_t) { + if (unlikely(impl->is_empty())) { + assert(is_root()); + return node_ertr::make_ready_future<Ref<tree_cursor_t>>( + new tree_cursor_t(this)); + } + auto pos = search_position_t::begin(); + key_view_t index_key; + auto p_value = impl->get_p_value(pos, &index_key); + return node_ertr::make_ready_future<Ref<tree_cursor_t>>( + get_or_track_cursor(pos, index_key, p_value)); +} + +node_future<Ref<tree_cursor_t>> +LeafNode::lookup_largest(context_t) { + if (unlikely(impl->is_empty())) { + assert(is_root()); + return node_ertr::make_ready_future<Ref<tree_cursor_t>>( + new tree_cursor_t(this)); + } + search_position_t pos; + const onode_t* p_value = nullptr; + key_view_t index_key; + impl->get_largest_slot(pos, index_key, &p_value); + return node_ertr::make_ready_future<Ref<tree_cursor_t>>( + get_or_track_cursor(pos, index_key, p_value)); +} + +node_future<Node::search_result_t> +LeafNode::lower_bound_tracked( + context_t c, const key_hobj_t& key, MatchHistory& history) { + key_view_t index_key; + auto result = impl->lower_bound(key, history, &index_key); + Ref<tree_cursor_t> cursor; + if (result.position.is_end()) { + assert(!result.p_value); + cursor = new tree_cursor_t(this); + } else { + cursor = get_or_track_cursor(result.position, index_key, result.p_value); + } + return node_ertr::make_ready_future<search_result_t>( + search_result_t{cursor, result.mstat}); +} + +node_future<> LeafNode::do_get_tree_stats(context_t, tree_stats_t& stats) { + auto nstats = impl->get_stats(); + stats.size_persistent_leaf += nstats.size_persistent; + stats.size_filled_leaf += nstats.size_filled; + stats.size_logical_leaf += nstats.size_logical; + stats.size_overhead_leaf += nstats.size_overhead; + stats.size_value_leaf += nstats.size_value; + stats.num_kvs_leaf += nstats.num_kvs; + stats.num_nodes_leaf += 1; + return node_ertr::now(); +} + +node_future<> LeafNode::test_clone_root( + context_t c_other, RootNodeTracker& tracker_other) const { + assert(is_root()); + assert(impl->is_level_tail()); + assert(impl->field_type() == field_type_t::N0); + Ref<const LeafNode> this_ref = this; + return LeafNode::allocate(c_other, field_type_t::N0, true + ).safe_then([this, c_other, &tracker_other](auto fresh_other) { + impl->test_copy_to(fresh_other.mut); + auto cloned_root = fresh_other.node; + return c_other.nm.get_super(c_other.t, tracker_other + ).safe_then([c_other, cloned_root](auto&& super_other) { + cloned_root->make_root_new(c_other, std::move(super_other)); + }); + }).safe_then([this_ref]{}); +} + +node_future<Ref<tree_cursor_t>> LeafNode::insert_value( + context_t c, const key_hobj_t& key, const onode_t& value, + const search_position_t& pos, const MatchHistory& history, + match_stat_t mstat) { +#ifndef NDEBUG + if (pos.is_end()) { + assert(impl->is_level_tail()); + } +#endif + logger().debug("OTree::Leaf::Insert: " + "pos({}), {}, {}, {}, mstat({}) ...", + pos, key, value, history, mstat); + search_position_t insert_pos = pos; + auto [insert_stage, insert_size] = impl->evaluate_insert( + key, value, history, mstat, insert_pos); + auto free_size = impl->free_size(); + if (free_size >= insert_size) { + // insert + on_layout_change(); + impl->prepare_mutate(c); + auto p_value = impl->insert(key, value, insert_pos, insert_stage, insert_size); + assert(impl->free_size() == free_size - insert_size); + assert(insert_pos <= pos); + assert(p_value->size == value.size); + auto ret = track_insert(insert_pos, insert_stage, p_value); + validate_tracked_cursors(); + return node_ertr::make_ready_future<Ref<tree_cursor_t>>(ret); + } + // split and insert + Ref<LeafNode> this_ref = this; + return (is_root() ? upgrade_root(c) : node_ertr::now() + ).safe_then([this, c] { + return LeafNode::allocate(c, impl->field_type(), impl->is_level_tail()); + }).safe_then([this_ref, this, c, &key, &value, &history, + insert_pos, insert_stage, insert_size](auto fresh_right) mutable { + auto right_node = fresh_right.node; + // no need to bump version for right node, as it is fresh + on_layout_change(); + impl->prepare_mutate(c); + auto [split_pos, is_insert_left, p_value] = impl->split_insert( + fresh_right.mut, *right_node->impl, key, value, + insert_pos, insert_stage, insert_size); + assert(p_value->size == value.size); + track_split(split_pos, right_node); + Ref<tree_cursor_t> ret; + if (is_insert_left) { + ret = track_insert(insert_pos, insert_stage, p_value); + } else { + ret = right_node->track_insert(insert_pos, insert_stage, p_value); + } + validate_tracked_cursors(); + right_node->validate_tracked_cursors(); + + // propagate insert to parent + return insert_parent(c, right_node).safe_then([ret] { + return ret; + }); + // TODO (optimize) + // try to acquire space from siblings before split... see btrfs + }); +} + +node_future<Ref<LeafNode>> LeafNode::allocate_root( + context_t c, RootNodeTracker& root_tracker) { + return LeafNode::allocate(c, field_type_t::N0, true + ).safe_then([c, &root_tracker](auto fresh_node) { + auto root = fresh_node.node; + return c.nm.get_super(c.t, root_tracker + ).safe_then([c, root](auto&& super) { + root->make_root_new(c, std::move(super)); + return root; + }); + }); +} + +Ref<tree_cursor_t> LeafNode::get_or_track_cursor( + const search_position_t& position, + const key_view_t& key, const onode_t* p_value) { + assert(!position.is_end()); + assert(p_value); + Ref<tree_cursor_t> p_cursor; + auto found = tracked_cursors.find(position); + if (found == tracked_cursors.end()) { + p_cursor = new tree_cursor_t(this, position, key, p_value, layout_version); + } else { + p_cursor = found->second; + assert(p_cursor->get_leaf_node() == this); + assert(p_cursor->get_position() == position); + p_cursor->update_kv(key, p_value, layout_version); + } + return p_cursor; +} + +void LeafNode::validate_cursor(tree_cursor_t& cursor) const { +#ifndef NDEBUG + assert(this == cursor.get_leaf_node().get()); + assert(!cursor.is_end()); + auto [key, val, ver] = get_kv(cursor.get_position()); + assert(key == cursor.get_key_view()); + assert(val == cursor.get_p_value()); +#endif +} + +Ref<tree_cursor_t> LeafNode::track_insert( + const search_position_t& insert_pos, match_stage_t insert_stage, + const onode_t* p_onode) { + // update cursor position + auto pos_upper_bound = insert_pos; + pos_upper_bound.index_by_stage(insert_stage) = INDEX_UPPER_BOUND; + auto first = tracked_cursors.lower_bound(insert_pos); + auto last = tracked_cursors.lower_bound(pos_upper_bound); + std::vector<tree_cursor_t*> p_cursors; + std::for_each(first, last, [&p_cursors](auto& kv) { + p_cursors.push_back(kv.second); + }); + tracked_cursors.erase(first, last); + for (auto& p_cursor : p_cursors) { + search_position_t new_pos = p_cursor->get_position(); + ++new_pos.index_by_stage(insert_stage); + p_cursor->update_track<true>(this, new_pos); + } + + // track insert + // TODO: getting key_view_t from stage::proceed_insert() and + // stage::append_insert() has not supported yet + return new tree_cursor_t(this, insert_pos); +} + +void LeafNode::track_split( + const search_position_t& split_pos, Ref<LeafNode> right_node) { + // update cursor ownership and position + auto first = tracked_cursors.lower_bound(split_pos); + auto iter = first; + while (iter != tracked_cursors.end()) { + search_position_t new_pos = iter->first; + new_pos -= split_pos; + iter->second->update_track<false>(right_node, new_pos); + ++iter; + } + tracked_cursors.erase(first, tracked_cursors.end()); +} + +node_future<LeafNode::fresh_node_t> LeafNode::allocate( + context_t c, field_type_t field_type, bool is_level_tail) { + return LeafNodeImpl::allocate(c, field_type, is_level_tail + ).safe_then([](auto&& fresh_impl) { + auto node = Ref<LeafNode>(new LeafNode( + fresh_impl.impl.get(), std::move(fresh_impl.impl))); + return fresh_node_t{node, fresh_impl.mut}; + }); +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node.h new file mode 100644 index 00000000000..79f61d73b8f --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node.h @@ -0,0 +1,476 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <map> +#include <memory> +#include <ostream> +#include <boost/smart_ptr/intrusive_ref_counter.hpp> + +#include "crimson/common/type_helpers.h" + +#include "node_extent_mutable.h" +#include "stages/key_layout.h" +#include "stages/stage_types.h" +#include "super.h" +#include "tree_types.h" + +/** + * Tree example (2 levels): + * + * Root node keys: [ 3 7 ] + * values: [p1 p2 p3] + * / | \ + * ------- | ------- + * | | | + * V V V + * Leaf node keys: [ 1 2 3] [ 4 5 7] [ 9 11 12] + * values: [v1 v2 v3] [v4 v5 v6] [v7 v8 v9] + * + * Tree structure properties: + * - As illustrated above, the parent key is strictly equal to its left child's + * largest key; + * - If a tree is indexing multiple seastore transactions, each transaction + * will be mapped to a Super which points to a distinct root node. So the + * transactions are isolated at tree level. However, tree nodes from + * different transactions can reference the same seastore CachedExtent before + * modification; + * - The resources of the transactional tree are tracked by tree_cursor_ts held + * by users. As long as any cursor is alive, the according tree hierarchy is + * alive and keeps tracked. See the reversed resource management sections + * below; + */ + +namespace crimson::os::seastore::onode { + +class LeafNode; +class InternalNode; + +/** + * tree_cursor_t + * + * A cursor points to a position (LeafNode and search_position_t) of the tree + * where it can find the according key and value pair. The position is updated + * by LeafNode insert/split/delete/merge internally and is kept valid. It also + * caches the key-value information for a specific node layout version. + * + * Exposes public interfaces for Btree::Cursor. + */ +using layout_version_t = uint32_t; +class tree_cursor_t final + : public boost::intrusive_ref_counter< + tree_cursor_t, boost::thread_unsafe_counter> { + public: + // public to Btree + ~tree_cursor_t(); + tree_cursor_t(const tree_cursor_t&) = delete; + tree_cursor_t(tree_cursor_t&&) = delete; + tree_cursor_t& operator=(const tree_cursor_t&) = delete; + tree_cursor_t& operator=(tree_cursor_t&&) = delete; + + /** + * is_end + * + * Represents one-past-the-last of all the sorted key-value + * pairs in the tree. An end cursor won't contain valid key-value + * information. + */ + bool is_end() const { return position.is_end(); } + + /// Returns the key view in tree if it is not an end cursor. + const key_view_t& get_key_view() const; + + /// Returns the value pointer in tree if it is not an end cursor. + const onode_t* get_p_value() const; + + private: + tree_cursor_t(Ref<LeafNode>, const search_position_t&); + tree_cursor_t(Ref<LeafNode>, const search_position_t&, + const key_view_t& key, const onode_t*, layout_version_t); + // lookup reaches the end, contain leaf node for further insert + tree_cursor_t(Ref<LeafNode>); + const search_position_t& get_position() const { return position; } + Ref<LeafNode> get_leaf_node() { return leaf_node; } + template <bool VALIDATE> + void update_track(Ref<LeafNode>, const search_position_t&); + void update_kv(const key_view_t&, const onode_t*, layout_version_t) const; + void ensure_kv() const; + + private: + /** + * Reversed resource management (tree_cursor_t) + * + * tree_cursor_t holds a reference to the LeafNode, so the LeafNode will be + * alive as long as any of it's cursors is still referenced by user. + */ + Ref<LeafNode> leaf_node; + search_position_t position; + + // cached information + mutable std::optional<key_view_t> key_view; + mutable const onode_t* p_value; + mutable layout_version_t node_version; + + friend class LeafNode; + friend class Node; // get_position(), get_leaf_node() +}; + +/** + * Node + * + * An abstracted class for both InternalNode and LeafNode. + * + * Exposes public interfaces for Btree. + */ +class Node + : public boost::intrusive_ref_counter< + Node, boost::thread_unsafe_counter> { + public: + // public to Btree + using node_ertr = crimson::errorator< + crimson::ct_error::input_output_error, + crimson::ct_error::invarg, + crimson::ct_error::enoent, + crimson::ct_error::erange>; + template <class ValueT=void> + using node_future = node_ertr::future<ValueT>; + + struct search_result_t { + bool is_end() const { return p_cursor->is_end(); } + Ref<tree_cursor_t> p_cursor; + match_stat_t mstat; + + MatchKindBS match() const { + assert(mstat >= MSTAT_MIN && mstat <= MSTAT_MAX); + return (mstat == MSTAT_EQ ? MatchKindBS::EQ : MatchKindBS::NE); + } + }; + + virtual ~Node(); + Node(const Node&) = delete; + Node(Node&&) = delete; + Node& operator=(const Node&) = delete; + Node& operator=(Node&&) = delete; + + /** + * level + * + * A positive value denotes the level (or height) of this node in tree. + * 0 means LeafNode, positive means InternalNode. + */ + level_t level() const; + + /** + * lookup_smallest + * + * Returns a cursor pointing to the smallest key in the sub-tree formed by + * this node. + * + * Returns an end cursor if it is an empty root node. + */ + virtual node_future<Ref<tree_cursor_t>> lookup_smallest(context_t) = 0; + + /** + * lookup_largest + * + * Returns a cursor pointing to the largest key in the sub-tree formed by + * this node. + * + * Returns an end cursor if it is an empty root node. + */ + virtual node_future<Ref<tree_cursor_t>> lookup_largest(context_t) = 0; + + /** + * lower_bound + * + * Returns a cursor pointing to the first element in the range [first, last) + * of the sub-tree which does not compare less than the input key. The + * result also denotes whether the pointed key is equal to the input key. + * + * Returns an end cursor with MatchKindBS::NE if: + * - It is an empty root node; + * - Or the input key is larger than all the keys in the sub-tree; + */ + node_future<search_result_t> lower_bound(context_t c, const key_hobj_t& key); + + /** + * insert + * + * Try to insert a key-value pair into the sub-tree formed by this node. + * + * Returns a boolean denoting whether the insertion is successful: + * - If true, the returned cursor points to the inserted element in tree; + * - If false, the returned cursor points to the conflicting element in tree; + */ + node_future<std::pair<Ref<tree_cursor_t>, bool>> insert( + context_t, const key_hobj_t&, const onode_t&); + + /// Recursively collects the statistics of the sub-tree formed by this node + node_future<tree_stats_t> get_tree_stats(context_t); + + /// Returns an ostream containing a dump of all the elements in the node. + std::ostream& dump(std::ostream&) const; + + /// Returns an ostream containing an one-line summary of this node. + std::ostream& dump_brief(std::ostream&) const; + + /// Initializes the tree by allocating an empty root node. + static node_future<> mkfs(context_t, RootNodeTracker&); + + /// Loads the tree root. The tree must be initialized. + static node_future<Ref<Node>> load_root(context_t, RootNodeTracker&); + + // Only for unit test purposes. + void test_make_destructable(context_t, NodeExtentMutable&, Super::URef&&); + virtual node_future<> test_clone_root(context_t, RootNodeTracker&) const = 0; + + protected: + virtual node_future<> test_clone_non_root(context_t, Ref<InternalNode>) const { + ceph_abort("impossible path"); + } + virtual node_future<search_result_t> lower_bound_tracked( + context_t, const key_hobj_t&, MatchHistory&) = 0; + virtual node_future<> do_get_tree_stats(context_t, tree_stats_t&) = 0; + + protected: + Node(NodeImplURef&&); + bool is_root() const { + assert((super && !_parent_info.has_value()) || + (!super && _parent_info.has_value())); + return !_parent_info.has_value(); + } + + // as root + void make_root(context_t c, Super::URef&& _super); + void make_root_new(context_t c, Super::URef&& _super) { + assert(_super->get_root_laddr() == L_ADDR_NULL); + make_root(c, std::move(_super)); + } + void make_root_from(context_t c, Super::URef&& _super, laddr_t from_addr) { + assert(_super->get_root_laddr() == from_addr); + make_root(c, std::move(_super)); + } + void as_root(Super::URef&& _super); + node_future<> upgrade_root(context_t); + + // as child/non-root + template <bool VALIDATE = true> + void as_child(const search_position_t&, Ref<InternalNode>); + struct parent_info_t { + search_position_t position; + Ref<InternalNode> ptr; + }; + const parent_info_t& parent_info() const { return *_parent_info; } + node_future<> insert_parent(context_t, Ref<Node> right_node); + + private: + /** + * Reversed resource management (Node) + * + * Root Node holds a reference to its parent Super class, so its parent + * will be alive as long as this root node is alive. + * + * None-root Node holds a reference to its parent Node, so its parent will + * be alive as long as any of it's children is alive. + */ + // as root + Super::URef super; + // as child/non-root + std::optional<parent_info_t> _parent_info; + + private: + static node_future<Ref<Node>> load(context_t, laddr_t, bool expect_is_level_tail); + + NodeImplURef impl; + friend class InternalNode; +}; +inline std::ostream& operator<<(std::ostream& os, const Node& node) { + return node.dump_brief(os); +} + +/** + * InternalNode + * + * A concrete implementation of Node class that represents an internal tree + * node. Its level is always positive and its values are logical block + * addresses to its child nodes. An internal node cannot be empty. + */ +class InternalNode final : public Node { + public: + // public to Node + InternalNode(InternalNodeImpl*, NodeImplURef&&); + ~InternalNode() override { assert(tracked_child_nodes.empty()); } + InternalNode(const InternalNode&) = delete; + InternalNode(InternalNode&&) = delete; + InternalNode& operator=(const InternalNode&) = delete; + InternalNode& operator=(InternalNode&&) = delete; + + node_future<> apply_child_split( + context_t, const search_position_t&, Ref<Node> left, Ref<Node> right); + template <bool VALIDATE> + void do_track_child(Node& child) { + if constexpr (VALIDATE) { + validate_child(child); + } + auto& child_pos = child.parent_info().position; + assert(tracked_child_nodes.find(child_pos) == tracked_child_nodes.end()); + tracked_child_nodes[child_pos] = &child; + } + void do_untrack_child(const Node& child) { + auto& child_pos = child.parent_info().position; + assert(tracked_child_nodes.find(child_pos)->second == &child); + [[maybe_unused]] auto removed = tracked_child_nodes.erase(child_pos); + assert(removed); + } + + static node_future<Ref<InternalNode>> allocate_root( + context_t, level_t, laddr_t, Super::URef&&); + + protected: + node_future<Ref<tree_cursor_t>> lookup_smallest(context_t) override; + node_future<Ref<tree_cursor_t>> lookup_largest(context_t) override; + node_future<search_result_t> lower_bound_tracked( + context_t, const key_hobj_t&, MatchHistory&) override; + node_future<> do_get_tree_stats(context_t, tree_stats_t&) override; + + node_future<> test_clone_root(context_t, RootNodeTracker&) const override; + + private: + // XXX: extract a common tracker for InternalNode to track Node, + // and LeafNode to track tree_cursor_t. + node_future<Ref<Node>> get_or_track_child(context_t, const search_position_t&, laddr_t); + void track_insert( + const search_position_t&, match_stage_t, Ref<Node>, Ref<Node> nxt_child = nullptr); + void replace_track(const search_position_t&, Ref<Node> new_child, Ref<Node> old_child); + void track_split(const search_position_t&, Ref<InternalNode>); + void validate_tracked_children() const { +#ifndef NDEBUG + for (auto& kv : tracked_child_nodes) { + assert(kv.first == kv.second->parent_info().position); + validate_child(*kv.second); + } +#endif + } + void validate_child(const Node& child) const; + + struct fresh_node_t { + Ref<InternalNode> node; + NodeExtentMutable mut; + std::pair<Ref<Node>, NodeExtentMutable> make_pair() { + return std::make_pair(Ref<Node>(node), mut); + } + }; + static node_future<fresh_node_t> allocate(context_t, field_type_t, bool, level_t); + + private: + /** + * Reversed resource management (InternalNode) + * + * InteralNode keeps track of its child nodes which are still alive in + * memory, and their positions will be updated throughout + * insert/split/delete/merge operations of this node. + */ + // XXX: leverage intrusive data structure to control memory overhead + std::map<search_position_t, Node*> tracked_child_nodes; + InternalNodeImpl* impl; +}; + +/** + * LeafNode + * + * A concrete implementation of Node class that represents a leaf tree node. + * Its level is always 0. A leaf node can only be empty if it is root. + */ +class LeafNode final : public Node { + public: + // public to tree_cursor_t + ~LeafNode() override { assert(tracked_cursors.empty()); } + LeafNode(const LeafNode&) = delete; + LeafNode(LeafNode&&) = delete; + LeafNode& operator=(const LeafNode&) = delete; + LeafNode& operator=(LeafNode&&) = delete; + + bool is_level_tail() const; + layout_version_t get_layout_version() const { return layout_version; } + std::tuple<key_view_t, const onode_t*, layout_version_t> get_kv( + const search_position_t&) const; + template <bool VALIDATE> + void do_track_cursor(tree_cursor_t& cursor) { + if constexpr (VALIDATE) { + validate_cursor(cursor); + } + auto& cursor_pos = cursor.get_position(); + assert(tracked_cursors.find(cursor_pos) == tracked_cursors.end()); + tracked_cursors[cursor_pos] = &cursor; + } + void do_untrack_cursor(tree_cursor_t& cursor) { + validate_cursor(cursor); + auto& cursor_pos = cursor.get_position(); + assert(tracked_cursors.find(cursor_pos)->second == &cursor); + [[maybe_unused]] auto removed = tracked_cursors.erase(cursor_pos); + assert(removed); + } + + protected: + node_future<Ref<tree_cursor_t>> lookup_smallest(context_t) override; + node_future<Ref<tree_cursor_t>> lookup_largest(context_t) override; + node_future<search_result_t> lower_bound_tracked( + context_t, const key_hobj_t&, MatchHistory&) override; + node_future<> do_get_tree_stats(context_t, tree_stats_t&) override; + + node_future<> test_clone_root(context_t, RootNodeTracker&) const override; + + private: + LeafNode(LeafNodeImpl*, NodeImplURef&&); + node_future<Ref<tree_cursor_t>> insert_value( + context_t, const key_hobj_t&, const onode_t&, + const search_position_t&, const MatchHistory&, + match_stat_t mstat); + static node_future<Ref<LeafNode>> allocate_root(context_t, RootNodeTracker&); + friend class Node; + + private: + // XXX: extract a common tracker for InternalNode to track Node, + // and LeafNode to track tree_cursor_t. + Ref<tree_cursor_t> get_or_track_cursor( + const search_position_t&, const key_view_t&, const onode_t*); + Ref<tree_cursor_t> track_insert( + const search_position_t&, match_stage_t, const onode_t*); + void track_split(const search_position_t&, Ref<LeafNode>); + void validate_tracked_cursors() const { +#ifndef NDEBUG + for (auto& kv : tracked_cursors) { + assert(kv.first == kv.second->get_position()); + validate_cursor(*kv.second); + } +#endif + } + void validate_cursor(tree_cursor_t& cursor) const; + // invalidate p_value pointers in tree_cursor_t + void on_layout_change() { ++layout_version; } + + struct fresh_node_t { + Ref<LeafNode> node; + NodeExtentMutable mut; + std::pair<Ref<Node>, NodeExtentMutable> make_pair() { + return std::make_pair(Ref<Node>(node), mut); + } + }; + static node_future<fresh_node_t> allocate(context_t, field_type_t, bool); + + private: + /** + * Reversed resource management (LeafNode) + * + * LeafNode keeps track of the referencing cursors which are still alive in + * memory, and their positions will be updated throughout + * insert/split/delete/merge operations of this node. + */ + // XXX: leverage intrusive data structure to control memory overhead + std::map<search_position_t, tree_cursor_t*> tracked_cursors; + LeafNodeImpl* impl; + layout_version_t layout_version = 0; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h new file mode 100644 index 00000000000..e29fd5333ff --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h @@ -0,0 +1,42 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "include/buffer.h" +#include "node_types.h" + +namespace crimson::os::seastore::onode { + +/** + * DeltaRecorder + * + * An abstracted class to encapsulate different implementations to apply delta + * to a specific node layout. + */ +class DeltaRecorder { + public: + virtual ~DeltaRecorder() { + assert(is_empty()); + } + + bool is_empty() const { + return encoded.length() == 0; + } + + ceph::bufferlist get_delta() { + assert(!is_empty()); + return std::move(encoded); + } + + virtual node_type_t node_type() const = 0; + virtual field_type_t field_type() const = 0; + virtual void apply_delta(ceph::bufferlist::const_iterator&, + NodeExtentMutable&) = 0; + + protected: + DeltaRecorder() = default; + ceph::bufferlist encoded; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h new file mode 100644 index 00000000000..685055727f2 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h @@ -0,0 +1,229 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "node_extent_manager.h" +#include "node_delta_recorder.h" +#include "node_layout_replayable.h" + +namespace crimson::os::seastore::onode { + +/** + * DeltaRecorderT + * + * Responsible to encode and decode delta, and apply delta for a specific node + * layout. + */ +template <typename FieldType, node_type_t NODE_TYPE> +class DeltaRecorderT final: public DeltaRecorder { + public: + using layout_t = NodeLayoutReplayableT<FieldType, NODE_TYPE>; + using position_t = typename layout_t::position_t; + using StagedIterator = typename layout_t::StagedIterator; + using value_t = typename layout_t::value_t; + static constexpr auto FIELD_TYPE = layout_t::FIELD_TYPE; + + ~DeltaRecorderT() override = default; + + template <KeyT KT> + void encode_insert( + const full_key_t<KT>& key, + const value_t& value, + const position_t& insert_pos, + const match_stage_t& insert_stage, + const node_offset_t& insert_size) { + // TODO encode to encoded + } + + void encode_split( + const StagedIterator& split_at, + const char* p_start) { + // TODO encode to encoded + } + + template <KeyT KT> + void encode_split_insert( + const StagedIterator& split_at, + const full_key_t<KT>& key, + const value_t& value, + const position_t& insert_pos, + const match_stage_t& insert_stage, + const node_offset_t& insert_size, + const char* p_start) { + // TODO encode to encoded + } + + void encode_update_child_addr( + const laddr_t new_addr, + const laddr_packed_t* p_addr, + const char* p_start) { + // TODO encode to encoded + } + + static DeltaRecorderURef create() { + return std::unique_ptr<DeltaRecorder>(new DeltaRecorderT()); + } + + private: + DeltaRecorderT() = default; + node_type_t node_type() const override { return NODE_TYPE; } + field_type_t field_type() const override { return FIELD_TYPE; } + void apply_delta(ceph::bufferlist::const_iterator& delta, + NodeExtentMutable& node) override { + assert(is_empty()); + // TODO decode and apply + assert(false && "not implemented"); + } +}; + +/** + * NodeExtentAccessorT + * + * This component is responsible to reference and mutate the underlying + * NodeExtent, record mutation parameters when needed, and apply the recorded + * modifications for a specific node layout. + */ +template <typename FieldType, node_type_t NODE_TYPE> +class NodeExtentAccessorT { + public: + using layout_t = NodeLayoutReplayableT<FieldType, NODE_TYPE>; + using node_stage_t = typename layout_t::node_stage_t; + using position_t = typename layout_t::position_t; + using recorder_t = DeltaRecorderT<FieldType, NODE_TYPE>; + using StagedIterator = typename layout_t::StagedIterator; + using value_t = typename layout_t::value_t; + static constexpr auto FIELD_TYPE = layout_t::FIELD_TYPE; + + NodeExtentAccessorT(NodeExtentRef extent) + : extent{extent}, + node_stage{reinterpret_cast<const FieldType*>(extent->get_read())} { + if (no_recording()) { + mut.emplace(extent->get_mutable()); + assert(extent->get_recorder() == nullptr); + recorder = nullptr; + } else if (needs_recording()) { + mut.emplace(extent->get_mutable()); + auto p_recorder = extent->get_recorder(); + assert(p_recorder != nullptr); + assert(p_recorder->node_type() == NODE_TYPE); + assert(p_recorder->field_type() == FIELD_TYPE); + recorder = static_cast<recorder_t*>(p_recorder); + } else if (needs_mutate()) { + // mut is empty + assert(extent->get_recorder() == nullptr || + extent->get_recorder()->is_empty()); + recorder = nullptr; + } else { + ceph_abort("impossible path"); + } + } + ~NodeExtentAccessorT() = default; + NodeExtentAccessorT(const NodeExtentAccessorT&) = delete; + NodeExtentAccessorT(NodeExtentAccessorT&&) = delete; + NodeExtentAccessorT& operator=(const NodeExtentAccessorT&) = delete; + NodeExtentAccessorT& operator=(NodeExtentAccessorT&&) = delete; + + const node_stage_t& read() const { return node_stage; } + laddr_t get_laddr() const { return extent->get_laddr(); } + + // must be called before any mutate attempes. + // for the safety of mixed read and mutate, call before read. + void prepare_mutate(context_t c) { + if (needs_mutate()) { + auto ref_recorder = recorder_t::create(); + recorder = static_cast<recorder_t*>(ref_recorder.get()); + extent = extent->mutate(c, std::move(ref_recorder)); + assert(needs_recording()); + node_stage = node_stage_t( + reinterpret_cast<const FieldType*>(extent->get_read())); + assert(recorder == static_cast<recorder_t*>(extent->get_recorder())); + mut.emplace(extent->get_mutable()); + } + } + + template <KeyT KT> + const value_t* insert_replayable( + const full_key_t<KT>& key, + const value_t& value, + position_t& insert_pos, + match_stage_t& insert_stage, + node_offset_t& insert_size) { + assert(!needs_mutate()); + if (needs_recording()) { + recorder->template encode_insert<KT>( + key, value, insert_pos, insert_stage, insert_size); + } + return layout_t::template insert<KT>( + *mut, read(), key, value, + insert_pos, insert_stage, insert_size); + } + + void split_replayable(StagedIterator& split_at) { + assert(!needs_mutate()); + if (needs_recording()) { + recorder->encode_split(split_at, read().p_start()); + } + layout_t::split(*mut, read(), split_at); + } + + template <KeyT KT> + const value_t* split_insert_replayable( + StagedIterator& split_at, + const full_key_t<KT>& key, + const value_t& value, + position_t& insert_pos, + match_stage_t& insert_stage, + node_offset_t& insert_size) { + assert(!needs_mutate()); + if (needs_recording()) { + recorder->template encode_split_insert<KT>( + split_at, key, value, insert_pos, insert_stage, insert_size, + read().p_start()); + } + return layout_t::template split_insert<KT>( + *mut, read(), split_at, key, value, + insert_pos, insert_stage, insert_size); + } + + void update_child_addr_replayable( + const laddr_t new_addr, laddr_packed_t* p_addr) { + assert(!needs_mutate()); + if (needs_recording()) { + recorder->encode_update_child_addr(new_addr, p_addr, read().p_start()); + } + return layout_t::update_child_addr(*mut, new_addr, p_addr); + } + + void test_copy_to(NodeExtentMutable& to) const { + assert(extent->get_length() == to.get_length()); + std::memcpy(to.get_write(), extent->get_read(), extent->get_length()); + } + + private: + /** + * Possible states with CachedExtent::extent_state_t: + * INITIAL_WRITE_PENDING -- can mutate, no recording + * MUTATION_PENDING -- can mutate, needs recording + * CLEAN/DIRTY -- pending mutate + * INVALID -- impossible + */ + bool no_recording() const { + return extent->is_initial_pending(); + } + bool needs_recording() const { + return extent->is_mutation_pending(); + } + bool needs_mutate() const { + assert(extent->is_valid()); + return !extent->is_pending(); + } + + NodeExtentRef extent; + node_stage_t node_stage; + std::optional<NodeExtentMutable> mut; + // owned by extent + recorder_t* recorder; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.cc new file mode 100644 index 00000000000..c5bd5a3fb96 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.cc @@ -0,0 +1,35 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "node_extent_manager.h" + +#include "node_extent_manager/dummy.h" +#include "node_extent_manager/seastore.h" +#include "stages/node_stage_layout.h" + +namespace crimson::os::seastore::onode { + +std::pair<node_type_t, field_type_t> NodeExtent::get_types() const { + const auto header = reinterpret_cast<const node_header_t*>(get_read()); + auto node_type = header->get_node_type(); + auto field_type = header->get_field_type(); + if (!field_type.has_value()) { + throw std::runtime_error("load failed: bad field type"); + } + return {node_type, *field_type}; +} + +NodeExtentManagerURef NodeExtentManager::create_dummy(bool is_sync) { + if (is_sync) { + return NodeExtentManagerURef(new DummyNodeExtentManager<true>()); + } else { + return NodeExtentManagerURef(new DummyNodeExtentManager<false>()); + } +} + +NodeExtentManagerURef NodeExtentManager::create_seastore( + TransactionManager& tm, laddr_t min_laddr) { + return NodeExtentManagerURef(new SeastoreNodeExtentManager(tm, min_laddr)); +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h new file mode 100644 index 00000000000..a633633b04a --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h @@ -0,0 +1,82 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/common/type_helpers.h" +#include "crimson/os/seastore/cached_extent.h" +#include "crimson/os/seastore/transaction_manager.h" + +#include "fwd.h" +#include "super.h" +#include "node_extent_mutable.h" +#include "node_types.h" + +/** + * node_extent_manager.h + * + * Contains general interfaces for different backends (Dummy and Seastore). + */ + +namespace crimson::os::seastore::onode { + +using crimson::os::seastore::LogicalCachedExtent; +class NodeExtent : public LogicalCachedExtent { + public: + virtual ~NodeExtent() = default; + std::pair<node_type_t, field_type_t> get_types() const; + const char* get_read() const { + return get_bptr().c_str(); + } + NodeExtentMutable get_mutable() { + assert(is_pending()); + return do_get_mutable(); + } + + virtual DeltaRecorder* get_recorder() const = 0; + virtual NodeExtentRef mutate(context_t, DeltaRecorderURef&&) = 0; + + protected: + template <typename... T> + NodeExtent(T&&... t) : LogicalCachedExtent(std::forward<T>(t)...) {} + + NodeExtentMutable do_get_mutable() { + return NodeExtentMutable(*this); + } + + /** + * Abstracted interfaces to implement: + * - CacheExtent::duplicate_for_write() -> CachedExtentRef + * - CacheExtent::get_type() -> extent_types_t + * - CacheExtent::get_delta() -> ceph::bufferlist + * - LogicalCachedExtent::apply_delta(const ceph::bufferlist) -> void + */ + + private: + friend class NodeExtentMutable; +}; + +using crimson::os::seastore::TransactionManager; +class NodeExtentManager { + public: + virtual ~NodeExtentManager() = default; + using tm_ertr = crimson::errorator< + crimson::ct_error::input_output_error, + crimson::ct_error::invarg, + crimson::ct_error::enoent, + crimson::ct_error::erange>; + template <class ValueT=void> + using tm_future = tm_ertr::future<ValueT>; + + virtual bool is_read_isolated() const = 0; + virtual tm_future<NodeExtentRef> read_extent( + Transaction&, laddr_t, extent_len_t) = 0; + virtual tm_future<NodeExtentRef> alloc_extent(Transaction&, extent_len_t) = 0; + virtual tm_future<Super::URef> get_super(Transaction&, RootNodeTracker&) = 0; + + static NodeExtentManagerURef create_dummy(bool is_sync); + static NodeExtentManagerURef create_seastore( + TransactionManager& tm, laddr_t min_laddr = L_ADDR_MIN); +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h new file mode 100644 index 00000000000..9a9975bf315 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h @@ -0,0 +1,152 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <chrono> +#include <seastar/core/sleep.hh> + +#include "include/buffer_raw.h" + +#include "crimson/common/log.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h" + +/** + * dummy.h + * + * Dummy backend implementations for test purposes. + */ + +namespace crimson::os::seastore::onode { + +class DummySuper final: public Super { + public: + DummySuper(Transaction& t, RootNodeTracker& tracker, laddr_t* p_root_laddr) + : Super(t, tracker), p_root_laddr{p_root_laddr} {} + ~DummySuper() override = default; + protected: + laddr_t get_root_laddr() const override { return *p_root_laddr; } + void write_root_laddr(context_t, laddr_t addr) override { + logger().info("OTree::Dummy: update root {:#x} ...", addr); + *p_root_laddr = addr; + } + private: + static seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } + laddr_t* p_root_laddr; +}; + +class DummyNodeExtent final: public NodeExtent { + public: + DummyNodeExtent(ceph::bufferptr &&ptr) : NodeExtent(std::move(ptr)) { + state = extent_state_t::INITIAL_WRITE_PENDING; + } + ~DummyNodeExtent() override = default; + protected: + NodeExtentRef mutate(context_t, DeltaRecorderURef&&) override { + ceph_abort("impossible path"); } + DeltaRecorder* get_recorder() const override { + return nullptr; } + CachedExtentRef duplicate_for_write() override { + ceph_abort("impossible path"); } + extent_types_t get_type() const override { + ceph_abort("impossible path"); } + ceph::bufferlist get_delta() override { + ceph_abort("impossible path"); } + void apply_delta(const ceph::bufferlist&) override { + ceph_abort("impossible path"); } +}; + +template <bool SYNC> +class DummyNodeExtentManager final: public NodeExtentManager { + static constexpr size_t ALIGNMENT = 4096; + public: + ~DummyNodeExtentManager() override = default; + protected: + bool is_read_isolated() const override { return false; } + + tm_future<NodeExtentRef> read_extent( + Transaction& t, laddr_t addr, extent_len_t len) override { + logger().trace("OTree::Dummy: reading {}B at {:#x} ...", len, addr); + if constexpr (SYNC) { + return read_extent_sync(t, addr, len); + } else { + using namespace std::chrono_literals; + return seastar::sleep(1us).then([this, &t, addr, len] { + return read_extent_sync(t, addr, len); + }); + } + } + + tm_future<NodeExtentRef> alloc_extent( + Transaction& t, extent_len_t len) override { + logger().trace("OTree::Dummy: allocating {}B ...", len); + if constexpr (SYNC) { + return alloc_extent_sync(t, len); + } else { + using namespace std::chrono_literals; + return seastar::sleep(1us).then([this, &t, len] { + return alloc_extent_sync(t, len); + }); + } + } + + tm_future<Super::URef> get_super( + Transaction& t, RootNodeTracker& tracker) override { + logger().trace("OTree::Dummy: get root ..."); + if constexpr (SYNC) { + return get_super_sync(t, tracker); + } else { + using namespace std::chrono_literals; + return seastar::sleep(1us).then([this, &t, &tracker] { + return get_super_sync(t, tracker); + }); + } + } + + private: + tm_future<NodeExtentRef> read_extent_sync( + Transaction& t, laddr_t addr, extent_len_t len) { + auto iter = allocate_map.find(addr); + assert(iter != allocate_map.end()); + auto extent = iter->second; + logger().trace("OTree::Dummy: read {}B at {:#x}", + extent->get_length(), extent->get_laddr()); + assert(extent->get_laddr() == addr); + assert(extent->get_length() == len); + return tm_ertr::make_ready_future<NodeExtentRef>(extent); + } + + tm_future<NodeExtentRef> alloc_extent_sync( + Transaction& t, extent_len_t len) { + assert(len % ALIGNMENT == 0); + auto r = ceph::buffer::create_aligned(len, ALIGNMENT); + auto addr = reinterpret_cast<laddr_t>(r->get_data()); + auto bp = ceph::bufferptr(std::move(r)); + auto extent = Ref<DummyNodeExtent>(new DummyNodeExtent(std::move(bp))); + extent->set_laddr(addr); + assert(allocate_map.find(extent->get_laddr()) == allocate_map.end()); + allocate_map.insert({extent->get_laddr(), extent}); + logger().debug("OTree::Dummy: allocated {}B at {:#x}", + extent->get_length(), extent->get_laddr()); + assert(extent->get_length() == len); + return tm_ertr::make_ready_future<NodeExtentRef>(extent); + } + + tm_future<Super::URef> get_super_sync( + Transaction& t, RootNodeTracker& tracker) { + logger().debug("OTree::Dummy: got root {:#x}", root_laddr); + return tm_ertr::make_ready_future<Super::URef>( + Super::URef(new DummySuper(t, tracker, &root_laddr))); + } + + static seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } + + std::map<laddr_t, Ref<DummyNodeExtent>> allocate_map; + laddr_t root_laddr = L_ADDR_NULL; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc new file mode 100644 index 00000000000..8d88485bf72 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc @@ -0,0 +1,88 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "seastore.h" + +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.h" + +namespace { + +seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); +} + +} + +namespace crimson::os::seastore::onode { + +static DeltaRecorderURef create_recorder( + node_type_t node_type, field_type_t field_type) { + if (node_type == node_type_t::LEAF) { + if (field_type == field_type_t::N0) { + return DeltaRecorderT<node_fields_0_t, node_type_t::LEAF>::create(); + } else if (field_type == field_type_t::N1) { + return DeltaRecorderT<node_fields_1_t, node_type_t::LEAF>::create(); + } else if (field_type == field_type_t::N2) { + return DeltaRecorderT<node_fields_2_t, node_type_t::LEAF>::create(); + } else if (field_type == field_type_t::N3) { + return DeltaRecorderT<leaf_fields_3_t, node_type_t::LEAF>::create(); + } else { + ceph_abort("impossible path"); + } + } else if (node_type == node_type_t::INTERNAL) { + if (field_type == field_type_t::N0) { + return DeltaRecorderT<node_fields_0_t, node_type_t::INTERNAL>::create(); + } else if (field_type == field_type_t::N1) { + return DeltaRecorderT<node_fields_1_t, node_type_t::INTERNAL>::create(); + } else if (field_type == field_type_t::N2) { + return DeltaRecorderT<node_fields_2_t, node_type_t::INTERNAL>::create(); + } else if (field_type == field_type_t::N3) { + return DeltaRecorderT<internal_fields_3_t, node_type_t::INTERNAL>::create(); + } else { + ceph_abort("impossible path"); + } + } else { + ceph_abort("impossible path"); + } +} + +void SeastoreSuper::write_root_laddr(context_t c, laddr_t addr) { + logger().info("OTree::Seastore: update root {:#x} ...", addr); + root_addr = addr; + auto nm = static_cast<SeastoreNodeExtentManager*>(&c.nm); + nm->get_tm().write_onode_root(c.t, addr); +} + +NodeExtentRef SeastoreNodeExtent::mutate( + context_t c, DeltaRecorderURef&& _recorder) { + logger().debug("OTree::Seastore: mutate {:#x} ...", get_laddr()); + auto nm = static_cast<SeastoreNodeExtentManager*>(&c.nm); + auto extent = nm->get_tm().get_mutable_extent(c.t, this); + auto ret = extent->cast<SeastoreNodeExtent>(); + assert(!ret->recorder || ret->recorder->is_empty()); + ret->recorder = std::move(_recorder); + return ret; +} + +void SeastoreNodeExtent::apply_delta(const ceph::bufferlist& bl) { + logger().debug("OTree::Seastore: replay {:#x} ...", get_laddr()); + if (!recorder) { + auto [node_type, field_type] = get_types(); + recorder = create_recorder(node_type, field_type); + } else { +#ifndef NDEBUG + auto [node_type, field_type] = get_types(); + assert(recorder->node_type() == node_type); + assert(recorder->field_type() == field_type); +#endif + } + assert(is_clean()); + auto node = do_get_mutable(); + auto p = bl.cbegin(); + while (p != bl.end()) { + recorder->apply_delta(p, node); + } +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h new file mode 100644 index 00000000000..9f69d10adcf --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h @@ -0,0 +1,118 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/common/log.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h" + +/** + * seastore.h + * + * Seastore backend implementations. + */ + +namespace crimson::os::seastore::onode { + +class SeastoreSuper final: public Super { + public: + SeastoreSuper(Transaction& t, RootNodeTracker& tracker, + laddr_t root_addr, TransactionManager& tm) + : Super(t, tracker), root_addr{root_addr}, tm{tm} {} + ~SeastoreSuper() override = default; + protected: + laddr_t get_root_laddr() const override { + return root_addr; + } + void write_root_laddr(context_t c, laddr_t addr) override; + private: + static seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } + laddr_t root_addr; + TransactionManager& tm; +}; + +class SeastoreNodeExtent final: public NodeExtent { + public: + SeastoreNodeExtent(ceph::bufferptr &&ptr) + : NodeExtent(std::move(ptr)) {} + SeastoreNodeExtent(const SeastoreNodeExtent& other) + : NodeExtent(other) {} + ~SeastoreNodeExtent() override = default; + protected: + NodeExtentRef mutate(context_t, DeltaRecorderURef&&) override; + + DeltaRecorder* get_recorder() const override { + return recorder.get(); + } + + CachedExtentRef duplicate_for_write() override { + return CachedExtentRef(new SeastoreNodeExtent(*this)); + } + extent_types_t get_type() const override { + return extent_types_t::ONODE_BLOCK_STAGED; + } + ceph::bufferlist get_delta() override { + assert(recorder); + return recorder->get_delta(); + } + void apply_delta(const ceph::bufferlist&) override; + private: + DeltaRecorderURef recorder; +}; + +class SeastoreNodeExtentManager final: public NodeExtentManager { + public: + SeastoreNodeExtentManager(TransactionManager& tm, laddr_t min) + : tm{tm}, addr_min{min} {}; + ~SeastoreNodeExtentManager() override = default; + TransactionManager& get_tm() { return tm; } + protected: + bool is_read_isolated() const override { return true; } + + tm_future<NodeExtentRef> read_extent( + Transaction& t, laddr_t addr, extent_len_t len) override { + logger().debug("OTree::Seastore: reading {}B at {:#x} ...", len, addr); + return tm.read_extents<SeastoreNodeExtent>(t, addr, len + ).safe_then([addr, len](auto&& extents) { + assert(extents.size() == 1); + [[maybe_unused]] auto [laddr, e] = extents.front(); + logger().trace("OTree::Seastore: read {}B at {:#x}", + e->get_length(), e->get_laddr()); + assert(e->get_laddr() == addr); + assert(e->get_length() == len); + return NodeExtentRef(e); + }); + } + + tm_future<NodeExtentRef> alloc_extent( + Transaction& t, extent_len_t len) override { + logger().debug("OTree::Seastore: allocating {}B ...", len); + return tm.alloc_extent<SeastoreNodeExtent>(t, addr_min, len + ).safe_then([len](auto extent) { + logger().debug("OTree::Seastore: allocated {}B at {:#x}", + extent->get_length(), extent->get_laddr()); + assert(extent->get_length() == len); + return NodeExtentRef(extent); + }); + } + + tm_future<Super::URef> get_super( + Transaction& t, RootNodeTracker& tracker) override { + logger().trace("OTree::Seastore: get root ..."); + return tm.read_onode_root(t).safe_then([this, &t, &tracker](auto root_addr) { + logger().debug("OTree::Seastore: got root {:#x}", root_addr); + return Super::URef(new SeastoreSuper(t, tracker, root_addr, tm)); + }); + } + private: + static seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } + TransactionManager& tm; + const laddr_t addr_min; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_mutable.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_mutable.cc new file mode 100644 index 00000000000..de67500274e --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_mutable.cc @@ -0,0 +1,32 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "node_extent_mutable.h" +#include "node_extent_manager.h" + +namespace crimson::os::seastore::onode { + +NodeExtentMutable::NodeExtentMutable(NodeExtent& extent) + : extent{extent} { + assert(extent.is_pending()); +} + +const char* NodeExtentMutable::get_read() const { + assert(extent.is_pending()); + return extent.get_bptr().c_str(); +} + +char* NodeExtentMutable::get_write() { + assert(extent.is_pending()); + return extent.get_bptr().c_str(); +} + +extent_len_t NodeExtentMutable::get_length() const { + return extent.get_length(); +} + +const char* NodeExtentMutable::buf_upper_bound() const { + return get_read() + get_length(); +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_mutable.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_mutable.h new file mode 100644 index 00000000000..6e58421c4e7 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_mutable.h @@ -0,0 +1,79 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <cstring> + +#include "fwd.h" + +#pragma once + +namespace crimson::os::seastore::onode { + +class NodeExtent; + +/** + * NodeExtentMutable + * + * A thin wrapper of NodeExtent to make sure that only the newly allocated + * or the duplicated NodeExtent is mutable, and the memory modifications are + * safe within the extent range. + */ +class NodeExtentMutable { + public: + void copy_in_absolute(void* dst, const void* src, extent_len_t len) { + assert((char*)dst >= get_write()); + assert((char*)dst + len <= buf_upper_bound()); + std::memcpy(dst, src, len); + } + template <typename T> + void copy_in_absolute(void* dst, const T& src) { + copy_in_absolute(dst, &src, sizeof(T)); + } + + const void* copy_in_relative( + extent_len_t dst_offset, const void* src, extent_len_t len) { + auto dst = get_write() + dst_offset; + copy_in_absolute(dst, src, len); + return dst; + } + template <typename T> + const T* copy_in_relative( + extent_len_t dst_offset, const T& src) { + auto dst = copy_in_relative(dst_offset, &src, sizeof(T)); + return static_cast<const T*>(dst); + } + + void shift_absolute(const void* src, extent_len_t len, int offset) { + assert((const char*)src >= get_write()); + assert((const char*)src + len <= buf_upper_bound()); + char* to = (char*)src + offset; + assert(to >= get_write()); + assert(to + len <= buf_upper_bound()); + if (len != 0) { + std::memmove(to, src, len); + } + } + void shift_relative(extent_len_t src_offset, extent_len_t len, int offset) { + shift_absolute(get_write() + src_offset, len, offset); + } + + template <typename T> + void validate_inplace_update(const T& updated) { + assert((const char*)&updated >= get_write()); + assert((const char*)&updated + sizeof(T) <= buf_upper_bound()); + } + + char* get_write(); + extent_len_t get_length() const; + + private: + explicit NodeExtentMutable(NodeExtent&); + const char* get_read() const; + const char* buf_upper_bound() const; + + NodeExtent& extent; + + friend class NodeExtent; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.cc new file mode 100644 index 00000000000..e64ef91d8b8 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.cc @@ -0,0 +1,76 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "node_impl.h" +#include "node_layout.h" + +namespace crimson::os::seastore::onode { + +#ifdef UNIT_TESTS_BUILT +last_split_info_t last_split = {}; +#endif + +// XXX: branchless allocation +InternalNodeImpl::alloc_ertr::future<InternalNodeImpl::fresh_impl_t> +InternalNodeImpl::allocate( + context_t c, field_type_t type, bool is_level_tail, level_t level) { + if (type == field_type_t::N0) { + return InternalNode0::allocate(c, is_level_tail, level); + } else if (type == field_type_t::N1) { + return InternalNode1::allocate(c, is_level_tail, level); + } else if (type == field_type_t::N2) { + return InternalNode2::allocate(c, is_level_tail, level); + } else if (type == field_type_t::N3) { + return InternalNode3::allocate(c, is_level_tail, level); + } else { + ceph_abort("impossible path"); + } +} + +LeafNodeImpl::alloc_ertr::future<LeafNodeImpl::fresh_impl_t> +LeafNodeImpl::allocate( + context_t c, field_type_t type, bool is_level_tail) { + if (type == field_type_t::N0) { + return LeafNode0::allocate(c, is_level_tail, 0); + } else if (type == field_type_t::N1) { + return LeafNode1::allocate(c, is_level_tail, 0); + } else if (type == field_type_t::N2) { + return LeafNode2::allocate(c, is_level_tail, 0); + } else if (type == field_type_t::N3) { + return LeafNode3::allocate(c, is_level_tail, 0); + } else { + ceph_abort("impossible path"); + } +} + +InternalNodeImplURef InternalNodeImpl::load( + NodeExtentRef extent, field_type_t type, bool expect_is_level_tail) { + if (type == field_type_t::N0) { + return InternalNode0::load(extent, expect_is_level_tail); + } else if (type == field_type_t::N1) { + return InternalNode1::load(extent, expect_is_level_tail); + } else if (type == field_type_t::N2) { + return InternalNode2::load(extent, expect_is_level_tail); + } else if (type == field_type_t::N3) { + return InternalNode3::load(extent, expect_is_level_tail); + } else { + ceph_abort("impossible path"); + } +} + +LeafNodeImplURef LeafNodeImpl::load( + NodeExtentRef extent, field_type_t type, bool expect_is_level_tail) { + if (type == field_type_t::N0) { + return LeafNode0::load(extent, expect_is_level_tail); + } else if (type == field_type_t::N1) { + return LeafNode1::load(extent, expect_is_level_tail); + } else if (type == field_type_t::N2) { + return LeafNode2::load(extent, expect_is_level_tail); + } else if (type == field_type_t::N3) { + return LeafNode3::load(extent, expect_is_level_tail); + } else { + ceph_abort("impossible path"); + } +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.h new file mode 100644 index 00000000000..e7e9f33449f --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.h @@ -0,0 +1,197 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <ostream> + +#include "node_extent_mutable.h" +#include "node_types.h" +#include "stages/stage_types.h" + +namespace crimson::os::seastore::onode { + +#ifdef UNIT_TESTS_BUILT +enum class InsertType { BEGIN, LAST, MID }; +struct split_expectation_t { + uint8_t split_stage; + uint8_t insert_stage; + bool is_insert_left; + InsertType insert_type; +}; +struct last_split_info_t { + search_position_t split_pos; + uint8_t insert_stage; + bool is_insert_left; + InsertType insert_type; + bool match(const split_expectation_t& e) const { + match_stage_t split_stage; + if (split_pos.nxt.nxt.index == 0) { + if (split_pos.nxt.index == 0) { + split_stage = 2; + } else { + split_stage = 1; + } + } else { + split_stage = 0; + } + return split_stage == e.split_stage && + insert_stage == e.insert_stage && + is_insert_left == e.is_insert_left && + insert_type == e.insert_type; + } + bool match_split_pos(const search_position_t& pos) const { + return split_pos == pos; + } +}; +extern last_split_info_t last_split; +#endif + +struct key_hobj_t; +struct key_view_t; +class NodeExtentMutable; + +/** + * NodeImpl + * + * Hides type specific node layout implementations for Node. + */ +class NodeImpl { + public: + using alloc_ertr = crimson::errorator< + crimson::ct_error::input_output_error, + crimson::ct_error::invarg, + crimson::ct_error::enoent, + crimson::ct_error::erange>; + virtual ~NodeImpl() = default; + + virtual field_type_t field_type() const = 0; + virtual laddr_t laddr() const = 0; + virtual void prepare_mutate(context_t) = 0; + virtual bool is_level_tail() const = 0; + virtual bool is_empty() const = 0; + virtual level_t level() const = 0; + virtual node_offset_t free_size() const = 0; + virtual key_view_t get_key_view(const search_position_t&) const = 0; + virtual key_view_t get_largest_key_view() const = 0; + virtual void next_position(search_position_t&) const = 0; + + virtual node_stats_t get_stats() const = 0; + virtual std::ostream& dump(std::ostream&) const = 0; + virtual std::ostream& dump_brief(std::ostream&) const = 0; + virtual void validate_layout() const = 0; + + virtual void test_copy_to(NodeExtentMutable&) const = 0; + virtual void test_set_tail(NodeExtentMutable&) = 0; + + protected: + NodeImpl() = default; +}; + +/** + * InternalNodeImpl + * + * Hides type specific node layout implementations for InternalNode. + */ +class InternalNodeImpl : public NodeImpl { + public: + struct internal_marker_t {}; + virtual ~InternalNodeImpl() = default; + + #pragma GCC diagnostic ignored "-Woverloaded-virtual" + virtual const laddr_packed_t* get_p_value( + const search_position_t&, + key_view_t* = nullptr, internal_marker_t = {}) const { + ceph_abort("impossible path"); + } + #pragma GCC diagnostic ignored "-Woverloaded-virtual" + virtual lookup_result_t<node_type_t::INTERNAL> lower_bound( + const key_hobj_t&, MatchHistory&, + key_view_t* = nullptr, internal_marker_t = {}) const { + ceph_abort("impossible path"); + } + #pragma GCC diagnostic ignored "-Woverloaded-virtual" + virtual const laddr_packed_t* insert( + const key_view_t&, const laddr_packed_t&, search_position_t&, match_stage_t&, node_offset_t&) { + ceph_abort("impossible path"); + } + #pragma GCC diagnostic ignored "-Woverloaded-virtual" + virtual std::tuple<search_position_t, bool, const laddr_packed_t*> split_insert( + NodeExtentMutable&, NodeImpl&, const key_view_t&, const laddr_packed_t&, + search_position_t&, match_stage_t&, node_offset_t&) { + ceph_abort("impossible path"); + } + + virtual void replace_child_addr(const search_position_t&, laddr_t dst, laddr_t src) = 0; + virtual std::tuple<match_stage_t, node_offset_t> evaluate_insert( + const key_view_t&, const laddr_t&, search_position_t&) const = 0; + + struct fresh_impl_t { + InternalNodeImplURef impl; + NodeExtentMutable mut; + std::pair<NodeImplURef, NodeExtentMutable> make_pair() { + return {std::move(impl), mut}; + } + }; + static alloc_ertr::future<fresh_impl_t> allocate(context_t, field_type_t, bool, level_t); + static InternalNodeImplURef load(NodeExtentRef, field_type_t, bool); + + protected: + InternalNodeImpl() = default; +}; + +/** + * LeafNodeImpl + * + * Hides type specific node layout implementations for LeafNode. + */ +class LeafNodeImpl : public NodeImpl { + public: + struct leaf_marker_t {}; + virtual ~LeafNodeImpl() = default; + + #pragma GCC diagnostic ignored "-Woverloaded-virtual" + virtual const onode_t* get_p_value( + const search_position_t&, + key_view_t* = nullptr, leaf_marker_t={}) const { + ceph_abort("impossible path"); + } + #pragma GCC diagnostic ignored "-Woverloaded-virtual" + virtual lookup_result_t<node_type_t::LEAF> lower_bound( + const key_hobj_t&, MatchHistory&, + key_view_t* = nullptr, leaf_marker_t = {}) const { + ceph_abort("impossible path"); + } + #pragma GCC diagnostic ignored "-Woverloaded-virtual" + virtual const onode_t* insert( + const key_hobj_t&, const onode_t&, search_position_t&, match_stage_t&, node_offset_t&) { + ceph_abort("impossible path"); + } + #pragma GCC diagnostic ignored "-Woverloaded-virtual" + virtual std::tuple<search_position_t, bool, const onode_t*> split_insert( + NodeExtentMutable&, NodeImpl&, const key_hobj_t&, const onode_t&, + search_position_t&, match_stage_t&, node_offset_t&) { + ceph_abort("impossible path"); + } + + virtual void get_largest_slot( + search_position_t&, key_view_t&, const onode_t**) const = 0; + virtual std::tuple<match_stage_t, node_offset_t> evaluate_insert( + const key_hobj_t&, const onode_t&, + const MatchHistory&, match_stat_t, search_position_t&) const = 0; + + struct fresh_impl_t { + LeafNodeImplURef impl; + NodeExtentMutable mut; + std::pair<NodeImplURef, NodeExtentMutable> make_pair() { + return {std::move(impl), mut}; + } + }; + static alloc_ertr::future<fresh_impl_t> allocate(context_t, field_type_t, bool); + static LeafNodeImplURef load(NodeExtentRef, field_type_t, bool); + + protected: + LeafNodeImpl() = default; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h new file mode 100644 index 00000000000..4cf691675e8 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h @@ -0,0 +1,613 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <ostream> +#include <sstream> + +#include "common/likely.h" +#include "crimson/common/log.h" +#include "node_extent_accessor.h" +#include "node_impl.h" +#include "stages/node_stage_layout.h" + +namespace crimson::os::seastore::onode { + +template <node_type_t NODE_TYPE> struct insert_key_type; +template <> struct insert_key_type<node_type_t::INTERNAL> { + static constexpr auto type = KeyT::VIEW; }; +template <> struct insert_key_type<node_type_t::LEAF> { + static constexpr auto type = KeyT::HOBJ; }; + +template <node_type_t NODE_TYPE> struct node_impl_type; +template <> struct node_impl_type<node_type_t::INTERNAL> { + using type = InternalNodeImpl; }; +template <> struct node_impl_type<node_type_t::LEAF> { + using type = LeafNodeImpl; }; + +template <node_type_t NODE_TYPE> struct node_marker_type; +template <> struct node_marker_type<node_type_t::INTERNAL> { + using type = InternalNodeImpl::internal_marker_t; }; +template <> struct node_marker_type<node_type_t::LEAF> { + using type = LeafNodeImpl::leaf_marker_t; }; + +/** + * NodeLayoutT + * + * Contains templated and concrete implementations for both InternalNodeImpl + * and LeafNodeImpl under a specific node layout. + */ +template <typename FieldType, node_type_t NODE_TYPE> +class NodeLayoutT final : public InternalNodeImpl, public LeafNodeImpl { + public: + using URef = std::unique_ptr<NodeLayoutT>; + using extent_t = NodeExtentAccessorT<FieldType, NODE_TYPE>; + using parent_t = typename node_impl_type<NODE_TYPE>::type; + using marker_t = typename node_marker_type<NODE_TYPE>::type; + using node_stage_t = typename extent_t::node_stage_t; + using position_t = typename extent_t::position_t; + using value_t = typename extent_t::value_t; + static constexpr auto FIELD_TYPE = extent_t::FIELD_TYPE; + static constexpr auto KEY_TYPE = insert_key_type<NODE_TYPE>::type; + static constexpr auto STAGE = STAGE_T::STAGE; + + NodeLayoutT(const NodeLayoutT&) = delete; + NodeLayoutT(NodeLayoutT&&) = delete; + NodeLayoutT& operator=(const NodeLayoutT&) = delete; + NodeLayoutT& operator=(NodeLayoutT&&) = delete; + ~NodeLayoutT() override = default; + + static URef load(NodeExtentRef extent, bool expect_is_level_tail) { + std::unique_ptr<NodeLayoutT> ret(new NodeLayoutT(extent)); + assert(ret->is_level_tail() == expect_is_level_tail); + return ret; + } + + using alloc_ertr = NodeExtentManager::tm_ertr; + static alloc_ertr::future<typename parent_t::fresh_impl_t> allocate( + context_t c, bool is_level_tail, level_t level) { + // NOTE: Currently, all the node types have the same size for simplicity. + // But depending on the requirement, we may need to make node size + // configurable by field_type_t and node_type_t, or totally flexible. + return c.nm.alloc_extent(c.t, node_stage_t::EXTENT_SIZE + ).safe_then([is_level_tail, level](auto extent) { + assert(extent->is_initial_pending()); + auto mut = extent->get_mutable(); + node_stage_t::bootstrap_extent( + mut, FIELD_TYPE, NODE_TYPE, is_level_tail, level); + return typename parent_t::fresh_impl_t{ + std::unique_ptr<parent_t>(new NodeLayoutT(extent)), mut}; + }); + } + + protected: + /* + * NodeImpl + */ + field_type_t field_type() const override { return FIELD_TYPE; } + laddr_t laddr() const override { return extent.get_laddr(); } + void prepare_mutate(context_t c) override { return extent.prepare_mutate(c); } + bool is_level_tail() const override { return extent.read().is_level_tail(); } + bool is_empty() const override { return extent.read().keys() == 0; } + level_t level() const override { return extent.read().level(); } + node_offset_t free_size() const override { return extent.read().free_size(); } + + key_view_t get_key_view(const search_position_t& position) const override { + key_view_t ret; + STAGE_T::get_key_view(extent.read(), cast_down<STAGE>(position), ret); + return ret; + } + + key_view_t get_largest_key_view() const override { + key_view_t index_key; + STAGE_T::template lookup_largest_slot<false, true, false>( + extent.read(), nullptr, &index_key, nullptr); + return index_key; + } + + void next_position(search_position_t& pos) const override { + assert(!pos.is_end()); + bool find_next = STAGE_T::next_position(extent.read(), cast_down<STAGE>(pos)); + if (find_next) { + pos = search_position_t::end(); + } + } + + node_stats_t get_stats() const override { + node_stats_t stats; + auto& node_stage = extent.read(); + key_view_t index_key; + if (node_stage.keys()) { + STAGE_T::get_stats(node_stage, stats, index_key); + } + stats.size_persistent = node_stage_t::EXTENT_SIZE; + stats.size_filled = filled_size(); + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + if (is_level_tail()) { + stats.size_logical += sizeof(value_t); + stats.size_value += sizeof(value_t); + stats.num_kvs += 1; + } + } + return stats; + } + + std::ostream& dump(std::ostream& os) const override { + auto& node_stage = extent.read(); + auto p_start = node_stage.p_start(); + dump_brief(os); + auto stats = get_stats(); + os << " num_kvs=" << stats.num_kvs + << ", logical=" << stats.size_logical + << "B, overhead=" << stats.size_overhead + << "B, value=" << stats.size_value << "B"; + os << ":\n header: " << node_stage_t::header_size() << "B"; + size_t size = 0u; + if (node_stage.keys()) { + STAGE_T::dump(node_stage, os, " ", size, p_start); + } else { + size += node_stage_t::header_size(); + if (NODE_TYPE == node_type_t::LEAF || !node_stage.is_level_tail()) { + os << " empty!"; + } + } + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + if (node_stage.is_level_tail()) { + size += sizeof(laddr_t); + auto value_ptr = node_stage.get_end_p_laddr(); + int offset = reinterpret_cast<const char*>(value_ptr) - p_start; + os << "\n tail value: 0x" + << std::hex << value_ptr->value << std::dec + << " " << size << "B" + << " @" << offset << "B"; + } + } + assert(size == filled_size()); + return os; + } + + std::ostream& dump_brief(std::ostream& os) const override { + auto& node_stage = extent.read(); + os << "Node" << NODE_TYPE << FIELD_TYPE + << "@0x" << std::hex << extent.get_laddr() + << "+" << node_stage_t::EXTENT_SIZE << std::dec + << (node_stage.is_level_tail() ? "$" : "") + << "(level=" << (unsigned)node_stage.level() + << ", filled=" << filled_size() << "B" + << ", free=" << node_stage.free_size() << "B" + << ")"; + return os; + } + + void validate_layout() const override { +#ifndef NDEBUG + STAGE_T::validate(extent.read()); +#endif + } + + void test_copy_to(NodeExtentMutable& to) const override { + extent.test_copy_to(to); + } + + void test_set_tail(NodeExtentMutable& mut) override { + node_stage_t::update_is_level_tail(mut, extent.read(), true); + } + + /* + * Common + */ + const value_t* get_p_value(const search_position_t& position, + key_view_t* index_key=nullptr, marker_t={}) const override { + auto& node_stage = extent.read(); + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + assert(!index_key); + if (position.is_end()) { + assert(is_level_tail()); + return node_stage.get_end_p_laddr(); + } + } else { + assert(!position.is_end()); + } + if (index_key) { + return STAGE_T::template get_p_value<true>( + node_stage, cast_down<STAGE>(position), index_key); + } else { + return STAGE_T::get_p_value(node_stage, cast_down<STAGE>(position)); + } + } + + lookup_result_t<NODE_TYPE> lower_bound( + const key_hobj_t& key, MatchHistory& history, + key_view_t* index_key=nullptr, marker_t={}) const override { + auto& node_stage = extent.read(); + if constexpr (NODE_TYPE == node_type_t::LEAF) { + if (unlikely(node_stage.keys() == 0)) { + history.set<STAGE_LEFT>(MatchKindCMP::LT); + return lookup_result_t<NODE_TYPE>::end(); + } + } + + typename STAGE_T::result_t result_raw; + if (index_key) { + result_raw = STAGE_T::template lower_bound<true>( + node_stage, key, history, index_key); +#ifndef NDEBUG + if (!result_raw.is_end()) { + full_key_t<KeyT::VIEW> index; + STAGE_T::get_key_view(node_stage, result_raw.position, index); + assert(index == *index_key); + } +#endif + } else { + result_raw = STAGE_T::lower_bound(node_stage, key, history); + } +#ifndef NDEBUG + if (result_raw.is_end()) { + assert(result_raw.mstat == MSTAT_END); + } else { + full_key_t<KeyT::VIEW> index; + STAGE_T::get_key_view(node_stage, result_raw.position, index); + assert_mstat(key, index, result_raw.mstat); + } +#endif + + // calculate MSTAT_LT3 + if constexpr (FIELD_TYPE == field_type_t::N0) { + // currently only internal node checks mstat + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + if (result_raw.mstat == MSTAT_LT2) { + auto cmp = compare_to<KeyT::HOBJ>( + key, node_stage[result_raw.position.index].shard_pool); + assert(cmp != MatchKindCMP::GT); + if (cmp != MatchKindCMP::EQ) { + result_raw.mstat = MSTAT_LT3; + } + } + } + } + + auto result = normalize(std::move(result_raw)); + if (result.is_end()) { + assert(node_stage.is_level_tail()); + assert(result.p_value == nullptr); + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + result.p_value = node_stage.get_end_p_laddr(); + } + } else { + assert(result.p_value != nullptr); + } + return result; + } + + const value_t* insert( + const full_key_t<KEY_TYPE>& key, const value_t& value, + search_position_t& insert_pos, match_stage_t& insert_stage, + node_offset_t& insert_size) override { + logger().debug("OTree::Layout::Insert: begin at " + "insert_pos({}), insert_stage={}, insert_size={}B ...", + insert_pos, insert_stage, insert_size); + if (unlikely(logger().is_enabled(seastar::log_level::trace))) { + std::ostringstream sos; + dump(sos); + logger().trace("OTree::Layout::Insert: -- dump\n{}", sos.str()); + } + auto ret = extent.template insert_replayable<KEY_TYPE>( + key, value, cast_down<STAGE>(insert_pos), insert_stage, insert_size); + logger().debug("OTree::Layout::Insert: done at " + "insert_pos({}), insert_stage={}, insert_size={}B", + insert_pos, insert_stage, insert_size); + if (unlikely(logger().is_enabled(seastar::log_level::trace))) { + std::ostringstream sos; + dump(sos); + logger().trace("OTree::Layout::Insert: -- dump\n{}", sos.str()); + } + validate_layout(); + assert(get_key_view(insert_pos) == key); + return ret; + } + + std::tuple<search_position_t, bool, const value_t*> split_insert( + NodeExtentMutable& right_mut, NodeImpl& right_impl, + const full_key_t<KEY_TYPE>& key, const value_t& value, + search_position_t& _insert_pos, match_stage_t& insert_stage, + node_offset_t& insert_size) override { + logger().info("OTree::Layout::Split: begin at " + "insert_pos({}), insert_stage={}, insert_size={}B, " + "{:#x}=>{:#x} ...", + _insert_pos, insert_stage, insert_size, + laddr(), right_impl.laddr()); + if (unlikely(logger().is_enabled(seastar::log_level::debug))) { + std::ostringstream sos; + dump(sos); + logger().debug("OTree::Layout::Split: -- dump\n{}", sos.str()); + } +#ifdef UNIT_TESTS_BUILT + auto insert_stage_pre = insert_stage; +#endif + + auto& insert_pos = cast_down<STAGE>(_insert_pos); + auto& node_stage = extent.read(); + typename STAGE_T::StagedIterator split_at; + bool is_insert_left; + size_t split_size; + size_t target_split_size; + { + size_t empty_size = node_stage.size_before(0); + size_t filled_kv_size = filled_size() - empty_size; + /** NODE_BLOCK_SIZE considerations + * + * Generally, + * target_split_size = (filled_size + insert_size) / 2 + * We can have two locate_split() strategies: + * A. the simpler one is to locate the largest split position where + * the estimated left_node_size <= target_split_size; + * B. the fair one takes a further step to calculate the next slot of + * P KiB, and if left_node_size + P/2 < target_split_size, compensate + * the split position to include the next slot; (TODO) + * + * Say that the node_block_size = N KiB, the largest allowed + * insert_size = 1/I * N KiB (I > 1). We want to identify the minimal 'I' + * that won't lead to "double split" effect, meaning after a split, + * the right node size is still larger than N KiB and need to split + * again. I think "double split" makes split much more complicated and + * we can no longer identify whether the node is safe under concurrent + * operations. + * + * We need to evaluate the worst case in order to identify 'I'. This means: + * - filled_size ~= N KiB + * - insert_size == N/I KiB + * - target_split_size ~= (I+1)/2I * N KiB + * To simplify the below calculations, node_block_size is normalized to 1. + * + * With strategy A, the worst case is when left_node_size cannot include + * the next slot that will just overflow the target_split_size: + * - left_node_size + 1/I ~= (I+1)/2I + * - left_node_size ~= (I-1)/2I + * - right_node_size ~= 1 + 1/I - left_node_size ~= (I+3)/2I + * The right_node_size cannot larger than the node_block_size in the + * worst case, which means (I+3)/2I < 1, so I > 3, meaning the largest + * possible insert_size must be smaller than 1/3 of the node_block_size. + * + * With strategy B, the worst case is when left_node_size cannot include + * the next slot that will just overflow the threshold + * target_split_size - 1/2I, thus: + * - left_node_size ~= (I+1)/2I - 1/2I ~= 1/2 + * - right_node_size ~= 1 + 1/I - 1/2 ~= (I+2)/2I < node_block_size(1) + * - I > 2 + * This means the largest possible insert_size must be smaller than 1/2 of + * the node_block_size, which is better than strategy A. + + * In order to avoid "double split", there is another side-effect we need + * to take into consideration: if split happens with snap-gen indexes, the + * according ns-oid string needs to be copied to the right node. That is + * to say: right_node_size + string_size < node_block_size. + * + * Say that the largest allowed string size is 1/S of the largest allowed + * insert_size N/I KiB. If we go with stragety B, the equation should be + * changed to: + * - right_node_size ~= (I+2)/2I + 1/(I*S) < 1 + * - I > 2 + 2/S (S > 1) + * + * Now back to NODE_BLOCK_SIZE calculation, if we have limits of at most + * X KiB ns-oid string and Y KiB of onode_t to store in this BTree, then: + * - largest_insert_size ~= X+Y KiB + * - 1/S == X/(X+Y) + * - I > (4X+2Y)/(X+Y) + * - node_block_size(N) == I * insert_size > 4X+2Y KiB + * + * In conclusion, + * (TODO) the current node block size (4 KiB) is too small to + * store entire 2 KiB ns-oid string. We need to consider a larger + * node_block_size. + * + * We are setting X = Y = 640 B in order not to break the current + * implementations with 4KiB node. + * + * (TODO) Implement smarter logics to check when "double split" happens. + */ + target_split_size = empty_size + (filled_kv_size + insert_size) / 2; + assert(insert_size < (node_stage.total_size() - empty_size) / 2); + + std::optional<bool> _is_insert_left; + split_at.set(node_stage); + split_size = 0; + bool locate_nxt = STAGE_T::recursively_locate_split_inserted( + split_size, 0, target_split_size, insert_pos, + insert_stage, insert_size, _is_insert_left, split_at); + is_insert_left = *_is_insert_left; + logger().debug("OTree::Layout::Split: -- located " + "split_at({}), insert_pos({}), is_insert_left={}, " + "split_size={}B(target={}B, current={}B)", + split_at, insert_pos, is_insert_left, + split_size, target_split_size, filled_size()); + // split_size can be larger than target_split_size in strategy B + // assert(split_size <= target_split_size); + if (locate_nxt) { + assert(insert_stage == STAGE); + assert(split_at.get().is_last()); + split_at.set_end(); + assert(insert_pos.index == split_at.index()); + } + } + + auto append_at = split_at; + // TODO(cross-node string dedup) + typename STAGE_T::template StagedAppender<KEY_TYPE> right_appender; + right_appender.init(&right_mut, right_mut.get_write()); + const value_t* p_value = nullptr; + if (!is_insert_left) { + // right node: append [start(append_at), insert_pos) + STAGE_T::template append_until<KEY_TYPE>( + append_at, right_appender, insert_pos, insert_stage); + logger().debug("OTree::Layout::Split: -- right appended until " + "insert_pos({}), insert_stage={}, insert/append the rest ...", + insert_pos, insert_stage); + // right node: append [insert_pos(key, value)] + bool is_front_insert = (insert_pos == position_t::begin()); + [[maybe_unused]] bool is_end = STAGE_T::template append_insert<KEY_TYPE>( + key, value, append_at, right_appender, + is_front_insert, insert_stage, p_value); + assert(append_at.is_end() == is_end); + } else { + logger().debug("OTree::Layout::Split: -- right appending ..."); + } + + // right node: append (insert_pos, end) + auto pos_end = position_t::end(); + STAGE_T::template append_until<KEY_TYPE>( + append_at, right_appender, pos_end, STAGE); + assert(append_at.is_end()); + right_appender.wrap(); + if (unlikely(logger().is_enabled(seastar::log_level::debug))) { + std::ostringstream sos; + right_impl.dump(sos); + logger().debug("OTree::Layout::Split: -- right node dump\n{}", sos.str()); + } + right_impl.validate_layout(); + + // mutate left node + if (is_insert_left) { + logger().debug("OTree::Layout::Split: -- left trim/insert at " + "insert_pos({}), insert_stage={} ...", + insert_pos, insert_stage); + p_value = extent.template split_insert_replayable<KEY_TYPE>( + split_at, key, value, insert_pos, insert_stage, insert_size); + assert(get_key_view(_insert_pos) == key); + } else { + logger().debug("OTree::Layout::Split: -- left trim ..."); + assert(right_impl.get_key_view(_insert_pos) == key); + extent.split_replayable(split_at); + } + if (unlikely(logger().is_enabled(seastar::log_level::debug))) { + std::ostringstream sos; + dump(sos); + logger().debug("OTree::Layout::Split: -- left node dump\n{}", sos.str()); + } + validate_layout(); + assert(p_value); + + auto split_pos = normalize(split_at.get_pos()); + logger().info("OTree::Layout::Split: done at " + "insert_pos({}), insert_stage={}, insert_size={}B, split_at({}), " + "is_insert_left={}, split_size={}B(target={}B)", + _insert_pos, insert_stage, insert_size, split_pos, + is_insert_left, split_size, target_split_size); + assert(split_size == filled_size()); + +#ifdef UNIT_TESTS_BUILT + InsertType insert_type; + search_position_t last_pos; + if (is_insert_left) { + STAGE_T::template lookup_largest_slot<true, false, false>( + extent.read(), &cast_down_fill_0<STAGE>(last_pos), nullptr, nullptr); + } else { + node_stage_t right_stage{reinterpret_cast<FieldType*>(right_mut.get_write())}; + STAGE_T::template lookup_largest_slot<true, false, false>( + right_stage, &cast_down_fill_0<STAGE>(last_pos), nullptr, nullptr); + } + if (_insert_pos == search_position_t::begin()) { + insert_type = InsertType::BEGIN; + } else if (_insert_pos == last_pos) { + insert_type = InsertType::LAST; + } else { + insert_type = InsertType::MID; + } + last_split = {split_pos, insert_stage_pre, is_insert_left, insert_type}; +#endif + return {split_pos, is_insert_left, p_value}; + } + + /* + * InternalNodeImpl + */ + void replace_child_addr( + const search_position_t& pos, laddr_t dst, laddr_t src) override { + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + const laddr_packed_t* p_value = get_p_value(pos); + assert(p_value->value == src); + extent.update_child_addr_replayable(dst, const_cast<laddr_packed_t*>(p_value)); + } else { + ceph_abort("impossible path"); + } + } + + std::tuple<match_stage_t, node_offset_t> evaluate_insert( + const key_view_t& key, const laddr_t& value, + search_position_t& insert_pos) const override { + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + auto packed_value = laddr_packed_t{value}; + auto& node_stage = extent.read(); + match_stage_t insert_stage; + node_offset_t insert_size; + if (unlikely(!node_stage.keys())) { + assert(insert_pos.is_end()); + insert_stage = STAGE; + insert_size = STAGE_T::template insert_size<KeyT::VIEW>(key, packed_value); + } else { + std::tie(insert_stage, insert_size) = STAGE_T::evaluate_insert( + node_stage, key, packed_value, cast_down<STAGE>(insert_pos), false); + } + return {insert_stage, insert_size}; + } else { + ceph_abort("impossible path"); + } + } + + /* + * LeafNodeImpl + */ + void get_largest_slot(search_position_t& pos, + key_view_t& index_key, const onode_t** pp_value) const override { + if constexpr (NODE_TYPE == node_type_t::LEAF) { + STAGE_T::template lookup_largest_slot<true, true, true>( + extent.read(), &cast_down_fill_0<STAGE>(pos), &index_key, pp_value); + } else { + ceph_abort("impossible path"); + } + } + + std::tuple<match_stage_t, node_offset_t> evaluate_insert( + const key_hobj_t& key, const onode_t& value, + const MatchHistory& history, match_stat_t mstat, + search_position_t& insert_pos) const override { + if constexpr (NODE_TYPE == node_type_t::LEAF) { + if (unlikely(is_empty())) { + assert(insert_pos.is_end()); + return {STAGE, STAGE_T::template insert_size<KeyT::HOBJ>(key, value)}; + } else { + return STAGE_T::evaluate_insert( + key, value, history, mstat, cast_down<STAGE>(insert_pos)); + } + } else { + ceph_abort("impossible path"); + } + } + + private: + NodeLayoutT(NodeExtentRef extent) : extent{extent} {} + + node_offset_t filled_size() const { + auto& node_stage = extent.read(); + auto ret = node_stage.size_before(node_stage.keys()); + assert(ret == node_stage.total_size() - node_stage.free_size()); + return ret; + } + + static seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } + + extent_t extent; +}; + +using InternalNode0 = NodeLayoutT<node_fields_0_t, node_type_t::INTERNAL>; +using InternalNode1 = NodeLayoutT<node_fields_1_t, node_type_t::INTERNAL>; +using InternalNode2 = NodeLayoutT<node_fields_2_t, node_type_t::INTERNAL>; +using InternalNode3 = NodeLayoutT<internal_fields_3_t, node_type_t::INTERNAL>; +using LeafNode0 = NodeLayoutT<node_fields_0_t, node_type_t::LEAF>; +using LeafNode1 = NodeLayoutT<node_fields_1_t, node_type_t::LEAF>; +using LeafNode2 = NodeLayoutT<node_fields_2_t, node_type_t::LEAF>; +using LeafNode3 = NodeLayoutT<leaf_fields_3_t, node_type_t::LEAF>; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout_replayable.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout_replayable.h new file mode 100644 index 00000000000..61c46403aaf --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout_replayable.h @@ -0,0 +1,75 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "node_extent_mutable.h" +#include "stages/node_stage.h" +#include "stages/stage.h" + +#define STAGE_T node_to_stage_t<node_stage_t> + +namespace crimson::os::seastore::onode { + +/** + * NodeLayoutReplayableT + * + * Contains templated logics to modify the layout of a NodeExtend which are + * also replayable. Used by NodeExtentAccessorT at runtime and by + * DeltaRecorderT during replay. + */ +template <typename FieldType, node_type_t NODE_TYPE> +struct NodeLayoutReplayableT { + using node_stage_t = node_extent_t<FieldType, NODE_TYPE>; + using position_t = typename STAGE_T::position_t; + using StagedIterator = typename STAGE_T::StagedIterator; + using value_t = value_type_t<NODE_TYPE>; + static constexpr auto FIELD_TYPE = FieldType::FIELD_TYPE; + + template <KeyT KT> + static const value_t* insert( + NodeExtentMutable& mut, + const node_stage_t& node_stage, + const full_key_t<KT>& key, + const value_t& value, + position_t& insert_pos, + match_stage_t& insert_stage, + node_offset_t& insert_size) { + auto p_value = STAGE_T::template proceed_insert<KT, false>( + mut, node_stage, key, value, insert_pos, insert_stage, insert_size); + return p_value; + } + + static void split( + NodeExtentMutable& mut, + const node_stage_t& node_stage, + StagedIterator& split_at) { + node_stage_t::update_is_level_tail(mut, node_stage, false); + STAGE_T::trim(mut, split_at); + } + + template <KeyT KT> + static const value_t* split_insert( + NodeExtentMutable& mut, + const node_stage_t& node_stage, + StagedIterator& split_at, + const full_key_t<KT>& key, + const value_t& value, + position_t& insert_pos, + match_stage_t& insert_stage, + node_offset_t& insert_size) { + node_stage_t::update_is_level_tail(mut, node_stage, false); + STAGE_T::trim(mut, split_at); + auto p_value = STAGE_T::template proceed_insert<KT, true>( + mut, node_stage, key, value, insert_pos, insert_stage, insert_size); + return p_value; + } + + static void update_child_addr( + NodeExtentMutable& mut, const laddr_t new_addr, laddr_packed_t* p_addr) { + assert(NODE_TYPE == node_type_t::INTERNAL); + mut.copy_in_absolute(p_addr, new_addr); + } +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_types.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_types.h new file mode 100644 index 00000000000..8452168e40c --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_types.h @@ -0,0 +1,61 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <cassert> +#include <ostream> + +#include "fwd.h" + +namespace crimson::os::seastore::onode { + +constexpr uint8_t FIELD_TYPE_MAGIC = 0x25; +enum class field_type_t : uint8_t { + N0 = FIELD_TYPE_MAGIC, + N1, + N2, + N3, + _MAX +}; +inline uint8_t to_unsigned(field_type_t type) { + auto value = static_cast<uint8_t>(type); + assert(value >= FIELD_TYPE_MAGIC); + assert(value < static_cast<uint8_t>(field_type_t::_MAX)); + return value - FIELD_TYPE_MAGIC; +} +inline std::ostream& operator<<(std::ostream &os, field_type_t type) { + const char* const names[] = {"0", "1", "2", "3"}; + auto index = to_unsigned(type); + os << names[index]; + return os; +} + +enum class node_type_t : uint8_t { + LEAF = 0, + INTERNAL +}; +inline std::ostream& operator<<(std::ostream &os, const node_type_t& type) { + const char* const names[] = {"L", "I"}; + auto index = static_cast<uint8_t>(type); + assert(index <= 1u); + os << names[index]; + return os; +} + +struct laddr_packed_t { + laddr_t value; +} __attribute__((packed)); + +using match_stat_t = int8_t; +constexpr match_stat_t MSTAT_END = -2; // index is search_position_t::end() +constexpr match_stat_t MSTAT_EQ = -1; // key == index +constexpr match_stat_t MSTAT_LT0 = 0; // key == index [pool/shard crush ns/oid]; key < index [snap/gen] +constexpr match_stat_t MSTAT_LT1 = 1; // key == index [pool/shard crush]; key < index [ns/oid] +constexpr match_stat_t MSTAT_LT2 = 2; // key < index [pool/shard crush ns/oid] || + // key == index [pool/shard]; key < index [crush] +constexpr match_stat_t MSTAT_LT3 = 3; // key < index [pool/shard] +constexpr match_stat_t MSTAT_MIN = MSTAT_END; +constexpr match_stat_t MSTAT_MAX = MSTAT_LT3; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.cc new file mode 100644 index 00000000000..826de16631b --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.cc @@ -0,0 +1,162 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "item_iterator_stage.h" + +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_mutable.h" + +namespace crimson::os::seastore::onode { + +#define ITER_T item_iterator_t<NODE_TYPE> +#define ITER_INST(NT) item_iterator_t<NT> +#define ITER_TEMPLATE(NT) template class ITER_INST(NT) +ITER_TEMPLATE(node_type_t::LEAF); +ITER_TEMPLATE(node_type_t::INTERNAL); + +template <node_type_t NODE_TYPE> +template <KeyT KT> +memory_range_t ITER_T::insert_prefix( + NodeExtentMutable& mut, const ITER_T& iter, const full_key_t<KT>& key, + bool is_end, node_offset_t size, const char* p_left_bound) { + // 1. insert range + char* p_insert; + if (is_end) { + assert(!iter.has_next()); + p_insert = const_cast<char*>(iter.p_start()); + } else { + p_insert = const_cast<char*>(iter.p_end()); + } + char* p_insert_front = p_insert - size; + + // 2. shift memory + const char* p_shift_start = p_left_bound; + const char* p_shift_end = p_insert; + mut.shift_absolute(p_shift_start, + p_shift_end - p_shift_start, + -(int)size); + + // 3. append header + p_insert -= sizeof(node_offset_t); + node_offset_t back_offset = (p_insert - p_insert_front); + mut.copy_in_absolute(p_insert, back_offset); + ns_oid_view_t::append<KT>(mut, key, p_insert); + + return {p_insert_front, p_insert}; +} +#define IP_TEMPLATE(NT, KT) \ + template memory_range_t ITER_INST(NT)::insert_prefix<KT>( \ + NodeExtentMutable&, const ITER_INST(NT)&, const full_key_t<KT>&, \ + bool, node_offset_t, const char*) +IP_TEMPLATE(node_type_t::LEAF, KeyT::VIEW); +IP_TEMPLATE(node_type_t::INTERNAL, KeyT::VIEW); +IP_TEMPLATE(node_type_t::LEAF, KeyT::HOBJ); +IP_TEMPLATE(node_type_t::INTERNAL, KeyT::HOBJ); + +template <node_type_t NODE_TYPE> +void ITER_T::update_size( + NodeExtentMutable& mut, const ITER_T& iter, int change) { + node_offset_t offset = iter.get_back_offset(); + int new_size = change + offset; + assert(new_size > 0 && new_size < NODE_BLOCK_SIZE); + mut.copy_in_absolute( + (void*)iter.get_item_range().p_end, node_offset_t(new_size)); +} + +template <node_type_t NODE_TYPE> +node_offset_t ITER_T::trim_until(NodeExtentMutable&, const ITER_T& iter) { + assert(iter.index() != 0); + size_t ret = iter.p_end() - iter.p_items_start; + assert(ret < NODE_BLOCK_SIZE); + return ret; +} + +template <node_type_t NODE_TYPE> +node_offset_t ITER_T::trim_at( + NodeExtentMutable& mut, const ITER_T& iter, node_offset_t trimmed) { + size_t trim_size = iter.p_start() - iter.p_items_start + trimmed; + assert(trim_size < NODE_BLOCK_SIZE); + assert(iter.get_back_offset() > trimmed); + node_offset_t new_offset = iter.get_back_offset() - trimmed; + mut.copy_in_absolute((void*)iter.item_range.p_end, new_offset); + return trim_size; +} + +#define APPEND_T ITER_T::Appender<KT> +template class ITER_INST(node_type_t::LEAF)::Appender<KeyT::VIEW>; +template class ITER_INST(node_type_t::INTERNAL)::Appender<KeyT::VIEW>; +template class ITER_INST(node_type_t::LEAF)::Appender<KeyT::HOBJ>; +template class ITER_INST(node_type_t::INTERNAL)::Appender<KeyT::HOBJ>; + +template <node_type_t NODE_TYPE> +template <KeyT KT> +bool APPEND_T::append(const ITER_T& src, size_t& items) { + auto p_end = src.p_end(); + bool append_till_end = false; + if (is_valid_index(items)) { + for (auto i = 1u; i <= items; ++i) { + if (!src.has_next()) { + assert(i == items); + append_till_end = true; + break; + } + ++src; + } + } else { + if (items == INDEX_END) { + append_till_end = true; + } else { + assert(items == INDEX_LAST); + } + items = 0; + while (src.has_next()) { + ++src; + ++items; + } + if (append_till_end) { + ++items; + } + } + + const char* p_start; + if (append_till_end) { + p_start = src.p_start(); + } else { + p_start = src.p_end(); + } + assert(p_end >= p_start); + size_t append_size = p_end - p_start; + p_append -= append_size; + p_mut->copy_in_absolute(p_append, p_start, append_size); + return append_till_end; +} + +template <node_type_t NODE_TYPE> +template <KeyT KT> +std::tuple<NodeExtentMutable*, char*> +APPEND_T::open_nxt(const key_get_type& partial_key) { + p_append -= sizeof(node_offset_t); + p_offset_while_open = p_append; + ns_oid_view_t::append(*p_mut, partial_key, p_append); + return {p_mut, p_append}; +} + +template <node_type_t NODE_TYPE> +template <KeyT KT> +std::tuple<NodeExtentMutable*, char*> +APPEND_T::open_nxt(const full_key_t<KT>& key) { + p_append -= sizeof(node_offset_t); + p_offset_while_open = p_append; + ns_oid_view_t::append<KT>(*p_mut, key, p_append); + return {p_mut, p_append}; +} + +template <node_type_t NODE_TYPE> +template <KeyT KT> +void APPEND_T::wrap_nxt(char* _p_append) { + assert(_p_append < p_append); + p_mut->copy_in_absolute( + p_offset_while_open, node_offset_t(p_offset_while_open - _p_append)); + p_append = _p_append; +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.h new file mode 100644 index 00000000000..bdf6bb95614 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/item_iterator_stage.h @@ -0,0 +1,147 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/os/seastore/onode_manager/staged-fltree/node_types.h" +#include "key_layout.h" +#include "stage_types.h" + +namespace crimson::os::seastore::onode { + +class NodeExtentMutable; + +/** + * item_iterator_t + * + * The STAGE_STRING implementation for node N0/N1, implements staged contract + * as an iterative container to resolve crush hash conflicts. + * + * The layout of the contaner to index ns, oid strings storing n items: + * + * # <--------- container range ---------> # + * #<~># items [i+1, n) # + * # # items [0, i) #<~># + * # # <------ item i -------------> # # + * # # <--- item_range ---> | # # + * # # | # # + * # # next-stage | ns-oid | back_ # # + * # # contaner | strings | offset # # + * #...# range | | #...# + * ^ ^ | + * | | | + * | +---------------------------+ + * + p_items_start + */ +template <node_type_t NODE_TYPE> +class item_iterator_t { + using value_t = value_type_t<NODE_TYPE>; + public: + item_iterator_t(const memory_range_t& range) + : p_items_start(range.p_start) { next_item_range(range.p_end); } + + const char* p_start() const { return item_range.p_start; } + const char* p_end() const { return item_range.p_end + sizeof(node_offset_t); } + const memory_range_t& get_item_range() const { return item_range; } + node_offset_t get_back_offset() const { return back_offset; } + + // container type system + using key_get_type = const ns_oid_view_t&; + static constexpr auto CONTAINER_TYPE = ContainerType::ITERATIVE; + size_t index() const { return _index; } + key_get_type get_key() const { + if (!key.has_value()) { + key = ns_oid_view_t(item_range.p_end); + assert(item_range.p_start < (*key).p_start()); + } + return *key; + } + node_offset_t size() const { + size_t ret = item_range.p_end - item_range.p_start + sizeof(node_offset_t); + assert(ret < NODE_BLOCK_SIZE); + return ret; + }; + node_offset_t size_to_nxt() const { + size_t ret = get_key().size() + sizeof(node_offset_t); + assert(ret < NODE_BLOCK_SIZE); + return ret; + } + node_offset_t size_overhead() const { + return sizeof(node_offset_t) + get_key().size_overhead(); + } + memory_range_t get_nxt_container() const { + return {item_range.p_start, get_key().p_start()}; + } + bool has_next() const { + assert(p_items_start <= item_range.p_start); + return p_items_start < item_range.p_start; + } + const item_iterator_t<NODE_TYPE>& operator++() const { + assert(has_next()); + next_item_range(item_range.p_start); + key.reset(); + ++_index; + return *this; + } + + static node_offset_t header_size() { return 0u; } + + template <KeyT KT> + static node_offset_t estimate_insert( + const full_key_t<KT>& key, const value_t&) { + return ns_oid_view_t::estimate_size<KT>(key) + sizeof(node_offset_t); + } + + template <KeyT KT> + static memory_range_t insert_prefix( + NodeExtentMutable& mut, const item_iterator_t<NODE_TYPE>& iter, + const full_key_t<KT>& key, bool is_end, + node_offset_t size, const char* p_left_bound); + + static void update_size( + NodeExtentMutable& mut, const item_iterator_t<NODE_TYPE>& iter, int change); + + static node_offset_t trim_until(NodeExtentMutable&, const item_iterator_t<NODE_TYPE>&); + static node_offset_t trim_at( + NodeExtentMutable&, const item_iterator_t<NODE_TYPE>&, node_offset_t trimmed); + + template <KeyT KT> + class Appender; + + private: + void next_item_range(const char* p_end) const { + auto p_item_end = p_end - sizeof(node_offset_t); + assert(p_items_start < p_item_end); + back_offset = reinterpret_cast<const node_offset_packed_t*>(p_item_end)->value; + assert(back_offset); + const char* p_item_start = p_item_end - back_offset; + assert(p_items_start <= p_item_start); + item_range = {p_item_start, p_item_end}; + } + + const char* p_items_start; + mutable memory_range_t item_range; + mutable node_offset_t back_offset; + mutable std::optional<ns_oid_view_t> key; + mutable size_t _index = 0u; +}; + +template <node_type_t NODE_TYPE> +template <KeyT KT> +class item_iterator_t<NODE_TYPE>::Appender { + public: + Appender(NodeExtentMutable* p_mut, char* p_append) + : p_mut{p_mut}, p_append{p_append} {} + bool append(const item_iterator_t<NODE_TYPE>& src, size_t& items); + char* wrap() { return p_append; } + std::tuple<NodeExtentMutable*, char*> open_nxt(const key_get_type&); + std::tuple<NodeExtentMutable*, char*> open_nxt(const full_key_t<KT>&); + void wrap_nxt(char* _p_append); + + private: + NodeExtentMutable* p_mut; + char* p_append; + char* p_offset_while_open; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.cc new file mode 100644 index 00000000000..8b5e380bd9a --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.cc @@ -0,0 +1,33 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "key_layout.h" + +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_mutable.h" + +namespace crimson::os::seastore::onode { + +void string_key_view_t::append_str( + NodeExtentMutable& mut, std::string_view str, char*& p_append) { + p_append -= sizeof(string_size_t); + assert(str.length() < std::numeric_limits<string_size_t>::max()); + string_size_t len = str.length(); + assert(len != 0); + mut.copy_in_absolute(p_append, len); + p_append -= len; + mut.copy_in_absolute(p_append, str.data(), len); +} + +void string_key_view_t::append_dedup( + NodeExtentMutable& mut, const Type& dedup_type, char*& p_append) { + p_append -= sizeof(string_size_t); + if (dedup_type == Type::MIN) { + mut.copy_in_absolute(p_append, (string_size_t)0u); + } else if (dedup_type == Type::MAX) { + mut.copy_in_absolute(p_append, std::numeric_limits<string_size_t>::max()); + } else { + assert(false); + } +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h new file mode 100644 index 00000000000..d4e994ec46d --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h @@ -0,0 +1,761 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <cassert> +#include <limits> +#include <optional> +#include <ostream> + +#include "common/hobject.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/fwd.h" + +namespace crimson::os::seastore::onode { + +using shard_t = int8_t; +using pool_t = int64_t; +using crush_hash_t = uint32_t; +using snap_t = uint64_t; +using gen_t = uint64_t; +static_assert(sizeof(shard_t) == sizeof(ghobject_t().shard_id.id)); +static_assert(sizeof(pool_t) == sizeof(ghobject_t().hobj.pool)); +static_assert(sizeof(crush_hash_t) == sizeof(ghobject_t().hobj.get_hash())); +static_assert(sizeof(snap_t) == sizeof(ghobject_t().hobj.snap.val)); +static_assert(sizeof(gen_t) == sizeof(ghobject_t().generation)); + +class NodeExtentMutable; +class key_view_t; +class key_hobj_t; +enum class KeyT { VIEW, HOBJ }; +template <KeyT> struct _full_key_type; +template<> struct _full_key_type<KeyT::VIEW> { using type = key_view_t; }; +template<> struct _full_key_type<KeyT::HOBJ> { using type = key_hobj_t; }; +template <KeyT type> +using full_key_t = typename _full_key_type<type>::type; + +struct node_offset_packed_t { + node_offset_t value; +} __attribute__((packed)); + +// TODO: consider alignments +struct shard_pool_t { + bool operator==(const shard_pool_t& x) const { + return (shard == x.shard && pool == x.pool); + } + bool operator!=(const shard_pool_t& x) const { return !(*this == x); } + + template <KeyT KT> + static shard_pool_t from_key(const full_key_t<KT>& key); + + shard_t shard; + pool_t pool; +} __attribute__((packed)); +inline std::ostream& operator<<(std::ostream& os, const shard_pool_t& sp) { + return os << (unsigned)sp.shard << "," << sp.pool; +} +inline MatchKindCMP compare_to(const shard_pool_t& l, const shard_pool_t& r) { + auto ret = toMatchKindCMP(l.shard, r.shard); + if (ret != MatchKindCMP::EQ) + return ret; + return toMatchKindCMP(l.pool, r.pool); +} + +struct crush_t { + bool operator==(const crush_t& x) const { return crush == x.crush; } + bool operator!=(const crush_t& x) const { return !(*this == x); } + + template <KeyT KT> + static crush_t from_key(const full_key_t<KT>& key); + + crush_hash_t crush; +} __attribute__((packed)); +inline std::ostream& operator<<(std::ostream& os, const crush_t& c) { + return os << c.crush; +} +inline MatchKindCMP compare_to(const crush_t& l, const crush_t& r) { + return toMatchKindCMP(l.crush, r.crush); +} + +struct shard_pool_crush_t { + bool operator==(const shard_pool_crush_t& x) const { + return (shard_pool == x.shard_pool && crush == x.crush); + } + bool operator!=(const shard_pool_crush_t& x) const { return !(*this == x); } + + template <KeyT KT> + static shard_pool_crush_t from_key(const full_key_t<KT>& key); + + shard_pool_t shard_pool; + crush_t crush; +} __attribute__((packed)); +inline std::ostream& operator<<(std::ostream& os, const shard_pool_crush_t& spc) { + return os << spc.shard_pool << "," << spc.crush; +} +inline MatchKindCMP compare_to( + const shard_pool_crush_t& l, const shard_pool_crush_t& r) { + auto ret = compare_to(l.shard_pool, r.shard_pool); + if (ret != MatchKindCMP::EQ) + return ret; + return compare_to(l.crush, r.crush); +} + +struct snap_gen_t { + bool operator==(const snap_gen_t& x) const { + return (snap == x.snap && gen == x.gen); + } + bool operator!=(const snap_gen_t& x) const { return !(*this == x); } + + template <KeyT KT> + static snap_gen_t from_key(const full_key_t<KT>& key); + + snap_t snap; + gen_t gen; +} __attribute__((packed)); +inline std::ostream& operator<<(std::ostream& os, const snap_gen_t& sg) { + return os << sg.snap << "," << sg.gen; +} +inline MatchKindCMP compare_to(const snap_gen_t& l, const snap_gen_t& r) { + auto ret = toMatchKindCMP(l.snap, r.snap); + if (ret != MatchKindCMP::EQ) + return ret; + return toMatchKindCMP(l.gen, r.gen); +} + +/** + * string_key_view_t + * + * The layout to store char array as an oid or an ns string which may be + * compressed. + * + * If compressed, the physical block only stores an unsigned int of + * string_size_t, with value 0 denoting Type::MIN, and value max() denoting + * Type::MAX. + * + * If not compressed (Type::STR), the physical block stores the char array and + * a valid string_size_t value. + */ +struct string_key_view_t { + enum class Type {MIN, STR, MAX}; + // presumably the maximum string length is 2KiB + using string_size_t = uint16_t; + string_key_view_t(const char* p_end) { + p_length = p_end - sizeof(string_size_t); + std::memcpy(&length, p_length, sizeof(string_size_t)); + if (length && length != std::numeric_limits<string_size_t>::max()) { + auto _p_key = p_length - length; + p_key = static_cast<const char*>(_p_key); + } else { + p_key = nullptr; + } + } + Type type() const { + if (length == 0u) { + return Type::MIN; + } else if (length == std::numeric_limits<string_size_t>::max()) { + return Type::MAX; + } else { + return Type::STR; + } + } + const char* p_start() const { + if (p_key) { + return p_key; + } else { + return p_length; + } + } + const char* p_next_end() const { + if (p_key) { + return p_start(); + } else { + return p_length + sizeof(string_size_t); + } + } + node_offset_t size() const { + size_t ret = length + sizeof(string_size_t); + assert(ret < NODE_BLOCK_SIZE); + return ret; + } + node_offset_t size_logical() const { + assert(type() == Type::STR); + return length; + } + node_offset_t size_overhead() const { + assert(type() == Type::STR); + return sizeof(string_size_t); + } + + std::string_view to_string_view() const { + assert(type() == Type::STR); + return {p_key, length}; + } + bool operator==(const string_key_view_t& x) const { + if (type() == x.type() && type() != Type::STR) + return true; + if (type() != x.type()) + return false; + if (length != x.length) + return false; + return (memcmp(p_key, x.p_key, length) == 0); + } + bool operator!=(const string_key_view_t& x) const { return !(*this == x); } + + static void append_str( + NodeExtentMutable&, std::string_view, char*& p_append); + + static void test_append_str(std::string_view str, char*& p_append) { + p_append -= sizeof(string_size_t); + assert(str.length() < std::numeric_limits<string_size_t>::max()); + string_size_t len = str.length(); + assert(len != 0); + std::memcpy(p_append, &len, sizeof(string_size_t)); + p_append -= len; + std::memcpy(p_append, str.data(), len); + } + + static void append_dedup( + NodeExtentMutable&, const Type& dedup_type, char*& p_append); + + static void test_append_dedup(const Type& dedup_type, char*& p_append) { + p_append -= sizeof(string_size_t); + string_size_t len; + if (dedup_type == Type::MIN) { + len = 0u; + } else if (dedup_type == Type::MAX) { + len = std::numeric_limits<string_size_t>::max(); + } else { + assert(false); + } + std::memcpy(p_append, &len, sizeof(string_size_t)); + } + + const char* p_key; + const char* p_length; + // TODO: remove if p_length is aligned + string_size_t length; +}; + +/** + * string_view_masked_t + * + * A common class to hide the underlying string implementation regardless of a + * string_key_view_t (maybe compressed), a string/string_view, or a compressed + * string. And leverage this consistant class to do compare, print, convert and + * append operations. + */ +class string_view_masked_t { + public: + using Type = string_key_view_t::Type; + explicit string_view_masked_t(const string_key_view_t& index) + : type{index.type()} { + if (type == Type::STR) { + view = index.to_string_view(); + } + } + explicit string_view_masked_t(std::string_view str) + : type{Type::STR}, view{str} {} + + Type get_type() const { return type; } + std::string_view to_string_view() const { + assert(get_type() == Type::STR); + return view; + } + size_t size() const { + assert(get_type() == Type::STR); + return view.size(); + } + bool operator==(const string_view_masked_t& x) const { + if (get_type() == x.get_type() && get_type() != Type::STR) + return true; + if (get_type() != x.get_type()) + return false; + if (size() != x.size()) + return false; + return (memcmp(view.data(), x.view.data(), size()) == 0); + } + bool operator!=(const string_view_masked_t& x) const { return !(*this == x); } + static auto min() { return string_view_masked_t{Type::MIN}; } + static auto max() { return string_view_masked_t{Type::MAX}; } + + private: + explicit string_view_masked_t(Type type) + : type{type} {} + Type type; + std::string_view view; +}; +inline MatchKindCMP compare_to(const string_view_masked_t& l, const string_view_masked_t& r) { + using Type = string_view_masked_t::Type; + auto l_type = l.get_type(); + auto r_type = r.get_type(); + if (l_type == Type::STR && r_type == Type::STR) { + assert(l.size() && r.size()); + return toMatchKindCMP(l.to_string_view(), r.to_string_view()); + } else if (l_type == r_type) { + return MatchKindCMP::EQ; + } else if (l_type == Type::MIN || r_type == Type::MAX) { + return MatchKindCMP::LT; + } else { // l_type == Type::MAX || r_type == Type::MIN + return MatchKindCMP::GT; + } +} +inline MatchKindCMP compare_to(std::string_view l, const string_view_masked_t& r) { + using Type = string_view_masked_t::Type; + assert(l.length()); + auto r_type = r.get_type(); + if (r_type == Type::MIN) { + return MatchKindCMP::GT; + } else if (r_type == Type::MAX) { + return MatchKindCMP::LT; + } else { // r_type == Type::STR + assert(r.size()); + return toMatchKindCMP(l, r.to_string_view()); + } +} +inline MatchKindCMP compare_to(const string_view_masked_t& l, std::string_view r) { + return reverse(compare_to(r, l)); +} +inline std::ostream& operator<<(std::ostream& os, const string_view_masked_t& masked) { + using Type = string_view_masked_t::Type; + auto type = masked.get_type(); + if (type == Type::MIN) { + return os << "MIN"; + } else if (type == Type::MAX) { + return os << "MAX"; + } else { // type == Type::STR + auto view = masked.to_string_view(); + if (view.length() <= 12) { + os << "\"" << view << "\""; + } else { + os << "\"" << std::string_view(view.data(), 4) << ".." + << std::string_view(view.data() + view.length() - 2, 2) + << "/" << view.length() << "B\""; + } + return os; + } +} + +struct ns_oid_view_t { + using string_size_t = string_key_view_t::string_size_t; + using Type = string_key_view_t::Type; + + ns_oid_view_t(const char* p_end) : nspace(p_end), oid(nspace.p_next_end()) {} + Type type() const { return oid.type(); } + const char* p_start() const { return oid.p_start(); } + node_offset_t size() const { + if (type() == Type::STR) { + size_t ret = nspace.size() + oid.size(); + assert(ret < NODE_BLOCK_SIZE); + return ret; + } else { + return sizeof(string_size_t); + } + } + node_offset_t size_logical() const { + assert(type() == Type::STR); + return nspace.size_logical() + oid.size_logical(); + } + node_offset_t size_overhead() const { + assert(type() == Type::STR); + return nspace.size_overhead() + oid.size_overhead(); + } + bool operator==(const ns_oid_view_t& x) const { + return (string_view_masked_t{nspace} == string_view_masked_t{x.nspace} && + string_view_masked_t{oid} == string_view_masked_t{x.oid}); + } + bool operator!=(const ns_oid_view_t& x) const { return !(*this == x); } + + template <KeyT KT> + static node_offset_t estimate_size(const full_key_t<KT>& key); + + template <KeyT KT> + static void append(NodeExtentMutable&, + const full_key_t<KT>& key, + char*& p_append); + + static void append(NodeExtentMutable& mut, + const ns_oid_view_t& view, + char*& p_append) { + if (view.type() == Type::STR) { + string_key_view_t::append_str(mut, view.nspace.to_string_view(), p_append); + string_key_view_t::append_str(mut, view.oid.to_string_view(), p_append); + } else { + string_key_view_t::append_dedup(mut, view.type(), p_append); + } + } + + template <KeyT KT> + static void test_append(const full_key_t<KT>& key, char*& p_append); + + string_key_view_t nspace; + string_key_view_t oid; +}; +inline std::ostream& operator<<(std::ostream& os, const ns_oid_view_t& ns_oid) { + return os << string_view_masked_t{ns_oid.nspace} << "," + << string_view_masked_t{ns_oid.oid}; +} +inline MatchKindCMP compare_to(const ns_oid_view_t& l, const ns_oid_view_t& r) { + auto ret = compare_to(string_view_masked_t{l.nspace}, + string_view_masked_t{r.nspace}); + if (ret != MatchKindCMP::EQ) + return ret; + return compare_to(string_view_masked_t{l.oid}, + string_view_masked_t{r.oid}); +} + +/** + * key_hobj_t + * + * A specialized implementation of a full_key_t storing a ghobject_t passed + * from user. + */ +class key_hobj_t { + public: + explicit key_hobj_t(const ghobject_t& ghobj) : ghobj{ghobj} {} + /* + * common interfaces as a full_key_t + */ + shard_t shard() const { + return ghobj.shard_id; + } + pool_t pool() const { + return ghobj.hobj.pool; + } + crush_hash_t crush() const { + return ghobj.hobj.get_hash(); + } + std::string_view nspace() const { + return ghobj.hobj.nspace; + } + std::string_view oid() const { + return ghobj.hobj.oid.name; + } + ns_oid_view_t::Type dedup_type() const { + return _dedup_type; + } + snap_t snap() const { + return ghobj.hobj.snap; + } + gen_t gen() const { + return ghobj.generation; + } + + bool operator==(const full_key_t<KeyT::VIEW>& o) const; + bool operator==(const full_key_t<KeyT::HOBJ>& o) const; + bool operator!=(const full_key_t<KeyT::VIEW>& o) const { + return !operator==(o); + } + bool operator!=(const full_key_t<KeyT::HOBJ>& o) const { + return !operator==(o); + } + + std::ostream& dump(std::ostream& os) const { + os << "key_hobj(" << (unsigned)shard() << "," + << pool() << "," << crush() << "; " + << string_view_masked_t{nspace()} << "," + << string_view_masked_t{oid()} << "; " + << snap() << "," << gen() << ")"; + return os; + } + + private: + ns_oid_view_t::Type _dedup_type = ns_oid_view_t::Type::STR; + ghobject_t ghobj; +}; +inline std::ostream& operator<<(std::ostream& os, const key_hobj_t& key) { + return key.dump(os); +} + +/** + * key_view_t + * + * A specialized implementation of a full_key_t pointing to the locations + * storing the full key in a tree node. + */ +class key_view_t { + public: + /** + * common interfaces as a full_key_t + */ + shard_t shard() const { + return shard_pool_packed().shard; + } + pool_t pool() const { + return shard_pool_packed().pool; + } + crush_hash_t crush() const { + return crush_packed().crush; + } + std::string_view nspace() const { + return ns_oid_view().nspace.to_string_view(); + } + std::string_view oid() const { + return ns_oid_view().oid.to_string_view(); + } + ns_oid_view_t::Type dedup_type() const { + return ns_oid_view().type(); + } + snap_t snap() const { + return snap_gen_packed().snap; + } + gen_t gen() const { + return snap_gen_packed().gen; + } + + bool operator==(const full_key_t<KeyT::VIEW>& o) const; + bool operator==(const full_key_t<KeyT::HOBJ>& o) const; + bool operator!=(const full_key_t<KeyT::VIEW>& o) const { + return !operator==(o); + } + bool operator!=(const full_key_t<KeyT::HOBJ>& o) const { + return !operator==(o); + } + + /** + * key_view_t specific interfaces + */ + bool has_shard_pool() const { + return p_shard_pool != nullptr; + } + bool has_crush() const { + return p_crush != nullptr; + } + bool has_ns_oid() const { + return p_ns_oid.has_value(); + } + bool has_snap_gen() const { + return p_snap_gen != nullptr; + } + + const shard_pool_t& shard_pool_packed() const { + assert(has_shard_pool()); + return *p_shard_pool; + } + const crush_t& crush_packed() const { + assert(has_crush()); + return *p_crush; + } + const ns_oid_view_t& ns_oid_view() const { + assert(has_ns_oid()); + return *p_ns_oid; + } + const snap_gen_t& snap_gen_packed() const { + assert(has_snap_gen()); + return *p_snap_gen; + } + + size_t size_logical() const { + return sizeof(shard_t) + sizeof(pool_t) + sizeof(crush_hash_t) + + sizeof(snap_t) + sizeof(gen_t) + ns_oid_view().size_logical(); + } + + ghobject_t to_ghobj() const { + ghobject_t ghobj; + ghobj.shard_id.id = shard(); + ghobj.hobj.pool = pool(); + ghobj.hobj.set_hash(crush()); + ghobj.hobj.nspace = nspace(); + ghobj.hobj.oid.name = oid(); + ghobj.hobj.snap = snap(); + ghobj.generation = gen(); + return ghobj; + } + + void replace(const crush_t& key) { p_crush = &key; } + void set(const crush_t& key) { + assert(!has_crush()); + replace(key); + } + void replace(const shard_pool_crush_t& key) { p_shard_pool = &key.shard_pool; } + void set(const shard_pool_crush_t& key) { + set(key.crush); + assert(!has_shard_pool()); + replace(key); + } + void replace(const ns_oid_view_t& key) { p_ns_oid = key; } + void set(const ns_oid_view_t& key) { + assert(!has_ns_oid()); + replace(key); + } + void replace(const snap_gen_t& key) { p_snap_gen = &key; } + void set(const snap_gen_t& key) { + assert(!has_snap_gen()); + replace(key); + } + + std::ostream& dump(std::ostream& os) const { + os << "key_view("; + if (has_shard_pool()) { + os << (unsigned)shard() << "," << pool() << ","; + } else { + os << "X,X,"; + } + if (has_crush()) { + os << crush() << "; "; + } else { + os << "X; "; + } + if (has_ns_oid()) { + os << ns_oid_view() << "; "; + } else { + os << "X,X; "; + } + if (has_snap_gen()) { + os << snap() << "," << gen() << ")"; + } else { + os << "X,X)"; + } + return os; + } + + private: + const shard_pool_t* p_shard_pool = nullptr; + const crush_t* p_crush = nullptr; + std::optional<ns_oid_view_t> p_ns_oid; + const snap_gen_t* p_snap_gen = nullptr; +}; + +inline MatchKindCMP compare_to(std::string_view l, std::string_view r) { + return toMatchKindCMP(l, r); +} +template <KeyT TypeL, KeyT TypeR> +bool compare_full_key(const full_key_t<TypeL>& l, const full_key_t<TypeR>& r) { + if (l.shard() != r.shard()) + return false; + if (l.pool() != r.pool()) + return false; + if (l.crush() != r.crush()) + return false; + if (compare_to(l.nspace(), r.nspace()) != MatchKindCMP::EQ) + return false; + if (compare_to(l.oid(), r.oid()) != MatchKindCMP::EQ) + return false; + if (l.snap() != r.snap()) + return false; + if (l.gen() != r.gen()) + return false; + return true; +} + +inline bool key_hobj_t::operator==(const full_key_t<KeyT::VIEW>& o) const { + return compare_full_key<KeyT::HOBJ, KeyT::VIEW>(*this, o); +} +inline bool key_hobj_t::operator==(const full_key_t<KeyT::HOBJ>& o) const { + return compare_full_key<KeyT::HOBJ, KeyT::HOBJ>(*this, o); +} +inline bool key_view_t::operator==(const full_key_t<KeyT::VIEW>& o) const { + return compare_full_key<KeyT::VIEW, KeyT::VIEW>(*this, o); +} +inline bool key_view_t::operator==(const full_key_t<KeyT::HOBJ>& o) const { + return compare_full_key<KeyT::VIEW, KeyT::HOBJ>(*this, o); +} + +inline std::ostream& operator<<(std::ostream& os, const key_view_t& key) { + return key.dump(os); +} + +template <KeyT Type> +MatchKindCMP compare_to(const full_key_t<Type>& key, const shard_pool_t& target) { + auto ret = toMatchKindCMP(key.shard(), target.shard); + if (ret != MatchKindCMP::EQ) + return ret; + return toMatchKindCMP(key.pool(), target.pool); +} + +template <KeyT Type> +MatchKindCMP compare_to(const full_key_t<Type>& key, const crush_t& target) { + return toMatchKindCMP(key.crush(), target.crush); +} + +template <KeyT Type> +MatchKindCMP compare_to(const full_key_t<Type>& key, const shard_pool_crush_t& target) { + auto ret = compare_to<Type>(key, target.shard_pool); + if (ret != MatchKindCMP::EQ) + return ret; + return compare_to<Type>(key, target.crush); +} + +template <KeyT Type> +MatchKindCMP compare_to(const full_key_t<Type>& key, const ns_oid_view_t& target) { + auto ret = compare_to(key.nspace(), string_view_masked_t{target.nspace}); + if (ret != MatchKindCMP::EQ) + return ret; + return compare_to(key.oid(), string_view_masked_t{target.oid}); +} + +template <KeyT Type> +MatchKindCMP compare_to(const full_key_t<Type>& key, const snap_gen_t& target) { + auto ret = toMatchKindCMP(key.snap(), target.snap); + if (ret != MatchKindCMP::EQ) + return ret; + return toMatchKindCMP(key.gen(), target.gen); +} + +template <KeyT KT> +shard_pool_t shard_pool_t::from_key(const full_key_t<KT>& key) { + if constexpr (KT == KeyT::VIEW) { + return key.shard_pool_packed(); + } else { + return {key.shard(), key.pool()}; + } +} + +template <KeyT KT> +crush_t crush_t::from_key(const full_key_t<KT>& key) { + if constexpr (KT == KeyT::VIEW) { + return key.crush_packed(); + } else { + return {key.crush()}; + } +} + +template <KeyT KT> +shard_pool_crush_t shard_pool_crush_t::from_key(const full_key_t<KT>& key) { + return {shard_pool_t::from_key<KT>(key), crush_t::from_key<KT>(key)}; +} + +template <KeyT KT> +snap_gen_t snap_gen_t::from_key(const full_key_t<KT>& key) { + if constexpr (KT == KeyT::VIEW) { + return key.snap_gen_packed(); + } else { + return {key.snap(), key.gen()}; + } +} + +template <KeyT KT> +node_offset_t ns_oid_view_t::estimate_size(const full_key_t<KT>& key) { + if constexpr (KT == KeyT::VIEW) { + return key.ns_oid_view().size(); + } else { + if (key.dedup_type() != Type::STR) { + // size after deduplication + return sizeof(string_size_t); + } else { + return 2 * sizeof(string_size_t) + key.nspace().size() + key.oid().size(); + } + } +} + +template <KeyT KT> +void ns_oid_view_t::append( + NodeExtentMutable& mut, const full_key_t<KT>& key, char*& p_append) { + if (key.dedup_type() == Type::STR) { + string_key_view_t::append_str(mut, key.nspace(), p_append); + string_key_view_t::append_str(mut, key.oid(), p_append); + } else { + string_key_view_t::append_dedup(mut, key.dedup_type(), p_append); + } +} + +template <KeyT KT> +void ns_oid_view_t::test_append(const full_key_t<KT>& key, char*& p_append) { + if (key.dedup_type() == Type::STR) { + string_key_view_t::test_append_str(key.nspace(), p_append); + string_key_view_t::test_append_str(key.oid(), p_append); + } else { + string_key_view_t::test_append_dedup(key.dedup_type(), p_append); + } +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.cc new file mode 100644 index 00000000000..4b7d3170f11 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.cc @@ -0,0 +1,316 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "node_stage.h" + +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_mutable.h" +#include "node_stage_layout.h" + +namespace crimson::os::seastore::onode { + +#define NODE_T node_extent_t<FieldType, NODE_TYPE> +#define NODE_INST(FT, NT) node_extent_t<FT, NT> +#define NODE_TEMPLATE(FT, NT) template class NODE_INST(FT, NT) +NODE_TEMPLATE(node_fields_0_t, node_type_t::INTERNAL); +NODE_TEMPLATE(node_fields_1_t, node_type_t::INTERNAL); +NODE_TEMPLATE(node_fields_2_t, node_type_t::INTERNAL); +NODE_TEMPLATE(internal_fields_3_t, node_type_t::INTERNAL); +NODE_TEMPLATE(node_fields_0_t, node_type_t::LEAF); +NODE_TEMPLATE(node_fields_1_t, node_type_t::LEAF); +NODE_TEMPLATE(node_fields_2_t, node_type_t::LEAF); +NODE_TEMPLATE(leaf_fields_3_t, node_type_t::LEAF); + +template <typename FieldType, node_type_t NODE_TYPE> +const char* NODE_T::p_left_bound() const { + if constexpr (std::is_same_v<FieldType, internal_fields_3_t>) { + // N3 internal node doesn't have the right part + return nullptr; + } else { + auto ret = p_start() + fields().get_item_end_offset(keys()); + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + if (is_level_tail()) { + ret -= sizeof(laddr_t); + } + } + return ret; + } +} + +template <typename FieldType, node_type_t NODE_TYPE> +node_offset_t NODE_T::size_to_nxt_at(size_t index) const { + assert(index < keys()); + if constexpr (FIELD_TYPE == field_type_t::N0 || + FIELD_TYPE == field_type_t::N1) { + return FieldType::estimate_insert_one(); + } else if constexpr (FIELD_TYPE == field_type_t::N2) { + auto p_end = p_start() + p_fields->get_item_end_offset(index); + return FieldType::estimate_insert_one() + ns_oid_view_t(p_end).size(); + } else { + ceph_abort("N3 node is not nested"); + } +} + +template <typename FieldType, node_type_t NODE_TYPE> +memory_range_t NODE_T::get_nxt_container(size_t index) const { + if constexpr (std::is_same_v<FieldType, internal_fields_3_t>) { + ceph_abort("N3 internal node doesn't have the right part"); + } else { + node_offset_t item_start_offset = p_fields->get_item_start_offset(index); + node_offset_t item_end_offset = p_fields->get_item_end_offset(index); + assert(item_start_offset < item_end_offset); + auto item_p_start = p_start() + item_start_offset; + auto item_p_end = p_start() + item_end_offset; + if constexpr (FIELD_TYPE == field_type_t::N2) { + // range for sub_items_t<NODE_TYPE> + item_p_end = ns_oid_view_t(item_p_end).p_start(); + assert(item_p_start < item_p_end); + } else { + // range for item_iterator_t<NODE_TYPE> + } + return {item_p_start, item_p_end}; + } +} + +template <typename FieldType, node_type_t NODE_TYPE> +void NODE_T::bootstrap_extent( + NodeExtentMutable& mut, + field_type_t field_type, node_type_t node_type, + bool is_level_tail, level_t level) { + node_header_t::bootstrap_extent( + mut, field_type, node_type, is_level_tail, level); + mut.copy_in_relative( + sizeof(node_header_t), typename FieldType::num_keys_t(0u)); +} + +template <typename FieldType, node_type_t NODE_TYPE> +void NODE_T::update_is_level_tail( + NodeExtentMutable& mut, const node_extent_t& extent, bool value) { + node_header_t::update_is_level_tail(mut, extent.p_fields->header, value); +} + +template <typename FieldType, node_type_t NODE_TYPE> +template <KeyT KT> +memory_range_t NODE_T::insert_prefix_at( + NodeExtentMutable& mut, const node_extent_t& node, const full_key_t<KT>& key, + size_t index, node_offset_t size, const char* p_left_bound) { + if constexpr (FIELD_TYPE == field_type_t::N0 || + FIELD_TYPE == field_type_t::N1) { + assert(index <= node.keys()); + assert(p_left_bound == node.p_left_bound()); + assert(size > FieldType::estimate_insert_one()); + auto size_right = size - FieldType::estimate_insert_one(); + const char* p_insert = node.p_start() + node.fields().get_item_end_offset(index); + const char* p_insert_front = p_insert - size_right; + FieldType::template insert_at<KT>(mut, key, node.fields(), index, size_right); + mut.shift_absolute(p_left_bound, + p_insert - p_left_bound, + -(int)size_right); + return {p_insert_front, p_insert}; + } else if constexpr (FIELD_TYPE == field_type_t::N2) { + ceph_abort("not implemented"); + } else { + ceph_abort("impossible"); + } +} +#define IPA_TEMPLATE(FT, NT, KT) \ + template memory_range_t NODE_INST(FT, NT)::insert_prefix_at<KT>( \ + NodeExtentMutable&, const node_extent_t&, const full_key_t<KT>&, \ + size_t, node_offset_t, const char*) +IPA_TEMPLATE(node_fields_0_t, node_type_t::INTERNAL, KeyT::VIEW); +IPA_TEMPLATE(node_fields_1_t, node_type_t::INTERNAL, KeyT::VIEW); +IPA_TEMPLATE(node_fields_2_t, node_type_t::INTERNAL, KeyT::VIEW); +IPA_TEMPLATE(node_fields_0_t, node_type_t::LEAF, KeyT::VIEW); +IPA_TEMPLATE(node_fields_1_t, node_type_t::LEAF, KeyT::VIEW); +IPA_TEMPLATE(node_fields_2_t, node_type_t::LEAF, KeyT::VIEW); +IPA_TEMPLATE(node_fields_0_t, node_type_t::INTERNAL, KeyT::HOBJ); +IPA_TEMPLATE(node_fields_1_t, node_type_t::INTERNAL, KeyT::HOBJ); +IPA_TEMPLATE(node_fields_2_t, node_type_t::INTERNAL, KeyT::HOBJ); +IPA_TEMPLATE(node_fields_0_t, node_type_t::LEAF, KeyT::HOBJ); +IPA_TEMPLATE(node_fields_1_t, node_type_t::LEAF, KeyT::HOBJ); +IPA_TEMPLATE(node_fields_2_t, node_type_t::LEAF, KeyT::HOBJ); + +template <typename FieldType, node_type_t NODE_TYPE> +void NODE_T::update_size_at( + NodeExtentMutable& mut, const node_extent_t& node, size_t index, int change) { + assert(index < node.keys()); + FieldType::update_size_at(mut, node.fields(), index, change); +} + +template <typename FieldType, node_type_t NODE_TYPE> +node_offset_t NODE_T::trim_until( + NodeExtentMutable& mut, const node_extent_t& node, size_t index) { + assert(!node.is_level_tail()); + auto keys = node.keys(); + assert(index <= keys); + if (index == keys) { + return 0; + } + if constexpr (std::is_same_v<FieldType, internal_fields_3_t>) { + ceph_abort("not implemented"); + } else { + mut.copy_in_absolute( + (void*)&node.p_fields->num_keys, num_keys_t(index)); + } + // no need to calculate trim size for node + return 0; +} + +template <typename FieldType, node_type_t NODE_TYPE> +node_offset_t NODE_T::trim_at( + NodeExtentMutable& mut, const node_extent_t& node, + size_t index, node_offset_t trimmed) { + assert(!node.is_level_tail()); + assert(index < node.keys()); + if constexpr (std::is_same_v<FieldType, internal_fields_3_t>) { + ceph_abort("not implemented"); + } else { + node_offset_t offset = node.p_fields->get_item_start_offset(index); + size_t new_offset = offset + trimmed; + assert(new_offset < node.p_fields->get_item_end_offset(index)); + mut.copy_in_absolute(const_cast<void*>(node.p_fields->p_offset(index)), + node_offset_t(new_offset)); + mut.copy_in_absolute( + (void*)&node.p_fields->num_keys, num_keys_t(index + 1)); + } + // no need to calculate trim size for node + return 0; +} + +#define APPEND_T node_extent_t<FieldType, NODE_TYPE>::Appender<KT> +#define APPEND_TEMPLATE(FT, NT, KT) template class node_extent_t<FT, NT>::Appender<KT> +APPEND_TEMPLATE(node_fields_0_t, node_type_t::INTERNAL, KeyT::VIEW); +APPEND_TEMPLATE(node_fields_1_t, node_type_t::INTERNAL, KeyT::VIEW); +APPEND_TEMPLATE(node_fields_2_t, node_type_t::INTERNAL, KeyT::VIEW); +APPEND_TEMPLATE(internal_fields_3_t, node_type_t::INTERNAL, KeyT::VIEW); +APPEND_TEMPLATE(node_fields_0_t, node_type_t::LEAF, KeyT::VIEW); +APPEND_TEMPLATE(node_fields_1_t, node_type_t::LEAF, KeyT::VIEW); +APPEND_TEMPLATE(node_fields_2_t, node_type_t::LEAF, KeyT::VIEW); +APPEND_TEMPLATE(leaf_fields_3_t, node_type_t::LEAF, KeyT::VIEW); +APPEND_TEMPLATE(node_fields_0_t, node_type_t::INTERNAL, KeyT::HOBJ); +APPEND_TEMPLATE(node_fields_1_t, node_type_t::INTERNAL, KeyT::HOBJ); +APPEND_TEMPLATE(node_fields_2_t, node_type_t::INTERNAL, KeyT::HOBJ); +APPEND_TEMPLATE(internal_fields_3_t, node_type_t::INTERNAL, KeyT::HOBJ); +APPEND_TEMPLATE(node_fields_0_t, node_type_t::LEAF, KeyT::HOBJ); +APPEND_TEMPLATE(node_fields_1_t, node_type_t::LEAF, KeyT::HOBJ); +APPEND_TEMPLATE(node_fields_2_t, node_type_t::LEAF, KeyT::HOBJ); +APPEND_TEMPLATE(leaf_fields_3_t, node_type_t::LEAF, KeyT::HOBJ); + +template <typename FieldType, node_type_t NODE_TYPE> +template <KeyT KT> +void APPEND_T::append(const node_extent_t& src, size_t from, size_t items) { + assert(from <= src.keys()); + if (p_src == nullptr) { + p_src = &src; + } else { + assert(p_src == &src); + } + if (items == 0) { + return; + } + assert(from < src.keys()); + assert(from + items <= src.keys()); + num_keys += items; + if constexpr (std::is_same_v<FieldType, internal_fields_3_t>) { + ceph_abort("impossible path"); + } else { + // append left part forwards + node_offset_t offset_left_start = src.fields().get_key_start_offset(from); + node_offset_t offset_left_end = src.fields().get_key_start_offset(from + items); + node_offset_t left_size = offset_left_end - offset_left_start; + if (num_keys == 0) { + // no need to adjust offset + assert(from == 0); + assert(p_start + offset_left_start == p_append_left); + p_mut->copy_in_absolute(p_append_left, + src.p_start() + offset_left_start, left_size); + } else { + node_offset_t step_size = FieldType::estimate_insert_one(); + node_offset_t offset_base = src.fields().get_item_end_offset(from); + int offset_change = p_append_right - p_start - offset_base; + auto p_offset_dst = p_append_left; + if constexpr (FIELD_TYPE != field_type_t::N2) { + // copy keys + p_mut->copy_in_absolute(p_append_left, + src.p_start() + offset_left_start, left_size); + // point to offset for update + p_offset_dst += sizeof(typename FieldType::key_t); + } + for (auto i = from; i < from + items; ++i) { + p_mut->copy_in_absolute(p_offset_dst, + node_offset_t(src.fields().get_item_start_offset(i) + offset_change)); + p_offset_dst += step_size; + } + assert(p_append_left + left_size + sizeof(typename FieldType::key_t) == + p_offset_dst); + } + p_append_left += left_size; + + // append right part backwards + node_offset_t offset_right_start = src.fields().get_item_end_offset(from + items); + node_offset_t offset_right_end = src.fields().get_item_end_offset(from); + node_offset_t right_size = offset_right_end - offset_right_start; + p_append_right -= right_size; + p_mut->copy_in_absolute(p_append_right, + src.p_start() + offset_right_start, right_size); + } +} + +template <typename FieldType, node_type_t NODE_TYPE> +template <KeyT KT> +void APPEND_T::append( + const full_key_t<KT>& key, const value_t& value, const value_t*& p_value) { + if constexpr (FIELD_TYPE == field_type_t::N3) { + ceph_abort("not implemented"); + } else { + ceph_abort("should not happen"); + } +} + +template <typename FieldType, node_type_t NODE_TYPE> +template <KeyT KT> +std::tuple<NodeExtentMutable*, char*> +APPEND_T::open_nxt(const key_get_type& partial_key) { + if constexpr (FIELD_TYPE == field_type_t::N0 || + FIELD_TYPE == field_type_t::N1) { + FieldType::append_key(*p_mut, partial_key, p_append_left); + } else if constexpr (FIELD_TYPE == field_type_t::N2) { + FieldType::append_key(*p_mut, partial_key, p_append_right); + } else { + ceph_abort("impossible path"); + } + return {p_mut, p_append_right}; +} + +template <typename FieldType, node_type_t NODE_TYPE> +template <KeyT KT> +std::tuple<NodeExtentMutable*, char*> +APPEND_T::open_nxt(const full_key_t<KT>& key) { + if constexpr (FIELD_TYPE == field_type_t::N0 || + FIELD_TYPE == field_type_t::N1) { + FieldType::template append_key<KT>(*p_mut, key, p_append_left); + } else if constexpr (FIELD_TYPE == field_type_t::N2) { + FieldType::template append_key<KT>(*p_mut, key, p_append_right); + } else { + ceph_abort("impossible path"); + } + return {p_mut, p_append_right}; +} + +template <typename FieldType, node_type_t NODE_TYPE> +template <KeyT KT> +char* APPEND_T::wrap() { + assert(p_append_left <= p_append_right); + assert(p_src); + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + if (p_src->is_level_tail()) { + laddr_t tail_value = p_src->get_end_p_laddr()->value; + p_append_right -= sizeof(laddr_t); + assert(p_append_left <= p_append_right); + p_mut->copy_in_absolute(p_append_right, tail_value); + } + } + p_mut->copy_in_absolute(p_start + offsetof(FieldType, num_keys), num_keys); + return p_append_left; +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.h new file mode 100644 index 00000000000..d1a704d4a0a --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage.h @@ -0,0 +1,215 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/os/seastore/onode_manager/staged-fltree/node_types.h" +#include "key_layout.h" +#include "stage_types.h" + +namespace crimson::os::seastore::onode { + +class NodeExtentMutable; + +/** + * node_extent_t + * + * The top indexing stage implementation for node N0/N1/N2/N3, implements + * staged contract as an indexable container, and provides access to node + * header. + * + * The specific field layout are defined by FieldType which are + * node_fields_0_t, node_fields_1_t, node_fields_2_t, internal_fields_3_t and + * leaf_fields_3_t. Diagrams see node_stage_layout.h. + */ +template <typename FieldType, node_type_t _NODE_TYPE> +class node_extent_t { + public: + using value_t = value_type_t<_NODE_TYPE>; + using num_keys_t = typename FieldType::num_keys_t; + static constexpr node_type_t NODE_TYPE = _NODE_TYPE; + static constexpr field_type_t FIELD_TYPE = FieldType::FIELD_TYPE; + static constexpr node_offset_t EXTENT_SIZE = + (FieldType::SIZE + DISK_BLOCK_SIZE - 1u) / DISK_BLOCK_SIZE * DISK_BLOCK_SIZE; + + // TODO: remove + node_extent_t() = default; + + node_extent_t(const FieldType* p_fields) : p_fields{p_fields} { + validate(*p_fields); + } + + const char* p_start() const { return fields_start(*p_fields); } + + const char* off_to_ptr(node_offset_t off) const { + assert(off <= FieldType::SIZE); + return p_start() + off; + } + + node_offset_t ptr_to_off(const void* ptr) const { + auto _ptr = static_cast<const char*>(ptr); + assert(_ptr >= p_start()); + auto off = _ptr - p_start(); + assert(off <= FieldType::SIZE); + return off; + } + + bool is_level_tail() const { return p_fields->is_level_tail(); } + level_t level() const { return p_fields->header.level; } + node_offset_t free_size() const { + return p_fields->template free_size_before<NODE_TYPE>(keys()); + } + node_offset_t total_size() const { return p_fields->total_size(); } + const char* p_left_bound() const; + template <node_type_t T = NODE_TYPE> + std::enable_if_t<T == node_type_t::INTERNAL, const laddr_packed_t*> + get_end_p_laddr() const { + assert(is_level_tail()); + if constexpr (FIELD_TYPE == field_type_t::N3) { + return &p_fields->child_addrs[keys()]; + } else { + auto offset_start = p_fields->get_item_end_offset(keys()); + assert(offset_start <= FieldType::SIZE); + offset_start -= sizeof(laddr_packed_t); + auto p_addr = p_start() + offset_start; + return reinterpret_cast<const laddr_packed_t*>(p_addr); + } + } + + // container type system + using key_get_type = typename FieldType::key_get_type; + static constexpr auto CONTAINER_TYPE = ContainerType::INDEXABLE; + size_t keys() const { return p_fields->num_keys; } + key_get_type operator[] (size_t index) const { return p_fields->get_key(index); } + node_offset_t size_before(size_t index) const { + auto free_size = p_fields->template free_size_before<NODE_TYPE>(index); + assert(total_size() >= free_size); + return total_size() - free_size; + } + node_offset_t size_to_nxt_at(size_t index) const; + node_offset_t size_overhead_at(size_t index) const { + return FieldType::ITEM_OVERHEAD; } + memory_range_t get_nxt_container(size_t index) const; + + template <typename T = FieldType> + std::enable_if_t<T::FIELD_TYPE == field_type_t::N3, const value_t*> + get_p_value(size_t index) const { + assert(index < keys()); + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + return &p_fields->child_addrs[index]; + } else { + auto range = get_nxt_container(index); + auto ret = reinterpret_cast<const onode_t*>(range.p_start); + assert(range.p_start + ret->size == range.p_end); + return ret; + } + } + + static void validate(const FieldType& fields) { +#ifndef NDEBUG + assert(fields.header.get_node_type() == NODE_TYPE); + assert(fields.header.get_field_type() == FieldType::FIELD_TYPE); + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + assert(fields.header.level > 0u); + } else { + assert(fields.header.level == 0u); + } +#endif + } + + static void bootstrap_extent( + NodeExtentMutable&, field_type_t, node_type_t, bool, level_t); + + static void update_is_level_tail(NodeExtentMutable&, const node_extent_t&, bool); + + static node_offset_t header_size() { return FieldType::HEADER_SIZE; } + + template <KeyT KT> + static node_offset_t estimate_insert( + const full_key_t<KT>& key, const value_t& value) { + auto size = FieldType::estimate_insert_one(); + if constexpr (FIELD_TYPE == field_type_t::N2) { + size += ns_oid_view_t::estimate_size<KT>(key); + } else if constexpr (FIELD_TYPE == field_type_t::N3 && + NODE_TYPE == node_type_t::LEAF) { + size += value.size; + } + return size; + } + + template <KeyT KT> + static const value_t* insert_at( + NodeExtentMutable& mut, const node_extent_t&, + const full_key_t<KT>& key, const value_t& value, + size_t index, node_offset_t size, const char* p_left_bound) { + if constexpr (FIELD_TYPE == field_type_t::N3) { + ceph_abort("not implemented"); + } else { + ceph_abort("impossible"); + } + } + + template <KeyT KT> + static memory_range_t insert_prefix_at( + NodeExtentMutable&, const node_extent_t&, + const full_key_t<KT>& key, + size_t index, node_offset_t size, const char* p_left_bound); + + static void update_size_at( + NodeExtentMutable&, const node_extent_t&, size_t index, int change); + + static node_offset_t trim_until( + NodeExtentMutable&, const node_extent_t&, size_t index); + static node_offset_t trim_at(NodeExtentMutable&, const node_extent_t&, + size_t index, node_offset_t trimmed); + + template <KeyT KT> + class Appender; + + private: + const FieldType& fields() const { return *p_fields; } + const FieldType* p_fields; +}; + +template <typename FieldType, node_type_t NODE_TYPE> +template <KeyT KT> +class node_extent_t<FieldType, NODE_TYPE>::Appender { + public: + Appender(NodeExtentMutable* p_mut, char* p_append) + : p_mut{p_mut}, p_start{p_append} { +#ifndef NDEBUG + auto p_fields = reinterpret_cast<const FieldType*>(p_append); + assert(*(p_fields->header.get_field_type()) == FIELD_TYPE); + assert(p_fields->header.get_node_type() == NODE_TYPE); + assert(p_fields->num_keys == 0); +#endif + p_append_left = p_start + FieldType::HEADER_SIZE; + p_append_right = p_start + FieldType::SIZE; + } + void append(const node_extent_t& src, size_t from, size_t items); + void append(const full_key_t<KT>&, const value_t&, const value_t*&); + char* wrap(); + std::tuple<NodeExtentMutable*, char*> open_nxt(const key_get_type&); + std::tuple<NodeExtentMutable*, char*> open_nxt(const full_key_t<KT>&); + void wrap_nxt(char* p_append) { + if constexpr (FIELD_TYPE != field_type_t::N3) { + assert(p_append < p_append_right); + assert(p_append_left < p_append); + p_append_right = p_append; + FieldType::append_offset(*p_mut, p_append - p_start, p_append_left); + ++num_keys; + } else { + ceph_abort("not implemented"); + } + } + + private: + const node_extent_t* p_src = nullptr; + NodeExtentMutable* p_mut; + char* p_start; + char* p_append_left; + char* p_append_right; + num_keys_t num_keys = 0; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.cc new file mode 100644 index 00000000000..2809803eb55 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.cc @@ -0,0 +1,95 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "node_stage_layout.h" + +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_mutable.h" + +namespace crimson::os::seastore::onode { + +void node_header_t::bootstrap_extent( + NodeExtentMutable& mut, + field_type_t field_type, node_type_t node_type, + bool is_level_tail, level_t level) { + node_header_t header; + header.set_field_type(field_type); + header.set_node_type(node_type); + header.set_is_level_tail(is_level_tail); + header.level = level; + mut.copy_in_relative(0, header); +} + +void node_header_t::update_is_level_tail( + NodeExtentMutable& mut, const node_header_t& header, bool value) { + auto& _header = const_cast<node_header_t&>(header); + _header.set_is_level_tail(value); + mut.validate_inplace_update(_header); +} + +#define F013_T _node_fields_013_t<SlotType> +#define F013_INST(ST) _node_fields_013_t<ST> +#define F013_TEMPLATE(ST) template struct F013_INST(ST) +F013_TEMPLATE(slot_0_t); +F013_TEMPLATE(slot_1_t); +F013_TEMPLATE(slot_3_t); + +template <typename SlotType> +void F013_T::update_size_at( + NodeExtentMutable& mut, const me_t& node, size_t index, int change) { + assert(index <= node.num_keys); + for (const auto* p_slot = &node.slots[index]; + p_slot < &node.slots[node.num_keys]; + ++p_slot) { + node_offset_t offset = p_slot->right_offset; + mut.copy_in_absolute( + (void*)&(p_slot->right_offset), + node_offset_t(offset - change)); + } +} + +template <typename SlotType> +void F013_T::append_key( + NodeExtentMutable& mut, const key_t& key, char*& p_append) { + mut.copy_in_absolute(p_append, key); + p_append += sizeof(key_t); +} + +template <typename SlotType> +void F013_T::append_offset( + NodeExtentMutable& mut, node_offset_t offset_to_right, char*& p_append) { + mut.copy_in_absolute(p_append, offset_to_right); + p_append += sizeof(node_offset_t); +} + +template <typename SlotType> +template <KeyT KT> +void F013_T::insert_at( + NodeExtentMutable& mut, const full_key_t<KT>& key, + const me_t& node, size_t index, node_offset_t size_right) { + assert(index <= node.num_keys); + update_size_at(mut, node, index, size_right); + auto p_insert = const_cast<char*>(fields_start(node)) + + node.get_key_start_offset(index); + auto p_shift_end = fields_start(node) + node.get_key_start_offset(node.num_keys); + mut.shift_absolute(p_insert, p_shift_end - p_insert, estimate_insert_one()); + mut.copy_in_absolute((void*)&node.num_keys, num_keys_t(node.num_keys + 1)); + append_key(mut, key_t::template from_key<KT>(key), p_insert); + append_offset(mut, node.get_item_end_offset(index) - size_right, p_insert); +} +#define IA_TEMPLATE(ST, KT) template void F013_INST(ST):: \ + insert_at<KT>(NodeExtentMutable&, const full_key_t<KT>&, \ + const F013_INST(ST)&, size_t, node_offset_t) +IA_TEMPLATE(slot_0_t, KeyT::VIEW); +IA_TEMPLATE(slot_1_t, KeyT::VIEW); +IA_TEMPLATE(slot_3_t, KeyT::VIEW); +IA_TEMPLATE(slot_0_t, KeyT::HOBJ); +IA_TEMPLATE(slot_1_t, KeyT::HOBJ); +IA_TEMPLATE(slot_3_t, KeyT::HOBJ); + +void node_fields_2_t::append_offset( + NodeExtentMutable& mut, node_offset_t offset_to_right, char*& p_append) { + mut.copy_in_absolute(p_append, offset_to_right); + p_append += sizeof(node_offset_t); +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.h new file mode 100644 index 00000000000..6305f1904da --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.h @@ -0,0 +1,366 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "key_layout.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_types.h" + +namespace crimson::os::seastore::onode { + +class NodeExtentMutable; + +struct node_header_t { + static constexpr unsigned FIELD_TYPE_BITS = 6u; + static_assert(static_cast<uint8_t>(field_type_t::_MAX) <= 1u << FIELD_TYPE_BITS); + static constexpr unsigned NODE_TYPE_BITS = 1u; + static constexpr unsigned B_LEVEL_TAIL_BITS = 1u; + using bits_t = uint8_t; + + node_header_t() {} + std::optional<field_type_t> get_field_type() const { + if (field_type >= FIELD_TYPE_MAGIC && + field_type < static_cast<uint8_t>(field_type_t::_MAX)) { + return static_cast<field_type_t>(field_type); + } else { + return std::nullopt; + } + } + node_type_t get_node_type() const { + return static_cast<node_type_t>(node_type); + } + bool get_is_level_tail() const { + return is_level_tail; + } + + static void bootstrap_extent( + NodeExtentMutable&, field_type_t, node_type_t, bool, level_t); + + static void update_is_level_tail(NodeExtentMutable&, const node_header_t&, bool); + + bits_t field_type : FIELD_TYPE_BITS; + bits_t node_type : NODE_TYPE_BITS; + bits_t is_level_tail : B_LEVEL_TAIL_BITS; + static_assert(sizeof(bits_t) * 8 == + FIELD_TYPE_BITS + NODE_TYPE_BITS + B_LEVEL_TAIL_BITS); + level_t level; + + private: + void set_field_type(field_type_t type) { + field_type = static_cast<uint8_t>(type); + } + void set_node_type(node_type_t type) { + node_type = static_cast<uint8_t>(type); + } + void set_is_level_tail(bool value) { + is_level_tail = static_cast<uint8_t>(value); + } +} __attribute__((packed)); + +template <typename FixedKeyType, field_type_t _FIELD_TYPE> +struct _slot_t { + using key_t = FixedKeyType; + static constexpr field_type_t FIELD_TYPE = _FIELD_TYPE; + static constexpr node_offset_t OVERHEAD = sizeof(_slot_t) - sizeof(key_t); + + key_t key; + node_offset_t right_offset; +} __attribute__((packed)); +using slot_0_t = _slot_t<shard_pool_crush_t, field_type_t::N0>; +using slot_1_t = _slot_t<crush_t, field_type_t::N1>; +using slot_3_t = _slot_t<snap_gen_t, field_type_t::N3>; + +struct node_range_t { + node_offset_t start; + node_offset_t end; +}; + +template <typename FieldType> +const char* fields_start(const FieldType& node) { + return reinterpret_cast<const char*>(&node); +} + +template <node_type_t NODE_TYPE, typename FieldType> +node_range_t fields_free_range_before( + const FieldType& node, size_t index) { + assert(index <= node.num_keys); + node_offset_t offset_start = node.get_key_start_offset(index); + node_offset_t offset_end = + (index == 0 ? FieldType::SIZE + : node.get_item_start_offset(index - 1)); + if constexpr (NODE_TYPE == node_type_t::INTERNAL) { + if (node.is_level_tail() && index == node.num_keys) { + offset_end -= sizeof(laddr_t); + } + } + assert(offset_start <= offset_end); + assert(offset_end - offset_start < FieldType::SIZE); + return {offset_start, offset_end}; +} + +/** + * _node_fields_013_t (node_fields_0_t, node_fields_1_t, leaf_fields_3_t + * + * The STAGE_LEFT layout implementation for node N0/N1, or the STAGE_RIGHT + * layout implementation for leaf node N3. + * + * The node layout storing n slots: + * + * # <----------------------------- node range --------------------------------------> # + * # #<~># free space # + * # <----- left part -----------------------------> # <~# <----- right slots -------> # + * # # <---- left slots -------------> #~> # # + * # # slots [2, n) |<~># #<~>| right slots [2, n) # + * # # <- slot 0 -> | <- slot 1 -> | # # | <-- s1 --> | <-- s0 --> # + * # # | | # # | | # + * # | num_ # | right | | right | # # | next-stage | next-stage # + * # header | keys # key | offset | key | offset | # # | container | container # + * # | # 0 | 0 | 1 | 1 |...#...#...| or onode 1 | or onode 0 # + * | | ^ ^ + * | | | | + * | +----------------+ | + * +--------------------------------------------+ + */ +template <typename SlotType> +struct _node_fields_013_t { + // TODO: decide by NODE_BLOCK_SIZE, sizeof(SlotType), sizeof(laddr_t) + // and the minimal size of variable_key. + using num_keys_t = uint8_t; + using key_t = typename SlotType::key_t; + using key_get_type = const key_t&; + using me_t = _node_fields_013_t<SlotType>; + static constexpr field_type_t FIELD_TYPE = SlotType::FIELD_TYPE; + static constexpr node_offset_t SIZE = NODE_BLOCK_SIZE; + static constexpr node_offset_t HEADER_SIZE = + sizeof(node_header_t) + sizeof(num_keys_t); + static constexpr node_offset_t ITEM_OVERHEAD = SlotType::OVERHEAD; + + bool is_level_tail() const { return header.get_is_level_tail(); } + node_offset_t total_size() const { return SIZE; } + key_get_type get_key(size_t index) const { + assert(index < num_keys); + return slots[index].key; + } + node_offset_t get_key_start_offset(size_t index) const { + assert(index <= num_keys); + auto offset = HEADER_SIZE + sizeof(SlotType) * index; + assert(offset < SIZE); + return offset; + } + node_offset_t get_item_start_offset(size_t index) const { + assert(index < num_keys); + auto offset = slots[index].right_offset; + assert(offset <= SIZE); + return offset; + } + const void* p_offset(size_t index) const { + assert(index < num_keys); + return &slots[index].right_offset; + } + node_offset_t get_item_end_offset(size_t index) const { + return index == 0 ? SIZE : get_item_start_offset(index - 1); + } + template <node_type_t NODE_TYPE> + node_offset_t free_size_before(size_t index) const { + auto range = fields_free_range_before<NODE_TYPE>(*this, index); + return range.end - range.start; + } + + static node_offset_t estimate_insert_one() { return sizeof(SlotType); } + template <KeyT KT> + static void insert_at( + NodeExtentMutable&, const full_key_t<KT>& key, + const me_t& node, size_t index, node_offset_t size_right); + static void update_size_at( + NodeExtentMutable&, const me_t& node, size_t index, int change); + static void append_key( + NodeExtentMutable&, const key_t& key, char*& p_append); + template <KeyT KT> + static void append_key( + NodeExtentMutable& mut, const full_key_t<KT>& key, char*& p_append) { + append_key(mut, key_t::template from_key<KT>(key), p_append); + } + static void append_offset( + NodeExtentMutable& mut, node_offset_t offset_to_right, char*& p_append); + + node_header_t header; + num_keys_t num_keys = 0u; + SlotType slots[]; +} __attribute__((packed)); +using node_fields_0_t = _node_fields_013_t<slot_0_t>; +using node_fields_1_t = _node_fields_013_t<slot_1_t>; + +/** + * node_fields_2_t + * + * The STAGE_STRING layout implementation for node N2. + * + * The node layout storing n slots: + * + * # <--------------------------------- node range ----------------------------------------> # + * # #<~># free space # + * # <------- left part ---------------> # <~# <--------- right slots ---------------------> # + * # # <---- offsets ----> #~> #<~>| slots [2, n) # + * # # offsets [2, n) |<~># # | <----- slot 1 ----> | <----- slot 0 ----> # + * # # | # # | | # + * # | num_ # offset | offset | # # | next-stage | ns-oid | next-stage | ns-oid # + * # header | keys # 0 | 1 |...#...#...| container1 | 1 | container0 | 0 # + * | | ^ ^ + * | | | | + * | +----------------+ | + * +-----------------------------------------------+ + */ +struct node_fields_2_t { + // TODO: decide by NODE_BLOCK_SIZE, sizeof(node_off_t), sizeof(laddr_t) + // and the minimal size of variable_key. + using num_keys_t = uint8_t; + using key_t = ns_oid_view_t; + using key_get_type = key_t; + static constexpr field_type_t FIELD_TYPE = field_type_t::N2; + static constexpr node_offset_t SIZE = NODE_BLOCK_SIZE; + static constexpr node_offset_t HEADER_SIZE = + sizeof(node_header_t) + sizeof(num_keys_t); + static constexpr node_offset_t ITEM_OVERHEAD = sizeof(node_offset_t); + + bool is_level_tail() const { return header.get_is_level_tail(); } + node_offset_t total_size() const { return SIZE; } + key_get_type get_key(size_t index) const { + assert(index < num_keys); + node_offset_t item_end_offset = + (index == 0 ? SIZE : offsets[index - 1]); + assert(item_end_offset <= SIZE); + const char* p_start = fields_start(*this); + return key_t(p_start + item_end_offset); + } + node_offset_t get_key_start_offset(size_t index) const { + assert(index <= num_keys); + auto offset = HEADER_SIZE + sizeof(node_offset_t) * num_keys; + assert(offset <= SIZE); + return offset; + } + node_offset_t get_item_start_offset(size_t index) const { + assert(index < num_keys); + auto offset = offsets[index]; + assert(offset <= SIZE); + return offset; + } + const void* p_offset(size_t index) const { + assert(index < num_keys); + return &offsets[index]; + } + node_offset_t get_item_end_offset(size_t index) const { + return index == 0 ? SIZE : get_item_start_offset(index - 1); + } + template <node_type_t NODE_TYPE> + node_offset_t free_size_before(size_t index) const { + auto range = fields_free_range_before<NODE_TYPE>(*this, index); + return range.end - range.start; + } + + static node_offset_t estimate_insert_one() { return sizeof(node_offset_t); } + template <KeyT KT> + static void insert_at( + NodeExtentMutable& mut, const full_key_t<KT>& key, + const node_fields_2_t& node, size_t index, node_offset_t size_right) { + ceph_abort("not implemented"); + } + static void update_size_at( + NodeExtentMutable& mut, const node_fields_2_t& node, size_t index, int change) { + ceph_abort("not implemented"); + } + static void append_key( + NodeExtentMutable& mut, const key_t& key, char*& p_append) { + ns_oid_view_t::append(mut, key, p_append); + } + template <KeyT KT> + static void append_key( + NodeExtentMutable& mut, const full_key_t<KT>& key, char*& p_append) { + ns_oid_view_t::append<KT>(mut, key, p_append); + } + static void append_offset( + NodeExtentMutable& mut, node_offset_t offset_to_right, char*& p_append); + + node_header_t header; + num_keys_t num_keys = 0u; + node_offset_t offsets[]; +} __attribute__((packed)); + +/** + * internal_fields_3_t + * + * The STAGE_RIGHT layout implementation for N2. + * + * The node layout storing 3 children: + * + * # <---------------- node range ---------------------------> # + * # # <-- keys ---> # <---- laddrs -----------> # + * # free space: # |<~># |<~># + * # # | # | # + * # | num_ # key | key | # laddr | laddr | laddr | # + * # header | keys # 0 | 1 |...# 0 | 1 | 2 |...# + */ +// TODO: decide by NODE_BLOCK_SIZE, sizeof(snap_gen_t), sizeof(laddr_t) +static constexpr unsigned MAX_NUM_KEYS_I3 = 170u; +template <unsigned MAX_NUM_KEYS> +struct _internal_fields_3_t { + using key_get_type = const snap_gen_t&; + using me_t = _internal_fields_3_t<MAX_NUM_KEYS>; + // TODO: decide by NODE_BLOCK_SIZE, sizeof(snap_gen_t), sizeof(laddr_t) + using num_keys_t = uint8_t; + static constexpr field_type_t FIELD_TYPE = field_type_t::N3; + static constexpr node_offset_t SIZE = sizeof(me_t); + static constexpr node_offset_t HEADER_SIZE = + sizeof(node_header_t) + sizeof(num_keys_t); + static constexpr node_offset_t ITEM_OVERHEAD = 0u; + + bool is_level_tail() const { return header.get_is_level_tail(); } + node_offset_t total_size() const { + if (is_level_tail()) { + return SIZE - sizeof(snap_gen_t); + } else { + return SIZE; + } + } + key_get_type get_key(size_t index) const { + assert(index < num_keys); + return keys[index]; + } + template <node_type_t NODE_TYPE> + std::enable_if_t<NODE_TYPE == node_type_t::INTERNAL, node_offset_t> + free_size_before(size_t index) const { + assert(index <= num_keys); + assert(num_keys <= (is_level_tail() ? MAX_NUM_KEYS - 1 : MAX_NUM_KEYS)); + auto free = (MAX_NUM_KEYS - index) * (sizeof(snap_gen_t) + sizeof(laddr_t)); + if (is_level_tail() && index == num_keys) { + free -= (sizeof(snap_gen_t) + sizeof(laddr_t)); + } + assert(free < SIZE); + return free; + } + + static node_offset_t estimate_insert_one() { + return sizeof(snap_gen_t) + sizeof(laddr_t); + } + template <KeyT KT> + static void insert_at( + NodeExtentMutable& mut, const full_key_t<KT>& key, + const me_t& node, size_t index, node_offset_t size_right) { + ceph_abort("not implemented"); + } + static void update_size_at( + NodeExtentMutable& mut, const me_t& node, size_t index, int change) { + ceph_abort("not implemented"); + } + + node_header_t header; + num_keys_t num_keys = 0u; + snap_gen_t keys[MAX_NUM_KEYS]; + laddr_packed_t child_addrs[MAX_NUM_KEYS]; +} __attribute__((packed)); +static_assert(_internal_fields_3_t<MAX_NUM_KEYS_I3>::SIZE <= NODE_BLOCK_SIZE && + _internal_fields_3_t<MAX_NUM_KEYS_I3 + 1>::SIZE > NODE_BLOCK_SIZE); +using internal_fields_3_t = _internal_fields_3_t<MAX_NUM_KEYS_I3>; + +using leaf_fields_3_t = _node_fields_013_t<slot_3_t>; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/stage.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/stage.h new file mode 100644 index 00000000000..9d8a5e1abd4 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/stage.h @@ -0,0 +1,2120 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <cassert> +#include <optional> +#include <ostream> +#include <sstream> +#include <type_traits> + +#include "common/likely.h" + +#include "sub_items_stage.h" +#include "item_iterator_stage.h" + +namespace crimson::os::seastore::onode { + +struct search_result_bs_t { + size_t index; + MatchKindBS match; +}; +template <typename FGetKey> +search_result_bs_t binary_search( + const full_key_t<KeyT::HOBJ>& key, + size_t begin, size_t end, FGetKey&& f_get_key) { + assert(begin <= end); + while (begin < end) { + auto total = begin + end; + auto mid = total >> 1; + // do not copy if return value is reference + decltype(f_get_key(mid)) target = f_get_key(mid); + auto match = compare_to<KeyT::HOBJ>(key, target); + if (match == MatchKindCMP::LT) { + end = mid; + } else if (match == MatchKindCMP::GT) { + begin = mid + 1; + } else { + return {mid, MatchKindBS::EQ}; + } + } + return {begin , MatchKindBS::NE}; +} + +template <typename PivotType, typename FGet> +search_result_bs_t binary_search_r( + size_t rend, size_t rbegin, FGet&& f_get, const PivotType& key) { + assert(rend <= rbegin); + while (rend < rbegin) { + auto total = rend + rbegin + 1; + auto mid = total >> 1; + // do not copy if return value is reference + decltype(f_get(mid)) target = f_get(mid); + int match = target - key; + if (match < 0) { + rend = mid; + } else if (match > 0) { + rbegin = mid - 1; + } else { + return {mid, MatchKindBS::EQ}; + } + } + return {rbegin, MatchKindBS::NE}; +} + +inline bool matchable(field_type_t type, match_stat_t mstat) { + assert(mstat >= MSTAT_MIN && mstat <= MSTAT_MAX); + /* + * compressed prefix by field type: + * N0: NONE + * N1: pool/shard + * N2: pool/shard crush + * N3: pool/shard crush ns/oid + * + * if key matches the node's compressed prefix, return true + * else, return false + */ +#ifndef NDEBUG + if (mstat == MSTAT_END) { + assert(type == field_type_t::N0); + } +#endif + return mstat + to_unsigned(type) < 4; +} + +inline void assert_mstat( + const full_key_t<KeyT::HOBJ>& key, + const full_key_t<KeyT::VIEW>& index, + match_stat_t mstat) { + assert(mstat >= MSTAT_MIN && mstat <= MSTAT_LT2); + // key < index ... + switch (mstat) { + case MSTAT_EQ: + break; + case MSTAT_LT0: + assert(compare_to<KeyT::HOBJ>(key, index.snap_gen_packed()) == MatchKindCMP::LT); + break; + case MSTAT_LT1: + assert(compare_to<KeyT::HOBJ>(key, index.ns_oid_view()) == MatchKindCMP::LT); + break; + case MSTAT_LT2: + if (index.has_shard_pool()) { + assert(compare_to<KeyT::HOBJ>(key, shard_pool_crush_t{ + index.shard_pool_packed(), index.crush_packed()}) == MatchKindCMP::LT); + } else { + assert(compare_to<KeyT::HOBJ>(key, index.crush_packed()) == MatchKindCMP::LT); + } + break; + default: + ceph_abort("impossible path"); + } + // key == index ... + switch (mstat) { + case MSTAT_EQ: + assert(compare_to<KeyT::HOBJ>(key, index.snap_gen_packed()) == MatchKindCMP::EQ); + case MSTAT_LT0: + if (!index.has_ns_oid()) + break; + assert(index.ns_oid_view().type() == ns_oid_view_t::Type::MAX || + compare_to<KeyT::HOBJ>(key, index.ns_oid_view()) == MatchKindCMP::EQ); + case MSTAT_LT1: + if (!index.has_crush()) + break; + assert(compare_to<KeyT::HOBJ>(key, index.crush_packed()) == MatchKindCMP::EQ); + if (!index.has_shard_pool()) + break; + assert(compare_to<KeyT::HOBJ>(key, index.shard_pool_packed()) == MatchKindCMP::EQ); + default: + break; + } +} + +#define NXT_STAGE_T staged<next_param_t> + +enum class TrimType { BEFORE, AFTER, AT }; + +/** + * staged + * + * Implements recursive logic that modifies or reads the node layout + * (N0/N1/N2/N3 * LEAF/INTERNAL) with the multi-stage design. The specific + * stage implementation is flexible. So the implementations for different + * stages can be assembled independently, as long as they follow the + * definitions of container interfaces. + * + * Multi-stage is designed to index different portions of onode keys + * stage-by-stage. There are at most 3 stages for a node: + * - STAGE_LEFT: index shard-pool-crush for N0, or index crush for N1 node; + * - STAGE_STRING: index ns-oid for N0/N1/N2 nodes; + * - STAGE_RIGHT: index snap-gen for N0/N1/N2/N3 nodes; + * + * The intention is to consolidate the high-level indexing implementations at + * the level of stage, so we don't need to write them repeatedly for every + * stage and for every node type. + */ +template <typename Params> +struct staged { + static_assert(Params::STAGE >= STAGE_BOTTOM); + static_assert(Params::STAGE <= STAGE_TOP); + using container_t = typename Params::container_t; + using key_get_type = typename container_t::key_get_type; + using next_param_t = typename Params::next_param_t; + using position_t = staged_position_t<Params::STAGE>; + using result_t = staged_result_t<Params::NODE_TYPE, Params::STAGE>; + using value_t = value_type_t<Params::NODE_TYPE>; + static constexpr auto CONTAINER_TYPE = container_t::CONTAINER_TYPE; + static constexpr bool IS_BOTTOM = (Params::STAGE == STAGE_BOTTOM); + static constexpr auto NODE_TYPE = Params::NODE_TYPE; + static constexpr auto STAGE = Params::STAGE; + + template <bool is_exclusive> + static void _left_or_right(size_t& split_index, size_t insert_index, + std::optional<bool>& is_insert_left) { + assert(!is_insert_left.has_value()); + assert(is_valid_index(split_index)); + if constexpr (is_exclusive) { + if (split_index <= insert_index) { + // ...[s_index-1] |!| (i_index) [s_index]... + // offset i_position to right + is_insert_left = false; + } else { + // ...[s_index-1] (i_index)) |?[s_index]| ... + // ...(i_index)...[s_index-1] |?[s_index]| ... + is_insert_left = true; + --split_index; + } + } else { + if (split_index < insert_index) { + // ...[s_index-1] |?[s_index]| ...[(i_index)[s_index_k]... + is_insert_left = false; + } else if (split_index > insert_index) { + // ...[(i_index)s_index-1] |?[s_index]| ... + // ...[(i_index)s_index_k]...[s_index-1] |?[s_index]| ... + is_insert_left = true; + } else { + // ...[s_index-1] |?[(i_index)s_index]| ... + // i_to_left = std::nullopt; + } + } + } + + template <ContainerType CTYPE, typename Enable = void> class _iterator_t; + template <ContainerType CTYPE> + class _iterator_t<CTYPE, std::enable_if_t<CTYPE == ContainerType::INDEXABLE>> { + /* + * indexable container type system: + * CONTAINER_TYPE = ContainerType::INDEXABLE + * keys() const -> size_t + * operator[](size_t) const -> key_get_type + * size_before(size_t) const -> node_offset_t + * size_overhead_at(size_t) const -> node_offset_t + * (IS_BOTTOM) get_p_value(size_t) const -> const value_t* + * (!IS_BOTTOM) size_to_nxt_at(size_t) const -> node_offset_t + * (!IS_BOTTOM) get_nxt_container(size_t) const + * static: + * header_size() -> node_offset_t + * estimate_insert(key, value) -> node_offset_t + * (IS_BOTTOM) insert_at(mut, src, key, value, + * index, size, p_left_bound) -> const value_t* + * (!IS_BOTTOM) insert_prefix_at(mut, src, key, + * index, size, p_left_bound) -> memory_range_t + * (!IS_BOTTOM) update_size_at(mut, src, index, size) + * trim_until(mut, container, index) -> trim_size + * (!IS_BOTTOM) trim_at(mut, container, index, trimmed) -> trim_size + * + * Appender::append(const container_t& src, from, items) + */ + public: + using me_t = _iterator_t<CTYPE>; + + _iterator_t(const container_t& container) : container{container} { + assert(container.keys()); + } + + size_t index() const { + return _index; + } + key_get_type get_key() const { + assert(!is_end()); + return container[_index]; + } + node_offset_t size_to_nxt() const { + assert(!is_end()); + return container.size_to_nxt_at(_index); + } + template <typename T = typename NXT_STAGE_T::container_t> + std::enable_if_t<!IS_BOTTOM, T> get_nxt_container() const { + assert(!is_end()); + return container.get_nxt_container(_index); + } + template <typename T = value_t> + std::enable_if_t<IS_BOTTOM, const T*> get_p_value() const { + assert(!is_end()); + return container.get_p_value(_index); + } + bool is_last() const { + return _index + 1 == container.keys(); + } + bool is_end() const { return _index == container.keys(); } + node_offset_t size() const { + assert(!is_end()); + assert(header_size() == container.size_before(0)); + assert(container.size_before(_index + 1) > container.size_before(_index)); + return container.size_before(_index + 1) - + container.size_before(_index); + } + node_offset_t size_overhead() const { + assert(!is_end()); + return container.size_overhead_at(_index); + } + + me_t& operator++() { + assert(!is_end()); + assert(!is_last()); + ++_index; + return *this; + } + void seek_at(size_t index) { + assert(index < container.keys()); + seek_till_end(index); + } + void seek_till_end(size_t index) { + assert(!is_end()); + assert(this->index() == 0); + assert(index <= container.keys()); + _index = index; + } + void seek_last() { + assert(!is_end()); + assert(index() == 0); + _index = container.keys() - 1; + } + void set_end() { + assert(!is_end()); + assert(is_last()); + ++_index; + } + // Note: possible to return an end iterator + MatchKindBS seek(const full_key_t<KeyT::HOBJ>& key, bool exclude_last) { + assert(!is_end()); + assert(index() == 0); + size_t end_index = container.keys(); + if (exclude_last) { + assert(end_index); + --end_index; + assert(compare_to<KeyT::HOBJ>(key, container[end_index]) == MatchKindCMP::LT); + } + auto ret = binary_search(key, _index, end_index, + [this] (size_t index) { return container[index]; }); + _index = ret.index; + return ret.match; + } + + template <KeyT KT, typename T = value_t> + std::enable_if_t<IS_BOTTOM, const T*> insert( + NodeExtentMutable& mut, const full_key_t<KT>& key, + const value_t& value, node_offset_t insert_size, const char* p_left_bound) { + return container_t::template insert_at<KT>( + mut, container, key, value, _index, insert_size, p_left_bound); + } + + template <KeyT KT, typename T = memory_range_t> + std::enable_if_t<!IS_BOTTOM, T> insert_prefix( + NodeExtentMutable& mut, const full_key_t<KT>& key, + node_offset_t size, const char* p_left_bound) { + return container_t::template insert_prefix_at<KT>( + mut, container, key, _index, size, p_left_bound); + } + + template <typename T = void> + std::enable_if_t<!IS_BOTTOM, T> + update_size(NodeExtentMutable& mut, node_offset_t insert_size) { + assert(!is_end()); + container_t::update_size_at(mut, container, _index, insert_size); + } + + // Note: possible to return an end iterator when is_exclusive is true + template <bool is_exclusive> + size_t seek_split_inserted( + size_t start_size, size_t extra_size, size_t target_size, + size_t& insert_index, size_t insert_size, + std::optional<bool>& is_insert_left) { + assert(!is_end()); + assert(index() == 0); + // replace insert_index placeholder + if constexpr (!is_exclusive) { + if (insert_index == INDEX_LAST) { + insert_index = container.keys() - 1; + } + } else { + if (insert_index == INDEX_END) { + insert_index = container.keys(); + } + } + assert(insert_index <= container.keys()); + + auto start_size_1 = start_size + extra_size; + auto f_get_used_size = [this, start_size, start_size_1, + insert_index, insert_size] (size_t index) { + size_t current_size; + if (unlikely(index == 0)) { + current_size = start_size; + } else { + current_size = start_size_1; + if (index > insert_index) { + current_size += insert_size; + if constexpr (is_exclusive) { + --index; + } + } + // already includes header size + current_size += container.size_before(index); + } + return current_size; + }; + size_t s_end; + if constexpr (is_exclusive) { + s_end = container.keys(); + } else { + s_end = container.keys() - 1; + } + _index = binary_search_r(0, s_end, f_get_used_size, target_size).index; + size_t current_size = f_get_used_size(_index); + assert(current_size <= target_size); + + _left_or_right<is_exclusive>(_index, insert_index, is_insert_left); + return current_size; + } + + size_t seek_split(size_t start_size, size_t extra_size, size_t target_size) { + assert(!is_end()); + assert(index() == 0); + auto start_size_1 = start_size + extra_size; + auto f_get_used_size = [this, start_size, start_size_1] (size_t index) { + size_t current_size; + if (unlikely(index == 0)) { + current_size = start_size; + } else { + // already includes header size + current_size = start_size_1 + container.size_before(index); + } + return current_size; + }; + _index = binary_search_r( + 0, container.keys() - 1, f_get_used_size, target_size).index; + size_t current_size = f_get_used_size(_index); + assert(current_size <= target_size); + return current_size; + } + + // Note: possible to return an end iterater if to_index == INDEX_END + template <KeyT KT> + void copy_out_until( + typename container_t::template Appender<KT>& appender, size_t& to_index) { + auto num_keys = container.keys(); + size_t items; + if (to_index == INDEX_END) { + items = num_keys - _index; + appender.append(container, _index, items); + _index = num_keys; + to_index = _index; + } else if (to_index == INDEX_LAST) { + assert(!is_end()); + items = num_keys - 1 - _index; + appender.append(container, _index, items); + _index = num_keys - 1; + to_index = _index; + } else { + assert(_index <= to_index); + assert(to_index <= num_keys); + items = to_index - _index; + appender.append(container, _index, items); + _index = to_index; + } + } + + node_offset_t trim_until(NodeExtentMutable& mut) { + return container_t::trim_until(mut, container, _index); + } + + template <typename T = node_offset_t> + std::enable_if_t<!IS_BOTTOM, T> + trim_at(NodeExtentMutable& mut, node_offset_t trimmed) { + return container_t::trim_at(mut, container, _index, trimmed); + } + + static node_offset_t header_size() { + return container_t::header_size(); + } + + template <KeyT KT> + static node_offset_t estimate_insert( + const full_key_t<KT>& key, const value_t& value) { + return container_t::template estimate_insert<KT>(key, value); + } + + private: + container_t container; + size_t _index = 0; + }; + + template <ContainerType CTYPE> + class _iterator_t<CTYPE, std::enable_if_t<CTYPE == ContainerType::ITERATIVE>> { + /* + * iterative container type system (!IS_BOTTOM): + * CONTAINER_TYPE = ContainerType::ITERATIVE + * index() const -> size_t + * get_key() const -> key_get_type + * size() const -> node_offset_t + * size_to_nxt() const -> node_offset_t + * size_overhead() const -> node_offset_t + * get_nxt_container() const + * has_next() const -> bool + * operator++() + * static: + * header_size() -> node_offset_t + * estimate_insert(key, value) -> node_offset_t + * insert_prefix(mut, src, key, is_end, size, p_left_bound) -> memory_range_t + * update_size(mut, src, size) + * trim_until(mut, container) -> trim_size + * trim_at(mut, container, trimmed) -> trim_size + */ + // currently the iterative iterator is only implemented with STAGE_STRING + // for in-node space efficiency + static_assert(STAGE == STAGE_STRING); + public: + using me_t = _iterator_t<CTYPE>; + + _iterator_t(const container_t& container) : container{container} { + assert(index() == 0); + } + + size_t index() const { + if (is_end()) { + return end_index; + } else { + return container.index(); + } + } + key_get_type get_key() const { + assert(!is_end()); + return container.get_key(); + } + node_offset_t size_to_nxt() const { + assert(!is_end()); + return container.size_to_nxt(); + } + const typename NXT_STAGE_T::container_t get_nxt_container() const { + assert(!is_end()); + return container.get_nxt_container(); + } + bool is_last() const { + assert(!is_end()); + return !container.has_next(); + } + bool is_end() const { return _is_end; } + node_offset_t size() const { + assert(!is_end()); + return container.size(); + } + node_offset_t size_overhead() const { + assert(!is_end()); + return container.size_overhead(); + } + + me_t& operator++() { + assert(!is_end()); + assert(!is_last()); + ++container; + return *this; + } + void seek_at(size_t index) { + assert(!is_end()); + assert(this->index() == 0); + while (index > 0) { + assert(container.has_next()); + ++container; + --index; + } + } + void seek_till_end(size_t index) { + assert(!is_end()); + assert(this->index() == 0); + while (index > 0) { + if (!container.has_next()) { + assert(index == 1); + set_end(); + break; + } + ++container; + --index; + } + } + void seek_last() { + assert(!is_end()); + assert(index() == 0); + while (container.has_next()) { + ++container; + } + } + void set_end() { + assert(!is_end()); + assert(is_last()); + _is_end = true; + end_index = container.index() + 1; + } + // Note: possible to return an end iterator + MatchKindBS seek(const full_key_t<KeyT::HOBJ>& key, bool exclude_last) { + assert(!is_end()); + assert(index() == 0); + do { + if (exclude_last && is_last()) { + assert(compare_to<KeyT::HOBJ>(key, get_key()) == MatchKindCMP::LT); + return MatchKindBS::NE; + } + auto match = compare_to<KeyT::HOBJ>(key, get_key()); + if (match == MatchKindCMP::LT) { + return MatchKindBS::NE; + } else if (match == MatchKindCMP::EQ) { + return MatchKindBS::EQ; + } else { + if (container.has_next()) { + ++container; + } else { + // end + break; + } + } + } while (true); + assert(!exclude_last); + set_end(); + return MatchKindBS::NE; + } + + template <KeyT KT> + memory_range_t insert_prefix( + NodeExtentMutable& mut, const full_key_t<KT>& key, + node_offset_t size, const char* p_left_bound) { + return container_t::template insert_prefix<KT>( + mut, container, key, is_end(), size, p_left_bound); + } + + void update_size(NodeExtentMutable& mut, node_offset_t insert_size) { + assert(!is_end()); + container_t::update_size(mut, container, insert_size); + } + + // Note: possible to return an end iterator when is_exclusive is true + // insert_index can still be INDEX_LAST or INDEX_END + template <bool is_exclusive> + size_t seek_split_inserted( + size_t start_size, size_t extra_size, size_t target_size, + size_t& insert_index, size_t insert_size, + std::optional<bool>& is_insert_left) { + assert(!is_end()); + assert(index() == 0); + size_t current_size = start_size; + size_t split_index = 0; + extra_size += header_size(); + do { + if constexpr (!is_exclusive) { + if (is_last()) { + assert(split_index == index()); + if (insert_index == INDEX_LAST) { + insert_index = index(); + } + assert(insert_index <= index()); + break; + } + } + + size_t nxt_size = current_size; + if (split_index == 0) { + nxt_size += extra_size; + } + if (split_index == insert_index) { + nxt_size += insert_size; + if constexpr (is_exclusive) { + if (nxt_size > target_size) { + break; + } + current_size = nxt_size; + ++split_index; + } + } + nxt_size += size(); + if (nxt_size > target_size) { + break; + } + current_size = nxt_size; + + if constexpr (is_exclusive) { + if (is_last()) { + assert(split_index == index()); + set_end(); + split_index = index(); + if (insert_index == INDEX_END) { + insert_index = index(); + } + assert(insert_index == index()); + break; + } else { + ++(*this); + ++split_index; + } + } else { + ++(*this); + ++split_index; + } + } while (true); + assert(current_size <= target_size); + + _left_or_right<is_exclusive>(split_index, insert_index, is_insert_left); + assert(split_index == index()); + return current_size; + } + + size_t seek_split(size_t start_size, size_t extra_size, size_t target_size) { + assert(!is_end()); + assert(index() == 0); + size_t current_size = start_size; + do { + if (is_last()) { + break; + } + + size_t nxt_size = current_size; + if (index() == 0) { + nxt_size += extra_size; + } + nxt_size += size(); + if (nxt_size > target_size) { + break; + } + current_size = nxt_size; + ++(*this); + } while (true); + assert(current_size <= target_size); + return current_size; + } + + // Note: possible to return an end iterater if to_index == INDEX_END + template <KeyT KT> + void copy_out_until( + typename container_t::template Appender<KT>& appender, size_t& to_index) { + if (is_end()) { + assert(!container.has_next()); + if (to_index == INDEX_END) { + to_index = index(); + } + assert(to_index == index()); + return; + } + size_t items; + if (to_index == INDEX_END || to_index == INDEX_LAST) { + items = to_index; + } else { + assert(is_valid_index(to_index)); + assert(index() <= to_index); + items = to_index - index(); + } + if (appender.append(container, items)) { + set_end(); + } + to_index = index(); + } + + node_offset_t trim_until(NodeExtentMutable& mut) { + if (is_end()) { + return 0; + } + return container_t::trim_until(mut, container); + } + + node_offset_t trim_at(NodeExtentMutable& mut, node_offset_t trimmed) { + assert(!is_end()); + return container_t::trim_at(mut, container, trimmed); + } + + static node_offset_t header_size() { + return container_t::header_size(); + } + + template <KeyT KT> + static node_offset_t estimate_insert(const full_key_t<KT>& key, const value_t& value) { + return container_t::template estimate_insert<KT>(key, value); + } + + private: + container_t container; + bool _is_end = false; + size_t end_index; + }; + + /* + * iterator_t encapsulates both indexable and iterative implementations + * from a *non-empty* container. + * cstr(const container_t&) + * access: + * index() -> size_t + * get_key() -> key_get_type (const reference or value type) + * is_last() -> bool + * is_end() -> bool + * size() -> node_offset_t + * size_overhead() -> node_offset_t + * (IS_BOTTOM) get_p_value() -> const value_t* + * (!IS_BOTTOM) get_nxt_container() -> nxt_stage::container_t + * (!IS_BOTTOM) size_to_nxt() -> node_offset_t + * seek: + * operator++() -> iterator_t& + * seek_at(index) + * seek_till_end(index) + * seek_last() + * set_end() + * seek(key, exclude_last) -> MatchKindBS + * insert: + * (IS_BOTTOM) insert(mut, key, value, size, p_left_bound) -> p_value + * (!IS_BOTTOM) insert_prefix(mut, key, size, p_left_bound) -> memory_range_t + * (!IS_BOTTOM) update_size(mut, size) + * split; + * seek_split_inserted<bool is_exclusive>( + * start_size, extra_size, target_size, insert_index, insert_size, + * std::optional<bool>& is_insert_left) + * -> insert to left/right/unknown (!exclusive) + * -> insert to left/right (exclusive, can be end) + * -> split_size + * seek_split(start_size, extra_size, target_size) -> split_size + * copy_out_until(appender, to_index) (can be end) + * trim_until(mut) -> trim_size + * (!IS_BOTTOM) trim_at(mut, trimmed) -> trim_size + * static: + * header_size() -> node_offset_t + * estimate_insert(key, value) -> node_offset_t + */ + using iterator_t = _iterator_t<CONTAINER_TYPE>; + /* TODO: detailed comments + * - trim_until(mut) -> trim_size + * * keep 0 to i - 1, and remove the rest, return the size trimmed. + * * if this is the end iterator, do nothing and return 0. + * * if this is the start iterator, normally needs to go to the higher + * stage to trim the entire container. + * - trim_at(mut, trimmed) -> trim_size + * * trim happens inside the current iterator, causing the size reduced by + * <trimmed>, return the total size trimmed. + */ + + /* + * Lookup internals (hide?) + */ + + template <bool GET_KEY> + static result_t smallest_result( + const iterator_t& iter, full_key_t<KeyT::VIEW>* index_key) { + static_assert(!IS_BOTTOM); + assert(!iter.is_end()); + auto pos_smallest = NXT_STAGE_T::position_t::begin(); + auto nxt_container = iter.get_nxt_container(); + auto value_ptr = NXT_STAGE_T::template get_p_value<GET_KEY>( + nxt_container, pos_smallest, index_key); + if constexpr (GET_KEY) { + index_key->set(iter.get_key()); + } + return result_t{{iter.index(), pos_smallest}, value_ptr, STAGE}; + } + + template <bool GET_KEY> + static result_t nxt_lower_bound( + const full_key_t<KeyT::HOBJ>& key, iterator_t& iter, + MatchHistory& history, full_key_t<KeyT::VIEW>* index_key) { + static_assert(!IS_BOTTOM); + assert(!iter.is_end()); + auto nxt_container = iter.get_nxt_container(); + auto nxt_result = NXT_STAGE_T::template lower_bound<GET_KEY>( + nxt_container, key, history, index_key); + if (nxt_result.is_end()) { + if (iter.is_last()) { + return result_t::end(); + } else { + return smallest_result<GET_KEY>(++iter, index_key); + } + } else { + if constexpr (GET_KEY) { + index_key->set(iter.get_key()); + } + return result_t::from_nxt(iter.index(), nxt_result); + } + } + + template <bool GET_POS, bool GET_KEY, bool GET_VAL> + static void lookup_largest_slot( + const container_t& container, position_t* p_position, + full_key_t<KeyT::VIEW>* p_index_key, const value_t** pp_value) { + auto iter = iterator_t(container); + iter.seek_last(); + if constexpr (GET_KEY) { + assert(p_index_key); + p_index_key->set(iter.get_key()); + } + if constexpr (GET_POS) { + assert(p_position); + p_position->index = iter.index(); + } + if constexpr (IS_BOTTOM) { + if constexpr (GET_VAL) { + assert(pp_value); + *pp_value = iter.get_p_value(); + } + } else { + auto nxt_container = iter.get_nxt_container(); + if constexpr (GET_POS) { + NXT_STAGE_T::template lookup_largest_slot<true, GET_KEY, GET_VAL>( + nxt_container, &p_position->nxt, p_index_key, pp_value); + } else { + NXT_STAGE_T::template lookup_largest_slot<false, GET_KEY, GET_VAL>( + nxt_container, nullptr, p_index_key, pp_value); + } + } + } + + template <bool GET_KEY = false> + static const value_t* get_p_value( + const container_t& container, const position_t& position, + full_key_t<KeyT::VIEW>* index_key = nullptr) { + auto iter = iterator_t(container); + iter.seek_at(position.index); + if constexpr (GET_KEY) { + index_key->set(iter.get_key()); + } + if constexpr (!IS_BOTTOM) { + auto nxt_container = iter.get_nxt_container(); + return NXT_STAGE_T::template get_p_value<GET_KEY>( + nxt_container, position.nxt, index_key); + } else { + return iter.get_p_value(); + } + } + + static void get_key_view( + const container_t& container, + const position_t& position, + full_key_t<KeyT::VIEW>& index_key) { + auto iter = iterator_t(container); + iter.seek_at(position.index); + index_key.set(iter.get_key()); + if constexpr (!IS_BOTTOM) { + auto nxt_container = iter.get_nxt_container(); + return NXT_STAGE_T::get_key_view(nxt_container, position.nxt, index_key); + } + } + + template <bool GET_KEY = false> + static result_t lower_bound( + const container_t& container, + const full_key_t<KeyT::HOBJ>& key, + MatchHistory& history, + full_key_t<KeyT::VIEW>* index_key = nullptr) { + bool exclude_last = false; + if (history.get<STAGE>().has_value()) { + if (*history.get<STAGE>() == MatchKindCMP::EQ) { + // lookup is short-circuited + if constexpr (!IS_BOTTOM) { + assert(history.get<STAGE - 1>().has_value()); + if (history.is_GT<STAGE - 1>()) { + auto iter = iterator_t(container); + bool test_key_equal; + if constexpr (STAGE == STAGE_STRING) { + // TODO(cross-node string dedup) + // test_key_equal = (iter.get_key().type() == ns_oid_view_t::Type::MIN); + auto cmp = compare_to<KeyT::HOBJ>(key, iter.get_key()); + assert(cmp != MatchKindCMP::GT); + test_key_equal = (cmp == MatchKindCMP::EQ); + } else { + auto cmp = compare_to<KeyT::HOBJ>(key, iter.get_key()); + // From history, key[stage] == parent[stage][index - 1] + // which should be the smallest possible value for all + // index[stage][*] + assert(cmp != MatchKindCMP::GT); + test_key_equal = (cmp == MatchKindCMP::EQ); + } + if (test_key_equal) { + return nxt_lower_bound<GET_KEY>(key, iter, history, index_key); + } else { + // key[stage] < index[stage][left-most] + return smallest_result<GET_KEY>(iter, index_key); + } + } + } + // IS_BOTTOM || !history.is_GT<STAGE - 1>() + auto iter = iterator_t(container); + iter.seek_last(); + if constexpr (STAGE == STAGE_STRING) { + // TODO(cross-node string dedup) + // assert(iter.get_key().type() == ns_oid_view_t::Type::MAX); + assert(compare_to<KeyT::HOBJ>(key, iter.get_key()) == MatchKindCMP::EQ); + } else { + assert(compare_to<KeyT::HOBJ>(key, iter.get_key()) == MatchKindCMP::EQ); + } + if constexpr (GET_KEY) { + index_key->set(iter.get_key()); + } + if constexpr (IS_BOTTOM) { + auto value_ptr = iter.get_p_value(); + return result_t{{iter.index()}, value_ptr, MSTAT_EQ}; + } else { + auto nxt_container = iter.get_nxt_container(); + auto nxt_result = NXT_STAGE_T::template lower_bound<GET_KEY>( + nxt_container, key, history, index_key); + // !history.is_GT<STAGE - 1>() means + // key[stage+1 ...] <= index[stage+1 ...][*] + assert(!nxt_result.is_end()); + return result_t::from_nxt(iter.index(), nxt_result); + } + } else if (*history.get<STAGE>() == MatchKindCMP::LT) { + exclude_last = true; + } + } + auto iter = iterator_t(container); + auto bs_match = iter.seek(key, exclude_last); + if (iter.is_end()) { + assert(!exclude_last); + assert(bs_match == MatchKindBS::NE); + history.set<STAGE>(MatchKindCMP::GT); + return result_t::end(); + } + history.set<STAGE>(bs_match == MatchKindBS::EQ ? + MatchKindCMP::EQ : MatchKindCMP::LT); + if constexpr (IS_BOTTOM) { + if constexpr (GET_KEY) { + index_key->set(iter.get_key()); + } + auto value_ptr = iter.get_p_value(); + return result_t{{iter.index()}, value_ptr, + (bs_match == MatchKindBS::EQ ? MSTAT_EQ : MSTAT_LT0)}; + } else { + if (bs_match == MatchKindBS::EQ) { + return nxt_lower_bound<GET_KEY>(key, iter, history, index_key); + } else { + return smallest_result<GET_KEY>(iter, index_key); + } + } + } + + template <KeyT KT> + static node_offset_t insert_size(const full_key_t<KT>& key, const value_t& value) { + if constexpr (IS_BOTTOM) { + return iterator_t::template estimate_insert<KT>(key, value); + } else { + return iterator_t::template estimate_insert<KT>(key, value) + + NXT_STAGE_T::iterator_t::header_size() + + NXT_STAGE_T::template insert_size<KT>(key, value); + } + } + + template <KeyT KT> + static node_offset_t insert_size_at( + match_stage_t stage, const full_key_t<KeyT::HOBJ>& key, const value_t& value) { + if (stage == STAGE) { + return insert_size<KT>(key, value); + } else { + assert(stage < STAGE); + return NXT_STAGE_T::template insert_size_at<KT>(stage, key, value); + } + } + + template <typename T = std::tuple<match_stage_t, node_offset_t>> + static std::enable_if_t<NODE_TYPE == node_type_t::INTERNAL, T> evaluate_insert( + const container_t& container, const full_key_t<KeyT::VIEW>& key, + const value_t& value, position_t& position, bool evaluate_last) { + auto iter = iterator_t(container); + auto& index = position.index; + if (evaluate_last || index == INDEX_END) { + iter.seek_last(); + index = iter.index(); + // evaluate the previous index + } else { + assert(is_valid_index(index)); + // evaluate the current index + iter.seek_at(index); + auto match = compare_to<KeyT::VIEW>(key, iter.get_key()); + if (match == MatchKindCMP::EQ) { + if constexpr (IS_BOTTOM) { + ceph_abort("insert conflict at current index!"); + } else { + // insert into the current index + auto nxt_container = iter.get_nxt_container(); + return NXT_STAGE_T::evaluate_insert( + nxt_container, key, value, position.nxt, false); + } + } else { + assert(match == MatchKindCMP::LT); + if (index == 0) { + // already the first index, so insert at the current index + return {STAGE, insert_size<KeyT::VIEW>(key, value)}; + } + --index; + iter = iterator_t(container); + iter.seek_at(index); + // proceed to evaluate the previous index + } + } + + // XXX(multi-type): when key is from a different type of node + auto match = compare_to<KeyT::VIEW>(key, iter.get_key()); + if (match == MatchKindCMP::GT) { + // key doesn't match both indexes, so insert at the current index + ++index; + return {STAGE, insert_size<KeyT::VIEW>(key, value)}; + } else { + assert(match == MatchKindCMP::EQ); + if constexpr (IS_BOTTOM) { + // ceph_abort? + ceph_abort("insert conflict at the previous index!"); + } else { + // insert into the previous index + auto nxt_container = iter.get_nxt_container(); + return NXT_STAGE_T::evaluate_insert( + nxt_container, key, value, position.nxt, true); + } + } + } + + template <typename T = bool> + static std::enable_if_t<NODE_TYPE == node_type_t::LEAF, T> + compensate_insert_position_at(match_stage_t stage, position_t& position) { + auto& index = position.index; + if (stage == STAGE) { + assert(index == 0); + // insert at the end of the current stage + index = INDEX_END; + return true; + } else { + if constexpr (IS_BOTTOM) { + ceph_abort("impossible path"); + } else { + assert(stage < STAGE); + bool compensate = NXT_STAGE_T:: + compensate_insert_position_at(stage, position.nxt); + if (compensate) { + assert(is_valid_index(index)); + if (index == 0) { + // insert into the *last* index of the current stage + index = INDEX_LAST; + return true; + } else { + --index; + return false; + } + } else { + return false; + } + } + } + } + + static void patch_insert_end(position_t& insert_pos, match_stage_t insert_stage) { + assert(insert_stage <= STAGE); + if (insert_stage == STAGE) { + insert_pos.index = INDEX_END; + } else if constexpr (!IS_BOTTOM) { + insert_pos.index = INDEX_LAST; + NXT_STAGE_T::patch_insert_end(insert_pos.nxt, insert_stage); + } + } + + template <typename T = std::tuple<match_stage_t, node_offset_t>> + static std::enable_if_t<NODE_TYPE == node_type_t::LEAF, T> evaluate_insert( + const full_key_t<KeyT::HOBJ>& key, const onode_t& value, + const MatchHistory& history, match_stat_t mstat, position_t& position) { + match_stage_t insert_stage = STAGE_TOP; + while (*history.get_by_stage(insert_stage) == MatchKindCMP::EQ) { + assert(insert_stage != STAGE_BOTTOM && "insert conflict!"); + --insert_stage; + } + + if (history.is_GT()) { + if (position.is_end()) { + // no need to compensate insert position + assert(insert_stage <= STAGE && "impossible insert stage"); + } else if (position == position_t::begin()) { + // I must be short-circuited by staged::smallest_result() + // in staged::lower_bound(), so we need to rely on mstat instead + assert(mstat >= MSTAT_LT0 && mstat <= MSTAT_LT3); + if (mstat == MSTAT_LT0) { + insert_stage = STAGE_RIGHT; + } else if (mstat == MSTAT_LT1) { + insert_stage = STAGE_STRING; + } else { + insert_stage = STAGE_LEFT; + } + // XXX(multi-type): need to upgrade node type before inserting an + // incompatible index at front. + assert(insert_stage <= STAGE && "incompatible insert"); + } else { + assert(insert_stage <= STAGE && "impossible insert stage"); + [[maybe_unused]] bool ret = compensate_insert_position_at(insert_stage, position); + assert(!ret); + } + } + + if (position.is_end()) { + patch_insert_end(position, insert_stage); + } + + node_offset_t insert_size = insert_size_at<KeyT::HOBJ>(insert_stage, key, value); + + return {insert_stage, insert_size}; + } + + template <KeyT KT> + static const value_t* insert_new( + NodeExtentMutable& mut, const memory_range_t& range, + const full_key_t<KT>& key, const value_t& value) { + char* p_insert = const_cast<char*>(range.p_end); + const value_t* p_value = nullptr; + StagedAppender<KT> appender; + appender.init(&mut, p_insert); + appender.append(key, value, p_value); + [[maybe_unused]] const char* p_insert_front = appender.wrap(); + assert(p_insert_front == range.p_start); + return p_value; + } + + template <KeyT KT, bool SPLIT> + static const value_t* proceed_insert_recursively( + NodeExtentMutable& mut, const container_t& container, + const full_key_t<KT>& key, const value_t& value, + position_t& position, match_stage_t& stage, + node_offset_t& _insert_size, const char* p_left_bound) { + // proceed insert from right to left + assert(stage <= STAGE); + auto iter = iterator_t(container); + auto& index = position.index; + + bool do_insert = false; + if (stage == STAGE) { + if (index == INDEX_END) { + iter.seek_last(); + iter.set_end(); + index = iter.index(); + } else { + assert(is_valid_index(index)); + iter.seek_till_end(index); + } + do_insert = true; + } else { // stage < STAGE + if (index == INDEX_LAST) { + iter.seek_last(); + index = iter.index(); + } else { + assert(is_valid_index(index)); + iter.seek_till_end(index); + } + if constexpr (SPLIT) { + if (iter.is_end()) { + // insert at the higher stage due to split + do_insert = true; + _insert_size = insert_size<KT>(key, value); + stage = STAGE; + } + } else { + assert(!iter.is_end()); + } + } + + if (do_insert) { + if constexpr (!IS_BOTTOM) { + position.nxt = position_t::nxt_t::begin(); + } + assert(_insert_size == insert_size<KT>(key, value)); + if constexpr (IS_BOTTOM) { + return iter.template insert<KT>( + mut, key, value, _insert_size, p_left_bound); + } else { + auto range = iter.template insert_prefix<KT>( + mut, key, _insert_size, p_left_bound); + return NXT_STAGE_T::template insert_new<KT>(mut, range, key, value); + } + } else { + if constexpr (!IS_BOTTOM) { + auto nxt_container = iter.get_nxt_container(); + auto p_value = NXT_STAGE_T::template proceed_insert_recursively<KT, SPLIT>( + mut, nxt_container, key, value, + position.nxt, stage, _insert_size, p_left_bound); + iter.update_size(mut, _insert_size); + return p_value; + } else { + ceph_abort("impossible path"); + } + } + } + + template <KeyT KT, bool SPLIT> + static const value_t* proceed_insert( + NodeExtentMutable& mut, const container_t& container, + const full_key_t<KT>& key, const value_t& value, + position_t& position, match_stage_t& stage, node_offset_t& _insert_size) { + auto p_left_bound = container.p_left_bound(); + if (unlikely(!container.keys())) { + if (position.is_end()) { + position = position_t::begin(); + assert(stage == STAGE); + assert(_insert_size == insert_size<KT>(key, value)); + } else if (position == position_t::begin()) { + // when insert into a trimmed and empty left node + stage = STAGE; + _insert_size = insert_size<KT>(key, value); + } else { + ceph_abort("impossible path"); + } + if constexpr (IS_BOTTOM) { + return container_t::template insert_at<KT>( + mut, container, key, value, 0, _insert_size, p_left_bound); + } else { + auto range = container_t::template insert_prefix_at<KT>( + mut, container, key, 0, _insert_size, p_left_bound); + return NXT_STAGE_T::template insert_new<KT>(mut, range, key, value); + } + } else { + return proceed_insert_recursively<KT, SPLIT>( + mut, container, key, value, + position, stage, _insert_size, p_left_bound); + } + } + + static std::ostream& dump(const container_t& container, + std::ostream& os, + const std::string& prefix, + size_t& size, + const char* p_start) { + auto iter = iterator_t(container); + assert(!iter.is_end()); + std::string prefix_blank(prefix.size(), ' '); + const std::string* p_prefix = &prefix; + size += iterator_t::header_size(); + do { + std::ostringstream sos; + sos << *p_prefix << iter.get_key() << ": "; + std::string i_prefix = sos.str(); + if constexpr (!IS_BOTTOM) { + auto nxt_container = iter.get_nxt_container(); + size += iter.size_to_nxt(); + NXT_STAGE_T::dump(nxt_container, os, i_prefix, size, p_start); + } else { + auto value_ptr = iter.get_p_value(); + int offset = reinterpret_cast<const char*>(value_ptr) - p_start; + size += iter.size(); + os << "\n" << i_prefix; + if constexpr (NODE_TYPE == node_type_t::LEAF) { + os << *value_ptr; + } else { + os << "0x" << std::hex << value_ptr->value << std::dec; + } + os << " " << size << "B" + << " @" << offset << "B"; + } + if (iter.is_last()) { + break; + } else { + ++iter; + p_prefix = &prefix_blank; + } + } while (true); + return os; + } + + static void validate(const container_t& container) { + auto iter = iterator_t(container); + assert(!iter.is_end()); + auto key = iter.get_key(); + do { + if constexpr (!IS_BOTTOM) { + auto nxt_container = iter.get_nxt_container(); + NXT_STAGE_T::validate(nxt_container); + } + if (iter.is_last()) { + break; + } else { + ++iter; + assert(compare_to(key, iter.get_key()) == MatchKindCMP::LT); + key = iter.get_key(); + } + } while (true); + } + + static void get_stats(const container_t& container, node_stats_t& stats, + full_key_t<KeyT::VIEW>& index_key) { + auto iter = iterator_t(container); + assert(!iter.is_end()); + stats.size_overhead += iterator_t::header_size(); + do { + index_key.replace(iter.get_key()); + stats.size_overhead += iter.size_overhead(); + if constexpr (!IS_BOTTOM) { + auto nxt_container = iter.get_nxt_container(); + NXT_STAGE_T::get_stats(nxt_container, stats, index_key); + } else { + ++stats.num_kvs; + size_t kv_logical_size = index_key.size_logical(); + size_t value_size; + if constexpr (NODE_TYPE == node_type_t::LEAF) { + value_size = iter.get_p_value()->size; + } else { + value_size = sizeof(value_t); + } + stats.size_value += value_size; + kv_logical_size += value_size; + stats.size_logical += kv_logical_size; + } + if (iter.is_last()) { + break; + } else { + ++iter; + } + } while (true); + } + + static bool next_position(const container_t& container, position_t& pos) { + auto iter = iterator_t(container); + assert(!iter.is_end()); + iter.seek_at(pos.index); + bool find_next; + if constexpr (!IS_BOTTOM) { + auto nxt_container = iter.get_nxt_container(); + find_next = NXT_STAGE_T::next_position(nxt_container, pos.nxt); + } else { + find_next = true; + } + if (find_next) { + if (iter.is_last()) { + return true; + } else { + pos.index = iter.index() + 1; + if constexpr (!IS_BOTTOM) { + pos.nxt = NXT_STAGE_T::position_t::begin(); + } + return false; + } + } else { + return false; + } + } + + struct _BaseEmpty {}; + class _BaseWithNxtIterator { + protected: + typename NXT_STAGE_T::StagedIterator _nxt; + }; + class StagedIterator + : std::conditional_t<IS_BOTTOM, _BaseEmpty, _BaseWithNxtIterator> { + public: + StagedIterator() = default; + bool valid() const { return iter.has_value(); } + size_t index() const { + return iter->index(); + } + bool is_end() const { return iter->is_end(); } + bool in_progress() const { + assert(valid()); + if constexpr (!IS_BOTTOM) { + if (this->_nxt.valid()) { + if (this->_nxt.index() == 0) { + return this->_nxt.in_progress(); + } else { + return true; + } + } else { + return false; + } + } else { + return false; + } + } + key_get_type get_key() const { return iter->get_key(); } + + iterator_t& get() { return *iter; } + void set(const container_t& container) { + assert(!valid()); + iter = iterator_t(container); + } + void set_end() { iter->set_end(); } + typename NXT_STAGE_T::StagedIterator& nxt() { + if constexpr (!IS_BOTTOM) { + if (!this->_nxt.valid()) { + auto nxt_container = iter->get_nxt_container(); + this->_nxt.set(nxt_container); + } + return this->_nxt; + } else { + ceph_abort("impossible path"); + } + } + typename NXT_STAGE_T::StagedIterator& get_nxt() { + if constexpr (!IS_BOTTOM) { + return this->_nxt; + } else { + ceph_abort("impossible path"); + } + } + StagedIterator& operator++() { + if (iter->is_last()) { + iter->set_end(); + } else { + ++(*iter); + } + if constexpr (!IS_BOTTOM) { + this->_nxt.reset(); + } + return *this; + } + void reset() { + if (valid()) { + iter.reset(); + if constexpr (!IS_BOTTOM) { + this->_nxt.reset(); + } + } + } + std::ostream& print(std::ostream& os, bool is_top) const { + if (valid()) { + if (iter->is_end()) { + return os << "END"; + } else { + os << index(); + } + } else { + if (is_top) { + return os << "invalid StagedIterator!"; + } else { + os << "0!"; + } + } + if constexpr (!IS_BOTTOM) { + os << ", "; + return this->_nxt.print(os, false); + } else { + return os; + } + } + position_t get_pos() const { + if (valid()) { + if constexpr (IS_BOTTOM) { + return position_t{index()}; + } else { + return position_t{index(), this->_nxt.get_pos()}; + } + } else { + return position_t::begin(); + } + } + friend std::ostream& operator<<(std::ostream& os, const StagedIterator& iter) { + return iter.print(os, true); + } + private: + std::optional<iterator_t> iter; + }; + + static bool recursively_locate_split( + size_t& current_size, size_t extra_size, + size_t target_size, StagedIterator& split_at) { + assert(current_size <= target_size); + iterator_t& split_iter = split_at.get(); + current_size = split_iter.seek_split(current_size, extra_size, target_size); + assert(current_size <= target_size); + assert(!split_iter.is_end()); + if (split_iter.index() == 0) { + extra_size += iterator_t::header_size(); + } else { + extra_size = 0; + } + bool locate_nxt; + if constexpr (!IS_BOTTOM) { + locate_nxt = NXT_STAGE_T::recursively_locate_split( + current_size, extra_size + split_iter.size_to_nxt(), + target_size, split_at.nxt()); + } else { // IS_BOTTOM + // located upper_bound, fair split strategy + size_t nxt_size = split_iter.size() + extra_size; + assert(current_size + nxt_size > target_size); + if (current_size + nxt_size/2 < target_size) { + // include next + current_size += nxt_size; + locate_nxt = true; + } else { + // exclude next + locate_nxt = false; + } + } + if (locate_nxt) { + if (split_iter.is_last()) { + return true; + } else { + ++split_at; + return false; + } + } else { + return false; + } + } + + static bool recursively_locate_split_inserted( + size_t& current_size, size_t extra_size, size_t target_size, + position_t& insert_pos, match_stage_t insert_stage, size_t insert_size, + std::optional<bool>& is_insert_left, StagedIterator& split_at) { + assert(current_size <= target_size); + assert(!is_insert_left.has_value()); + iterator_t& split_iter = split_at.get(); + auto& insert_index = insert_pos.index; + if (insert_stage == STAGE) { + current_size = split_iter.template seek_split_inserted<true>( + current_size, extra_size, target_size, + insert_index, insert_size, is_insert_left); + assert(is_insert_left.has_value()); + assert(current_size <= target_size); + if (split_iter.index() == 0) { + if (insert_index == 0) { + if (*is_insert_left == false) { + extra_size += iterator_t::header_size(); + } else { + extra_size = 0; + } + } else { + extra_size += iterator_t::header_size(); + } + } else { + extra_size = 0; + } + if (*is_insert_left == false && split_iter.index() == insert_index) { + // split_iter can be end + // found the lower-bound of target_size + // ...[s_index-1] |!| (i_index) [s_index]... + + // located upper-bound, fair split strategy + // look at the next slot (the insert item) + size_t nxt_size = insert_size + extra_size; + assert(current_size + nxt_size > target_size); + if (current_size + nxt_size/2 < target_size) { + // include next + *is_insert_left = true; + current_size += nxt_size; + if (split_iter.is_end()) { + // ...[s_index-1] (i_index) |!| + return true; + } else { + return false; + } + } else { + // exclude next + return false; + } + } else { + // Already considered insert effect in the current stage. + // Look into the next stage to identify the target_size lower-bound w/o + // insert effect. + assert(!split_iter.is_end()); + bool locate_nxt; + if constexpr (!IS_BOTTOM) { + locate_nxt = NXT_STAGE_T::recursively_locate_split( + current_size, extra_size + split_iter.size_to_nxt(), + target_size, split_at.nxt()); + } else { // IS_BOTTOM + // located upper-bound, fair split strategy + // look at the next slot + size_t nxt_size = split_iter.size() + extra_size; + assert(current_size + nxt_size > target_size); + if (current_size + nxt_size/2 < target_size) { + // include next + current_size += nxt_size; + locate_nxt = true; + } else { + // exclude next + locate_nxt = false; + } + } + if (locate_nxt) { + if (split_iter.is_last()) { + auto end_index = split_iter.index() + 1; + if (insert_index == INDEX_END) { + insert_index = end_index; + } + assert(insert_index <= end_index); + if (insert_index == end_index) { + assert(*is_insert_left == false); + split_iter.set_end(); + // ...[s_index-1] |!| (i_index) + return false; + } else { + assert(*is_insert_left == true); + return true; + } + } else { + ++split_at; + return false; + } + } else { + return false; + } + } + } else { + if constexpr (!IS_BOTTOM) { + assert(insert_stage < STAGE); + current_size = split_iter.template seek_split_inserted<false>( + current_size, extra_size, target_size, + insert_index, insert_size, is_insert_left); + assert(!split_iter.is_end()); + assert(current_size <= target_size); + if (split_iter.index() == 0) { + extra_size += iterator_t::header_size(); + } else { + extra_size = 0; + } + bool locate_nxt; + if (!is_insert_left.has_value()) { + // Considered insert effect in the current stage, and insert happens + // in the lower stage. + // Look into the next stage to identify the target_size lower-bound w/ + // insert effect. + assert(split_iter.index() == insert_index); + locate_nxt = NXT_STAGE_T::recursively_locate_split_inserted( + current_size, extra_size + split_iter.size_to_nxt(), target_size, + insert_pos.nxt, insert_stage, insert_size, + is_insert_left, split_at.nxt()); + assert(is_insert_left.has_value()); +#ifndef NDEBUG + if (locate_nxt) { + assert(*is_insert_left == true); + } +#endif + } else { + // is_insert_left.has_value() == true + // Insert will *not* happen in the lower stage. + // Need to look into the next stage to identify the target_size + // lower-bound w/ insert effect + assert(split_iter.index() != insert_index); + locate_nxt = NXT_STAGE_T::recursively_locate_split( + current_size, extra_size + split_iter.size_to_nxt(), + target_size, split_at.nxt()); +#ifndef NDEBUG + if (split_iter.index() < insert_index) { + assert(*is_insert_left == false); + } else { + assert(*is_insert_left == true); + } +#endif + } + if (locate_nxt) { + if (split_iter.is_last()) { + return true; + } else { + ++split_at; + return false; + } + } else { + return false; + } + } else { + ceph_abort("impossible path"); + return false;; + } + } + } + + /* + * container appender type system + * container_t::Appender(NodeExtentMutable& mut, char* p_append) + * append(const container_t& src, size_t from, size_t items) + * wrap() -> char* + * IF !IS_BOTTOM: + * open_nxt(const key_get_type&) + * open_nxt(const full_key_t&) + * -> std::tuple<NodeExtentMutable&, char*> + * wrap_nxt(char* p_append) + * ELSE + * append(const full_key_t& key, const value_t& value) + */ + template <KeyT KT> + struct _BaseWithNxtAppender { + typename NXT_STAGE_T::template StagedAppender<KT> _nxt; + }; + template <KeyT KT> + class StagedAppender + : std::conditional_t<IS_BOTTOM, _BaseEmpty, _BaseWithNxtAppender<KT>> { + public: + StagedAppender() = default; + ~StagedAppender() { + assert(!require_wrap_nxt); + assert(!valid()); + } + bool valid() const { return appender.has_value(); } + size_t index() const { + assert(valid()); + return _index; + } + bool in_progress() const { return require_wrap_nxt; } + // TODO: pass by reference + void init(NodeExtentMutable* p_mut, char* p_start) { + assert(!valid()); + appender = typename container_t::template Appender<KT>(p_mut, p_start); + _index = 0; + } + // possible to make src_iter end if to_index == INDEX_END + void append_until(StagedIterator& src_iter, size_t& to_index) { + assert(!require_wrap_nxt); + auto s_index = src_iter.index(); + src_iter.get().template copy_out_until<KT>(*appender, to_index); + assert(src_iter.index() == to_index); + assert(to_index >= s_index); + auto increment = (to_index - s_index); + if (increment) { + _index += increment; + if constexpr (!IS_BOTTOM) { + src_iter.get_nxt().reset(); + } + } + } + void append(const full_key_t<KT>& key, + const value_t& value, const value_t*& p_value) { + assert(!require_wrap_nxt); + if constexpr (!IS_BOTTOM) { + auto& nxt = open_nxt(key); + nxt.append(key, value, p_value); + wrap_nxt(); + } else { + appender->append(key, value, p_value); + ++_index; + } + } + char* wrap() { + assert(valid()); + assert(_index > 0); + if constexpr (!IS_BOTTOM) { + if (require_wrap_nxt) { + wrap_nxt(); + } + } + auto ret = appender->wrap(); + appender.reset(); + return ret; + } + typename NXT_STAGE_T::template StagedAppender<KT>& + open_nxt(key_get_type paritial_key) { + assert(!require_wrap_nxt); + if constexpr (!IS_BOTTOM) { + require_wrap_nxt = true; + auto [p_mut, p_append] = appender->open_nxt(paritial_key); + this->_nxt.init(p_mut, p_append); + return this->_nxt; + } else { + ceph_abort("impossible path"); + } + } + typename NXT_STAGE_T::template StagedAppender<KT>& + open_nxt(const full_key_t<KT>& key) { + assert(!require_wrap_nxt); + if constexpr (!IS_BOTTOM) { + require_wrap_nxt = true; + auto [p_mut, p_append] = appender->open_nxt(key); + this->_nxt.init(p_mut, p_append); + return this->_nxt; + } else { + ceph_abort("impossible path"); + } + } + typename NXT_STAGE_T::template StagedAppender<KT>& get_nxt() { + if constexpr (!IS_BOTTOM) { + assert(require_wrap_nxt); + return this->_nxt; + } else { + ceph_abort("impossible path"); + } + } + void wrap_nxt() { + if constexpr (!IS_BOTTOM) { + assert(require_wrap_nxt); + require_wrap_nxt = false; + auto p_append = this->_nxt.wrap(); + appender->wrap_nxt(p_append); + ++_index; + } else { + ceph_abort("impossible path"); + } + } + private: + std::optional<typename container_t::template Appender<KT>> appender; + size_t _index; + bool require_wrap_nxt = false; + }; + + template <KeyT KT> + static void _append_range( + StagedIterator& src_iter, StagedAppender<KT>& appender, size_t& to_index) { + if (src_iter.is_end()) { + // append done + assert(to_index == INDEX_END); + to_index = src_iter.index(); + } else if constexpr (!IS_BOTTOM) { + if (appender.in_progress()) { + // appender has appended something at the current item, + // cannot append the current item as-a-whole + size_t to_index_nxt = INDEX_END; + NXT_STAGE_T::template _append_range<KT>( + src_iter.nxt(), appender.get_nxt(), to_index_nxt); + ++src_iter; + appender.wrap_nxt(); + } else if (src_iter.in_progress()) { + // src_iter is not at the beginning of the current item, + // cannot append the current item as-a-whole + size_t to_index_nxt = INDEX_END; + NXT_STAGE_T::template _append_range<KT>( + src_iter.nxt(), appender.open_nxt(src_iter.get_key()), to_index_nxt); + ++src_iter; + appender.wrap_nxt(); + } else { + // we can safely append the current item as-a-whole + } + } + appender.append_until(src_iter, to_index); + } + + template <KeyT KT> + static void _append_into(StagedIterator& src_iter, StagedAppender<KT>& appender, + position_t& position, match_stage_t stage) { + assert(position.index == src_iter.index()); + // reaches the last item + if (stage == STAGE) { + // done, end recursion + if constexpr (!IS_BOTTOM) { + position.nxt = position_t::nxt_t::begin(); + } + } else { + assert(stage < STAGE); + // proceed append in the next stage + NXT_STAGE_T::template append_until<KT>( + src_iter.nxt(), appender.open_nxt(src_iter.get_key()), + position.nxt, stage); + } + } + + template <KeyT KT> + static void append_until(StagedIterator& src_iter, StagedAppender<KT>& appender, + position_t& position, match_stage_t stage) { + size_t from_index = src_iter.index(); + size_t& to_index = position.index; + assert(from_index <= to_index); + if constexpr (IS_BOTTOM) { + assert(stage == STAGE); + appender.append_until(src_iter, to_index); + } else { + assert(stage <= STAGE); + if (src_iter.index() == to_index) { + _append_into<KT>(src_iter, appender, position, stage); + } else { + if (to_index == INDEX_END) { + assert(stage == STAGE); + } else if (to_index == INDEX_LAST) { + assert(stage < STAGE); + } + _append_range<KT>(src_iter, appender, to_index); + _append_into<KT>(src_iter, appender, position, stage); + } + } + to_index -= from_index; + } + + template <KeyT KT> + static bool append_insert( + const full_key_t<KT>& key, const value_t& value, + StagedIterator& src_iter, StagedAppender<KT>& appender, + bool is_front_insert, match_stage_t& stage, const value_t*& p_value) { + assert(src_iter.valid()); + if (stage == STAGE) { + appender.append(key, value, p_value); + if (src_iter.is_end()) { + return true; + } else { + return false; + } + } else { + assert(stage < STAGE); + if constexpr (!IS_BOTTOM) { + auto nxt_is_end = NXT_STAGE_T::template append_insert<KT>( + key, value, src_iter.get_nxt(), appender.get_nxt(), + is_front_insert, stage, p_value); + if (nxt_is_end) { + appender.wrap_nxt(); + ++src_iter; + if (is_front_insert) { + stage = STAGE; + } + if (src_iter.is_end()) { + return true; + } + } + return false; + } else { + ceph_abort("impossible path"); + } + } + } + + /* TrimType: + * BEFORE: remove the entire container, normally means the according higher + * stage iterator needs to be trimmed as-a-whole. + * AFTER: retain the entire container, normally means the trim should be + * start from the next iterator at the higher stage. + * AT: trim happens in the current container, and the according higher + * stage iterator needs to be adjusted by the trimmed size. + */ + static std::tuple<TrimType, node_offset_t> + recursively_trim(NodeExtentMutable& mut, StagedIterator& trim_at) { + if (!trim_at.valid()) { + return {TrimType::BEFORE, 0u}; + } + if (trim_at.is_end()) { + return {TrimType::AFTER, 0u}; + } + + auto& iter = trim_at.get(); + if constexpr (!IS_BOTTOM) { + auto [type, trimmed] = NXT_STAGE_T::recursively_trim( + mut, trim_at.get_nxt()); + node_offset_t trim_size; + if (type == TrimType::AFTER) { + if (iter.is_last()) { + return {TrimType::AFTER, 0u}; + } + ++trim_at; + trim_size = iter.trim_until(mut); + } else if (type == TrimType::BEFORE) { + if (iter.index() == 0) { + return {TrimType::BEFORE, 0u}; + } + trim_size = iter.trim_until(mut); + } else { + trim_size = iter.trim_at(mut, trimmed); + } + return {TrimType::AT, trim_size}; + } else { + if (iter.index() == 0) { + return {TrimType::BEFORE, 0u}; + } else { + auto trimmed = iter.trim_until(mut); + return {TrimType::AT, trimmed}; + } + } + } + + static void trim(NodeExtentMutable& mut, StagedIterator& trim_at) { + auto [type, trimmed] = recursively_trim(mut, trim_at); + if (type == TrimType::BEFORE) { + assert(trim_at.valid()); + auto& iter = trim_at.get(); + iter.trim_until(mut); + } + } +}; + +/** + * Configurations for struct staged + * + * staged_params_* assembles different container_t implementations (defined by + * stated::_iterator_t) by STAGE, and constructs the final multi-stage + * implementations for different node layouts defined by + * node_extent_t<FieldType, NODE_TYPE>. + * + * The specialized implementations for different layouts are accessible through + * the helper type node_to_stage_t<node_extent_t<FieldType, NODE_TYPE>>. + * + * Specifically, the settings of 8 layouts are: + * + * The layout (N0, LEAF/INTERNAL) has 3 stages: + * - STAGE_LEFT: node_extent_t<node_fields_0_t, LEAF/INTERNAL> + * - STAGE_STRING: item_iterator_t<LEAF/INTERNAL> + * - STAGE_RIGHT: sub_items_t<LEAF/INTERNAL> + * + * The layout (N1, LEAF/INTERNAL) has 3 stages: + * - STAGE_LEFT: node_extent_t<node_fields_1_t, LEAF/INTERNAL> + * - STAGE_STRING: item_iterator_t<LEAF/INTERNAL> + * - STAGE_RIGHT: sub_items_t<LEAF/INTERNAL> + * + * The layout (N2, LEAF/INTERNAL) has 2 stages: + * - STAGE_STRING: node_extent_t<node_fields_2_t, LEAF/INTERNAL> + * - STAGE_RIGHT: sub_items_t<LEAF/INTERNAL> + * + * The layout (N3, LEAF) has 1 stage: + * - STAGE_RIGHT: node_extent_t<leaf_fields_3_t, LEAF> + * + * The layout (N3, INTERNAL) has 1 stage: + * - STAGE_RIGHT: node_extent_t<internal_fields_3_t, INTERNAL> + */ + +template <node_type_t _NODE_TYPE> +struct staged_params_subitems { + using container_t = sub_items_t<_NODE_TYPE>; + static constexpr auto NODE_TYPE = _NODE_TYPE; + static constexpr auto STAGE = STAGE_RIGHT; + + // dummy type in order to make our type system work + // any better solution to get rid of this? + using next_param_t = staged_params_subitems<NODE_TYPE>; +}; + +template <node_type_t _NODE_TYPE> +struct staged_params_item_iterator { + using container_t = item_iterator_t<_NODE_TYPE>; + static constexpr auto NODE_TYPE = _NODE_TYPE; + static constexpr auto STAGE = STAGE_STRING; + + using next_param_t = staged_params_subitems<NODE_TYPE>; +}; + +template <typename NodeType> +struct staged_params_node_01 { + using container_t = NodeType; + static constexpr auto NODE_TYPE = NodeType::NODE_TYPE; + static constexpr auto STAGE = STAGE_LEFT; + + using next_param_t = staged_params_item_iterator<NODE_TYPE>; +}; + +template <typename NodeType> +struct staged_params_node_2 { + using container_t = NodeType; + static constexpr auto NODE_TYPE = NodeType::NODE_TYPE; + static constexpr auto STAGE = STAGE_STRING; + + using next_param_t = staged_params_subitems<NODE_TYPE>; +}; + +template <typename NodeType> +struct staged_params_node_3 { + using container_t = NodeType; + static constexpr auto NODE_TYPE = NodeType::NODE_TYPE; + static constexpr auto STAGE = STAGE_RIGHT; + + // dummy type in order to make our type system work + // any better solution to get rid of this? + using next_param_t = staged_params_node_3<NodeType>; +}; + +template <typename NodeType, typename Enable = void> struct _node_to_stage_t; +template <typename NodeType> +struct _node_to_stage_t<NodeType, + std::enable_if_t<NodeType::FIELD_TYPE == field_type_t::N0 || + NodeType::FIELD_TYPE == field_type_t::N1>> { + using type = staged<staged_params_node_01<NodeType>>; +}; +template <typename NodeType> +struct _node_to_stage_t<NodeType, + std::enable_if_t<NodeType::FIELD_TYPE == field_type_t::N2>> { + using type = staged<staged_params_node_2<NodeType>>; +}; +template <typename NodeType> +struct _node_to_stage_t<NodeType, + std::enable_if_t<NodeType::FIELD_TYPE == field_type_t::N3>> { + using type = staged<staged_params_node_3<NodeType>>; +}; +template <typename NodeType> +using node_to_stage_t = typename _node_to_stage_t<NodeType>::type; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/stage_types.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/stage_types.h new file mode 100644 index 00000000000..60a0fbec72f --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/stage_types.h @@ -0,0 +1,389 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <cassert> +#include <optional> +#include <ostream> + +#include "crimson/os/seastore/onode_manager/staged-fltree/fwd.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_types.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/tree_types.h" + +namespace crimson::os::seastore::onode { + +using match_stage_t = uint8_t; +constexpr match_stage_t STAGE_LEFT = 2u; // shard/pool/crush +constexpr match_stage_t STAGE_STRING = 1u; // nspace/oid +constexpr match_stage_t STAGE_RIGHT = 0u; // snap/gen +constexpr auto STAGE_TOP = STAGE_LEFT; +constexpr auto STAGE_BOTTOM = STAGE_RIGHT; +constexpr bool is_valid_stage(match_stage_t stage) { + return std::clamp(stage, STAGE_BOTTOM, STAGE_TOP) == stage; +} +// TODO: replace by +// using match_history_t = int8_t; +// left_m, str_m, right_m +// 3: GT, +// 2: EQ, GT, +// 1: EQ, EQ, GT +// 0: EQ, EQ, EQ +// -1: EQ, EQ, LT +// -2: EQ, LT, +// -3: LT, + +struct MatchHistory { + template <match_stage_t STAGE> + const std::optional<MatchKindCMP>& get() const { + static_assert(is_valid_stage(STAGE)); + if constexpr (STAGE == STAGE_RIGHT) { + return right_match; + } else if (STAGE == STAGE_STRING) { + return string_match; + } else { + return left_match; + } + } + + const std::optional<MatchKindCMP>& + get_by_stage(match_stage_t stage) const { + assert(is_valid_stage(stage)); + if (stage == STAGE_RIGHT) { + return right_match; + } else if (stage == STAGE_STRING) { + return string_match; + } else { + return left_match; + } + } + + template <match_stage_t STAGE = STAGE_TOP> + const bool is_GT() const; + + template <match_stage_t STAGE> + void set(MatchKindCMP match) { + static_assert(is_valid_stage(STAGE)); + if constexpr (STAGE < STAGE_TOP) { + assert(*get<STAGE + 1>() == MatchKindCMP::EQ); + } + assert(!get<STAGE>().has_value() || *get<STAGE>() != MatchKindCMP::EQ); + const_cast<std::optional<MatchKindCMP>&>(get<STAGE>()) = match; + } + + std::ostream& dump(std::ostream& os) const { + os << "history("; + dump_each(os, left_match) << ", "; + dump_each(os, string_match) << ", "; + dump_each(os, right_match) << ")"; + return os; + } + + std::ostream& dump_each( + std::ostream& os, const std::optional<MatchKindCMP>& match) const { + if (!match.has_value()) { + return os << "--"; + } else if (*match == MatchKindCMP::LT) { + return os << "LT"; + } else if (*match == MatchKindCMP::EQ) { + return os << "EQ"; + } else if (*match == MatchKindCMP::GT) { + return os << "GT"; + } else { + ceph_abort("impossble path"); + } + } + + std::optional<MatchKindCMP> left_match; + std::optional<MatchKindCMP> string_match; + std::optional<MatchKindCMP> right_match; +}; +inline std::ostream& operator<<(std::ostream& os, const MatchHistory& pos) { + return pos.dump(os); +} + +template <match_stage_t STAGE> +struct _check_GT_t { + static bool eval(const MatchHistory* history) { + return history->get<STAGE>() && + (*history->get<STAGE>() == MatchKindCMP::GT || + (*history->get<STAGE>() == MatchKindCMP::EQ && + _check_GT_t<STAGE - 1>::eval(history))); + } +}; +template <> +struct _check_GT_t<STAGE_RIGHT> { + static bool eval(const MatchHistory* history) { + return history->get<STAGE_RIGHT>() && + *history->get<STAGE_RIGHT>() == MatchKindCMP::GT; + } +}; +template <match_stage_t STAGE> +const bool MatchHistory::is_GT() const { + static_assert(is_valid_stage(STAGE)); + if constexpr (STAGE < STAGE_TOP) { + assert(get<STAGE + 1>() == MatchKindCMP::EQ); + } + return _check_GT_t<STAGE>::eval(this); +} + +template <match_stage_t STAGE> +struct staged_position_t { + static_assert(is_valid_stage(STAGE)); + using me_t = staged_position_t<STAGE>; + using nxt_t = staged_position_t<STAGE - 1>; + bool is_end() const { + if (index == INDEX_END) { + return true; + } else { + assert(is_valid_index(index)); + return false; + } + } + size_t& index_by_stage(match_stage_t stage) { + assert(stage <= STAGE); + if (STAGE == stage) { + return index; + } else { + return nxt.index_by_stage(stage); + } + } + + int cmp(const me_t& o) const { + if (index > o.index) { + return 1; + } else if (index < o.index) { + return -1; + } else { + return nxt.cmp(o.nxt); + } + } + bool operator>(const me_t& o) const { return cmp(o) > 0; } + bool operator>=(const me_t& o) const { return cmp(o) >= 0; } + bool operator<(const me_t& o) const { return cmp(o) < 0; } + bool operator<=(const me_t& o) const { return cmp(o) <= 0; } + bool operator==(const me_t& o) const { return cmp(o) == 0; } + bool operator!=(const me_t& o) const { return cmp(o) != 0; } + + me_t& operator-=(const me_t& o) { + assert(is_valid_index(o.index)); + assert(index >= o.index); + if (index != INDEX_END) { + assert(is_valid_index(index)); + index -= o.index; + if (index == 0) { + nxt -= o.nxt; + } + } + return *this; + } + + static me_t begin() { return {0u, nxt_t::begin()}; } + static me_t end() { + return {INDEX_END, nxt_t::end()}; + } + + size_t index; + nxt_t nxt; +}; +template <match_stage_t STAGE> +std::ostream& operator<<(std::ostream& os, const staged_position_t<STAGE>& pos) { + if (pos.index == INDEX_END) { + os << "END"; + } else if (pos.index == INDEX_LAST) { + os << "LAST"; + } else { + os << pos.index; + assert(is_valid_index(pos.index)); + } + return os << ", " << pos.nxt; +} + +template <> +struct staged_position_t<STAGE_BOTTOM> { + using me_t = staged_position_t<STAGE_BOTTOM>; + bool is_end() const { + if (index == INDEX_END) { + return true; + } else { + assert(is_valid_index(index)); + return false; + } + } + size_t& index_by_stage(match_stage_t stage) { + assert(stage == STAGE_BOTTOM); + return index; + } + + int cmp(const staged_position_t<STAGE_BOTTOM>& o) const { + if (index > o.index) { + return 1; + } else if (index < o.index) { + return -1; + } else { + return 0; + } + } + bool operator>(const me_t& o) const { return cmp(o) > 0; } + bool operator>=(const me_t& o) const { return cmp(o) >= 0; } + bool operator<(const me_t& o) const { return cmp(o) < 0; } + bool operator<=(const me_t& o) const { return cmp(o) <= 0; } + bool operator==(const me_t& o) const { return cmp(o) == 0; } + bool operator!=(const me_t& o) const { return cmp(o) != 0; } + + me_t& operator-=(const me_t& o) { + assert(is_valid_index(o.index)); + assert(index >= o.index); + if (index != INDEX_END) { + assert(is_valid_index(index)); + index -= o.index; + } + return *this; + } + + static me_t begin() { return {0u}; } + static me_t end() { return {INDEX_END}; } + + size_t index; +}; +template <> +inline std::ostream& operator<<(std::ostream& os, const staged_position_t<STAGE_BOTTOM>& pos) { + if (pos.index == INDEX_END) { + os << "END"; + } else if (pos.index == INDEX_LAST) { + os << "LAST"; + } else { + os << pos.index; + assert(is_valid_index(pos.index)); + } + return os; +} + +using search_position_t = staged_position_t<STAGE_TOP>; + +template <match_stage_t STAGE> +const staged_position_t<STAGE>& cast_down(const search_position_t& pos) { + if constexpr (STAGE == STAGE_LEFT) { + return pos; + } else if constexpr (STAGE == STAGE_STRING) { +#ifndef NDEBUG + if (pos.is_end()) { + assert(pos.nxt.is_end()); + } else { + assert(pos.index == 0u); + } +#endif + return pos.nxt; + } else if constexpr (STAGE == STAGE_RIGHT) { +#ifndef NDEBUG + if (pos.is_end()) { + assert(pos.nxt.nxt.is_end()); + } else { + assert(pos.index == 0u); + assert(pos.nxt.index == 0u); + } +#endif + return pos.nxt.nxt; + } else { + ceph_abort("impossible path"); + } +} + +template <match_stage_t STAGE> +staged_position_t<STAGE>& cast_down(search_position_t& pos) { + const search_position_t& _pos = pos; + return const_cast<staged_position_t<STAGE>&>(cast_down<STAGE>(_pos)); +} + +template <match_stage_t STAGE> +staged_position_t<STAGE>& cast_down_fill_0(search_position_t& pos) { + if constexpr (STAGE == STAGE_LEFT) { + return pos; + } if constexpr (STAGE == STAGE_STRING) { + pos.index = 0; + return pos.nxt; + } else if constexpr (STAGE == STAGE_RIGHT) { + pos.index = 0; + pos.nxt.index = 0; + return pos.nxt.nxt; + } else { + ceph_abort("impossible path"); + } +} + +inline search_position_t&& normalize(search_position_t&& pos) { return std::move(pos); } + +template <match_stage_t STAGE, typename = std::enable_if_t<STAGE != STAGE_TOP>> +search_position_t normalize(staged_position_t<STAGE>&& pos) { + if (pos.is_end()) { + return search_position_t::end(); + } + if constexpr (STAGE == STAGE_STRING) { + return {0u, std::move(pos)}; + } else if (STAGE == STAGE_RIGHT) { + return {0u, {0u, std::move(pos)}}; + } else { + ceph_abort("impossible path"); + } +} + +struct memory_range_t { + const char* p_start; + const char* p_end; +}; + +enum class ContainerType { ITERATIVE, INDEXABLE }; + +template <node_type_t> struct value_type; +template<> struct value_type<node_type_t::INTERNAL> { using type = laddr_packed_t; }; +template<> struct value_type<node_type_t::LEAF> { using type = onode_t; }; +template <node_type_t NODE_TYPE> +using value_type_t = typename value_type<NODE_TYPE>::type; + +template <node_type_t NODE_TYPE, match_stage_t STAGE> +struct staged_result_t { + using me_t = staged_result_t<NODE_TYPE, STAGE>; + bool is_end() const { return position.is_end(); } + + static me_t end() { + return {staged_position_t<STAGE>::end(), nullptr, MSTAT_END}; + } + template <typename T = me_t> + static std::enable_if_t<STAGE != STAGE_BOTTOM, T> from_nxt( + size_t index, const staged_result_t<NODE_TYPE, STAGE - 1>& nxt_stage_result) { + return {{index, nxt_stage_result.position}, + nxt_stage_result.p_value, + nxt_stage_result.mstat}; + } + + staged_position_t<STAGE> position; + const value_type_t<NODE_TYPE>* p_value; + match_stat_t mstat; +}; + +template <node_type_t NODE_TYPE> +using lookup_result_t = staged_result_t<NODE_TYPE, STAGE_TOP>; + +template <node_type_t NODE_TYPE> +lookup_result_t<NODE_TYPE>&& normalize( + lookup_result_t<NODE_TYPE>&& result) { return std::move(result); } + +template <node_type_t NODE_TYPE, match_stage_t STAGE, + typename = std::enable_if_t<STAGE != STAGE_TOP>> +lookup_result_t<NODE_TYPE> normalize( + staged_result_t<NODE_TYPE, STAGE>&& result) { + // FIXME: assert result.mstat correct + return {normalize(std::move(result.position)), result.p_value, result.mstat}; +} + +struct node_stats_t { + size_t size_persistent = 0; + size_t size_filled = 0; + // filled by staged::get_stats() + size_t size_logical = 0; + size_t size_overhead = 0; + size_t size_value = 0; + unsigned num_kvs = 0; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.cc new file mode 100644 index 00000000000..bb3c64b97e3 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.cc @@ -0,0 +1,205 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "sub_items_stage.h" + +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_mutable.h" + +namespace crimson::os::seastore::onode { + +template <KeyT KT> +const laddr_packed_t* internal_sub_items_t::insert_at( + NodeExtentMutable& mut, const internal_sub_items_t& sub_items, + const full_key_t<KT>& key, const laddr_packed_t& value, + size_t index, node_offset_t size, const char* p_left_bound) { + assert(index <= sub_items.keys()); + assert(size == estimate_insert<KT>(key, value)); + const char* p_shift_start = p_left_bound; + const char* p_shift_end = reinterpret_cast<const char*>( + sub_items.p_first_item + 1 - index); + mut.shift_absolute(p_shift_start, p_shift_end - p_shift_start, -(int)size); + + auto p_insert = const_cast<char*>(p_shift_end) - size; + auto item = internal_sub_item_t{snap_gen_t::from_key<KT>(key), value}; + mut.copy_in_absolute(p_insert, item); + return &reinterpret_cast<internal_sub_item_t*>(p_insert)->value; +} +template const laddr_packed_t* internal_sub_items_t::insert_at<KeyT::VIEW>( + NodeExtentMutable&, const internal_sub_items_t&, const full_key_t<KeyT::VIEW>&, + const laddr_packed_t&, size_t, node_offset_t, const char*); + +node_offset_t internal_sub_items_t::trim_until( + NodeExtentMutable&, internal_sub_items_t& items, size_t index) { + assert(index != 0); + auto keys = items.keys(); + assert(index <= keys); + size_t ret = sizeof(internal_sub_item_t) * (keys - index); + assert(ret < NODE_BLOCK_SIZE); + return ret; +} + +template class internal_sub_items_t::Appender<KeyT::VIEW>; +template class internal_sub_items_t::Appender<KeyT::HOBJ>; + +template <KeyT KT> +void internal_sub_items_t::Appender<KT>::append( + const internal_sub_items_t& src, size_t from, size_t items) { + assert(from <= src.keys()); + if (items == 0) { + return; + } + assert(from < src.keys()); + assert(from + items <= src.keys()); + node_offset_t size = sizeof(internal_sub_item_t) * items; + p_append -= size; + p_mut->copy_in_absolute(p_append, src.p_first_item + 1 - from - items, size); +} + +template <KeyT KT> +void internal_sub_items_t::Appender<KT>::append( + const full_key_t<KT>& key, const laddr_packed_t& value, + const laddr_packed_t*& p_value) { + p_append -= sizeof(internal_sub_item_t); + auto item = internal_sub_item_t{snap_gen_t::from_key<KT>(key), value}; + p_mut->copy_in_absolute(p_append, item); + p_value = &reinterpret_cast<internal_sub_item_t*>(p_append)->value; +} + +template <KeyT KT> +const onode_t* leaf_sub_items_t::insert_at( + NodeExtentMutable& mut, const leaf_sub_items_t& sub_items, + const full_key_t<KT>& key, const onode_t& value, + size_t index, node_offset_t size, const char* p_left_bound) { + assert(index <= sub_items.keys()); + assert(size == estimate_insert<KT>(key, value)); + // a. [... item(index)] << size + const char* p_shift_start = p_left_bound; + const char* p_shift_end = sub_items.get_item_end(index); + mut.shift_absolute(p_shift_start, p_shift_end - p_shift_start, -(int)size); + + // b. insert item + auto p_insert = const_cast<char*>(p_shift_end - size); + auto p_value = reinterpret_cast<const onode_t*>(p_insert); + mut.copy_in_absolute(p_insert, &value, value.size); + p_insert += value.size; + mut.copy_in_absolute(p_insert, snap_gen_t::template from_key<KT>(key)); + assert(p_insert + sizeof(snap_gen_t) + sizeof(node_offset_t) == p_shift_end); + + // c. compensate affected offsets + auto item_size = value.size + sizeof(snap_gen_t); + for (auto i = index; i < sub_items.keys(); ++i) { + const node_offset_packed_t& offset_i = sub_items.get_offset(i); + mut.copy_in_absolute((void*)&offset_i, node_offset_t(offset_i.value + item_size)); + } + + // d. [item(index-1) ... item(0) ... offset(index)] <<< sizeof(node_offset_t) + const char* p_offset = (index == 0 ? + (const char*)&sub_items.get_offset(0) + sizeof(node_offset_t) : + (const char*)&sub_items.get_offset(index - 1)); + p_shift_start = p_shift_end; + p_shift_end = p_offset; + mut.shift_absolute(p_shift_start, p_shift_end - p_shift_start, -(int)sizeof(node_offset_t)); + + // e. insert offset + node_offset_t offset_to_item_start = item_size + sub_items.get_offset_to_end(index); + mut.copy_in_absolute( + const_cast<char*>(p_shift_end) - sizeof(node_offset_t), offset_to_item_start); + + // f. update num_sub_keys + mut.copy_in_absolute((void*)sub_items.p_num_keys, num_keys_t(sub_items.keys() + 1)); + + return p_value; +} +template const onode_t* leaf_sub_items_t::insert_at<KeyT::HOBJ>( + NodeExtentMutable&, const leaf_sub_items_t&, const full_key_t<KeyT::HOBJ>&, + const onode_t&, size_t, node_offset_t, const char*); + +node_offset_t leaf_sub_items_t::trim_until( + NodeExtentMutable& mut, leaf_sub_items_t& items, size_t index) { + assert(index != 0); + auto keys = items.keys(); + assert(index <= keys); + if (index == keys) { + return 0; + } + size_t trim_items = keys - index; + const char* p_items_start = items.p_start(); + const char* p_shift_start = items.get_item_end(index); + const char* p_shift_end = items.get_item_end(0); + size_t size_trim_offsets = sizeof(node_offset_t) * trim_items; + mut.shift_absolute(p_shift_start, p_shift_end - p_shift_start, + size_trim_offsets); + mut.copy_in_absolute((void*)items.p_num_keys, num_keys_t(index)); + size_t ret = size_trim_offsets + (p_shift_start - p_items_start); + assert(ret < NODE_BLOCK_SIZE); + return ret; +} + +// helper type for the visitor +template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; }; +// explicit deduction guide +template<class... Ts> overloaded(Ts...) -> overloaded<Ts...>; + +template class leaf_sub_items_t::Appender<KeyT::VIEW>; +template class leaf_sub_items_t::Appender<KeyT::HOBJ>; + +template <KeyT KT> +char* leaf_sub_items_t::Appender<KT>::wrap() { + auto p_cur = p_append; + num_keys_t num_keys = 0; + for (auto i = 0u; i < cnt; ++i) { + auto& a = appends[i]; + std::visit(overloaded { + [&] (const range_items_t& arg) { num_keys += arg.items; }, + [&] (const kv_item_t& arg) { ++num_keys; } + }, a); + } + assert(num_keys); + p_cur -= sizeof(num_keys_t); + p_mut->copy_in_absolute(p_cur, num_keys); + + node_offset_t last_offset = 0; + for (auto i = 0u; i < cnt; ++i) { + auto& a = appends[i]; + std::visit(overloaded { + [&] (const range_items_t& arg) { + int compensate = (last_offset - op_src->get_offset_to_end(arg.from)); + node_offset_t offset; + for (auto i = arg.from; i < arg.from + arg.items; ++i) { + offset = op_src->get_offset(i).value + compensate; + p_cur -= sizeof(node_offset_t); + p_mut->copy_in_absolute(p_cur, offset); + } + last_offset = offset; + }, + [&] (const kv_item_t& arg) { + last_offset += sizeof(snap_gen_t) + arg.p_value->size; + p_cur -= sizeof(node_offset_t); + p_mut->copy_in_absolute(p_cur, last_offset); + } + }, a); + } + + for (auto i = 0u; i < cnt; ++i) { + auto& a = appends[i]; + std::visit(overloaded { + [&] (const range_items_t& arg) { + auto _p_start = op_src->get_item_end(arg.from + arg.items); + size_t _len = op_src->get_item_end(arg.from) - _p_start; + p_cur -= _len; + p_mut->copy_in_absolute(p_cur, _p_start, _len); + }, + [&] (const kv_item_t& arg) { + assert(pp_value); + p_cur -= sizeof(snap_gen_t); + p_mut->copy_in_absolute(p_cur, snap_gen_t::template from_key<KT>(*arg.p_key)); + p_cur -= arg.p_value->size; + p_mut->copy_in_absolute(p_cur, arg.p_value, arg.p_value->size); + *pp_value = reinterpret_cast<const onode_t*>(p_cur); + } + }, a); + } + return p_cur; +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.h new file mode 100644 index 00000000000..37262afacea --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/sub_items_stage.h @@ -0,0 +1,294 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <variant> + +#include "crimson/os/seastore/onode_manager/staged-fltree/node_types.h" +#include "key_layout.h" +#include "stage_types.h" + +namespace crimson::os::seastore::onode { + +class NodeExtentMutable; + +struct internal_sub_item_t { + const snap_gen_t& get_key() const { return key; } + const laddr_packed_t* get_p_value() const { return &value; } + + snap_gen_t key; + laddr_packed_t value; +} __attribute__((packed)); + +/** + * internal_sub_items_t + * + * The STAGE_RIGHT implementation for internal node N0/N1/N2, implements staged + * contract as an indexable container to index snap-gen to child node + * addresses. + * + * The layout of the contaner storing n sub-items: + * + * # <--------- container range -----------> # + * #<~># sub-items [2, n) # + * # # <- sub-item 1 -> # <- sub-item 0 -> # + * #...# snap-gen | laddr # snap-gen | laddr # + * ^ + * | + * p_first_item + + */ +class internal_sub_items_t { + public: + using num_keys_t = size_t; + + internal_sub_items_t(const memory_range_t& range) { + assert(range.p_start < range.p_end); + assert((range.p_end - range.p_start) % sizeof(internal_sub_item_t) == 0); + num_items = (range.p_end - range.p_start) / sizeof(internal_sub_item_t); + assert(num_items > 0); + auto _p_first_item = range.p_end - sizeof(internal_sub_item_t); + p_first_item = reinterpret_cast<const internal_sub_item_t*>(_p_first_item); + } + + // container type system + using key_get_type = const snap_gen_t&; + static constexpr auto CONTAINER_TYPE = ContainerType::INDEXABLE; + num_keys_t keys() const { return num_items; } + key_get_type operator[](size_t index) const { + assert(index < num_items); + return (p_first_item - index)->get_key(); + } + node_offset_t size_before(size_t index) const { + size_t ret = index * sizeof(internal_sub_item_t); + assert(ret < NODE_BLOCK_SIZE); + return ret; + } + const laddr_packed_t* get_p_value(size_t index) const { + assert(index < num_items); + return (p_first_item - index)->get_p_value(); + } + node_offset_t size_overhead_at(size_t index) const { return 0u; } + + static node_offset_t header_size() { return 0u; } + + template <KeyT KT> + static node_offset_t estimate_insert( + const full_key_t<KT>&, const laddr_packed_t&) { + return sizeof(internal_sub_item_t); + } + + template <KeyT KT> + static const laddr_packed_t* insert_at( + NodeExtentMutable&, const internal_sub_items_t&, + const full_key_t<KT>&, const laddr_packed_t&, + size_t index, node_offset_t size, const char* p_left_bound); + + static node_offset_t trim_until(NodeExtentMutable&, internal_sub_items_t&, size_t); + + template <KeyT KT> + class Appender; + + private: + size_t num_items; + const internal_sub_item_t* p_first_item; +}; + +template <KeyT KT> +class internal_sub_items_t::Appender { + public: + Appender(NodeExtentMutable* p_mut, char* p_append) + : p_mut{p_mut}, p_append{p_append} {} + void append(const internal_sub_items_t& src, size_t from, size_t items); + void append(const full_key_t<KT>&, const laddr_packed_t&, const laddr_packed_t*&); + char* wrap() { return p_append; } + private: + NodeExtentMutable* p_mut; + char* p_append; +}; + +/** + * leaf_sub_items_t + * + * The STAGE_RIGHT implementation for leaf node N0/N1/N2, implements staged + * contract as an indexable container to index snap-gen to onode_t. + * + * The layout of the contaner storing n sub-items: + * + * # <------------------------ container range -------------------------------> # + * # <---------- sub-items ----------------> # <--- offsets ---------# # + * #<~># sub-items [2, n) #<~>| offsets [2, n) # # + * # # <- sub-item 1 -> # <- sub-item 0 -> # | # # + * #...# snap-gen | onode # snap-gen | onode #...| offset1 | offset0 # num_keys # + * ^ ^ ^ + * | | | + * p_items_end + p_offsets + | + * p_num_keys + + */ +class leaf_sub_items_t { + public: + // TODO: decide by NODE_BLOCK_SIZE, sizeof(snap_gen_t), + // and the minimal size of onode_t + using num_keys_t = uint8_t; + + leaf_sub_items_t(const memory_range_t& range) { + assert(range.p_start < range.p_end); + auto _p_num_keys = range.p_end - sizeof(num_keys_t); + assert(range.p_start < _p_num_keys); + p_num_keys = reinterpret_cast<const num_keys_t*>(_p_num_keys); + assert(keys()); + auto _p_offsets = _p_num_keys - sizeof(node_offset_t); + assert(range.p_start < _p_offsets); + p_offsets = reinterpret_cast<const node_offset_packed_t*>(_p_offsets); + p_items_end = reinterpret_cast<const char*>(&get_offset(keys() - 1)); + assert(range.p_start < p_items_end); + assert(range.p_start == p_start()); + } + + bool operator==(const leaf_sub_items_t& x) { + return (p_num_keys == x.p_num_keys && + p_offsets == x.p_offsets && + p_items_end == x.p_items_end); + } + + const char* p_start() const { return get_item_end(keys()); } + + const node_offset_packed_t& get_offset(size_t index) const { + assert(index < keys()); + return *(p_offsets - index); + } + + const node_offset_t get_offset_to_end(size_t index) const { + assert(index <= keys()); + return index == 0 ? 0 : get_offset(index - 1).value; + } + + const char* get_item_start(size_t index) const { + return p_items_end - get_offset(index).value; + } + + const char* get_item_end(size_t index) const { + return p_items_end - get_offset_to_end(index); + } + + // container type system + using key_get_type = const snap_gen_t&; + static constexpr auto CONTAINER_TYPE = ContainerType::INDEXABLE; + num_keys_t keys() const { return *p_num_keys; } + key_get_type operator[](size_t index) const { + assert(index < keys()); + auto pointer = get_item_end(index); + assert(get_item_start(index) < pointer); + pointer -= sizeof(snap_gen_t); + assert(get_item_start(index) < pointer); + return *reinterpret_cast<const snap_gen_t*>(pointer); + } + node_offset_t size_before(size_t index) const { + assert(index <= keys()); + size_t ret; + if (index == 0) { + ret = sizeof(num_keys_t); + } else { + --index; + ret = sizeof(num_keys_t) + + (index + 1) * sizeof(node_offset_t) + + get_offset(index).value; + } + assert(ret < NODE_BLOCK_SIZE); + return ret; + } + node_offset_t size_overhead_at(size_t index) const { return sizeof(node_offset_t); } + const onode_t* get_p_value(size_t index) const { + assert(index < keys()); + auto pointer = get_item_start(index); + auto value = reinterpret_cast<const onode_t*>(pointer); + assert(pointer + value->size + sizeof(snap_gen_t) == get_item_end(index)); + return value; + } + + static node_offset_t header_size() { return sizeof(num_keys_t); } + + template <KeyT KT> + static node_offset_t estimate_insert(const full_key_t<KT>&, const onode_t& value) { + return value.size + sizeof(snap_gen_t) + sizeof(node_offset_t); + } + + template <KeyT KT> + static const onode_t* insert_at( + NodeExtentMutable&, const leaf_sub_items_t&, + const full_key_t<KT>&, const onode_t&, + size_t index, node_offset_t size, const char* p_left_bound); + + static node_offset_t trim_until(NodeExtentMutable&, leaf_sub_items_t&, size_t index); + + template <KeyT KT> + class Appender; + + private: + // TODO: support unaligned access + const num_keys_t* p_num_keys; + const node_offset_packed_t* p_offsets; + const char* p_items_end; +}; + +auto constexpr APPENDER_LIMIT = 3u; + +template <KeyT KT> +class leaf_sub_items_t::Appender { + struct range_items_t { + size_t from; + size_t items; + }; + struct kv_item_t { + const full_key_t<KT>* p_key; + const onode_t* p_value; + }; + using var_t = std::variant<range_items_t, kv_item_t>; + + public: + Appender(NodeExtentMutable* p_mut, char* p_append) + : p_mut{p_mut}, p_append{p_append} { + } + + void append(const leaf_sub_items_t& src, size_t from, size_t items) { + assert(cnt <= APPENDER_LIMIT); + assert(from <= src.keys()); + if (items == 0) { + return; + } + if (op_src) { + assert(*op_src == src); + } else { + op_src = src; + } + assert(from < src.keys()); + assert(from + items <= src.keys()); + appends[cnt] = range_items_t{from, items}; + ++cnt; + } + void append(const full_key_t<KT>& key, + const onode_t& value, const onode_t*& p_value) { + assert(pp_value == nullptr); + assert(cnt <= APPENDER_LIMIT); + appends[cnt] = kv_item_t{&key, &value}; + ++cnt; + pp_value = &p_value; + } + char* wrap(); + + private: + std::optional<leaf_sub_items_t> op_src; + const onode_t** pp_value = nullptr; + NodeExtentMutable* p_mut; + char* p_append; + var_t appends[APPENDER_LIMIT]; + size_t cnt = 0; +}; + +template <node_type_t> struct _sub_items_t; +template<> struct _sub_items_t<node_type_t::INTERNAL> { using type = internal_sub_items_t; }; +template<> struct _sub_items_t<node_type_t::LEAF> { using type = leaf_sub_items_t; }; +template <node_type_t NODE_TYPE> +using sub_items_t = typename _sub_items_t<NODE_TYPE>::type; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/super.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/super.cc new file mode 100644 index 00000000000..2828dd33e27 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/super.cc @@ -0,0 +1,26 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "super.h" +#include "node.h" + +namespace crimson::os::seastore::onode { + +Ref<Node> RootNodeTrackerIsolated::get_root(Transaction& t) const { + auto iter = tracked_supers.find(&t); + if (iter == tracked_supers.end()) { + return nullptr; + } else { + return iter->second->get_p_root(); + } +} + +Ref<Node> RootNodeTrackerShared::get_root(Transaction&) const { + if (is_clean()) { + return nullptr; + } else { + return tracked_super->get_p_root(); + } +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/super.h b/src/crimson/os/seastore/onode_manager/staged-fltree/super.h new file mode 100644 index 00000000000..3d809e66fbd --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/super.h @@ -0,0 +1,143 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <memory> + +#include "crimson/common/type_helpers.h" + +#include "fwd.h" + +namespace crimson::os::seastore::onode { + +class Node; +class Super; + +/** + * RootNodeTracker + * + * An abstracted tracker to get the root node by Transaction. + */ +class RootNodeTracker { + public: + virtual ~RootNodeTracker() = default; + virtual bool is_clean() const = 0; + virtual Ref<Node> get_root(Transaction&) const = 0; + static RootNodeTrackerURef create(bool read_isolated); + protected: + RootNodeTracker() = default; + RootNodeTracker(const RootNodeTracker&) = delete; + RootNodeTracker(RootNodeTracker&&) = delete; + RootNodeTracker& operator=(const RootNodeTracker&) = delete; + RootNodeTracker& operator=(RootNodeTracker&&) = delete; + virtual void do_track_super(Transaction&, Super&) = 0; + virtual void do_untrack_super(Transaction&, Super&) = 0; + friend class Super; +}; + +/** + * Super + * + * The parent of root node. It contains the relationship between a Transaction + * and a root node address. + */ +class Super { + public: + using URef = std::unique_ptr<Super>; + Super(const Super&) = delete; + Super(Super&&) = delete; + Super& operator=(const Super&) = delete; + Super& operator=(Super&&) = delete; + virtual ~Super() { + assert(tracked_root_node == nullptr); + tracker.do_untrack_super(t, *this); + } + + virtual laddr_t get_root_laddr() const = 0; + virtual void write_root_laddr(context_t, laddr_t) = 0; + + void do_track_root(Node& root) { + assert(tracked_root_node == nullptr); + tracked_root_node = &root; + } + void do_untrack_root(Node& root) { + assert(tracked_root_node == &root); + tracked_root_node = nullptr; + } + Node* get_p_root() const { + assert(tracked_root_node != nullptr); + return tracked_root_node; + } + + protected: + Super(Transaction& t, RootNodeTracker& tracker) + : t{t}, tracker{tracker} { + tracker.do_track_super(t, *this); + } + + private: + Transaction& t; + RootNodeTracker& tracker; + Node* tracked_root_node = nullptr; +}; + +/** + * RootNodeTrackerIsolated + * + * A concrete RootNodeTracker implementation which provides root node isolation + * between Transactions for Seastore backend. + */ +class RootNodeTrackerIsolated final : public RootNodeTracker { + public: + ~RootNodeTrackerIsolated() override { assert(is_clean()); } + protected: + bool is_clean() const override { + return tracked_supers.empty(); + } + void do_track_super(Transaction& t, Super& super) override { + assert(tracked_supers.find(&t) == tracked_supers.end()); + tracked_supers[&t] = &super; + } + void do_untrack_super(Transaction& t, Super& super) override { + [[maybe_unused]] auto removed = tracked_supers.erase(&t); + assert(removed); + } + ::Ref<Node> get_root(Transaction& t) const override; + std::map<Transaction*, Super*> tracked_supers; +}; + +/** + * RootNodeTrackerShared + * + * A concrete RootNodeTracker implementation which has no isolation between + * Transactions for Dummy backend. + */ +class RootNodeTrackerShared final : public RootNodeTracker { + public: + ~RootNodeTrackerShared() override { assert(is_clean()); } + protected: + bool is_clean() const override { + return tracked_super == nullptr; + } + void do_track_super(Transaction&, Super& super) override { + assert(is_clean()); + tracked_super = &super; + } + void do_untrack_super(Transaction&, Super& super) override { + assert(tracked_super == &super); + tracked_super = nullptr; + } + ::Ref<Node> get_root(Transaction&) const override; + Super* tracked_super = nullptr; +}; + +inline RootNodeTrackerURef RootNodeTracker::create(bool read_isolated) { + if (read_isolated) { + return RootNodeTrackerURef(new RootNodeTrackerIsolated()); + } else { + return RootNodeTrackerURef(new RootNodeTrackerShared()); + } +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/tree.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/tree.cc new file mode 100644 index 00000000000..a4e7ef451f4 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/tree.cc @@ -0,0 +1,231 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tree.h" + +#include "node.h" +#include "node_extent_manager.h" +#include "stages/key_layout.h" +#include "super.h" + +namespace crimson::os::seastore::onode { + +using btree_ertr = Btree::btree_ertr; +template <class ValueT=void> +using btree_future = Btree::btree_future<ValueT>; +using Cursor = Btree::Cursor; + +Cursor::Cursor(Btree* p_tree, Ref<tree_cursor_t> _p_cursor) + : p_tree(p_tree) { + if (_p_cursor->is_end()) { + // no need to hold the leaf node + } else { + p_cursor = _p_cursor; + } +} +Cursor::Cursor(Btree* p_tree) : p_tree{p_tree} {} +Cursor::Cursor(const Cursor&) = default; +Cursor::Cursor(Cursor&&) noexcept = default; +Cursor& Cursor::operator=(const Cursor&) = default; +Cursor& Cursor::operator=(Cursor&&) = default; +Cursor::~Cursor() = default; + +bool Cursor::is_end() const { + if (p_cursor) { + assert(!p_cursor->is_end()); + return false; + } else { + return true; + } +} + +ghobject_t Cursor::get_ghobj() const { + return p_cursor->get_key_view().to_ghobj(); +} + +const onode_t* Cursor::value() const { + return p_cursor->get_p_value(); +} + +bool Cursor::operator==(const Cursor& x) const { + return p_cursor == x.p_cursor; +} + +Cursor& Cursor::operator++() { + // TODO + return *this; +} + +Cursor Cursor::operator++(int) { + Cursor tmp = *this; + ++*this; + return tmp; +} + +Cursor Cursor::make_end(Btree* p_tree) { + return {p_tree}; +} + +Btree::Btree(NodeExtentManagerURef&& _nm) + : nm{std::move(_nm)}, + root_tracker{RootNodeTracker::create(nm->is_read_isolated())} {} + +Btree::~Btree() { assert(root_tracker->is_clean()); } + +btree_future<> Btree::mkfs(Transaction& t) { + return Node::mkfs(get_context(t), *root_tracker); +} + +btree_future<Cursor> Btree::begin(Transaction& t) { + return get_root(t).safe_then([this, &t](auto root) { + return root->lookup_smallest(get_context(t)); + }).safe_then([this](auto cursor) { + return Cursor{this, cursor}; + }); +} + +btree_future<Cursor> Btree::last(Transaction& t) { + return get_root(t).safe_then([this, &t](auto root) { + return root->lookup_largest(get_context(t)); + }).safe_then([this](auto cursor) { + return Cursor(this, cursor); + }); +} + +Cursor Btree::end() { + return Cursor::make_end(this); +} + +btree_future<bool> +Btree::contains(Transaction& t, const ghobject_t& obj) { + return seastar::do_with( + full_key_t<KeyT::HOBJ>(obj), + [this, &t](auto& key) -> btree_future<bool> { + return get_root(t).safe_then([this, &t, &key](auto root) { + // TODO: improve lower_bound() + return root->lower_bound(get_context(t), key); + }).safe_then([](auto result) { + return MatchKindBS::EQ == result.match(); + }); + } + ); +} + +btree_future<Cursor> +Btree::find(Transaction& t, const ghobject_t& obj) { + return seastar::do_with( + full_key_t<KeyT::HOBJ>(obj), + [this, &t](auto& key) -> btree_future<Cursor> { + return get_root(t).safe_then([this, &t, &key](auto root) { + // TODO: improve lower_bound() + return root->lower_bound(get_context(t), key); + }).safe_then([this](auto result) { + if (result.match() == MatchKindBS::EQ) { + return Cursor(this, result.p_cursor); + } else { + return Cursor::make_end(this); + } + }); + } + ); +} + +btree_future<Cursor> +Btree::lower_bound(Transaction& t, const ghobject_t& obj) { + return seastar::do_with( + full_key_t<KeyT::HOBJ>(obj), + [this, &t](auto& key) -> btree_future<Cursor> { + return get_root(t).safe_then([this, &t, &key](auto root) { + return root->lower_bound(get_context(t), key); + }).safe_then([this](auto result) { + return Cursor(this, result.p_cursor); + }); + } + ); +} + +btree_future<std::pair<Cursor, bool>> +Btree::insert(Transaction& t, const ghobject_t& obj, const onode_t& value) { + return seastar::do_with( + full_key_t<KeyT::HOBJ>(obj), + [this, &t, &value](auto& key) -> btree_future<std::pair<Cursor, bool>> { + return get_root(t).safe_then([this, &t, &key, &value](auto root) { + return root->insert(get_context(t), key, value); + }).safe_then([this](auto ret) { + auto& [cursor, success] = ret; + return std::make_pair(Cursor(this, cursor), success); + }); + } + ); +} + +btree_future<size_t> Btree::erase(Transaction& t, const ghobject_t& obj) { + // TODO + return btree_ertr::make_ready_future<size_t>(0u); +} + +btree_future<Cursor> Btree::erase(Cursor& pos) { + // TODO + return btree_ertr::make_ready_future<Cursor>( + Cursor::make_end(this)); +} + +btree_future<Cursor> +Btree::erase(Cursor& first, Cursor& last) { + // TODO + return btree_ertr::make_ready_future<Cursor>( + Cursor::make_end(this)); +} + +btree_future<size_t> Btree::height(Transaction& t) { + return get_root(t).safe_then([](auto root) { + return size_t(root->level() + 1); + }); +} + +btree_future<tree_stats_t> Btree::get_stats_slow(Transaction& t) { + return get_root(t).safe_then([this, &t](auto root) { + unsigned height = root->level() + 1; + return root->get_tree_stats(get_context(t) + ).safe_then([height](auto stats) { + stats.height = height; + return btree_ertr::make_ready_future<tree_stats_t>(stats); + }); + }); +} + +std::ostream& Btree::dump(Transaction& t, std::ostream& os) { + auto root = root_tracker->get_root(t); + if (root) { + root->dump(os); + } else { + os << "empty tree!"; + } + return os; +} + +btree_future<Ref<Node>> Btree::get_root(Transaction& t) { + auto root = root_tracker->get_root(t); + if (root) { + return btree_ertr::make_ready_future<Ref<Node>>(root); + } else { + return Node::load_root(get_context(t), *root_tracker); + } +} + +bool Btree::test_is_clean() const { + return root_tracker->is_clean(); +} + +btree_future<> Btree::test_clone_from( + Transaction& t, Transaction& t_from, Btree& from) { + // Note: assume the tree to clone is tracked correctly in memory. + // In some unit tests, parts of the tree are stubbed out that they + // should not be loaded from NodeExtentManager. + return from.get_root(t_from + ).safe_then([this, &t](auto root_from) { + return root_from->test_clone_root(get_context(t), *root_tracker); + }); +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/tree.h b/src/crimson/os/seastore/onode_manager/staged-fltree/tree.h new file mode 100644 index 00000000000..7276303fba4 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/tree.h @@ -0,0 +1,115 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <ostream> + +#include "common/hobject.h" +#include "crimson/common/type_helpers.h" + +#include "fwd.h" +#include "tree_types.h" + +/** + * tree.h + * + * An example implementation to expose tree interfaces to users. The current + * interface design is based on: + * - ceph::os::Transaction::create/touch/remove() + * - ceph::ObjectStore::collection_list() + * - ceph::BlueStore::get_onode() + * - db->get_iterator(PREFIIX_OBJ) by ceph::BlueStore::fsck() + * + * TODO: Redesign the interfaces based on real onode manager requirements. + */ + +namespace crimson::os::seastore::onode { + +class Node; +class Btree { + public: + using btree_ertr = crimson::errorator< + crimson::ct_error::input_output_error, + crimson::ct_error::invarg, + crimson::ct_error::enoent, + crimson::ct_error::erange>; + template <class ValueT=void> + using btree_future = btree_ertr::future<ValueT>; + + Btree(NodeExtentManagerURef&& nm); + Btree(const Btree&) = delete; + Btree(Btree&&) = delete; + Btree& operator=(const Btree&) = delete; + Btree& operator=(Btree&&) = delete; + ~Btree(); + + btree_future<> mkfs(Transaction&); + + class Cursor; + // lookup + btree_future<Cursor> begin(Transaction&); + btree_future<Cursor> last(Transaction&); + Cursor end(); + btree_future<bool> contains(Transaction&, const ghobject_t&); + btree_future<Cursor> find(Transaction&, const ghobject_t&); + btree_future<Cursor> lower_bound(Transaction&, const ghobject_t&); + + // modifiers + // TODO: replace onode_t + btree_future<std::pair<Cursor, bool>> + insert(Transaction&, const ghobject_t&, const onode_t&); + btree_future<size_t> erase(Transaction&, const ghobject_t& key); + btree_future<Cursor> erase(Cursor& pos); + btree_future<Cursor> erase(Cursor& first, Cursor& last); + + // stats + btree_future<size_t> height(Transaction&); + btree_future<tree_stats_t> get_stats_slow(Transaction&); + std::ostream& dump(Transaction&, std::ostream&); + + // test_only + bool test_is_clean() const; + btree_future<> test_clone_from(Transaction& t, Transaction& t_from, Btree& from); + + private: + context_t get_context(Transaction& t) { return {*nm, t}; } + btree_future<Ref<Node>> get_root(Transaction& t); + + NodeExtentManagerURef nm; + RootNodeTrackerURef root_tracker; + + friend class DummyChildPool; +}; + +class tree_cursor_t; +class Btree::Cursor { + public: + Cursor(const Cursor&); + Cursor(Cursor&&) noexcept; + Cursor& operator=(const Cursor&); + Cursor& operator=(Cursor&&); + ~Cursor(); + + bool is_end() const; + // XXX: return key_view_t to avoid unecessary ghobject_t constructions + ghobject_t get_ghobj() const; + const onode_t* value() const; + bool operator==(const Cursor& x) const; + bool operator!=(const Cursor& x) const { return !(*this == x); } + Cursor& operator++(); + Cursor operator++(int); + + private: + Cursor(Btree*, Ref<tree_cursor_t>); + Cursor(Btree*); + + static Cursor make_end(Btree*); + + Btree* p_tree; + Ref<tree_cursor_t> p_cursor; + + friend class Btree; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/tree_types.h b/src/crimson/os/seastore/onode_manager/staged-fltree/tree_types.h new file mode 100644 index 00000000000..c9e731b3f33 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/tree_types.h @@ -0,0 +1,91 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <ostream> + +namespace crimson::os::seastore::onode { + +// TODO: Redesign according to real requirement from onode manager +struct onode_t { + // onode should be smaller than a node + uint16_t size; // address up to 64 KiB sized node + uint16_t id; + // omap, extent_map, inline data + + bool operator==(const onode_t& o) const { return size == o.size && id == o.id; } + bool operator!=(const onode_t& o) const { return !(*this == o); } +} __attribute__((packed)); +inline std::ostream& operator<<(std::ostream& os, const onode_t& node) { + return os << "onode(" << node.id << ", " << node.size << "B)"; +} + +struct tree_stats_t { + size_t size_persistent_leaf = 0; + size_t size_persistent_internal = 0; + size_t size_filled_leaf = 0; + size_t size_filled_internal = 0; + size_t size_logical_leaf = 0; + size_t size_logical_internal = 0; + size_t size_overhead_leaf = 0; + size_t size_overhead_internal = 0; + size_t size_value_leaf = 0; + size_t size_value_internal = 0; + unsigned num_kvs_leaf = 0; + unsigned num_kvs_internal = 0; + unsigned num_nodes_leaf = 0; + unsigned num_nodes_internal = 0; + unsigned height = 0; + + size_t size_persistent() const { + return size_persistent_leaf + size_persistent_internal; } + size_t size_filled() const { + return size_filled_leaf + size_filled_internal; } + size_t size_logical() const { + return size_logical_leaf + size_logical_internal; } + size_t size_overhead() const { + return size_overhead_leaf + size_overhead_internal; } + size_t size_value() const { + return size_value_leaf + size_value_internal; } + unsigned num_kvs() const { + return num_kvs_leaf + num_kvs_internal; } + unsigned num_nodes() const { + return num_nodes_leaf + num_nodes_internal; } + + double ratio_fullness() const { + return (double)size_filled() / size_persistent(); } + double ratio_key_compression() const { + return (double)(size_filled() - size_value()) / (size_logical() - size_value()); } + double ratio_overhead() const { + return (double)size_overhead() / size_filled(); } + double ratio_keys_leaf() const { + return (double)num_kvs_leaf / num_kvs(); } + double ratio_nodes_leaf() const { + return (double)num_nodes_leaf / num_nodes(); } + double ratio_filled_leaf() const { + return (double)size_filled_leaf / size_filled(); } +}; +inline std::ostream& operator<<(std::ostream& os, const tree_stats_t& stats) { + os << "Tree stats:" + << "\n height = " << stats.height + << "\n num values = " << stats.num_kvs_leaf + << "\n num nodes = " << stats.num_nodes() + << " (leaf=" << stats.num_nodes_leaf + << ", internal=" << stats.num_nodes_internal << ")" + << "\n size persistent = " << stats.size_persistent() << "B" + << "\n size filled = " << stats.size_filled() << "B" + << " (value=" << stats.size_value_leaf << "B" + << ", rest=" << stats.size_filled() - stats.size_value_leaf << "B)" + << "\n size logical = " << stats.size_logical() << "B" + << "\n size overhead = " << stats.size_overhead() << "B" + << "\n ratio fullness = " << stats.ratio_fullness() + << "\n ratio keys leaf = " << stats.ratio_keys_leaf() + << "\n ratio nodes leaf = " << stats.ratio_nodes_leaf() + << "\n ratio filled leaf = " << stats.ratio_filled_leaf() + << "\n ratio key compression = " << stats.ratio_key_compression(); + assert(stats.num_kvs_internal + 1 == stats.num_nodes()); + return os; +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h b/src/crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h new file mode 100644 index 00000000000..5ea848e6130 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h @@ -0,0 +1,317 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <cassert> +#include <cstring> +#include <random> +#include <string> +#include <sstream> +#include <utility> +#include <vector> + +#include "crimson/common/log.h" +#include "stages/key_layout.h" +#include "tree.h" + +/** + * tree_utils.h + * + * Contains shared logic for unit tests and perf tool. + */ + +namespace crimson::os::seastore::onode { + +class Onodes { + public: + Onodes(size_t n) { + for (size_t i = 1; i <= n; ++i) { + auto p_onode = &create(i * 8); + onodes.push_back(p_onode); + } + } + + Onodes(std::vector<size_t> sizes) { + for (auto& size : sizes) { + auto p_onode = &create(size); + onodes.push_back(p_onode); + } + } + + ~Onodes() { + std::for_each(tracked_onodes.begin(), tracked_onodes.end(), + [] (onode_t* onode) { + std::free(onode); + }); + } + + const onode_t& create(size_t size) { + ceph_assert(size >= sizeof(onode_t) + sizeof(uint32_t)); + uint32_t target = size * 137; + auto p_mem = (char*)std::malloc(size); + auto p_onode = (onode_t*)p_mem; + tracked_onodes.push_back(p_onode); + p_onode->size = size; + p_onode->id = id++; + p_mem += (size - sizeof(uint32_t)); + std::memcpy(p_mem, &target, sizeof(uint32_t)); + validate(*p_onode); + return *p_onode; + } + + const onode_t& pick() const { + auto index = rd() % onodes.size(); + return *onodes[index]; + } + + const onode_t& pick_largest() const { + return *onodes[onodes.size() - 1]; + } + + static void validate_cursor( + const Btree::Cursor& cursor, const ghobject_t& key, const onode_t& onode) { + ceph_assert(!cursor.is_end()); + ceph_assert(cursor.get_ghobj() == key); + ceph_assert(cursor.value()); + ceph_assert(cursor.value() != &onode); + ceph_assert(*cursor.value() == onode); + validate(*cursor.value()); + } + + private: + static void validate(const onode_t& node) { + auto p_target = (const char*)&node + node.size - sizeof(uint32_t); + uint32_t target; + std::memcpy(&target, p_target, sizeof(uint32_t)); + ceph_assert(target == node.size * 137); + } + + uint16_t id = 0; + mutable std::random_device rd; + std::vector<const onode_t*> onodes; + std::vector<onode_t*> tracked_onodes; +}; + +class KVPool { + struct kv_conf_t { + unsigned index2; + unsigned index1; + unsigned index0; + size_t ns_size; + size_t oid_size; + const onode_t* p_value; + + ghobject_t get_ghobj() const { + assert(index1 < 10); + std::ostringstream os_ns; + os_ns << "ns" << index1; + unsigned current_size = (unsigned)os_ns.tellp(); + assert(ns_size >= current_size); + os_ns << std::string(ns_size - current_size, '_'); + + std::ostringstream os_oid; + os_oid << "oid" << index1; + current_size = (unsigned)os_oid.tellp(); + assert(oid_size >= current_size); + os_oid << std::string(oid_size - current_size, '_'); + + return ghobject_t(shard_id_t(index2), index2, index2, + os_ns.str(), os_oid.str(), index0, index0); + } + }; + using kv_vector_t = std::vector<kv_conf_t>; + + public: + using kv_t = std::pair<ghobject_t, const onode_t*>; + + KVPool(const std::vector<size_t>& str_sizes, + const std::vector<size_t>& onode_sizes, + const std::pair<unsigned, unsigned>& range2, + const std::pair<unsigned, unsigned>& range1, + const std::pair<unsigned, unsigned>& range0) + : str_sizes{str_sizes}, onodes{onode_sizes} { + ceph_assert(range2.first < range2.second); + ceph_assert(range2.second - 1 <= (unsigned)std::numeric_limits<shard_t>::max()); + ceph_assert(range2.second - 1 <= std::numeric_limits<pool_t>::max()); + ceph_assert(range2.second - 1 <= std::numeric_limits<crush_hash_t>::max()); + ceph_assert(range1.first < range1.second); + ceph_assert(range1.second - 1 <= 9); + ceph_assert(range0.first < range0.second); + ceph_assert(range0.second - 1 <= std::numeric_limits<snap_t>::max()); + ceph_assert(range0.second - 1 <= std::numeric_limits<gen_t>::max()); + std::random_device rd; + for (unsigned i = range2.first; i < range2.second; ++i) { + for (unsigned j = range1.first; j < range1.second; ++j) { + auto ns_size = (unsigned)str_sizes[rd() % str_sizes.size()]; + auto oid_size = (unsigned)str_sizes[rd() % str_sizes.size()]; + for (unsigned k = range0.first; k < range0.second; ++k) { + kvs.emplace_back(kv_conf_t{i, j, k, ns_size, oid_size, &onodes.pick()}); + } + } + } + random_kvs = kvs; + std::random_shuffle(random_kvs.begin(), random_kvs.end()); + } + + class iterator_t { + public: + iterator_t() = default; + iterator_t(const iterator_t&) = default; + iterator_t(iterator_t&&) = default; + iterator_t& operator=(const iterator_t&) = default; + iterator_t& operator=(iterator_t&&) = default; + + kv_t get_kv() const { + assert(!is_end()); + auto& conf = (*p_kvs)[i]; + return std::make_pair(conf.get_ghobj(), conf.p_value); + } + bool is_end() const { return !p_kvs || i >= p_kvs->size(); } + size_t index() const { return i; } + + iterator_t& operator++() { + assert(!is_end()); + ++i; + return *this; + } + + iterator_t operator++(int) { + iterator_t tmp = *this; + ++*this; + return tmp; + } + + private: + iterator_t(const kv_vector_t& kvs) : p_kvs{&kvs} {} + + const kv_vector_t* p_kvs = nullptr; + size_t i = 0; + friend class KVPool; + }; + + iterator_t begin() const { + return iterator_t(kvs); + } + + iterator_t random_begin() const { + return iterator_t(random_kvs); + } + + size_t size() const { + return kvs.size(); + } + + private: + std::vector<size_t> str_sizes; + Onodes onodes; + kv_vector_t kvs; + kv_vector_t random_kvs; +}; + +template <bool TRACK> +class TreeBuilder { + public: + using ertr = Btree::btree_ertr; + template <class ValueT=void> + using future = ertr::future<ValueT>; + + TreeBuilder(KVPool& kvs, NodeExtentManagerURef&& nm) + : kvs{kvs}, ref_t{make_transaction()}, t{*ref_t}, tree{std::move(nm)} {} + + future<> bootstrap() { + return tree.mkfs(t); + } + + future<> run() { + std::ostringstream oss; +#ifndef NDEBUG + oss << "debug on, "; +#else + oss << "debug off, "; +#endif + if constexpr (TRACK) { + oss << "track on"; + } else { + oss << "track off"; + } + kv_iter = kvs.random_begin(); + logger().warn("start inserting {} kvs ({}) ...", kvs.size(), oss.str()); + auto start_time = mono_clock::now(); + return crimson::do_until([this]() -> future<bool> { + if (kv_iter.is_end()) { + return ertr::make_ready_future<bool>(true); + } + auto [key, p_value] = kv_iter.get_kv(); + logger().debug("[{}] {} -> {}", kv_iter.index(), key_hobj_t{key}, *p_value); + return tree.insert(t, key, *p_value).safe_then([this](auto ret) { + auto& [cursor, success] = ret; + assert(success == true); + if constexpr (TRACK) { + cursors.emplace_back(cursor); + } +#ifndef NDEBUG + auto [key, p_value] = kv_iter.get_kv(); + Onodes::validate_cursor(cursor, key, *p_value); + return tree.lower_bound(t, key).safe_then([this, cursor](auto cursor_) { + auto [key, p_value] = kv_iter.get_kv(); + ceph_assert(cursor_.get_ghobj() == key); + ceph_assert(cursor_.value() == cursor.value()); + ++kv_iter; + return ertr::make_ready_future<bool>(false); + }); +#else + ++kv_iter; + return ertr::make_ready_future<bool>(false); +#endif + }); + }).safe_then([this, start_time] { + std::chrono::duration<double> duration = mono_clock::now() - start_time; + logger().warn("Insert done! {}s", duration.count()); + return tree.get_stats_slow(t); + }).safe_then([this](auto stats) { + logger().warn("{}", stats); + if (!cursors.empty()) { + logger().info("Verifing tracked cursors ..."); + kv_iter = kvs.random_begin(); + return seastar::do_with( + cursors.begin(), [this](auto& c_iter) { + return crimson::do_until([this, &c_iter]() -> future<bool> { + if (kv_iter.is_end()) { + logger().info("Verify done!"); + return ertr::make_ready_future<bool>(true); + } + assert(c_iter != cursors.end()); + auto [k, v] = kv_iter.get_kv(); + // validate values in tree keep intact + return tree.lower_bound(t, k).safe_then([this, &c_iter](auto cursor) { + auto [k, v] = kv_iter.get_kv(); + Onodes::validate_cursor(cursor, k, *v); + // validate values in cursors keep intact + Onodes::validate_cursor(*c_iter, k, *v); + ++kv_iter; + ++c_iter; + return ertr::make_ready_future<bool>(false); + }); + }); + }); + } else { + return ertr::now(); + } + }); + } + + private: + static seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } + + KVPool& kvs; + TransactionRef ref_t; + Transaction& t; + Btree tree; + KVPool::iterator_t kv_iter; + std::vector<Btree::Cursor> cursors; +}; + +} diff --git a/src/crimson/os/seastore/root_block.h b/src/crimson/os/seastore/root_block.h index a5aac321144..4a5024caa62 100644 --- a/src/crimson/os/seastore/root_block.h +++ b/src/crimson/os/seastore/root_block.h @@ -18,6 +18,7 @@ struct __attribute__((aligned(8), packed)) root_t { depth_t segment_depth = 0; paddr_t lba_root_addr; paddr_t segment_root; + laddr_t onode_root = L_ADDR_NULL; void adjust_addrs_from_base(paddr_t base) { if (lba_root_addr.is_relative()) { @@ -101,7 +102,7 @@ struct RootBlock : CachedExtent { ceph_abort_msg("Root is only written via deltas"); } - root_t &get_lba_root() { return root; } + root_t &get_root() { return root; } }; using RootBlockRef = RootBlock::Ref; diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index 9c20fc3f0e8..ff43b1e515b 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -57,6 +57,8 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t) return out << "EXTMAP_INNER"; case extent_types_t::EXTMAP_LEAF: return out << "EXTMAP_LEAF"; + case extent_types_t::ONODE_BLOCK_STAGED: + return out << "ONODE_BLOCK_STAGED"; case extent_types_t::TEST_BLOCK: return out << "TEST_BLOCK"; case extent_types_t::TEST_BLOCK_PHYSICAL: diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 56bf53b72fb..26875cbb3f7 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -269,6 +269,7 @@ enum class extent_types_t : uint8_t { ONODE_BLOCK = 3, EXTMAP_INNER = 4, EXTMAP_LEAF = 5, + ONODE_BLOCK_STAGED = 6, // Test Block Types TEST_BLOCK = 0xF0, diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 18259a45b3c..674c33feabf 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -250,6 +250,32 @@ public: return segment_manager.release(id); } + /** + * read_onode_root + * + * Get onode-tree root logical address + */ + using read_onode_root_ertr = crimson::errorator< + crimson::ct_error::input_output_error + >; + using read_onode_root_ret = read_onode_root_ertr::future<laddr_t>; + read_onode_root_ret read_onode_root(Transaction &t) { + return cache.get_root(t).safe_then([](auto croot) { + return croot->get_root().onode_root; + }); + } + + /** + * write_onode_root + * + * Write onode-tree root logical address, must be called after read. + */ + void write_onode_root(Transaction &t, laddr_t addr) { + auto croot = cache.get_root_fast(t); + croot = cache.duplicate_for_write(t, croot)->cast<RootBlock>(); + croot->get_root().onode_root = addr; + } + ~TransactionManager(); private: diff --git a/src/crimson/osd/backfill_facades.h b/src/crimson/osd/backfill_facades.h index d6d6f91e363..683dc6ea649 100644 --- a/src/crimson/osd/backfill_facades.h +++ b/src/crimson/osd/backfill_facades.h @@ -9,40 +9,46 @@ namespace crimson::osd { -// PeeringFacade -- a facade (in the GoF-defined meaning) simplifying -// the interface of PeeringState. The motivation is to have an inventory -// of behaviour that must be provided by a unit test's mock. -struct BackfillState::PeeringFacade { +// PeeringFacade -- main implementation of the BackfillState::PeeringFacade +// interface. We have the abstraction to decuple BackfillState from Peering +// State, and thus cut depedencies in unit testing. The second implemention +// is BackfillFixture::PeeringFacade and sits in test_backfill.cc. +struct PeeringFacade final : BackfillState::PeeringFacade { PeeringState& peering_state; - decltype(auto) earliest_backfill() const { + hobject_t earliest_backfill() const override { return peering_state.earliest_backfill(); } - decltype(auto) get_backfill_targets() const { + const std::set<pg_shard_t>& get_backfill_targets() const override { return peering_state.get_backfill_targets(); } - decltype(auto) get_peer_info(pg_shard_t peer) const { - return peering_state.get_peer_info(peer); + const hobject_t& get_peer_last_backfill(pg_shard_t peer) const override { + return peering_state.get_peer_info(peer).last_backfill; + } + + const eversion_t& get_last_update() const override { + return peering_state.get_info().last_update; } - decltype(auto) get_info() const { - return peering_state.get_info(); + const eversion_t& get_log_tail() const override { + return peering_state.get_info().log_tail; } - decltype(auto) get_pg_log() const { - return peering_state.get_pg_log(); + void scan_log_after(eversion_t v, scan_log_func_t f) const override { + peering_state.get_pg_log().get_log().scan_log_after(v, std::move(f)); } - bool is_backfill_target(pg_shard_t peer) const { + + bool is_backfill_target(pg_shard_t peer) const override { return peering_state.is_backfill_target(peer); } void update_complete_backfill_object_stats(const hobject_t &hoid, - const pg_stat_t &stats) { - return peering_state.update_complete_backfill_object_stats(hoid, stats); + const pg_stat_t &stats) override { + peering_state.update_complete_backfill_object_stats(hoid, stats); } - bool is_backfilling() const { + bool is_backfilling() const override { return peering_state.is_backfilling(); } @@ -54,10 +60,10 @@ struct BackfillState::PeeringFacade { // PGFacade -- a facade (in the GoF-defined meaning) simplifying the huge // interface of crimson's PG class. The motivation is to have an inventory // of behaviour that must be provided by a unit test's mock. -struct BackfillState::PGFacade { +struct PGFacade final : BackfillState::PGFacade { PG& pg; - decltype(auto) get_projected_last_update() const { + const eversion_t& get_projected_last_update() const override { return pg.projected_last_update; } diff --git a/src/crimson/osd/backfill_state.cc b/src/crimson/osd/backfill_state.cc index 8fcfdc65b3d..57f845f92f5 100644 --- a/src/crimson/osd/backfill_state.cc +++ b/src/crimson/osd/backfill_state.cc @@ -5,9 +5,6 @@ #include <boost/type_index.hpp> #include "crimson/osd/backfill_state.h" -#include "crimson/osd/backfill_facades.h" -#include "crimson/osd/pg.h" -#include "osd/PeeringState.h" namespace { seastar::logger& logger() { @@ -70,7 +67,7 @@ BackfillState::Initial::Initial(my_context ctx) backfill_state().last_backfill_started); for (const auto& bt : peering_state().get_backfill_targets()) { logger().debug("{}: target shard {} from {}", - __func__, bt, peering_state().get_peer_info(bt).last_backfill); + __func__, bt, peering_state().get_peer_last_backfill(bt)); } ceph_assert(peering_state().get_backfill_targets().size()); ceph_assert(!backfill_state().last_backfill_started.is_max()); @@ -86,7 +83,7 @@ BackfillState::Initial::react(const BackfillState::Triggered& evt) // initialize BackfillIntervals for (const auto& bt : peering_state().get_backfill_targets()) { backfill_state().peer_backfill_info[bt].reset( - peering_state().get_peer_info(bt).last_backfill); + peering_state().get_peer_last_backfill(bt)); } backfill_state().backfill_info.reset(backfill_state().last_backfill_started); if (Enqueuing::all_enqueued(peering_state(), @@ -108,7 +105,7 @@ void BackfillState::Enqueuing::maybe_update_range() primary_bi.version >= pg().get_projected_last_update()) { logger().info("{}: bi is current", __func__); ceph_assert(primary_bi.version == pg().get_projected_last_update()); - } else if (primary_bi.version >= peering_state().get_info().log_tail) { + } else if (primary_bi.version >= peering_state().get_log_tail()) { #if 0 if (peering_state().get_pg_log().get_log().empty() && pg().get_projected_log().empty()) { @@ -127,7 +124,7 @@ void BackfillState::Enqueuing::maybe_update_range() primary_bi.version, pg().get_projected_last_update()); logger().debug("{}: scanning pg log first", __func__); - peering_state().get_pg_log().get_log().scan_log_after(primary_bi.version, + peering_state().scan_log_after(primary_bi.version, [&](const pg_log_entry_t& e) { logger().debug("maybe_update_range(lambda): updating from version {}", e.version); @@ -156,7 +153,7 @@ void BackfillState::Enqueuing::trim_backfill_infos() { for (const auto& bt : peering_state().get_backfill_targets()) { backfill_state().peer_backfill_info[bt].trim_to( - std::max(peering_state().get_peer_info(bt).last_backfill, + std::max(peering_state().get_peer_last_backfill(bt), backfill_state().last_backfill_started)); } backfill_state().backfill_info.trim_to( @@ -257,27 +254,39 @@ BackfillState::Enqueuing::update_on_peers(const hobject_t& check) // Find all check peers that have the wrong version if (const eversion_t& obj_v = primary_bi.objects.begin()->second; check == primary_bi.begin && check == peer_bi.begin) { - if(peer_bi.objects.begin()->second != obj_v) { - backfill_state().progress_tracker->enqueue_push(primary_bi.begin); - backfill_listener().enqueue_push(bt, primary_bi.begin, obj_v); + if(peer_bi.objects.begin()->second != obj_v && + backfill_state().progress_tracker->enqueue_push(primary_bi.begin)) { + backfill_listener().enqueue_push(primary_bi.begin, obj_v); } else { - // it's fine, keep it! + // it's fine, keep it! OR already recovering } result.pbi_targets.insert(bt); } else { - const pg_info_t& pinfo = peering_state().get_peer_info(bt); // Only include peers that we've caught up to their backfill line // otherwise, they only appear to be missing this object // because their peer_bi.begin > backfill_info.begin. - if (primary_bi.begin > pinfo.last_backfill) { - backfill_state().progress_tracker->enqueue_push(primary_bi.begin); - backfill_listener().enqueue_push(bt, primary_bi.begin, obj_v); + if (primary_bi.begin > peering_state().get_peer_last_backfill(bt) && + backfill_state().progress_tracker->enqueue_push(primary_bi.begin)) { + backfill_listener().enqueue_push(primary_bi.begin, obj_v); } } } return result; } +bool BackfillState::Enqueuing::Enqueuing::all_emptied( + const BackfillInterval& local_backfill_info, + const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const +{ + const auto& targets = peering_state().get_backfill_targets(); + const auto replicas_emptied = + std::all_of(std::begin(targets), std::end(targets), + [&] (const auto& bt) { + return peer_backfill_info.at(bt).empty(); + }); + return local_backfill_info.empty() && replicas_emptied; +} + BackfillState::Enqueuing::Enqueuing(my_context ctx) : my_base(ctx) { @@ -285,7 +294,7 @@ BackfillState::Enqueuing::Enqueuing(my_context ctx) // update our local interval to cope with recent changes primary_bi.begin = backfill_state().last_backfill_started; - if (primary_bi.version < peering_state().get_info().log_tail) { + if (primary_bi.version < peering_state().get_log_tail()) { // it might be that the OSD is so flooded with modifying operations // that backfill will be spinning here over and over. For the sake // of performance and complexity we don't synchronize with entire PG. @@ -299,7 +308,7 @@ BackfillState::Enqueuing::Enqueuing(my_context ctx) } trim_backfill_infos(); - while (!primary_bi.empty()) { + while (!all_emptied(primary_bi, backfill_state().peer_backfill_info)) { if (!backfill_listener().budget_available()) { post_event(RequestWaiting{}); return; @@ -327,8 +336,9 @@ BackfillState::Enqueuing::Enqueuing(my_context ctx) trim_backfilled_object_from_intervals(std::move(result), backfill_state().last_backfill_started, backfill_state().peer_backfill_info); - backfill_state().backfill_info.pop_front(); + primary_bi.pop_front(); } + backfill_listener().maybe_flush(); } if (should_rescan_primary(backfill_state().peer_backfill_info, @@ -351,8 +361,7 @@ BackfillState::Enqueuing::Enqueuing(my_context ctx) BackfillState::PrimaryScanning::PrimaryScanning(my_context ctx) : my_base(ctx) { - backfill_state().backfill_info.version = \ - peering_state().get_info().last_update; + backfill_state().backfill_info.version = peering_state().get_last_update(); backfill_listener().request_primary_scan( backfill_state().backfill_info.begin); } @@ -497,16 +506,17 @@ bool BackfillState::ProgressTracker::tracked_objects_completed() const return registry.empty(); } -void BackfillState::ProgressTracker::enqueue_push(const hobject_t& obj) +bool BackfillState::ProgressTracker::enqueue_push(const hobject_t& obj) { - ceph_assert(registry.count(obj) == 0); - registry[obj] = registry_item_t{ op_stage_t::enqueued_push, std::nullopt }; + [[maybe_unused]] const auto [it, first_seen] = registry.try_emplace( + obj, registry_item_t{op_stage_t::enqueued_push, std::nullopt}); + return first_seen; } void BackfillState::ProgressTracker::enqueue_drop(const hobject_t& obj) { - ceph_assert(registry.count(obj) == 0); - registry[obj] = registry_item_t{ op_stage_t::enqueued_drop, pg_stat_t{} }; + registry.try_emplace( + obj, registry_item_t{op_stage_t::enqueued_drop, pg_stat_t{}}); } void BackfillState::ProgressTracker::complete_to( diff --git a/src/crimson/osd/backfill_state.h b/src/crimson/osd/backfill_state.h index 549daa28242..4bd2991fb62 100644 --- a/src/crimson/osd/backfill_state.h +++ b/src/crimson/osd/backfill_state.h @@ -13,7 +13,6 @@ #include <boost/statechart/state_machine.hpp> #include <boost/statechart/transition.hpp> -#include "osd/PeeringState.h" #include "osd/recovery_types.h" namespace crimson::osd { @@ -170,6 +169,9 @@ public: // these methods take BackfillIntervals instead of extracting them from // the state to emphasize the relationships across the main loop. + bool all_emptied( + const BackfillInterval& local_backfill_info, + const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const; hobject_t earliest_peer_backfill( const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const; bool should_rescan_replicas( @@ -288,7 +290,6 @@ struct BackfillState::BackfillListener { const hobject_t& begin) = 0; virtual void enqueue_push( - const pg_shard_t& target, const hobject_t& obj, const eversion_t& v) = 0; @@ -297,6 +298,8 @@ struct BackfillState::BackfillListener { const hobject_t& obj, const eversion_t& v) = 0; + virtual void maybe_flush() = 0; + virtual void update_peers_last_backfill( const hobject_t& new_last_backfill) = 0; @@ -307,6 +310,37 @@ struct BackfillState::BackfillListener { virtual ~BackfillListener() = default; }; +// PeeringFacade -- a facade (in the GoF-defined meaning) simplifying +// the interface of PeeringState. The motivation is to have an inventory +// of behaviour that must be provided by a unit test's mock. +struct BackfillState::PeeringFacade { + virtual hobject_t earliest_backfill() const = 0; + virtual const std::set<pg_shard_t>& get_backfill_targets() const = 0; + virtual const hobject_t& get_peer_last_backfill(pg_shard_t peer) const = 0; + virtual const eversion_t& get_last_update() const = 0; + virtual const eversion_t& get_log_tail() const = 0; + + // the performance impact of `std::function` has not been considered yet. + // If there is any proof (from e.g. profiling) about its significance, we + // can switch back to the template variant. + using scan_log_func_t = std::function<void(const pg_log_entry_t&)>; + virtual void scan_log_after(eversion_t, scan_log_func_t) const = 0; + + virtual bool is_backfill_target(pg_shard_t peer) const = 0; + virtual void update_complete_backfill_object_stats(const hobject_t &hoid, + const pg_stat_t &stats) = 0; + virtual bool is_backfilling() const = 0; + virtual ~PeeringFacade() {} +}; + +// PGFacade -- a facade (in the GoF-defined meaning) simplifying the huge +// interface of crimson's PG class. The motivation is to have an inventory +// of behaviour that must be provided by a unit test's mock. +struct BackfillState::PGFacade { + virtual const eversion_t& get_projected_last_update() const = 0; + virtual ~PGFacade() {} +}; + class BackfillState::ProgressTracker { // TODO: apply_stat, enum class op_stage_t { @@ -340,7 +374,7 @@ public: bool tracked_objects_completed() const; - void enqueue_push(const hobject_t&); + bool enqueue_push(const hobject_t&); void enqueue_drop(const hobject_t&); void complete_to(const hobject_t&, const pg_stat_t&); }; diff --git a/src/crimson/osd/heartbeat.cc b/src/crimson/osd/heartbeat.cc index d8dc1e550e1..81ec06ecd5d 100644 --- a/src/crimson/osd/heartbeat.cc +++ b/src/crimson/osd/heartbeat.cc @@ -57,14 +57,10 @@ seastar::future<> Heartbeat::start(entity_addrvec_t front_addrs, SocketPolicy::lossy_client(0)); back_msgr->set_policy(entity_name_t::TYPE_OSD, SocketPolicy::lossy_client(0)); - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(*this); return seastar::when_all_succeed(start_messenger(*front_msgr, - front_addrs, - chained_dispatchers), + front_addrs), start_messenger(*back_msgr, - back_addrs, - chained_dispatchers)) + back_addrs)) .then_unpack([this] { timer.arm_periodic( std::chrono::seconds(local_conf()->osd_heartbeat_interval)); @@ -73,25 +69,26 @@ seastar::future<> Heartbeat::start(entity_addrvec_t front_addrs, seastar::future<> Heartbeat::start_messenger(crimson::net::Messenger& msgr, - const entity_addrvec_t& addrs, - ChainedDispatchersRef chained_dispatchers) + const entity_addrvec_t& addrs) { return msgr.try_bind(addrs, local_conf()->ms_bind_port_min, local_conf()->ms_bind_port_max) - .then([&msgr, chained_dispatchers]() mutable { - return msgr.start(chained_dispatchers); - }); + .safe_then([this, &msgr]() mutable { + return msgr.start({this}); + }, crimson::net::Messenger::bind_ertr::all_same_way( + [] (const std::error_code& e) { + logger().error("heartbeat messenger try_bind(): address range is unavailable."); + ceph_abort(); + })); } seastar::future<> Heartbeat::stop() { logger().info("{}", __func__); timer.cancel(); - if (!front_msgr->dispatcher_chain_empty()) - front_msgr->remove_dispatcher(*this); - if (!back_msgr->dispatcher_chain_empty()) - back_msgr->remove_dispatcher(*this); + front_msgr->stop(); + back_msgr->stop(); return gate.close().then([this] { return seastar::when_all_succeed(front_msgr->shutdown(), back_msgr->shutdown()); @@ -206,17 +203,20 @@ void Heartbeat::remove_peer(osd_id_t peer) peers.erase(peer); } -seastar::future<> Heartbeat::ms_dispatch(crimson::net::Connection* conn, - MessageRef m) +std::optional<seastar::future<>> +Heartbeat::ms_dispatch(crimson::net::ConnectionRef conn, MessageRef m) { - return gate.dispatch(__func__, *this, [this, conn, &m] { + bool dispatched = true; + gate.dispatch_in_background(__func__, *this, [this, conn, &m, &dispatched] { switch (m->get_type()) { case MSG_OSD_PING: return handle_osd_ping(conn, boost::static_pointer_cast<MOSDPing>(m)); default: + dispatched = false; return seastar::now(); } }); + return (dispatched ? std::make_optional(seastar::now()) : std::nullopt); } void Heartbeat::ms_handle_reset(crimson::net::ConnectionRef conn, bool is_replace) @@ -258,7 +258,7 @@ void Heartbeat::ms_handle_accept(crimson::net::ConnectionRef conn) } } -seastar::future<> Heartbeat::handle_osd_ping(crimson::net::Connection* conn, +seastar::future<> Heartbeat::handle_osd_ping(crimson::net::ConnectionRef conn, Ref<MOSDPing> m) { switch (m->op) { @@ -273,7 +273,7 @@ seastar::future<> Heartbeat::handle_osd_ping(crimson::net::Connection* conn, } } -seastar::future<> Heartbeat::handle_ping(crimson::net::Connection* conn, +seastar::future<> Heartbeat::handle_ping(crimson::net::ConnectionRef conn, Ref<MOSDPing> m) { auto min_message = static_cast<uint32_t>( @@ -291,7 +291,7 @@ seastar::future<> Heartbeat::handle_ping(crimson::net::Connection* conn, return conn->send(reply); } -seastar::future<> Heartbeat::handle_reply(crimson::net::Connection* conn, +seastar::future<> Heartbeat::handle_reply(crimson::net::ConnectionRef conn, Ref<MOSDPing> m) { const osd_id_t from = m->get_source().num(); @@ -373,9 +373,9 @@ Heartbeat::Connection::~Connection() } } -bool Heartbeat::Connection::matches(crimson::net::Connection* _conn) const +bool Heartbeat::Connection::matches(crimson::net::ConnectionRef _conn) const { - return (conn && conn.get() == _conn); + return (conn && conn == _conn); } void Heartbeat::Connection::accepted(crimson::net::ConnectionRef accepted_conn) @@ -551,7 +551,7 @@ void Heartbeat::Peer::send_heartbeat( } seastar::future<> Heartbeat::Peer::handle_reply( - crimson::net::Connection* conn, Ref<MOSDPing> m) + crimson::net::ConnectionRef conn, Ref<MOSDPing> m) { if (!session.is_started()) { // we haven't sent any ping yet diff --git a/src/crimson/osd/heartbeat.h b/src/crimson/osd/heartbeat.h index b8d13ee3567..4947e871ff5 100644 --- a/src/crimson/osd/heartbeat.h +++ b/src/crimson/osd/heartbeat.h @@ -6,7 +6,7 @@ #include <cstdint> #include <seastar/core/future.hh> #include "common/ceph_time.h" -#include "crimson/net/chained_dispatchers.h" +#include "crimson/common/gated.h" #include "crimson/net/Dispatcher.h" #include "crimson/net/Fwd.h" @@ -48,19 +48,19 @@ public: void set_require_authorizer(bool); // Dispatcher methods - seastar::future<> ms_dispatch(crimson::net::Connection* conn, - MessageRef m) override; + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef conn, MessageRef m) override; void ms_handle_reset(crimson::net::ConnectionRef conn, bool is_replace) override; void ms_handle_connect(crimson::net::ConnectionRef conn) override; void ms_handle_accept(crimson::net::ConnectionRef conn) override; void print(std::ostream&) const; private: - seastar::future<> handle_osd_ping(crimson::net::Connection* conn, + seastar::future<> handle_osd_ping(crimson::net::ConnectionRef conn, Ref<MOSDPing> m); - seastar::future<> handle_ping(crimson::net::Connection* conn, + seastar::future<> handle_ping(crimson::net::ConnectionRef conn, Ref<MOSDPing> m); - seastar::future<> handle_reply(crimson::net::Connection* conn, + seastar::future<> handle_reply(crimson::net::ConnectionRef conn, Ref<MOSDPing> m); seastar::future<> handle_you_died(); @@ -71,8 +71,7 @@ private: void add_reporter_peers(int whoami); seastar::future<> start_messenger(crimson::net::Messenger& msgr, - const entity_addrvec_t& addrs, - ChainedDispatchersRef); + const entity_addrvec_t& addrs); private: const osd_id_t whoami; const crimson::osd::ShardServices& service; @@ -183,10 +182,7 @@ class Heartbeat::Connection { ~Connection(); - bool matches(crimson::net::Connection* _conn) const; - bool matches(crimson::net::ConnectionRef conn) const { - return matches(conn.get()); - } + bool matches(crimson::net::ConnectionRef _conn) const; void connected() { set_connected(); } @@ -411,7 +407,7 @@ class Heartbeat::Peer final : private Heartbeat::ConnectionListener { } void send_heartbeat( clock::time_point, ceph::signedspan, std::vector<seastar::future<>>&); - seastar::future<> handle_reply(crimson::net::Connection*, Ref<MOSDPing>); + seastar::future<> handle_reply(crimson::net::ConnectionRef, Ref<MOSDPing>); void handle_reset(crimson::net::ConnectionRef conn, bool is_replace) { for_each_conn([&] (auto& _conn) { if (_conn.matches(conn)) { diff --git a/src/crimson/osd/object_context.h b/src/crimson/osd/object_context.h index 7e14ac3e16f..e479ac8ee56 100644 --- a/src/crimson/osd/object_context.h +++ b/src/crimson/osd/object_context.h @@ -111,18 +111,28 @@ private: make_blocking_future(std::forward<LockF>(lockf))); } + template <typename Lock, typename Func> + auto _with_lock(Lock&& lock, Func&& func) { + Ref obc = this; + return lock.lock().then([&lock, func = std::forward<Func>(func), obc]() mutable { + return seastar::futurize_invoke(func).finally([&lock, obc] { + lock.unlock(); + }); + }); + } + public: template<RWState::State Type, typename Func> auto with_lock(Func&& func) { switch (Type) { case RWState::RWWRITE: - return seastar::with_lock(lock.for_write(), std::forward<Func>(func)); + return _with_lock(lock.for_write(), std::forward<Func>(func)); case RWState::RWREAD: - return seastar::with_lock(lock.for_read(), std::forward<Func>(func)); + return _with_lock(lock.for_read(), std::forward<Func>(func)); case RWState::RWEXCL: - return seastar::with_lock(lock.for_excl(), std::forward<Func>(func)); + return _with_lock(lock.for_excl(), std::forward<Func>(func)); case RWState::RWNONE: - return seastar::futurize_invoke(func); + return seastar::futurize_invoke(std::forward<Func>(func)); default: assert(0 == "noop"); } @@ -131,11 +141,11 @@ public: auto with_promoted_lock(Func&& func) { switch (Type) { case RWState::RWWRITE: - return seastar::with_lock(lock.excl_from_write(), std::forward<Func>(func)); + return _with_lock(lock.excl_from_write(), std::forward<Func>(func)); case RWState::RWREAD: - return seastar::with_lock(lock.excl_from_read(), std::forward<Func>(func)); + return _with_lock(lock.excl_from_read(), std::forward<Func>(func)); case RWState::RWEXCL: - return seastar::with_lock(lock.excl_from_excl(), std::forward<Func>(func)); + return _with_lock(lock.excl_from_excl(), std::forward<Func>(func)); default: assert(0 == "noop"); } @@ -163,10 +173,8 @@ public: return false; } } - seastar::future<> wait_recovery_read() { - return lock.lock_for_read().then([this] { - recovery_read_marker = true; - }); + void wait_recovery_read() { + recovery_read_marker = true; } void drop_recovery_read() { assert(recovery_read_marker); diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc index 027fac9a703..6b6614e93d8 100644 --- a/src/crimson/osd/ops_executer.cc +++ b/src/crimson/osd/ops_executer.cc @@ -16,6 +16,7 @@ #include <seastar/core/thread.hh> #include "crimson/osd/exceptions.h" +#include "crimson/osd/pg.h" #include "crimson/osd/watch.h" #include "osd/ClassHandler.h" diff --git a/src/crimson/osd/ops_executer.h b/src/crimson/osd/ops_executer.h index 0efe6b43c7a..42fcf61b800 100644 --- a/src/crimson/osd/ops_executer.h +++ b/src/crimson/osd/ops_executer.h @@ -25,12 +25,12 @@ #include "crimson/osd/shard_services.h" #include "crimson/osd/osdmap_gate.h" -#include "crimson/osd/pg.h" #include "crimson/osd/pg_backend.h" #include "crimson/osd/exceptions.h" #include "messages/MOSDOp.h" +class PG; class PGLSFilter; class OSDOp; @@ -85,9 +85,9 @@ private: ObjectContextRef obc; const OpInfo& op_info; - PG& pg; + const pg_pool_t& pool_info; // for the sake of the ObjClass API PGBackend& backend; - Ref<MOSDOp> msg; + const MOSDOp& msg; std::optional<osd_op_params_t> osd_op_params; bool user_modify = false; ceph::os::Transaction txn; @@ -166,12 +166,16 @@ private: } public: - OpsExecuter(ObjectContextRef obc, const OpInfo& op_info, PG& pg, Ref<MOSDOp> msg) + OpsExecuter(ObjectContextRef obc, + const OpInfo& op_info, + const pg_pool_t& pool_info, + PGBackend& backend, + const MOSDOp& msg) : obc(std::move(obc)), op_info(op_info), - pg(pg), - backend(pg.get_backend()), - msg(std::move(msg)) { + pool_info(pool_info), + backend(backend), + msg(msg) { } osd_op_errorator::future<> execute_op(class OSDOp& osd_op); @@ -180,7 +184,7 @@ public: osd_op_errorator::future<> flush_changes(Func&& func, MutFunc&& mut_func) &&; const auto& get_message() const { - return *msg; + return msg; } size_t get_processed_rw_ops_num() const { @@ -188,7 +192,11 @@ public: } uint32_t get_pool_stripe_width() const { - return pg.get_pool().info.get_stripe_width(); + return pool_info.get_stripe_width(); + } + + bool has_seen_write() const { + return num_write > 0; } }; @@ -233,29 +241,21 @@ OpsExecuter::osd_op_errorator::future<> OpsExecuter::flush_changes( Func&& func, MutFunc&& mut_func) && { - assert(obc); const bool want_mutate = !txn.empty(); - if (want_mutate) { - // osd_op_params are instantiated by every wr-like operation. - assert(osd_op_params); - osd_op_params->req = std::move(msg); - osd_op_params->at_version = pg.next_version(); - osd_op_params->pg_trim_to = pg.get_pg_trim_to(); - osd_op_params->min_last_complete_ondisk = pg.get_min_last_complete_ondisk(); - osd_op_params->last_complete = pg.get_info().last_complete; - if (user_modify) { - osd_op_params->user_at_version = osd_op_params->at_version.version; - } - } + // osd_op_params are instantiated by every wr-like operation. + assert(osd_op_params || !want_mutate); + assert(obc); if (__builtin_expect(op_effects.empty(), true)) { return want_mutate ? std::forward<MutFunc>(mut_func)(std::move(txn), std::move(obc), - std::move(*osd_op_params)) + std::move(*osd_op_params), + user_modify) : std::forward<Func>(func)(std::move(obc)); } else { return (want_mutate ? std::forward<MutFunc>(mut_func)(std::move(txn), std::move(obc), - std::move(*osd_op_params)) + std::move(*osd_op_params), + user_modify) : std::forward<Func>(func)(std::move(obc)) ).safe_then([this] { // let's do the cleaning of `op_effects` in destructor diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 430d8ed2ebb..521cb9ba3bb 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -265,23 +265,28 @@ seastar::future<> OSD::start() cluster_msgr->set_policy(entity_name_t::TYPE_CLIENT, SocketPolicy::stateless_server(0)); - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_front(*mgrc); - chained_dispatchers->push_front(*monc); - chained_dispatchers->push_front(*this); + crimson::net::dispatchers_t dispatchers{this, monc.get(), mgrc.get()}; return seastar::when_all_succeed( cluster_msgr->try_bind(pick_addresses(CEPH_PICK_ADDRESS_CLUSTER), local_conf()->ms_bind_port_min, local_conf()->ms_bind_port_max) - .then([this, chained_dispatchers]() mutable { - return cluster_msgr->start(chained_dispatchers); - }), + .safe_then([this, dispatchers]() mutable { + return cluster_msgr->start(dispatchers); + }, crimson::net::Messenger::bind_ertr::all_same_way( + [] (const std::error_code& e) { + logger().error("cluster messenger try_bind(): address range is unavailable."); + ceph_abort(); + })), public_msgr->try_bind(pick_addresses(CEPH_PICK_ADDRESS_PUBLIC), local_conf()->ms_bind_port_min, local_conf()->ms_bind_port_max) - .then([this, chained_dispatchers]() mutable { - return public_msgr->start(chained_dispatchers); - })); + .safe_then([this, dispatchers]() mutable { + return public_msgr->start(dispatchers); + }, crimson::net::Messenger::bind_ertr::all_same_way( + [] (const std::error_code& e) { + logger().error("public messenger try_bind(): address range is unavailable."); + ceph_abort(); + }))); }).then_unpack([this] { return seastar::when_all_succeed(monc->start(), mgrc->start()); @@ -412,7 +417,7 @@ seastar::future<> OSD::_add_me_to_crush() }); } -seastar::future<> OSD::handle_command(crimson::net::Connection* conn, +seastar::future<> OSD::handle_command(crimson::net::ConnectionRef conn, Ref<MCommand> m) { return asok->handle_command(conn, std::move(m)); @@ -452,16 +457,8 @@ seastar::future<> OSD::stop() return prepare_to_stop().then([this] { state.set_stopping(); logger().debug("prepared to stop"); - if (!public_msgr->dispatcher_chain_empty()) { - public_msgr->remove_dispatcher(*this); - public_msgr->remove_dispatcher(*mgrc); - public_msgr->remove_dispatcher(*monc); - } - if (!cluster_msgr->dispatcher_chain_empty()) { - cluster_msgr->remove_dispatcher(*this); - cluster_msgr->remove_dispatcher(*mgrc); - cluster_msgr->remove_dispatcher(*monc); - } + public_msgr->stop(); + cluster_msgr->stop(); auto gate_close_fut = gate.close(); return asok->stop().then([this] { return heartbeat->stop(); @@ -617,12 +614,14 @@ seastar::future<Ref<PG>> OSD::load_pg(spg_t pgid) }); } -seastar::future<> OSD::ms_dispatch(crimson::net::Connection* conn, MessageRef m) +std::optional<seastar::future<>> +OSD::ms_dispatch(crimson::net::ConnectionRef conn, MessageRef m) { - return gate.dispatch(__func__, *this, [this, conn, &m] { - if (state.is_stopping()) { - return seastar::now(); - } + if (state.is_stopping()) { + return {}; + } + bool dispatched = true; + gate.dispatch_in_background(__func__, *this, [this, conn, &m, &dispatched] { switch (m->get_type()) { case CEPH_MSG_OSD_MAP: return handle_osd_map(conn, boost::static_pointer_cast<MOSDMap>(m)); @@ -631,7 +630,7 @@ seastar::future<> OSD::ms_dispatch(crimson::net::Connection* conn, MessageRef m) case MSG_OSD_PG_CREATE2: shard_services.start_operation<CompoundPeeringRequest>( *this, - conn->get_shared(), + conn, m); return seastar::now(); case MSG_COMMAND: @@ -651,6 +650,8 @@ seastar::future<> OSD::ms_dispatch(crimson::net::Connection* conn, MessageRef m) case MSG_OSD_PG_SCAN: [[fallthrough]]; case MSG_OSD_PG_BACKFILL: + [[fallthrough]]; + case MSG_OSD_PG_BACKFILL_REMOVE: return handle_recovery_subreq(conn, boost::static_pointer_cast<MOSDFastDispatchOp>(m)); case MSG_OSD_PG_LEASE: [[fallthrough]]; @@ -675,10 +676,11 @@ seastar::future<> OSD::ms_dispatch(crimson::net::Connection* conn, MessageRef m) case MSG_OSD_SCRUB2: return handle_scrub(conn, boost::static_pointer_cast<MOSDScrub2>(m)); default: - logger().info("ms_dispatch unhandled message {}", *m); + dispatched = false; return seastar::now(); } }); + return (dispatched ? std::make_optional(seastar::now()) : std::nullopt); } void OSD::ms_handle_reset(crimson::net::ConnectionRef conn, bool is_replace) @@ -949,7 +951,7 @@ seastar::future<Ref<PG>> OSD::handle_pg_create_info( }); } -seastar::future<> OSD::handle_osd_map(crimson::net::Connection* conn, +seastar::future<> OSD::handle_osd_map(crimson::net::ConnectionRef conn, Ref<MOSDMap> m) { logger().info("handle_osd_map {}", *m); @@ -1086,17 +1088,17 @@ seastar::future<> OSD::committed_osd_maps(version_t first, }); } -seastar::future<> OSD::handle_osd_op(crimson::net::Connection* conn, +seastar::future<> OSD::handle_osd_op(crimson::net::ConnectionRef conn, Ref<MOSDOp> m) { (void) shard_services.start_operation<ClientRequest>( *this, - conn->get_shared(), + conn, std::move(m)); return seastar::now(); } -seastar::future<> OSD::send_incremental_map(crimson::net::Connection* conn, +seastar::future<> OSD::send_incremental_map(crimson::net::ConnectionRef conn, epoch_t first) { if (first >= superblock.oldest_map) { @@ -1122,18 +1124,18 @@ seastar::future<> OSD::send_incremental_map(crimson::net::Connection* conn, } } -seastar::future<> OSD::handle_rep_op(crimson::net::Connection* conn, +seastar::future<> OSD::handle_rep_op(crimson::net::ConnectionRef conn, Ref<MOSDRepOp> m) { m->finish_decode(); (void) shard_services.start_operation<RepRequest>( *this, - conn->get_shared(), + std::move(conn), std::move(m)); return seastar::now(); } -seastar::future<> OSD::handle_rep_op_reply(crimson::net::Connection* conn, +seastar::future<> OSD::handle_rep_op_reply(crimson::net::ConnectionRef conn, Ref<MOSDRepOpReply> m) { const auto& pgs = pg_map.get_pgs(); @@ -1146,7 +1148,7 @@ seastar::future<> OSD::handle_rep_op_reply(crimson::net::Connection* conn, return seastar::now(); } -seastar::future<> OSD::handle_scrub(crimson::net::Connection* conn, +seastar::future<> OSD::handle_scrub(crimson::net::ConnectionRef conn, Ref<MOSDScrub2> m) { if (m->fsid != superblock.cluster_fsid) { @@ -1154,7 +1156,7 @@ seastar::future<> OSD::handle_scrub(crimson::net::Connection* conn, return seastar::now(); } return seastar::parallel_for_each(std::move(m->scrub_pgs), - [m, conn=conn->get_shared(), this](spg_t pgid) { + [m, conn, this](spg_t pgid) { pg_shard_t from_shard{static_cast<int>(m->get_source().num()), pgid.shard}; PeeringState::RequestScrub scrub_request{m->deep, m->repair}; @@ -1168,7 +1170,7 @@ seastar::future<> OSD::handle_scrub(crimson::net::Connection* conn, }); } -seastar::future<> OSD::handle_mark_me_down(crimson::net::Connection* conn, +seastar::future<> OSD::handle_mark_me_down(crimson::net::ConnectionRef conn, Ref<MOSDMarkMeDown> m) { if (state.is_prestop()) { @@ -1177,12 +1179,12 @@ seastar::future<> OSD::handle_mark_me_down(crimson::net::Connection* conn, return seastar::now(); } -seastar::future<> OSD::handle_recovery_subreq(crimson::net::Connection* conn, +seastar::future<> OSD::handle_recovery_subreq(crimson::net::ConnectionRef conn, Ref<MOSDFastDispatchOp> m) { (void) shard_services.start_operation<RecoverySubRequest>( *this, - conn->get_shared(), + conn, std::move(m)); return seastar::now(); } @@ -1266,7 +1268,7 @@ void OSD::update_heartbeat_peers() } seastar::future<> OSD::handle_peering_op( - crimson::net::Connection* conn, + crimson::net::ConnectionRef conn, Ref<MOSDPeeringOp> m) { const int from = m->get_source().num(); @@ -1274,7 +1276,7 @@ seastar::future<> OSD::handle_peering_op( std::unique_ptr<PGPeeringEvent> evt(m->get_event()); (void) shard_services.start_operation<RemotePeeringEvent>( *this, - conn->get_shared(), + conn, shard_services, pg_shard_t{from, m->get_spg().shard}, m->get_spg(), diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h index 72a892fb2ff..889960ced8d 100644 --- a/src/crimson/osd/osd.h +++ b/src/crimson/osd/osd.h @@ -13,7 +13,6 @@ #include "crimson/common/type_helpers.h" #include "crimson/common/auth_handler.h" #include "crimson/common/gated.h" -#include "crimson/net/chained_dispatchers.h" #include "crimson/admin/admin_socket.h" #include "crimson/common/simple_lru.h" #include "crimson/common/shared_lru.h" @@ -97,7 +96,7 @@ class OSD final : public crimson::net::Dispatcher, OSDSuperblock superblock; // Dispatcher methods - seastar::future<> ms_dispatch(crimson::net::Connection* conn, MessageRef m) final; + std::optional<seastar::future<>> ms_dispatch(crimson::net::ConnectionRef, MessageRef) final; void ms_handle_reset(crimson::net::ConnectionRef conn, bool is_replace) final; void ms_handle_remote_reset(crimson::net::ConnectionRef conn) final; @@ -137,7 +136,7 @@ public: void dump_pg_state_history(Formatter*) const; void print(std::ostream&) const; - seastar::future<> send_incremental_map(crimson::net::Connection* conn, + seastar::future<> send_incremental_map(crimson::net::ConnectionRef conn, epoch_t first); /// @return the seq id of the pg stats being sent @@ -179,21 +178,21 @@ private: seastar::future<Ref<PG>> handle_pg_create_info( std::unique_ptr<PGCreateInfo> info); - seastar::future<> handle_osd_map(crimson::net::Connection* conn, + seastar::future<> handle_osd_map(crimson::net::ConnectionRef conn, Ref<MOSDMap> m); - seastar::future<> handle_osd_op(crimson::net::Connection* conn, + seastar::future<> handle_osd_op(crimson::net::ConnectionRef conn, Ref<MOSDOp> m); - seastar::future<> handle_rep_op(crimson::net::Connection* conn, + seastar::future<> handle_rep_op(crimson::net::ConnectionRef conn, Ref<MOSDRepOp> m); - seastar::future<> handle_rep_op_reply(crimson::net::Connection* conn, + seastar::future<> handle_rep_op_reply(crimson::net::ConnectionRef conn, Ref<MOSDRepOpReply> m); - seastar::future<> handle_peering_op(crimson::net::Connection* conn, + seastar::future<> handle_peering_op(crimson::net::ConnectionRef conn, Ref<MOSDPeeringOp> m); - seastar::future<> handle_recovery_subreq(crimson::net::Connection* conn, + seastar::future<> handle_recovery_subreq(crimson::net::ConnectionRef conn, Ref<MOSDFastDispatchOp> m); - seastar::future<> handle_scrub(crimson::net::Connection* conn, + seastar::future<> handle_scrub(crimson::net::ConnectionRef conn, Ref<MOSDScrub2> m); - seastar::future<> handle_mark_me_down(crimson::net::Connection* conn, + seastar::future<> handle_mark_me_down(crimson::net::ConnectionRef conn, Ref<MOSDMarkMeDown> m); seastar::future<> committed_osd_maps(version_t first, @@ -202,7 +201,7 @@ private: void check_osdmap_features(); - seastar::future<> handle_command(crimson::net::Connection* conn, + seastar::future<> handle_command(crimson::net::ConnectionRef conn, Ref<MCommand> m); seastar::future<> start_asok_admin(); diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc index f2fa0e20507..87b8fc788e3 100644 --- a/src/crimson/osd/osd_operations/client_request.cc +++ b/src/crimson/osd/osd_operations/client_request.cc @@ -70,13 +70,13 @@ seastar::future<> ClientRequest::start() }).then([this, opref](Ref<PG> pgref) { PG &pg = *pgref; if (pg.can_discard_op(*m)) { - return osd.send_incremental_map(conn.get(), m->get_map_epoch()); + return osd.send_incremental_map(conn, m->get_map_epoch()); } return with_blocking_future( handle.enter(pp(pg).await_map) ).then([this, &pg]() mutable { return with_blocking_future( - pg.osdmap_gate.wait_for_map(m->get_map_epoch())); + pg.osdmap_gate.wait_for_map(m->get_min_epoch())); }).then([this, &pg](auto map) mutable { return with_blocking_future( handle.enter(pp(pg).wait_for_active)); diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 5c78a5d115b..812b31bb8a8 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -308,10 +308,12 @@ void PG::prepare_write(pg_info_t &info, } } -void PG::do_delete_work(ceph::os::Transaction &t) +ghobject_t PG::do_delete_work(ceph::os::Transaction &t, + ghobject_t _next) { // TODO shard_services.dec_pg_num(); + return _next; } void PG::scrub_requested(bool deep, bool repair, bool need_auto) @@ -596,6 +598,69 @@ seastar::future<> PG::submit_transaction(const OpInfo& op_info, }); } +osd_op_params_t&& PG::fill_op_params_bump_pg_version( + osd_op_params_t&& osd_op_p, + Ref<MOSDOp> m, + const bool user_modify) +{ + osd_op_p.req = std::move(m); + osd_op_p.at_version = next_version(); + osd_op_p.pg_trim_to = get_pg_trim_to(); + osd_op_p.min_last_complete_ondisk = get_min_last_complete_ondisk(); + osd_op_p.last_complete = get_info().last_complete; + if (user_modify) { + osd_op_p.user_at_version = osd_op_p.at_version.version; + } + return std::move(osd_op_p); +} + +seastar::future<Ref<MOSDOpReply>> PG::handle_failed_op( + const std::error_code& e, + ObjectContextRef obc, + const OpsExecuter& ox, + const MOSDOp& m) const +{ + // Oops, an operation had failed. do_osd_ops() altogether with + // OpsExecuter already dropped the ObjectStore::Transaction if + // there was any. However, this is not enough to completely + // rollback as we gave OpsExecuter the very single copy of `obc` + // we maintain and we did it for both reading and writing. + // Now all modifications must be reverted. + // + // Let's just reload from the store. Evicting from the shared + // LRU would be tricky as next MOSDOp (the one at `get_obc` + // phase) could actually already finished the lookup. Fortunately, + // this is supposed to live on cold paths, so performance is not + // a concern -- simplicity wins. + // + // The conditional's purpose is to efficiently handle hot errors + // which may appear as a result of e.g. CEPH_OSD_OP_CMPXATTR or + // CEPH_OSD_OP_OMAP_CMP. These are read-like ops and clients + // typically append them before any write. If OpsExecuter hasn't + // seen any modifying operation, `obc` is supposed to be kept + // unchanged. + assert(e.value() > 0); + const bool need_reload_obc = ox.has_seen_write(); + logger().debug( + "{}: {} - object {} got error code {}, {}; need_reload_obc {}", + __func__, + m, + obc->obs.oi.soid, + e.value(), + e.message(), + need_reload_obc); + return (need_reload_obc ? reload_obc(*obc) + : load_obc_ertr::now() + ).safe_then([&e, &m, obc = std::move(obc), this] { + auto reply = make_message<MOSDOpReply>( + &m, -e.value(), get_osdmap_epoch(), 0, false); + reply->set_enoent_reply_versions( + peering_state.get_info().last_update, + peering_state.get_info().last_user_version); + return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply)); + }, load_obc_ertr::assert_all{ "can't live with object state messed up" }); +} + seastar::future<Ref<MOSDOpReply>> PG::do_osd_ops( Ref<MOSDOp> m, ObjectContextRef obc, @@ -608,9 +673,8 @@ seastar::future<Ref<MOSDOpReply>> PG::do_osd_ops( using osd_op_errorator = OpsExecuter::osd_op_errorator; const auto oid = m->get_snapid() == CEPH_SNAPDIR ? m->get_hobj().get_head() : m->get_hobj(); - auto ox = - std::make_unique<OpsExecuter>(obc, op_info, *this/* as const& */, m); - + auto ox = std::make_unique<OpsExecuter>( + obc, op_info, get_pool().info, get_backend(), *m); return crimson::do_for_each( m->ops, [obc, m, ox = ox.get()](OSDOp& osd_op) { logger().debug( @@ -634,18 +698,26 @@ seastar::future<Ref<MOSDOpReply>> PG::do_osd_ops( }, [this, m, &op_info] (auto&& txn, auto&& obc, - auto&& osd_op_p) -> osd_op_errorator::future<> { + auto&& osd_op_p, + bool user_modify) -> osd_op_errorator::future<> { logger().debug( "do_osd_ops: {} - object {} submitting txn", *m, obc->obs.oi.soid); + auto filled_osd_op_p = fill_op_params_bump_pg_version( + std::move(osd_op_p), + std::move(m), + user_modify); return submit_transaction( - op_info, m->ops, std::move(obc), std::move(txn), std::move(osd_op_p)); + op_info, + filled_osd_op_p.req->ops, + std::move(obc), + std::move(txn), + std::move(filled_osd_op_p)); }); }).safe_then([this, m, obc, - ox_deleter = std::move(ox), rvec = op_info.allows_returnvec()] { // TODO: should stop at the first op which returns a negative retval, // cmpext uses it for returning the index of first unmatched byte @@ -664,32 +736,18 @@ seastar::future<Ref<MOSDOpReply>> PG::do_osd_ops( *m, obc->obs.oi.soid); return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply)); - }, OpsExecuter::osd_op_errorator::all_same_way([=] (const std::error_code& e) { - assert(e.value() > 0); - logger().debug( - "do_osd_ops: {} - object {} got error code {}, {}", - *m, - obc->obs.oi.soid, - e.value(), - e.message()); - auto reply = make_message<MOSDOpReply>( - m.get(), -e.value(), get_osdmap_epoch(), 0, false); - reply->set_enoent_reply_versions(peering_state.get_info().last_update, - peering_state.get_info().last_user_version); - return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply)); - })).handle_exception_type([=](const crimson::osd::error& e) { + }, osd_op_errorator::all_same_way([ox = ox.get(), + m, + obc, + this] (const std::error_code& e) { + return handle_failed_op(e, std::move(obc), *ox, *m); + })).handle_exception_type([ox_deleter = std::move(ox), + m, + obc, + this] (const crimson::osd::error& e) { // we need this handler because throwing path which aren't errorated yet. - logger().debug( - "do_osd_ops: {} - object {} got unhandled exception {} ({})", - *m, - obc->obs.oi.soid, - e.code(), - e.what()); - auto reply = make_message<MOSDOpReply>( - m.get(), -e.code().value(), get_osdmap_epoch(), 0, false); - reply->set_enoent_reply_versions(peering_state.get_info().last_update, - peering_state.get_info().last_user_version); - return seastar::make_ready_future<Ref<MOSDOpReply>>(std::move(reply)); + logger().debug("encountered the legacy error handling path!"); + return handle_failed_op(e.code(), std::move(obc), *ox_deleter, *m); }); } @@ -771,7 +829,7 @@ std::optional<hobject_t> PG::resolve_oid( } template<RWState::State State> -seastar::future<> +PG::load_obc_ertr::future<> PG::with_head_obc(hobject_t oid, with_obc_func_t&& func) { assert(oid.is_head()); @@ -779,44 +837,45 @@ PG::with_head_obc(hobject_t oid, with_obc_func_t&& func) return obc->with_lock<State>( [oid=std::move(oid), existed=existed, obc=std::move(obc), func=std::move(func), this] { - auto loaded = seastar::make_ready_future<ObjectContextRef>(obc); + auto loaded = load_obc_ertr::make_ready_future<ObjectContextRef>(obc); if (existed) { logger().debug("with_head_obc: found {} in cache", oid); } else { logger().debug("with_head_obc: cache miss on {}", oid); - loaded = obc->with_promoted_lock<RWState::RWEXCL>([this, obc] { + loaded = obc->with_promoted_lock<State>([this, obc] { return load_head_obc(obc); }); } - return loaded.then([func = std::move(func)](auto obc) { + return loaded.safe_then([func=std::move(func)](auto obc) { return func(std::move(obc)); }); }); } template<RWState::State State> -seastar::future<> +PG::load_obc_ertr::future<> PG::with_clone_obc(hobject_t oid, with_obc_func_t&& func) { assert(!oid.is_head()); return with_head_obc<RWState::RWREAD>(oid.get_head(), - [oid, func=std::move(func), this](auto head) { + [oid, func=std::move(func), this](auto head) -> load_obc_ertr::future<> { auto coid = resolve_oid(head->get_ro_ss(), oid); if (!coid) { // TODO: return crimson::ct_error::enoent::make(); logger().error("with_clone_obc: {} clone not found", coid); - return seastar::make_ready_future<>(); + return load_obc_ertr::make_ready_future<>(); } auto [clone, existed] = shard_services.obc_registry.get_cached_obc(*coid); return clone->template with_lock<State>( [coid=*coid, existed=existed, - head=std::move(head), clone=std::move(clone), func=std::move(func), this] { - auto loaded = seastar::make_ready_future<ObjectContextRef>(clone); + head=std::move(head), clone=std::move(clone), + func=std::move(func), this]() -> load_obc_ertr::future<> { + auto loaded = load_obc_ertr::make_ready_future<ObjectContextRef>(clone); if (existed) { logger().debug("with_clone_obc: found {} in cache", coid); } else { logger().debug("with_clone_obc: cache miss on {}", coid); - loaded = clone->template with_promoted_lock<RWState::RWEXCL>( + loaded = clone->template with_promoted_lock<State>( [coid, clone, head, this] { return backend->load_metadata(coid).safe_then( [coid, clone=std::move(clone), head=std::move(head)](auto md) mutable { @@ -825,7 +884,7 @@ PG::with_clone_obc(hobject_t oid, with_obc_func_t&& func) }); }); } - return loaded.then([func = std::move(func)](auto clone) { + return loaded.safe_then([func=std::move(func)](auto clone) { return func(std::move(clone)); }); }); @@ -833,7 +892,7 @@ PG::with_clone_obc(hobject_t oid, with_obc_func_t&& func) } // explicitly instantiate the used instantiations -template seastar::future<> +template PG::load_obc_ertr::future<> PG::with_head_obc<RWState::RWNONE>(hobject_t, with_obc_func_t&&); PG::load_obc_ertr::future<crimson::osd::ObjectContextRef> @@ -860,6 +919,29 @@ PG::load_head_obc(ObjectContextRef obc) } PG::load_obc_ertr::future<> +PG::reload_obc(crimson::osd::ObjectContext& obc) const +{ + assert(obc.is_head()); + return backend->load_metadata(obc.get_oid()).safe_then([&obc](auto md) + -> load_obc_ertr::future<> { + logger().debug( + "{}: reloaded obs {} for {}", + __func__, + md->os.oi, + obc.get_oid()); + if (!md->ss) { + logger().error( + "{}: oid {} missing snapset", + __func__, + obc.get_oid()); + return crimson::ct_error::object_corrupted::make(); + } + obc.set_head_state(std::move(md->os), std::move(*(md->ss))); + return load_obc_ertr::now(); + }); +} + +PG::load_obc_ertr::future<> PG::with_locked_obc(Ref<MOSDOp> &m, const OpInfo &op_info, Operation *op, PG::with_obc_func_t &&f) { @@ -887,7 +969,7 @@ PG::with_locked_obc(Ref<MOSDOp> &m, const OpInfo &op_info, return with_clone_obc<RWState::RWWRITE>(oid, std::move(f)); } default: - assert(0); + ceph_abort(); }; } @@ -922,7 +1004,7 @@ seastar::future<> PG::handle_rep_op(Ref<MOSDRepOp> req) }); } -void PG::handle_rep_op_reply(crimson::net::Connection* conn, +void PG::handle_rep_op_reply(crimson::net::ConnectionRef conn, const MOSDRepOpReply& m) { if (!can_discard_replica_op(m)) { diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index 33782da01d3..feefb6d70d0 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -34,8 +34,8 @@ #include "crimson/osd/pg_recovery_listener.h" #include "crimson/osd/recovery_backend.h" -class OSDMap; class MQuery; +class OSDMap; class PGBackend; class PGPeeringEvent; class osd_op_params_t; @@ -54,6 +54,7 @@ namespace crimson::os { namespace crimson::osd { class ClientRequest; +class OpsExecuter; class PG : public boost::intrusive_ref_counter< PG, @@ -327,7 +328,8 @@ public: void on_removal(ceph::os::Transaction &t) final { // TODO } - void do_delete_work(ceph::os::Transaction &t) final; + ghobject_t do_delete_work(ceph::os::Transaction &t, + ghobject_t _next) final; // merge/split not ready void clear_ready_to_merge() final {} @@ -361,6 +363,7 @@ public: bool try_reserve_recovery_space( int64_t primary_num_bytes, int64_t local_num_bytes) final { + // TODO return true; } void unreserve_recovery_space() final {} @@ -500,11 +503,15 @@ public: load_obc_ertr::future<crimson::osd::ObjectContextRef> load_head_obc(ObjectContextRef obc); + load_obc_ertr::future<> + reload_obc(crimson::osd::ObjectContext& obc) const; + public: - using with_obc_func_t = std::function<seastar::future<> (ObjectContextRef)>; + using with_obc_func_t = + std::function<load_obc_ertr::future<> (ObjectContextRef)>; template<RWState::State State> - seastar::future<> with_head_obc(hobject_t oid, with_obc_func_t&& func); + load_obc_ertr::future<> with_head_obc(hobject_t oid, with_obc_func_t&& func); load_obc_ertr::future<> with_locked_obc( Ref<MOSDOp> &m, @@ -513,7 +520,7 @@ public: with_obc_func_t&& f); seastar::future<> handle_rep_op(Ref<MOSDRepOp> m); - void handle_rep_op_reply(crimson::net::Connection* conn, + void handle_rep_op_reply(crimson::net::ConnectionRef conn, const MOSDRepOpReply& m); void print(std::ostream& os) const; @@ -521,7 +528,7 @@ public: private: template<RWState::State State> - seastar::future<> with_clone_obc(hobject_t oid, with_obc_func_t&& func); + load_obc_ertr::future<> with_clone_obc(hobject_t oid, with_obc_func_t&& func); load_obc_ertr::future<ObjectContextRef> get_locked_obc( Operation *op, @@ -531,18 +538,20 @@ private: void do_peering_event( const boost::statechart::event_base &evt, PeeringCtx &rctx); + osd_op_params_t&& fill_op_params_bump_pg_version( + osd_op_params_t&& osd_op_p, + Ref<MOSDOp> m, + const bool user_modify); + seastar::future<Ref<MOSDOpReply>> handle_failed_op( + const std::error_code& e, + ObjectContextRef obc, + const OpsExecuter& ox, + const MOSDOp& m) const; seastar::future<Ref<MOSDOpReply>> do_osd_ops( Ref<MOSDOp> m, ObjectContextRef obc, const OpInfo &op_info); seastar::future<Ref<MOSDOpReply>> do_pg_ops(Ref<MOSDOp> m); - seastar::future<> do_osd_op( - ObjectState& os, - OSDOp& op, - ceph::os::Transaction& txn); - seastar::future<ceph::bufferlist> do_pgnls(ceph::bufferlist& indata, - const std::string& nspace, - uint64_t limit); seastar::future<> submit_transaction(const OpInfo& op_info, const std::vector<OSDOp>& ops, ObjectContextRef&& obc, @@ -663,7 +672,7 @@ private: friend class PeeringEvent; friend class RepRequest; friend class BackfillRecovery; - friend struct BackfillState::PGFacade; + friend struct PGFacade; private: seastar::future<bool> find_unfound() { return seastar::make_ready_future<bool>(true); diff --git a/src/crimson/osd/pg_recovery.cc b/src/crimson/osd/pg_recovery.cc index 14fc62565a4..1ef692783e7 100644 --- a/src/crimson/osd/pg_recovery.cc +++ b/src/crimson/osd/pg_recovery.cc @@ -430,12 +430,11 @@ void PGRecovery::request_primary_scan( } void PGRecovery::enqueue_push( - const pg_shard_t& target, const hobject_t& obj, const eversion_t& v) { - logger().debug("{}: target={} obj={} v={}", - __func__, target, obj, v); + logger().debug("{}: obj={} v={}", + __func__, obj, v); pg->get_recovery_backend()->add_recovering(obj); std::ignore = pg->get_recovery_backend()->recover_object(obj, v).\ handle_exception([] (auto) { @@ -453,7 +452,24 @@ void PGRecovery::enqueue_drop( const hobject_t& obj, const eversion_t& v) { - ceph_abort_msg("Not implemented"); + // allocate a pair if target is seen for the first time + auto& req = backfill_drop_requests[target]; + if (!req) { + req = ceph::make_message<MOSDPGBackfillRemove>( + spg_t(pg->get_pgid().pgid, target.shard), pg->get_osdmap_epoch()); + } + req->ls.emplace_back(obj, v); +} + +void PGRecovery::maybe_flush() +{ + for (auto& [target, req] : backfill_drop_requests) { + std::ignore = pg->get_shard_services().send_to_osd( + target.osd, + std::move(req), + pg->get_osdmap_epoch()); + } + backfill_drop_requests.clear(); } void PGRecovery::update_peers_last_backfill( @@ -524,8 +540,8 @@ void PGRecovery::on_backfill_reserved() using BackfillState = crimson::osd::BackfillState; backfill_state = std::make_unique<BackfillState>( *this, - std::make_unique<BackfillState::PeeringFacade>(pg->get_peering_state()), - std::make_unique<BackfillState::PGFacade>( + std::make_unique<crimson::osd::PeeringFacade>(pg->get_peering_state()), + std::make_unique<crimson::osd::PGFacade>( *static_cast<crimson::osd::PG*>(pg))); // yes, it's **not** backfilling yet. The PG_STATE_BACKFILLING // will be set after on_backfill_reserved() returns. diff --git a/src/crimson/osd/pg_recovery.h b/src/crimson/osd/pg_recovery.h index e55547c95b5..54f3744bcec 100644 --- a/src/crimson/osd/pg_recovery.h +++ b/src/crimson/osd/pg_recovery.h @@ -13,6 +13,7 @@ #include "osd/object_state.h" +class MOSDPGBackfillRemove; class PGBackend; class PGRecovery : public crimson::osd::BackfillState::BackfillListener { @@ -82,6 +83,8 @@ private: // backfill begin std::unique_ptr<crimson::osd::BackfillState> backfill_state; + std::map<pg_shard_t, + ceph::ref_t<MOSDPGBackfillRemove>> backfill_drop_requests; template <class EventT> void start_backfill_recovery( @@ -93,13 +96,13 @@ private: void request_primary_scan( const hobject_t& begin) final; void enqueue_push( - const pg_shard_t& target, const hobject_t& obj, const eversion_t& v) final; void enqueue_drop( const pg_shard_t& target, const hobject_t& obj, const eversion_t& v) final; + void maybe_flush() final; void update_peers_last_backfill( const hobject_t& new_last_backfill) final; bool budget_available() const final; diff --git a/src/crimson/osd/recovery_backend.cc b/src/crimson/osd/recovery_backend.cc index 9444379be17..91e4cc334c7 100644 --- a/src/crimson/osd/recovery_backend.cc +++ b/src/crimson/osd/recovery_backend.cc @@ -136,6 +136,25 @@ seastar::future<> RecoveryBackend::handle_backfill( } } +seastar::future<> RecoveryBackend::handle_backfill_remove( + MOSDPGBackfillRemove& m) +{ + logger().debug("{} m.ls=", __func__, m.ls); + assert(m.get_type() == MSG_OSD_PG_BACKFILL_REMOVE); + + ObjectStore::Transaction t; + for ([[maybe_unused]] const auto& [soid, ver] : m.ls) { + // TODO: the reserved space management. PG::try_reserve_recovery_space(). + t.remove(pg.get_collection_ref()->get_cid(), + ghobject_t(soid, ghobject_t::NO_GEN, pg.get_pg_whoami().shard)); + } + return shard_services.get_store().do_transaction( + pg.get_collection_ref(), std::move(t) + ).handle_exception([] (auto) { + ceph_abort_msg("this transaction shall not fail"); + }); +} + seastar::future<BackfillInterval> RecoveryBackend::scan_for_backfill( const hobject_t& start, [[maybe_unused]] const std::int64_t min, @@ -274,6 +293,8 @@ seastar::future<> RecoveryBackend::handle_recovery_op( switch (m->get_header().type) { case MSG_OSD_PG_BACKFILL: return handle_backfill(*boost::static_pointer_cast<MOSDPGBackfill>(m)); + case MSG_OSD_PG_BACKFILL_REMOVE: + return handle_backfill_remove(*boost::static_pointer_cast<MOSDPGBackfillRemove>(m)); case MSG_OSD_PG_SCAN: return handle_scan(*boost::static_pointer_cast<MOSDPGScan>(m)); default: diff --git a/src/crimson/osd/recovery_backend.h b/src/crimson/osd/recovery_backend.h index 89f242b2b0c..2bffaed6a64 100644 --- a/src/crimson/osd/recovery_backend.h +++ b/src/crimson/osd/recovery_backend.h @@ -12,6 +12,7 @@ #include "crimson/osd/shard_services.h" #include "messages/MOSDPGBackfill.h" +#include "messages/MOSDPGBackfillRemove.h" #include "messages/MOSDPGScan.h" #include "osd/recovery_types.h" #include "osd/osd_types.h" @@ -31,6 +32,8 @@ class RecoveryBackend { MOSDPGBackfill& m); seastar::future<> handle_backfill(MOSDPGBackfill& m); + seastar::future<> handle_backfill_remove(MOSDPGBackfillRemove& m); + seastar::future<> handle_scan_get_digest( MOSDPGScan& m); seastar::future<> handle_scan_digest( diff --git a/src/crimson/osd/replicated_recovery_backend.cc b/src/crimson/osd/replicated_recovery_backend.cc index edd2513e97a..274b06cfecf 100644 --- a/src/crimson/osd/replicated_recovery_backend.cc +++ b/src/crimson/osd/replicated_recovery_backend.cc @@ -30,20 +30,20 @@ seastar::future<> ReplicatedRecoveryBackend::recover_object( [this, soid, need](auto& pops, auto& shards) { return maybe_pull_missing_obj(soid, need).then([this, soid](bool pulled) { return load_obc_for_recovery(soid, pulled); - }).then([this, soid, need, &pops, &shards] { + }).safe_then([this, soid, need, &pops, &shards] { if (!shards.empty()) { return prep_push(soid, need, &pops, shards); } else { return seastar::now(); } - }).handle_exception([this, soid](auto e) { + }, crimson::ct_error::all_same_way([this, soid](const std::error_code& e) { auto recovery_waiter = recovering.find(soid); if (auto obc = recovery_waiter->second.obc; obc) { obc->drop_recovery_read(); } recovering.erase(recovery_waiter); return seastar::make_exception_future<>(e); - }).then([this, &pops, &shards, soid] { + })).then([this, &pops, &shards, soid] { return seastar::parallel_for_each(shards, [this, &pops, soid](auto shard) { auto msg = make_message<MOSDPGPush>(); @@ -114,18 +114,20 @@ ReplicatedRecoveryBackend::maybe_pull_missing_obj( }); } -seastar::future<> ReplicatedRecoveryBackend::load_obc_for_recovery( +auto ReplicatedRecoveryBackend::load_obc_for_recovery( const hobject_t& soid, - bool pulled) + bool pulled) -> + load_obc_ertr::future<> { auto& recovery_waiter = recovering.at(soid); if (recovery_waiter.obc) { - return seastar::now(); + return load_obc_ertr::now(); } return pg.with_head_obc<RWState::RWREAD>(soid, [&recovery_waiter](auto obc) { logger().debug("load_obc_for_recovery: loaded obc: {}", obc->obs.oi.soid); recovery_waiter.obc = obc; - return recovery_waiter.obc->wait_recovery_read(); + recovery_waiter.obc->wait_recovery_read(); + return seastar::now(); }); } @@ -649,8 +651,8 @@ seastar::future<bool> ReplicatedRecoveryBackend::_handle_pull_response( recovery_waiter.obc = obc; obc->obs.oi.decode(pop.attrset[OI_ATTR]); pi.recovery_info.oi = obc->obs.oi; - return seastar::make_ready_future<>(); - }); + return crimson::osd::PG::load_obc_ertr::now(); + }).handle_error(crimson::ct_error::assert_all{}); }; return prepare_waiter.then([this, first=pi.recovery_progress.first, &pi, &pop, t, response]() mutable { diff --git a/src/crimson/osd/replicated_recovery_backend.h b/src/crimson/osd/replicated_recovery_backend.h index 4bb353c5aa0..919aa3a703b 100644 --- a/src/crimson/osd/replicated_recovery_backend.h +++ b/src/crimson/osd/replicated_recovery_backend.h @@ -117,7 +117,9 @@ private: eversion_t need); /// load object context for recovery if it is not ready yet - seastar::future<> load_obc_for_recovery( + using load_obc_ertr = crimson::errorator< + crimson::ct_error::object_corrupted>; + load_obc_ertr::future<> load_obc_for_recovery( const hobject_t& soid, bool pulled); diff --git a/src/doc/mon-janitorial-queue.txt b/src/doc/mon-janitorial-queue.txt index bc9972b903d..9114acbe7d7 100644 --- a/src/doc/mon-janitorial-queue.txt +++ b/src/doc/mon-janitorial-queue.txt @@ -6,11 +6,6 @@ Low-hanging fruit: where possible, get rid of those put(). No one expects helpers to put() messages and that may lead to double frees. -Medium complexity: - -- get rid of QuorumServices. It seemed like a neat idea, but we only have - one or two and they just add complexity and noise. - Time consuming / complex: - Split the OSDMonitor.cc file into auxiliary files. This will mean: diff --git a/src/doc/mon-wishlist.txt b/src/doc/mon-wishlist.txt index 3d29a3f6260..a5fb9422cd7 100644 --- a/src/doc/mon-wishlist.txt +++ b/src/doc/mon-wishlist.txt @@ -8,12 +8,6 @@ Low-hanging fruit where possible, get rid of those put(). No one expects helpers to put() messages and that may lead to double frees. (issue #9378) -Medium complexity ------------------ - -* get rid of QuorumServices. It seemed like a neat idea, but we only have - one or two and they just add complexity and noise. (issue #10506) - Time consuming / complex ------------------------ diff --git a/src/global/global_init.cc b/src/global/global_init.cc index 3387a0e55bd..32e36d45fe7 100644 --- a/src/global/global_init.cc +++ b/src/global/global_init.cc @@ -515,6 +515,9 @@ int reopen_as_null(CephContext *cct, int fd) void global_init_postfork_start(CephContext *cct) { + // reexpand the meta in child process + cct->_conf.finalize_reexpand_meta(); + // restart log thread cct->_log->start(); cct->notify_post_fork(); diff --git a/src/include/config-h.in.cmake b/src/include/config-h.in.cmake index f3931951322..d952983c55b 100644 --- a/src/include/config-h.in.cmake +++ b/src/include/config-h.in.cmake @@ -360,6 +360,9 @@ /* Define if PWL-SSD is enabled */ #cmakedefine WITH_RBD_SSD_CACHE +/* Define if libcryptsetup version < 2.0.5 */ +#cmakedefine LIBCRYPTSETUP_LEGACY_DATA_ALIGNMENT + /* Shared library extension, such as .so, .dll or .dylib */ #cmakedefine CMAKE_SHARED_LIBRARY_SUFFIX "@CMAKE_SHARED_LIBRARY_SUFFIX@" diff --git a/src/include/rados/librados.h b/src/include/rados/librados.h index 27031419f1f..17fa5b7ce2d 100644 --- a/src/include/rados/librados.h +++ b/src/include/rados/librados.h @@ -2805,6 +2805,10 @@ CEPH_RADOS_API int rados_set_alloc_hint2(rados_ioctx_t io, const char *o, * to be performed atomically. You must call rados_release_write_op when you are * finished with it. * + * @note the ownership of a write operartion is passed to the function + * performing the operation, so the same instance of @c rados_write_op_t + * cannot be used again after being performed. + * * @returns non-NULL on success, NULL on memory allocation error. */ CEPH_RADOS_API rados_write_op_t rados_create_write_op(void); @@ -3188,11 +3192,15 @@ CEPH_RADOS_API int rados_aio_write_op_operate(rados_write_op_t write_op, int flags); /** - * Create a new rados_read_op_t write operation. This will store all + * Create a new rados_read_op_t read operation. This will store all * actions to be performed atomically. You must call * rados_release_read_op when you are finished with it (after it * completes, or you decide not to send it in the first place). * + * @note the ownership of a read operartion is passed to the function + * performing the operation, so the same instance of @c rados_read_op_t + * cannot be used again after being performed. + * * @returns non-NULL on success, NULL on memory allocation error. */ CEPH_RADOS_API rados_read_op_t rados_create_read_op(void); diff --git a/src/init-rbdmap b/src/init-rbdmap deleted file mode 100755 index 6058e397e40..00000000000 --- a/src/init-rbdmap +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env bash -# -# rbdmap Ceph RBD Mapping -# -# chkconfig: 2345 20 80 -# description: Ceph RBD Mapping - -### BEGIN INIT INFO -# Provides: rbdmap -# Required-Start: $network $remote_fs -# Required-Stop: $network $remote_fs -# Should-Start: ceph -# Should-Stop: ceph -# X-Start-Before: $x-display-manager -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Ceph RBD Mapping -# Description: Ceph RBD Mapping -### END INIT INFO - -RBDMAPFILE="/etc/ceph/rbdmap" - -if [ -e /lib/lsb/init-functions ]; then - . /lib/lsb/init-functions -fi - - - - -case "$1" in - start) - rbdmap device map - ;; - - stop) - rbdmap device unmap - ;; - - restart|force-reload) - $0 stop - $0 start - ;; - - reload) - rbdmap device map - ;; - - status) - rbd device list - ;; - - *) - echo "Usage: rbdmap {start|stop|restart|force-reload|reload|status}" - exit 1 - ;; -esac diff --git a/src/json_spirit/json_spirit_reader_template.h b/src/json_spirit/json_spirit_reader_template.h index 7d394b26e7d..a5790b9b5e1 100644 --- a/src/json_spirit/json_spirit_reader_template.h +++ b/src/json_spirit/json_spirit_reader_template.h @@ -20,27 +20,18 @@ #include <boost/bind/bind.hpp>
#include <boost/function.hpp>
#include <boost/version.hpp>
-
-#if BOOST_VERSION >= 103800
- #include <boost/spirit/include/classic_core.hpp>
- #include <boost/spirit/include/classic_confix.hpp>
- #include <boost/spirit/include/classic_escape_char.hpp>
- #include <boost/spirit/include/classic_multi_pass.hpp>
- #include <boost/spirit/include/classic_position_iterator.hpp>
- #define spirit_namespace boost::spirit::classic
-#else
- #include <boost/spirit/core.hpp>
- #include <boost/spirit/utility/confix.hpp>
- #include <boost/spirit/utility/escape_char.hpp>
- #include <boost/spirit/iterator/multi_pass.hpp>
- #include <boost/spirit/iterator/position_iterator.hpp>
- #define spirit_namespace boost::spirit
-#endif
+#include <boost/spirit/include/classic_core.hpp>
+#include <boost/spirit/include/classic_confix.hpp>
+#include <boost/spirit/include/classic_escape_char.hpp>
+#include <boost/spirit/include/classic_multi_pass.hpp>
+#include <boost/spirit/include/classic_position_iterator.hpp>
#include "include/ceph_assert.h"
namespace json_spirit
{
+ namespace spirit_namespace = boost::spirit::classic;
+
const spirit_namespace::int_parser < boost::int64_t > int64_p = spirit_namespace::int_parser < boost::int64_t >();
const spirit_namespace::uint_parser< boost::uint64_t > uint64_p = spirit_namespace::uint_parser< boost::uint64_t >();
diff --git a/src/kv/MemDB.cc b/src/kv/MemDB.cc index 4846042e1fe..465aab9e999 100644 --- a/src/kv/MemDB.cc +++ b/src/kv/MemDB.cc @@ -151,7 +151,7 @@ int MemDB::_load() int MemDB::_init(bool create) { - int r; + int r = 0; dout(1) << __func__ << dendl; if (create) { if (fs::exists(m_db_path)) { diff --git a/src/librados/RadosClient.cc b/src/librados/RadosClient.cc index fa996d45224..47b928058c4 100644 --- a/src/librados/RadosClient.cc +++ b/src/librados/RadosClient.cc @@ -56,7 +56,13 @@ namespace ca = ceph::async; namespace cb = ceph::buffer; librados::RadosClient::RadosClient(CephContext *cct_) - : Dispatcher(cct_->get()) {} + : Dispatcher(cct_->get()), + cct_deleter{cct, [](CephContext *p) {p->put();}} +{ + auto& conf = cct->_conf; + conf.add_observer(this); + rados_mon_op_timeout = conf.get_val<std::chrono::seconds>("rados_mon_op_timeout"); +} int64_t librados::RadosClient::lookup_pool(const char *name) { @@ -451,6 +457,7 @@ int librados::RadosClient::get_min_compatible_client(int8_t* min_compat_client, librados::RadosClient::~RadosClient() { + cct->_conf.remove_observer(this); if (messenger) delete messenger; if (objecter) diff --git a/src/librados/RadosClient.h b/src/librados/RadosClient.h index 10d3baea13d..0db094b1800 100644 --- a/src/librados/RadosClient.h +++ b/src/librados/RadosClient.h @@ -50,8 +50,7 @@ public: using Dispatcher::cct; private: std::unique_ptr<CephContext, - std::function<void(CephContext*)> > cct_deleter{ - cct, [](CephContext *p) {p->put();}}; + std::function<void(CephContext*)>> cct_deleter; public: const ConfigProxy& conf{cct->_conf}; diff --git a/src/librados/librados.map b/src/librados/librados.map index b7552be6111..279a0ba0691 100644 --- a/src/librados/librados.map +++ b/src/librados/librados.map @@ -1,4 +1,14 @@ LIBRADOS_PRIVATE { + global: + extern "C++" { + "guard variable for boost::asio::detail::call_stack<boost::asio::detail::strand_executor_service::strand_impl, unsigned char>::top_"; + "guard variable for boost::asio::detail::call_stack<boost::asio::detail::strand_service::strand_impl, unsigned char>::top_"; + "guard variable for boost::asio::detail::call_stack<boost::asio::detail::thread_context, boost::asio::detail::thread_info_base>::top_"; + "boost::asio::detail::call_stack<boost::asio::detail::strand_executor_service::strand_impl, unsigned char>::top_"; + "boost::asio::detail::call_stack<boost::asio::detail::strand_service::strand_impl, unsigned char>::top_"; + "boost::asio::detail::call_stack<boost::asio::detail::thread_context, boost::asio::detail::thread_info_base>::top_"; + + }; local: *; }; diff --git a/src/librbd/CMakeLists.txt b/src/librbd/CMakeLists.txt index 7fbadfd49ba..dfd7940a4e9 100644 --- a/src/librbd/CMakeLists.txt +++ b/src/librbd/CMakeLists.txt @@ -1,3 +1,7 @@ +if(Boost_VERSION VERSION_GREATER_EQUAL 1.74) + add_definitions(-DBOOST_ASIO_USE_TS_EXECUTOR_AS_DEFAULT) +endif() + set(librbd_types_srcs journal/Types.cc mirroring_watcher/Types.cc @@ -5,10 +9,6 @@ set(librbd_types_srcs watcher/Types.cc WatchNotifyTypes.cc) -if(WITH_RBD_RWL OR WITH_RBD_SSD_CACHE) - list(APPEND librbd_types_srcs cache/pwl/Types.cc) -endif() - add_library(rbd_types STATIC ${librbd_types_srcs}) @@ -49,10 +49,6 @@ set(librbd_internal_srcs cache/ImageWriteback.cc cache/ObjectCacherObjectDispatch.cc cache/ObjectCacherWriteback.cc - cache/pwl/DiscardRequest.cc - cache/pwl/InitRequest.cc - cache/pwl/ShutdownRequest.cc - cache/Utils.cc cache/WriteAroundObjectDispatch.cc crypto/BlockCrypto.cc crypto/CryptoContextPool.cc @@ -127,11 +123,16 @@ set(librbd_internal_srcs managed_lock/ReleaseRequest.cc managed_lock/Utils.cc migration/FileStream.cc + migration/HttpClient.cc + migration/HttpStream.cc migration/ImageDispatch.cc migration/NativeFormat.cc migration/OpenSourceImageRequest.cc migration/RawFormat.cc + migration/RawSnapshot.cc + migration/S3Stream.cc migration/SourceSpecBuilder.cc + migration/Utils.cc mirror/DemoteRequest.cc mirror/DisableRequest.cc mirror/EnableRequest.cc @@ -197,28 +198,11 @@ if(WITH_EVENTTRACE) list(APPEND librbd_internal_srcs ../common/EventTrace.cc) endif() -if(WITH_RBD_RWL OR WITH_RBD_SSD_CACHE) - set(librbd_internal_srcs - ${librbd_internal_srcs} - cache/pwl/ImageCacheState.cc - cache/pwl/LogEntry.cc - cache/pwl/LogMap.cc - cache/pwl/LogOperation.cc - cache/pwl/ReadRequest.cc - cache/pwl/Request.cc - cache/pwl/SyncPoint.cc - cache/pwl/AbstractWriteLog.cc - cache/WriteLogImageDispatch.cc) - if(WITH_RBD_RWL) - set(librbd_internal_srcs - ${librbd_internal_srcs} - cache/pwl/ReplicatedWriteLog.cc) - endif() - if(WITH_RBD_SSD_CACHE) - set(librbd_internal_srcs - ${librbd_internal_srcs} - cache/pwl/SSDWriteLog.cc) - endif() +if(LINUX AND HAVE_LIBCRYPTSETUP) + list(APPEND librbd_internal_srcs + crypto/luks/Header.cc + crypto/luks/FormatRequest.cc + crypto/luks/LoadRequest.cc) endif() add_library(rbd_api STATIC librbd.cc) @@ -235,19 +219,14 @@ if(WITH_EVENTTRACE) add_dependencies(rbd_internal eventtrace_tp) endif() target_link_libraries(rbd_internal PRIVATE - osdc rbd_types) + osdc rbd_types + OpenSSL::SSL) target_include_directories(rbd_internal PRIVATE ${OPENSSL_INCLUDE_DIR}) - -if(WITH_RBD_RWL OR WITH_RBD_SSD_CACHE) - target_link_libraries(rbd_internal - PUBLIC blk) - target_link_libraries(rbd_internal PRIVATE - StdFilesystem::filesystem) -endif() -if(WITH_RBD_RWL) - target_link_libraries(rbd_types - PUBLIC blk) +if(LINUX AND HAVE_LIBCRYPTSETUP) + target_include_directories(rbd_internal PRIVATE ${LIBCRYPTSETUP_INCLUDE_DIR}) + target_link_libraries(rbd_internal PRIVATE ${LIBCRYPTSETUP_LIBRARIES}) endif() + add_custom_target(librbd_plugins) set(librbd_plugins_dir ${CEPH_INSTALL_PKGLIBDIR}/librbd) @@ -267,6 +246,56 @@ set_target_properties(librbd_plugin_parent_cache PROPERTIES install(TARGETS librbd_plugin_parent_cache DESTINATION ${librbd_plugins_dir}) add_dependencies(librbd_plugins librbd_plugin_parent_cache) +if(WITH_RBD_RWL OR WITH_RBD_SSD_CACHE) + set(rbd_plugin_pwl_srcs + cache/WriteLogImageDispatch.cc + cache/pwl/AbstractWriteLog.cc + cache/pwl/DiscardRequest.cc + cache/pwl/ImageCacheState.cc + cache/pwl/InitRequest.cc + cache/pwl/LogEntry.cc + cache/pwl/LogMap.cc + cache/pwl/LogOperation.cc + cache/pwl/ReadRequest.cc + cache/pwl/Request.cc + cache/pwl/ShutdownRequest.cc + cache/pwl/SyncPoint.cc + cache/pwl/Types.cc + plugin/WriteLogImageCache.cc) + + if(WITH_RBD_SSD_CACHE) + set(rbd_plugin_pwl_srcs + ${rbd_plugin_pwl_srcs} + cache/pwl/SSDWriteLog.cc) + endif() + if(WITH_RBD_RWL) + set(rbd_plugin_pwl_srcs + ${rbd_plugin_pwl_srcs} + cache/pwl/ReplicatedWriteLog.cc) + endif() + + add_library(librbd_plugin_pwl_cache SHARED + ${rbd_plugin_pwl_srcs}) + target_link_libraries(librbd_plugin_pwl_cache PRIVATE + blk + ceph-common + cls_rbd_client + StdFilesystem::filesystem) + + if(WITH_RBD_RWL) + target_link_libraries(librbd_plugin_pwl_cache + PUBLIC pmem::pmemobj + PRIVATE pmem::pmem) + endif() + + set_target_properties(librbd_plugin_pwl_cache PROPERTIES + OUTPUT_NAME ceph_librbd_pwl_cache + VERSION 1.0.0 + SOVERSION 1) + install(TARGETS librbd_plugin_pwl_cache DESTINATION ${librbd_plugins_dir}) + add_dependencies(librbd_plugins librbd_plugin_pwl_cache) +endif() + add_library(librbd ${CEPH_SHARED} librbd.cc) if(WITH_LTTNG) diff --git a/src/librbd/ImageState.cc b/src/librbd/ImageState.cc index ab63b69bdda..a81a8373dbd 100644 --- a/src/librbd/ImageState.cc +++ b/src/librbd/ImageState.cc @@ -808,6 +808,11 @@ void ImageState<I>::complete_action_unlock(State next_state, int r) { TaskFinisherSingleton::get_singleton(m_image_ctx->cct).queue(ctx, r); } else { for (auto ctx : action_contexts.second) { + if (next_state == STATE_OPEN) { + // we couldn't originally wrap the open callback w/ an async wrapper in + // case the image failed to open + ctx = create_async_context_callback(*m_image_ctx, ctx); + } ctx->complete(r); } @@ -828,9 +833,8 @@ void ImageState<I>::send_open_unlock() { m_state = STATE_OPENING; - Context *ctx = create_async_context_callback( - *m_image_ctx, create_context_callback< - ImageState<I>, &ImageState<I>::handle_open>(this)); + Context *ctx = create_context_callback< + ImageState<I>, &ImageState<I>::handle_open>(this); image::OpenRequest<I> *req = image::OpenRequest<I>::create( m_image_ctx, m_open_flags, ctx); diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 13ab9d4541e..b2f9d610d7d 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -6,7 +6,6 @@ #include "librbd/ImageCtx.h" #include "librbd/ImageState.h" #include "librbd/internal.h" -#include "librbd/Operations.h" #include "librbd/TaskFinisher.h" #include "librbd/Types.h" #include "librbd/Utils.h" @@ -89,6 +88,9 @@ void ImageWatcher<I>::unregister_watch(Context *on_finish) { cancel_async_requests(); + // flush the task finisher queue before completing + on_finish = create_async_context_callback(m_task_finisher, on_finish); + on_finish = new LambdaContext([this, on_finish](int r) { cancel_quiesce_requests(); m_task_finisher->cancel_all(); @@ -201,7 +203,7 @@ void ImageWatcher<I>::notify_resize(uint64_t request_id, uint64_t size, AsyncRequestId async_request_id(get_client_id(), request_id); notify_async_request(async_request_id, - new ResizePayload(size, allow_shrink, async_request_id), + new ResizePayload(async_request_id, size, allow_shrink), prog_ctx, on_finish); } @@ -225,51 +227,68 @@ void ImageWatcher<I>::notify_snap_create(uint64_t request_id, } template <typename I> -void ImageWatcher<I>::notify_snap_rename(const snapid_t &src_snap_id, +void ImageWatcher<I>::notify_snap_rename(uint64_t request_id, + const snapid_t &src_snap_id, const std::string &dst_snap_name, Context *on_finish) { ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock)); ceph_assert(m_image_ctx.exclusive_lock && !m_image_ctx.exclusive_lock->is_lock_owner()); - notify_lock_owner(new SnapRenamePayload(src_snap_id, dst_snap_name), - on_finish); + AsyncRequestId async_request_id(get_client_id(), request_id); + + notify_async_request( + async_request_id, + new SnapRenamePayload(async_request_id, src_snap_id, dst_snap_name), + m_no_op_prog_ctx, on_finish); } template <typename I> -void ImageWatcher<I>::notify_snap_remove(const cls::rbd::SnapshotNamespace &snap_namespace, - const std::string &snap_name, - Context *on_finish) { +void ImageWatcher<I>::notify_snap_remove( + uint64_t request_id, const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name, Context *on_finish) { ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock)); ceph_assert(m_image_ctx.exclusive_lock && !m_image_ctx.exclusive_lock->is_lock_owner()); - notify_lock_owner(new SnapRemovePayload(snap_namespace, snap_name), - on_finish); + AsyncRequestId async_request_id(get_client_id(), request_id); + + notify_async_request( + async_request_id, + new SnapRemovePayload(async_request_id, snap_namespace, snap_name), + m_no_op_prog_ctx, on_finish); } template <typename I> -void ImageWatcher<I>::notify_snap_protect(const cls::rbd::SnapshotNamespace &snap_namespace, - const std::string &snap_name, - Context *on_finish) { +void ImageWatcher<I>::notify_snap_protect( + uint64_t request_id, const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name, Context *on_finish) { ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock)); ceph_assert(m_image_ctx.exclusive_lock && !m_image_ctx.exclusive_lock->is_lock_owner()); - notify_lock_owner(new SnapProtectPayload(snap_namespace, snap_name), - on_finish); + AsyncRequestId async_request_id(get_client_id(), request_id); + + notify_async_request( + async_request_id, + new SnapProtectPayload(async_request_id, snap_namespace, snap_name), + m_no_op_prog_ctx, on_finish); } template <typename I> -void ImageWatcher<I>::notify_snap_unprotect(const cls::rbd::SnapshotNamespace &snap_namespace, - const std::string &snap_name, - Context *on_finish) { +void ImageWatcher<I>::notify_snap_unprotect( + uint64_t request_id, const cls::rbd::SnapshotNamespace &snap_namespace, + const std::string &snap_name, Context *on_finish) { ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock)); ceph_assert(m_image_ctx.exclusive_lock && !m_image_ctx.exclusive_lock->is_lock_owner()); - notify_lock_owner(new SnapUnprotectPayload(snap_namespace, snap_name), - on_finish); + AsyncRequestId async_request_id(get_client_id(), request_id); + + notify_async_request( + async_request_id, + new SnapUnprotectPayload(async_request_id, snap_namespace, snap_name), + m_no_op_prog_ctx, on_finish); } template <typename I> @@ -288,23 +307,33 @@ void ImageWatcher<I>::notify_rebuild_object_map(uint64_t request_id, } template <typename I> -void ImageWatcher<I>::notify_rename(const std::string &image_name, +void ImageWatcher<I>::notify_rename(uint64_t request_id, + const std::string &image_name, Context *on_finish) { ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock)); ceph_assert(m_image_ctx.exclusive_lock && !m_image_ctx.exclusive_lock->is_lock_owner()); - notify_lock_owner(new RenamePayload(image_name), on_finish); + AsyncRequestId async_request_id(get_client_id(), request_id); + + notify_async_request(async_request_id, + new RenamePayload(async_request_id, image_name), + m_no_op_prog_ctx, on_finish); } template <typename I> -void ImageWatcher<I>::notify_update_features(uint64_t features, bool enabled, +void ImageWatcher<I>::notify_update_features(uint64_t request_id, + uint64_t features, bool enabled, Context *on_finish) { ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock)); ceph_assert(m_image_ctx.exclusive_lock && !m_image_ctx.exclusive_lock->is_lock_owner()); - notify_lock_owner(new UpdateFeaturesPayload(features, enabled), on_finish); + AsyncRequestId async_request_id(get_client_id(), request_id); + + notify_async_request(async_request_id, + new UpdateFeaturesPayload(async_request_id, features, enabled), + m_no_op_prog_ctx, on_finish); } template <typename I> @@ -357,7 +386,7 @@ template <typename I> void ImageWatcher<I>::notify_quiesce(uint64_t *request_id, ProgressContext &prog_ctx, Context *on_finish) { - *request_id = m_image_ctx.operations->reserve_async_request_id(); + *request_id = util::reserve_async_request_id(); ldout(m_image_ctx.cct, 10) << this << " " << __func__ << ": request_id=" << request_id << dendl; @@ -443,27 +472,37 @@ void ImageWatcher<I>::notify_unquiesce(uint64_t request_id, Context *on_finish) } template <typename I> -void ImageWatcher<I>::notify_metadata_set(const std::string &key, +void ImageWatcher<I>::notify_metadata_set(uint64_t request_id, + const std::string &key, const std::string &value, Context *on_finish) { ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock)); ceph_assert(m_image_ctx.exclusive_lock && !m_image_ctx.exclusive_lock->is_lock_owner()); - notify_lock_owner(new MetadataUpdatePayload(key, - std::optional<std::string>{value}), - on_finish); + AsyncRequestId async_request_id(get_client_id(), request_id); + + notify_async_request( + async_request_id, + new MetadataUpdatePayload(async_request_id, key, + std::optional<std::string>{value}), + m_no_op_prog_ctx, on_finish); } template <typename I> -void ImageWatcher<I>::notify_metadata_remove(const std::string &key, +void ImageWatcher<I>::notify_metadata_remove(uint64_t request_id, + const std::string &key, Context *on_finish) { ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock)); ceph_assert(m_image_ctx.exclusive_lock && !m_image_ctx.exclusive_lock->is_lock_owner()); - notify_lock_owner(new MetadataUpdatePayload(key, std::nullopt), - on_finish); + AsyncRequestId async_request_id(get_client_id(), request_id); + + notify_async_request( + async_request_id, + new MetadataUpdatePayload(async_request_id, key, std::nullopt), + m_no_op_prog_ctx, on_finish); } template <typename I> @@ -877,6 +916,57 @@ void ImageWatcher<I>::cancel_quiesce_requests() { } template <typename I> +bool ImageWatcher<I>::handle_operation_request( + const AsyncRequestId& async_request_id, + exclusive_lock::OperationRequestType request_type, Operation operation, + std::function<void(ProgressContext &prog_ctx, Context*)> execute, + C_NotifyAck *ack_ctx) { + std::shared_lock owner_locker{m_image_ctx.owner_lock}; + + if (m_image_ctx.exclusive_lock != nullptr) { + int r = 0; + if (m_image_ctx.exclusive_lock->accept_request(request_type, &r)) { + bool new_request; + Context *ctx; + ProgressContext *prog_ctx; + bool complete; + if (async_request_id) { + r = prepare_async_request(async_request_id, &new_request, &ctx, + &prog_ctx); + encode(ResponseMessage(r), ack_ctx->out); + complete = true; + } else { + new_request = true; + ctx = new C_ResponseMessage(ack_ctx); + prog_ctx = &m_no_op_prog_ctx; + complete = false; + } + if (r == 0 && new_request) { + ctx = new LambdaContext( + [this, operation, ctx](int r) { + m_image_ctx.operations->finish_op(operation, r); + ctx->complete(r); + }); + ctx = new LambdaContext( + [this, execute, prog_ctx, ctx](int r) { + if (r < 0) { + ctx->complete(r); + return; + } + std::shared_lock l{m_image_ctx.owner_lock}; + execute(*prog_ctx, ctx); + }); + m_image_ctx.operations->start_op(operation, ctx); + } + return complete; + } else if (r < 0) { + encode(ResponseMessage(r), ack_ctx->out); + } + } + return true; +} + +template <typename I> bool ImageWatcher<I>::handle_payload(const HeaderUpdatePayload &payload, C_NotifyAck *ack_ctx) { ldout(m_image_ctx.cct, 10) << this << " image header updated" << dendl; @@ -1018,334 +1108,232 @@ bool ImageWatcher<I>::handle_payload(const AsyncCompletePayload &payload, template <typename I> bool ImageWatcher<I>::handle_payload(const FlattenPayload &payload, C_NotifyAck *ack_ctx) { + ldout(m_image_ctx.cct, 10) << this << " remote flatten request: " + << payload.async_request_id << dendl; - std::shared_lock l{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - int r; - if (m_image_ctx.exclusive_lock->accept_request( - exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, &r)) { - bool new_request; - Context *ctx; - ProgressContext *prog_ctx; - r = prepare_async_request(payload.async_request_id, &new_request, - &ctx, &prog_ctx); - if (r == 0 && new_request) { - ldout(m_image_ctx.cct, 10) << this << " remote flatten request: " - << payload.async_request_id << dendl; - m_image_ctx.operations->execute_flatten(*prog_ctx, ctx); - } - - encode(ResponseMessage(r), ack_ctx->out); - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } - } - return true; + return handle_operation_request( + payload.async_request_id, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, + OPERATION_FLATTEN, std::bind(&Operations<I>::execute_flatten, + m_image_ctx.operations, + std::placeholders::_1, + std::placeholders::_2), + ack_ctx); } template <typename I> bool ImageWatcher<I>::handle_payload(const ResizePayload &payload, C_NotifyAck *ack_ctx) { - std::shared_lock l{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - int r; - if (m_image_ctx.exclusive_lock->accept_request( - exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, &r)) { - bool new_request; - Context *ctx; - ProgressContext *prog_ctx; - r = prepare_async_request(payload.async_request_id, &new_request, - &ctx, &prog_ctx); - if (r == 0 && new_request) { - ldout(m_image_ctx.cct, 10) << this << " remote resize request: " - << payload.async_request_id << " " - << payload.size << " " - << payload.allow_shrink << dendl; - m_image_ctx.operations->execute_resize(payload.size, payload.allow_shrink, *prog_ctx, ctx, 0); - } + ldout(m_image_ctx.cct, 10) << this << " remote resize request: " + << payload.async_request_id << " " + << payload.size << " " + << payload.allow_shrink << dendl; - encode(ResponseMessage(r), ack_ctx->out); - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } - } - return true; + return handle_operation_request( + payload.async_request_id, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, + OPERATION_RESIZE, std::bind(&Operations<I>::execute_resize, + m_image_ctx.operations, payload.size, + payload.allow_shrink, std::placeholders::_1, + std::placeholders::_2, 0), ack_ctx); } template <typename I> bool ImageWatcher<I>::handle_payload(const SnapCreatePayload &payload, C_NotifyAck *ack_ctx) { - std::shared_lock l{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - int r; - auto request_type = exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL; - - // rbd-mirror needs to accept forced promotion orphan snap create requests - auto mirror_ns = boost::get<cls::rbd::MirrorSnapshotNamespace>( - &payload.snap_namespace); - if (mirror_ns != nullptr && mirror_ns->is_orphan()) { - request_type = exclusive_lock::OPERATION_REQUEST_TYPE_FORCE_PROMOTION; - } - - if (m_image_ctx.exclusive_lock->accept_request(request_type, &r)) { - bool new_request; - Context *ctx; - ProgressContext *prog_ctx; - bool complete; - if (payload.async_request_id) { - r = prepare_async_request(payload.async_request_id, &new_request, - &ctx, &prog_ctx); - encode(ResponseMessage(r), ack_ctx->out); - complete = true; - } else { - new_request = true; - prog_ctx = new NoOpProgressContext(); - ctx = new LambdaContext( - [prog_ctx, on_finish=new C_ResponseMessage(ack_ctx)](int r) { - delete prog_ctx; - on_finish->complete(r); - }); - complete = false; - } - if (r == 0 && new_request) { - ldout(m_image_ctx.cct, 10) << this << " remote snap_create request: " - << payload.async_request_id << " " - << payload.snap_namespace << " " - << payload.snap_name << " " - << payload.flags << dendl; - - m_image_ctx.operations->execute_snap_create(payload.snap_namespace, - payload.snap_name, - ctx, 0, payload.flags, - *prog_ctx); - } - return complete; - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } + ldout(m_image_ctx.cct, 10) << this << " remote snap_create request: " + << payload.async_request_id << " " + << payload.snap_namespace << " " + << payload.snap_name << " " + << payload.flags << dendl; + + auto request_type = exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL; + + // rbd-mirror needs to accept forced promotion orphan snap create requests + auto mirror_ns = boost::get<cls::rbd::MirrorSnapshotNamespace>( + &payload.snap_namespace); + if (mirror_ns != nullptr && mirror_ns->is_orphan()) { + request_type = exclusive_lock::OPERATION_REQUEST_TYPE_FORCE_PROMOTION; } - return true; + + return handle_operation_request( + payload.async_request_id, request_type, + OPERATION_SNAP_CREATE, std::bind(&Operations<I>::execute_snap_create, + m_image_ctx.operations, + payload.snap_namespace, + payload.snap_name, std::placeholders::_2, + 0, payload.flags, std::placeholders::_1), + ack_ctx); } template <typename I> bool ImageWatcher<I>::handle_payload(const SnapRenamePayload &payload, C_NotifyAck *ack_ctx) { - std::shared_lock l{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - int r; - if (m_image_ctx.exclusive_lock->accept_request( - exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, &r)) { - ldout(m_image_ctx.cct, 10) << this << " remote snap_rename request: " - << payload.snap_id << " to " - << payload.snap_name << dendl; - - m_image_ctx.operations->execute_snap_rename(payload.snap_id, - payload.snap_name, - new C_ResponseMessage(ack_ctx)); - return false; - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } - } - return true; + ldout(m_image_ctx.cct, 10) << this << " remote snap_rename request: " + << payload.async_request_id << " " + << payload.snap_id << " to " + << payload.snap_name << dendl; + + return handle_operation_request( + payload.async_request_id, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, + OPERATION_SNAP_RENAME, std::bind(&Operations<I>::execute_snap_rename, + m_image_ctx.operations, payload.snap_id, + payload.snap_name, + std::placeholders::_2), ack_ctx); } template <typename I> bool ImageWatcher<I>::handle_payload(const SnapRemovePayload &payload, C_NotifyAck *ack_ctx) { - std::shared_lock l{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - auto request_type = exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL; - if (cls::rbd::get_snap_namespace_type(payload.snap_namespace) == - cls::rbd::SNAPSHOT_NAMESPACE_TYPE_TRASH) { - request_type = exclusive_lock::OPERATION_REQUEST_TYPE_TRASH_SNAP_REMOVE; - } - int r; - if (m_image_ctx.exclusive_lock->accept_request(request_type, &r)) { - ldout(m_image_ctx.cct, 10) << this << " remote snap_remove request: " - << payload.snap_name << dendl; + ldout(m_image_ctx.cct, 10) << this << " remote snap_remove request: " + << payload.snap_name << dendl; - m_image_ctx.operations->execute_snap_remove(payload.snap_namespace, - payload.snap_name, - new C_ResponseMessage(ack_ctx)); - return false; - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } + auto request_type = exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL; + if (cls::rbd::get_snap_namespace_type(payload.snap_namespace) == + cls::rbd::SNAPSHOT_NAMESPACE_TYPE_TRASH) { + request_type = exclusive_lock::OPERATION_REQUEST_TYPE_TRASH_SNAP_REMOVE; } - return true; + + return handle_operation_request( + payload.async_request_id, request_type, OPERATION_SNAP_REMOVE, + std::bind(&Operations<I>::execute_snap_remove, m_image_ctx.operations, + payload.snap_namespace, payload.snap_name, + std::placeholders::_2), ack_ctx); } template <typename I> bool ImageWatcher<I>::handle_payload(const SnapProtectPayload& payload, C_NotifyAck *ack_ctx) { - std::shared_lock owner_locker{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - int r; - if (m_image_ctx.exclusive_lock->accept_request( - exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, &r)) { - ldout(m_image_ctx.cct, 10) << this << " remote snap_protect request: " - << payload.snap_name << dendl; - - m_image_ctx.operations->execute_snap_protect(payload.snap_namespace, - payload.snap_name, - new C_ResponseMessage(ack_ctx)); - return false; - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } - } - return true; + ldout(m_image_ctx.cct, 10) << this << " remote snap_protect request: " + << payload.async_request_id << " " + << payload.snap_name << dendl; + + return handle_operation_request( + payload.async_request_id, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, + OPERATION_SNAP_PROTECT, std::bind(&Operations<I>::execute_snap_protect, + m_image_ctx.operations, + payload.snap_namespace, + payload.snap_name, + std::placeholders::_2), ack_ctx); } template <typename I> bool ImageWatcher<I>::handle_payload(const SnapUnprotectPayload& payload, C_NotifyAck *ack_ctx) { - std::shared_lock owner_locker{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - int r; - if (m_image_ctx.exclusive_lock->accept_request( - exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, &r)) { - ldout(m_image_ctx.cct, 10) << this << " remote snap_unprotect request: " - << payload.snap_name << dendl; - - m_image_ctx.operations->execute_snap_unprotect(payload.snap_namespace, - payload.snap_name, - new C_ResponseMessage(ack_ctx)); - return false; - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } - } - return true; + ldout(m_image_ctx.cct, 10) << this << " remote snap_unprotect request: " + << payload.async_request_id << " " + << payload.snap_name << dendl; + + return handle_operation_request( + payload.async_request_id, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, + OPERATION_SNAP_UNPROTECT, std::bind(&Operations<I>::execute_snap_unprotect, + m_image_ctx.operations, + payload.snap_namespace, + payload.snap_name, + std::placeholders::_2), ack_ctx); } template <typename I> bool ImageWatcher<I>::handle_payload(const RebuildObjectMapPayload& payload, C_NotifyAck *ack_ctx) { - std::shared_lock l{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - int r; - if (m_image_ctx.exclusive_lock->accept_request( - exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, &r)) { - bool new_request; - Context *ctx; - ProgressContext *prog_ctx; - r = prepare_async_request(payload.async_request_id, &new_request, - &ctx, &prog_ctx); - if (r == 0 && new_request) { - ldout(m_image_ctx.cct, 10) << this - << " remote rebuild object map request: " - << payload.async_request_id << dendl; - m_image_ctx.operations->execute_rebuild_object_map(*prog_ctx, ctx); - } + ldout(m_image_ctx.cct, 10) << this << " remote rebuild object map request: " + << payload.async_request_id << dendl; - encode(ResponseMessage(r), ack_ctx->out); - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } - } - return true; + return handle_operation_request( + payload.async_request_id, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, + OPERATION_REBUILD_OBJECT_MAP, + std::bind(&Operations<I>::execute_rebuild_object_map, + m_image_ctx.operations, std::placeholders::_1, + std::placeholders::_2), ack_ctx); } template <typename I> bool ImageWatcher<I>::handle_payload(const RenamePayload& payload, C_NotifyAck *ack_ctx) { - std::shared_lock owner_locker{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - int r; - if (m_image_ctx.exclusive_lock->accept_request( - exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, &r)) { - ldout(m_image_ctx.cct, 10) << this << " remote rename request: " - << payload.image_name << dendl; + ldout(m_image_ctx.cct, 10) << this << " remote rename request: " + << payload.async_request_id << " " + << payload.image_name << dendl; - m_image_ctx.operations->execute_rename(payload.image_name, - new C_ResponseMessage(ack_ctx)); - return false; - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } - } - return true; + return handle_operation_request( + payload.async_request_id, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, + OPERATION_RENAME, std::bind(&Operations<I>::execute_rename, + m_image_ctx.operations, payload.image_name, + std::placeholders::_2), ack_ctx); } template <typename I> bool ImageWatcher<I>::handle_payload(const UpdateFeaturesPayload& payload, C_NotifyAck *ack_ctx) { - std::shared_lock owner_locker{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - int r; - if (m_image_ctx.exclusive_lock->accept_request( - exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, &r)) { - ldout(m_image_ctx.cct, 10) << this << " remote update_features request: " - << payload.features << " " - << (payload.enabled ? "enabled" : "disabled") - << dendl; + ldout(m_image_ctx.cct, 10) << this << " remote update_features request: " + << payload.async_request_id << " " + << payload.features << " " + << (payload.enabled ? "enabled" : "disabled") + << dendl; - m_image_ctx.operations->execute_update_features( - payload.features, payload.enabled, new C_ResponseMessage(ack_ctx), 0); - return false; - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } - } - return true; + return handle_operation_request( + payload.async_request_id, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, + OPERATION_UPDATE_FEATURES, + std::bind(&Operations<I>::execute_update_features, m_image_ctx.operations, + payload.features, payload.enabled, std::placeholders::_2, 0), + ack_ctx); } template <typename I> bool ImageWatcher<I>::handle_payload(const MigratePayload &payload, C_NotifyAck *ack_ctx) { + ldout(m_image_ctx.cct, 10) << this << " remote migrate request: " + << payload.async_request_id << dendl; - std::shared_lock l{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - int r; - if (m_image_ctx.exclusive_lock->accept_request( - exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, &r)) { - bool new_request; - Context *ctx; - ProgressContext *prog_ctx; - r = prepare_async_request(payload.async_request_id, &new_request, - &ctx, &prog_ctx); - if (r == 0 && new_request) { - ldout(m_image_ctx.cct, 10) << this << " remote migrate request: " - << payload.async_request_id << dendl; - m_image_ctx.operations->execute_migrate(*prog_ctx, ctx); - } - - encode(ResponseMessage(r), ack_ctx->out); - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } - } - return true; + return handle_operation_request( + payload.async_request_id, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, + OPERATION_MIGRATE, std::bind(&Operations<I>::execute_migrate, + m_image_ctx.operations, + std::placeholders::_1, + std::placeholders::_2), ack_ctx); } template <typename I> bool ImageWatcher<I>::handle_payload(const SparsifyPayload &payload, C_NotifyAck *ack_ctx) { - std::shared_lock l{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - int r; - if (m_image_ctx.exclusive_lock->accept_request( - exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, &r)) { - bool new_request; - Context *ctx; - ProgressContext *prog_ctx; - r = prepare_async_request(payload.async_request_id, &new_request, - &ctx, &prog_ctx); - if (r == 0 && new_request) { - ldout(m_image_ctx.cct, 10) << this << " remote sparsify request: " - << payload.async_request_id << dendl; - m_image_ctx.operations->execute_sparsify(payload.sparse_size, *prog_ctx, - ctx); - } + ldout(m_image_ctx.cct, 10) << this << " remote sparsify request: " + << payload.async_request_id << dendl; - encode(ResponseMessage(r), ack_ctx->out); - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } + return handle_operation_request( + payload.async_request_id, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, + OPERATION_SPARSIFY, std::bind(&Operations<I>::execute_sparsify, + m_image_ctx.operations, + payload.sparse_size, std::placeholders::_1, + std::placeholders::_2), ack_ctx); +} + +template <typename I> +bool ImageWatcher<I>::handle_payload(const MetadataUpdatePayload &payload, + C_NotifyAck *ack_ctx) { + if (payload.value) { + ldout(m_image_ctx.cct, 10) << this << " remote metadata_set request: " + << payload.async_request_id << " " + << "key=" << payload.key << ", value=" + << *payload.value << dendl; + + return handle_operation_request( + payload.async_request_id, + exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, + OPERATION_METADATA_UPDATE, + std::bind(&Operations<I>::execute_metadata_set, + m_image_ctx.operations, payload.key, *payload.value, + std::placeholders::_2), + ack_ctx); + } else { + ldout(m_image_ctx.cct, 10) << this << " remote metadata_remove request: " + << payload.async_request_id << " " + << "key=" << payload.key << dendl; + + return handle_operation_request( + payload.async_request_id, + exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, + OPERATION_METADATA_UPDATE, + std::bind(&Operations<I>::execute_metadata_remove, + m_image_ctx.operations, payload.key, std::placeholders::_2), + ack_ctx); } - return true; } template <typename I> @@ -1375,37 +1363,6 @@ bool ImageWatcher<I>::handle_payload(const UnquiescePayload &payload, } template <typename I> -bool ImageWatcher<I>::handle_payload(const MetadataUpdatePayload &payload, - C_NotifyAck *ack_ctx) { - std::shared_lock l{m_image_ctx.owner_lock}; - if (m_image_ctx.exclusive_lock != nullptr) { - int r; - if (m_image_ctx.exclusive_lock->accept_request( - exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, &r)) { - if (payload.value) { - ldout(m_image_ctx.cct, 10) << this << " remote metadata_set request: key=" - << payload.key << ", value=" - << *payload.value << dendl; - - m_image_ctx.operations->execute_metadata_set(payload.key, *payload.value, - new C_ResponseMessage(ack_ctx)); - return false; - } else { - ldout(m_image_ctx.cct, 10) << this << " remote metadata_remove request: key=" - << payload.key << dendl; - - m_image_ctx.operations->execute_metadata_remove(payload.key, - new C_ResponseMessage(ack_ctx)); - return false; - } - } else if (r < 0) { - encode(ResponseMessage(r), ack_ctx->out); - } - } - return true; -} - -template <typename I> bool ImageWatcher<I>::handle_payload(const UnknownPayload &payload, C_NotifyAck *ack_ctx) { std::shared_lock l{m_image_ctx.owner_lock}; diff --git a/src/librbd/ImageWatcher.h b/src/librbd/ImageWatcher.h index c445974c79a..cda9a246e0e 100644 --- a/src/librbd/ImageWatcher.h +++ b/src/librbd/ImageWatcher.h @@ -9,8 +9,12 @@ #include "common/ceph_mutex.h" #include "include/Context.h" #include "include/rbd/librbd.hpp" +#include "librbd/Operations.h" #include "librbd/Watcher.h" #include "librbd/WatchNotifyTypes.h" +#include "librbd/exclusive_lock/Policy.h" +#include "librbd/internal.h" +#include <functional> #include <set> #include <string> #include <utility> @@ -41,23 +45,29 @@ public: uint64_t flags, ProgressContext &prog_ctx, Context *on_finish); - void notify_snap_rename(const snapid_t &src_snap_id, + void notify_snap_rename(uint64_t request_id, + const snapid_t &src_snap_id, const std::string &dst_snap_name, Context *on_finish); - void notify_snap_remove(const cls::rbd::SnapshotNamespace &snap_namespace, + void notify_snap_remove(uint64_t request_id, + const cls::rbd::SnapshotNamespace &snap_namespace, const std::string &snap_name, Context *on_finish); - void notify_snap_protect(const cls::rbd::SnapshotNamespace &snap_namespace, + void notify_snap_protect(uint64_t request_id, + const cls::rbd::SnapshotNamespace &snap_namespace, const std::string &snap_name, Context *on_finish); - void notify_snap_unprotect(const cls::rbd::SnapshotNamespace &snap_namespace, + void notify_snap_unprotect(uint64_t request_id, + const cls::rbd::SnapshotNamespace &snap_namespace, const std::string &snap_name, Context *on_finish); void notify_rebuild_object_map(uint64_t request_id, ProgressContext &prog_ctx, Context *on_finish); - void notify_rename(const std::string &image_name, Context *on_finish); + void notify_rename(uint64_t request_id, + const std::string &image_name, Context *on_finish); - void notify_update_features(uint64_t features, bool enabled, + void notify_update_features(uint64_t request_id, + uint64_t features, bool enabled, Context *on_finish); void notify_migrate(uint64_t request_id, ProgressContext &prog_ctx, @@ -78,9 +88,11 @@ public: Context *on_finish); void notify_unquiesce(uint64_t request_id, Context *on_finish); - void notify_metadata_set(const std::string &key, const std::string &value, + void notify_metadata_set(uint64_t request_id, + const std::string &key, const std::string &value, Context *on_finish); - void notify_metadata_remove(const std::string &key, Context *on_finish); + void notify_metadata_remove(uint64_t request_id, + const std::string &key, Context *on_finish); private: enum TaskCode { @@ -182,6 +194,8 @@ private: AsyncOpTracker m_async_op_tracker; + NoOpProgressContext m_no_op_prog_ctx; + void handle_register_watch(int r); void schedule_cancel_async_requests(); @@ -230,6 +244,12 @@ private: size_t attempts, ProgressContext &prog_ctx, Context *on_finish); + bool handle_operation_request( + const watch_notify::AsyncRequestId& async_request_id, + exclusive_lock::OperationRequestType request_type, Operation operation, + std::function<void(ProgressContext &prog_ctx, Context*)> execute, + C_NotifyAck *ack_ctx); + bool handle_payload(const watch_notify::HeaderUpdatePayload& payload, C_NotifyAck *ctx); bool handle_payload(const watch_notify::AcquiredLockPayload& payload, diff --git a/src/librbd/Operations.cc b/src/librbd/Operations.cc index 7db2c0361d3..cd5506cf757 100644 --- a/src/librbd/Operations.cc +++ b/src/librbd/Operations.cc @@ -51,6 +51,60 @@ using namespace boost::placeholders; namespace { +std::ostream &operator<<(std::ostream &out, const Operation &op) { + switch (op) { + case OPERATION_CHECK_OBJECT_MAP: + out << "check object map"; + break; + case OPERATION_FLATTEN: + out << "flatten"; + break; + case OPERATION_METADATA_UPDATE: + out << "metadata update"; + break; + case OPERATION_MIGRATE: + out << "migrate"; + break; + case OPERATION_REBUILD_OBJECT_MAP: + out << "rebuild object map"; + break; + case OPERATION_RENAME: + out << "rename"; + break; + case OPERATION_RESIZE: + out << "resize"; + break; + case OPERATION_SNAP_CREATE: + out << "snap create"; + break; + case OPERATION_SNAP_PROTECT: + out << "snap protect"; + break; + case OPERATION_SNAP_REMOVE: + out << "snap remove"; + break; + case OPERATION_SNAP_RENAME: + out << "snap rename"; + break; + case OPERATION_SNAP_ROLLBACK: + out << "snap rollback"; + break; + case OPERATION_SNAP_UNPROTECT: + out << "snap unprotect"; + break; + case OPERATION_SPARSIFY: + out << "sparsify"; + break; + case OPERATION_UPDATE_FEATURES: + out << "update features"; + break; + default: + ceph_abort(); + break; + } + return out; +} + template <typename I> struct C_NotifyUpdate : public Context { I &image_ctx; @@ -125,7 +179,7 @@ struct C_InvokeAsyncRequest : public Context { */ I &image_ctx; - std::string name; + Operation operation; exclusive_lock::OperationRequestType request_type; bool permit_snapshot; boost::function<void(Context*)> local; @@ -134,14 +188,14 @@ struct C_InvokeAsyncRequest : public Context { Context *on_finish; bool request_lock = false; - C_InvokeAsyncRequest(I &image_ctx, const std::string& name, + C_InvokeAsyncRequest(I &image_ctx, Operation operation, exclusive_lock::OperationRequestType request_type, bool permit_snapshot, const boost::function<void(Context*)>& local, const boost::function<void(Context*)>& remote, const std::set<int> &filter_error_codes, Context *on_finish) - : image_ctx(image_ctx), name(name), request_type(request_type), + : image_ctx(image_ctx), operation(operation), request_type(request_type), permit_snapshot(permit_snapshot), local(local), remote(remote), filter_error_codes(filter_error_codes), on_finish(on_finish) { } @@ -270,7 +324,8 @@ struct C_InvokeAsyncRequest : public Context { ldout(cct, 20) << __func__ << ": r=" << r << dendl; if (r == -EOPNOTSUPP) { - ldout(cct, 5) << name << " not supported by current lock owner" << dendl; + ldout(cct, 5) << operation << " not supported by current lock owner" + << dendl; request_lock = true; send_refresh_image(); return; @@ -281,12 +336,26 @@ struct C_InvokeAsyncRequest : public Context { return; } - ldout(cct, 5) << name << " timed out notifying lock owner" << dendl; + ldout(cct, 5) << operation << " timed out notifying lock owner" << dendl; send_refresh_image(); } void send_local_request() { - ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock)); + auto ctx = new LambdaContext( + [this](int r) { + if (r == -ERESTART) { + image_ctx.operations->finish_op(operation, r); + send_refresh_image(); + return; + } + execute_local_request(); + }); + + image_ctx.operations->start_op(operation, ctx); + } + + void execute_local_request() { + std::shared_lock owner_locker{image_ctx.owner_lock}; CephContext *cct = image_ctx.cct; ldout(cct, 20) << __func__ << dendl; @@ -302,6 +371,8 @@ struct C_InvokeAsyncRequest : public Context { CephContext *cct = image_ctx.cct; ldout(cct, 20) << __func__ << ": r=" << r << dendl; + image_ctx.operations->finish_op(operation, r); + if (r == -ERESTART) { send_refresh_image(); return; @@ -333,7 +404,70 @@ bool needs_invalidate(I& image_ctx, uint64_t object_no, template <typename I> Operations<I>::Operations(I &image_ctx) - : m_image_ctx(image_ctx), m_async_request_seq(0) { + : m_image_ctx(image_ctx), + m_queue_lock(ceph::make_mutex( + util::unique_lock_name("librbd::Operations::m_queue_lock", + this))) { +} + +template <typename I> +void Operations<I>::start_op(Operation op, Context *ctx) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << __func__ << ": " << op << " " << ctx << dendl; + + ceph_assert(ceph_mutex_is_locked(m_image_ctx.owner_lock)); + bool requires_lock = m_image_ctx.exclusive_lock != nullptr; + + ctx = util::create_async_context_callback( + m_image_ctx, new LambdaContext( + [this, op, requires_lock, ctx](int r) { + Context *finish_op_ctx = nullptr; + if (requires_lock && r == 0) { + std::shared_lock owner_locker{m_image_ctx.owner_lock}; + std::shared_lock image_locker{m_image_ctx.image_lock}; + auto exclusive_lock = m_image_ctx.exclusive_lock; + + if (exclusive_lock == nullptr || + (finish_op_ctx = exclusive_lock->start_op(&r)) == nullptr) { + ldout(m_image_ctx.cct, 20) << "lock owner lost, restarting" + << dendl; + r = -ERESTART; + } + } + + ldout(m_image_ctx.cct, 20) << "start " << op << " " << ctx << dendl; + ctx->complete(r); + if (finish_op_ctx != nullptr) { + finish_op_ctx->complete(0); + } + })); + + std::unique_lock locker{m_queue_lock}; + if (!m_in_flight_ops.insert(op).second) { + ldout(cct, 20) << __func__ << ": " << op << " in flight" << dendl; + m_queued_ops[op].push_back(ctx); + return; + } + + ctx->complete(0); +} + +template <typename I> +void Operations<I>::finish_op(Operation op, int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << __func__ << ": " << op << " r=" << r << dendl; + + std::unique_lock locker{m_queue_lock}; + auto &queue = m_queued_ops[op]; + if (queue.empty()) { + m_in_flight_ops.erase(op); + return; + } + + auto ctx = queue.front(); + queue.pop_front(); + // propagate -ERESTART through all the queue + ctx->complete(r == -ERESTART ? r : 0); } template <typename I> @@ -358,8 +492,8 @@ int Operations<I>::flatten(ProgressContext &prog_ctx) { } } - uint64_t request_id = ++m_async_request_seq; - r = invoke_async_request("flatten", + uint64_t request_id = util::reserve_async_request_id(); + r = invoke_async_request(OPERATION_FLATTEN, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, false, boost::bind(&Operations<I>::execute_flatten, this, @@ -432,8 +566,8 @@ int Operations<I>::rebuild_object_map(ProgressContext &prog_ctx) { return r; } - uint64_t request_id = ++m_async_request_seq; - r = invoke_async_request("rebuild object map", + uint64_t request_id = util::reserve_async_request_id(); + r = invoke_async_request(OPERATION_REBUILD_OBJECT_MAP, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true, boost::bind(&Operations<I>::execute_rebuild_object_map, this, boost::ref(prog_ctx), _1), @@ -484,7 +618,7 @@ int Operations<I>::check_object_map(ProgressContext &prog_ctx) { return r; } - r = invoke_async_request("check object map", + r = invoke_async_request(OPERATION_CHECK_OBJECT_MAP, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true, boost::bind(&Operations<I>::check_object_map, this, boost::ref(prog_ctx), _1), @@ -538,14 +672,15 @@ int Operations<I>::rename(const char *dstname) { } if (m_image_ctx.test_features(RBD_FEATURE_JOURNALING)) { - r = invoke_async_request("rename", + uint64_t request_id = util::reserve_async_request_id(); + r = invoke_async_request(OPERATION_RENAME, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true, boost::bind(&Operations<I>::execute_rename, this, dstname, _1), boost::bind(&ImageWatcher<I>::notify_rename, - m_image_ctx.image_watcher, dstname, - _1)); + m_image_ctx.image_watcher, request_id, + dstname, _1)); if (r < 0 && r != -EEXIST) { return r; } @@ -636,8 +771,8 @@ int Operations<I>::resize(uint64_t size, bool allow_shrink, ProgressContext& pro return -EINVAL; } - uint64_t request_id = ++m_async_request_seq; - r = invoke_async_request("resize", + uint64_t request_id = util::reserve_async_request_id(); + r = invoke_async_request(OPERATION_RESIZE, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, false, boost::bind(&Operations<I>::execute_resize, this, @@ -731,10 +866,10 @@ void Operations<I>::snap_create(const cls::rbd::SnapshotNamespace &snap_namespac } m_image_ctx.image_lock.unlock_shared(); - uint64_t request_id = ++m_async_request_seq; + uint64_t request_id = util::reserve_async_request_id(); C_InvokeAsyncRequest<I> *req = new C_InvokeAsyncRequest<I>( - m_image_ctx, "snap_create", exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, - true, + m_image_ctx, OPERATION_SNAP_CREATE, + exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true, boost::bind(&Operations<I>::execute_snap_create, this, snap_namespace, snap_name, _1, 0, flags, boost::ref(prog_ctx)), boost::bind(&ImageWatcher<I>::notify_snap_create, m_image_ctx.image_watcher, @@ -818,7 +953,22 @@ int Operations<I>::snap_rollback(const cls::rbd::SnapshotNamespace& snap_namespa return r; } - execute_snap_rollback(snap_namespace, snap_name, prog_ctx, &cond_ctx); + Context *ctx = new LambdaContext( + [this, ctx=&cond_ctx](int r) { + m_image_ctx.operations->finish_op(OPERATION_SNAP_ROLLBACK, r); + ctx->complete(r); + }); + ctx = new LambdaContext( + [this, snap_namespace, snap_name, &prog_ctx, ctx](int r) { + if (r < 0) { + ctx->complete(r); + return; + } + std::shared_lock l{m_image_ctx.owner_lock}; + execute_snap_rollback(snap_namespace, snap_name, prog_ctx, ctx); + }); + + m_image_ctx.operations->start_op(OPERATION_SNAP_ROLLBACK, ctx); } r = cond_ctx.wait(); @@ -915,16 +1065,19 @@ void Operations<I>::snap_remove(const cls::rbd::SnapshotNamespace& snap_namespac m_image_ctx.image_lock.unlock_shared(); if (proxy_op) { + uint64_t request_id = util::reserve_async_request_id(); auto request_type = exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL; if (cls::rbd::get_snap_namespace_type(snap_namespace) == cls::rbd::SNAPSHOT_NAMESPACE_TYPE_TRASH) { request_type = exclusive_lock::OPERATION_REQUEST_TYPE_TRASH_SNAP_REMOVE; } C_InvokeAsyncRequest<I> *req = new C_InvokeAsyncRequest<I>( - m_image_ctx, "snap_remove", request_type, true, - boost::bind(&Operations<I>::execute_snap_remove, this, snap_namespace, snap_name, _1), - boost::bind(&ImageWatcher<I>::notify_snap_remove, m_image_ctx.image_watcher, - snap_namespace, snap_name, _1), + m_image_ctx, OPERATION_SNAP_REMOVE, request_type, true, + boost::bind(&Operations<I>::execute_snap_remove, this, snap_namespace, + snap_name, _1), + boost::bind(&ImageWatcher<I>::notify_snap_remove, + m_image_ctx.image_watcher, request_id, snap_namespace, + snap_name, _1), {-ENOENT}, on_finish); req->send(); } else { @@ -1012,14 +1165,15 @@ int Operations<I>::snap_rename(const char *srcname, const char *dstname) { } if (m_image_ctx.test_features(RBD_FEATURE_JOURNALING)) { - r = invoke_async_request("snap_rename", + uint64_t request_id = util::reserve_async_request_id(); + r = invoke_async_request(OPERATION_SNAP_RENAME, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true, boost::bind(&Operations<I>::execute_snap_rename, this, snap_id, dstname, _1), boost::bind(&ImageWatcher<I>::notify_snap_rename, - m_image_ctx.image_watcher, snap_id, - dstname, _1)); + m_image_ctx.image_watcher, request_id, + snap_id, dstname, _1)); if (r < 0 && r != -EEXIST) { return r; } @@ -1113,13 +1267,14 @@ int Operations<I>::snap_protect(const cls::rbd::SnapshotNamespace& snap_namespac } if (m_image_ctx.test_features(RBD_FEATURE_JOURNALING)) { - r = invoke_async_request("snap_protect", + uint64_t request_id = util::reserve_async_request_id(); + r = invoke_async_request(OPERATION_SNAP_PROTECT, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true, boost::bind(&Operations<I>::execute_snap_protect, this, snap_namespace, snap_name, _1), boost::bind(&ImageWatcher<I>::notify_snap_protect, - m_image_ctx.image_watcher, + m_image_ctx.image_watcher, request_id, snap_namespace, snap_name, _1)); if (r < 0 && r != -EBUSY) { return r; @@ -1210,13 +1365,14 @@ int Operations<I>::snap_unprotect(const cls::rbd::SnapshotNamespace& snap_namesp } if (m_image_ctx.test_features(RBD_FEATURE_JOURNALING)) { - r = invoke_async_request("snap_unprotect", + uint64_t request_id = util::reserve_async_request_id(); + r = invoke_async_request(OPERATION_SNAP_UNPROTECT, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, true, boost::bind(&Operations<I>::execute_snap_unprotect, this, snap_namespace, snap_name, _1), boost::bind(&ImageWatcher<I>::notify_snap_unprotect, - m_image_ctx.image_watcher, + m_image_ctx.image_watcher, request_id, snap_namespace, snap_name, _1)); if (r < 0 && r != -EINVAL) { return r; @@ -1410,14 +1566,15 @@ int Operations<I>::update_features(uint64_t features, bool enabled) { r = cond_ctx.wait(); } else { - r = invoke_async_request("update_features", + uint64_t request_id = util::reserve_async_request_id(); + r = invoke_async_request(OPERATION_UPDATE_FEATURES, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, false, boost::bind(&Operations<I>::execute_update_features, this, features, enabled, _1, 0), boost::bind(&ImageWatcher<I>::notify_update_features, - m_image_ctx.image_watcher, features, - enabled, _1)); + m_image_ctx.image_watcher, request_id, + features, enabled, _1)); } ldout(cct, 2) << "update_features finished" << dendl; return r; @@ -1484,13 +1641,14 @@ int Operations<I>::metadata_set(const std::string &key, return -EROFS; } - r = invoke_async_request("metadata_set", + uint64_t request_id = util::reserve_async_request_id(); + r = invoke_async_request(OPERATION_METADATA_UPDATE, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, false, boost::bind(&Operations<I>::execute_metadata_set, this, key, value, _1), boost::bind(&ImageWatcher<I>::notify_metadata_set, - m_image_ctx.image_watcher, + m_image_ctx.image_watcher, request_id, key, value, _1)); if (config_override && r >= 0) { @@ -1543,13 +1701,15 @@ int Operations<I>::metadata_remove(const std::string &key) { if(r < 0) return r; - r = invoke_async_request("metadata_remove", + uint64_t request_id = util::reserve_async_request_id(); + r = invoke_async_request(OPERATION_METADATA_UPDATE, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, false, boost::bind(&Operations<I>::execute_metadata_remove, this, key, _1), boost::bind(&ImageWatcher<I>::notify_metadata_remove, - m_image_ctx.image_watcher, key, _1)); + m_image_ctx.image_watcher, request_id, + key, _1)); if (r == -ENOENT) { r = 0; } @@ -1606,8 +1766,8 @@ int Operations<I>::migrate(ProgressContext &prog_ctx) { } } - uint64_t request_id = ++m_async_request_seq; - r = invoke_async_request("migrate", + uint64_t request_id = util::reserve_async_request_id(); + r = invoke_async_request(OPERATION_MIGRATE, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, false, boost::bind(&Operations<I>::execute_migrate, this, @@ -1672,8 +1832,8 @@ int Operations<I>::sparsify(size_t sparse_size, ProgressContext &prog_ctx) { return -EINVAL; } - uint64_t request_id = ++m_async_request_seq; - int r = invoke_async_request("sparsify", + uint64_t request_id = util::reserve_async_request_id(); + int r = invoke_async_request(OPERATION_SPARSIFY, exclusive_lock::OPERATION_REQUEST_TYPE_GENERAL, false, boost::bind(&Operations<I>::execute_sparsify, @@ -1766,11 +1926,11 @@ int Operations<I>::prepare_image_update( template <typename I> int Operations<I>::invoke_async_request( - const std::string& name, exclusive_lock::OperationRequestType request_type, + Operation op, exclusive_lock::OperationRequestType request_type, bool permit_snapshot, const boost::function<void(Context*)>& local_request, const boost::function<void(Context*)>& remote_request) { C_SaferCond ctx; - C_InvokeAsyncRequest<I> *req = new C_InvokeAsyncRequest<I>(m_image_ctx, name, + C_InvokeAsyncRequest<I> *req = new C_InvokeAsyncRequest<I>(m_image_ctx, op, request_type, permit_snapshot, local_request, diff --git a/src/librbd/Operations.h b/src/librbd/Operations.h index fec7e16da41..52d1484e7ed 100644 --- a/src/librbd/Operations.h +++ b/src/librbd/Operations.h @@ -10,6 +10,9 @@ #include "librbd/operation/ObjectMapIterate.h" #include <atomic> #include <string> +#include <list> +#include <map> +#include <set> #include <boost/function.hpp> class Context; @@ -19,14 +22,31 @@ namespace librbd { class ImageCtx; class ProgressContext; +enum Operation { + OPERATION_CHECK_OBJECT_MAP, + OPERATION_FLATTEN, + OPERATION_METADATA_UPDATE, + OPERATION_MIGRATE, + OPERATION_REBUILD_OBJECT_MAP, + OPERATION_RENAME, + OPERATION_RESIZE, + OPERATION_SNAP_CREATE, + OPERATION_SNAP_PROTECT, + OPERATION_SNAP_REMOVE, + OPERATION_SNAP_RENAME, + OPERATION_SNAP_ROLLBACK, + OPERATION_SNAP_UNPROTECT, + OPERATION_SPARSIFY, + OPERATION_UPDATE_FEATURES, +}; + template <typename ImageCtxT = ImageCtx> class Operations { public: Operations(ImageCtxT &image_ctx); - uint64_t reserve_async_request_id() { - return ++m_async_request_seq; - } + void start_op(enum Operation op, Context *ctx); + void finish_op(enum Operation op, int r); int flatten(ProgressContext &prog_ctx); void execute_flatten(ProgressContext &prog_ctx, Context *on_finish); @@ -119,9 +139,12 @@ public: private: ImageCtxT &m_image_ctx; - std::atomic<uint64_t> m_async_request_seq; - int invoke_async_request(const std::string& name, + mutable ceph::mutex m_queue_lock; + std::set<Operation> m_in_flight_ops; + std::map<Operation, std::list<Context *>> m_queued_ops; + + int invoke_async_request(Operation op, exclusive_lock::OperationRequestType request_type, bool permit_snapshot, const boost::function<void(Context*)>& local, diff --git a/src/librbd/PluginRegistry.cc b/src/librbd/PluginRegistry.cc index a4dabb7d33e..6ddf0a414f8 100644 --- a/src/librbd/PluginRegistry.cc +++ b/src/librbd/PluginRegistry.cc @@ -4,6 +4,7 @@ #include "librbd/PluginRegistry.h" #include "include/Context.h" #include "common/dout.h" +#include "librbd/cache/ImageWriteback.h" #include "librbd/ImageCtx.h" #include "librbd/plugin/Api.h" #include <boost/tokenizer.hpp> @@ -17,7 +18,8 @@ namespace librbd { template <typename I> PluginRegistry<I>::PluginRegistry(I* image_ctx) - : m_image_ctx(image_ctx), m_plugin_api(std::make_unique<plugin::Api<I>>()) { + : m_image_ctx(image_ctx), m_plugin_api(std::make_unique<plugin::Api<I>>()), + m_image_writeback(std::make_unique<cache::ImageWriteback<I>>(*image_ctx)) { } template <typename I> @@ -45,14 +47,55 @@ void PluginRegistry<I>::init(const std::string& plugins, Context* on_finish) { break; } - m_plugin_hook_points.emplace_back(); - auto hook_points = &m_plugin_hook_points.back(); - plugin->init(m_image_ctx, *m_plugin_api, hook_points, ctx); + plugin->init( + m_image_ctx, *m_plugin_api, *m_image_writeback, m_plugin_hook_points, ctx); } gather_ctx->activate(); } +template <typename I> +void PluginRegistry<I>::acquired_exclusive_lock(Context* on_finish) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << dendl; + + auto gather_ctx = new C_Gather(cct, on_finish); + + for (auto &hook : m_plugin_hook_points) { + auto ctx = gather_ctx->new_sub(); + hook->acquired_exclusive_lock(ctx); + } + gather_ctx->activate(); +} + +template <typename I> +void PluginRegistry<I>::prerelease_exclusive_lock(Context* on_finish) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << dendl; + + auto gather_ctx = new C_Gather(cct, on_finish); + + for (auto &hook : m_plugin_hook_points) { + auto ctx = gather_ctx->new_sub(); + hook->prerelease_exclusive_lock(ctx); + } + gather_ctx->activate(); +} + +template <typename I> +void PluginRegistry<I>::discard(Context* on_finish) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << dendl; + + auto gather_ctx = new C_Gather(cct, on_finish); + + for (auto &hook : m_plugin_hook_points) { + auto ctx = gather_ctx->new_sub(); + hook->discard(ctx); + } + gather_ctx->activate(); +} + } // namespace librbd template class librbd::PluginRegistry<librbd::ImageCtx>; diff --git a/src/librbd/PluginRegistry.h b/src/librbd/PluginRegistry.h index ca343fdd814..92e183ce10a 100644 --- a/src/librbd/PluginRegistry.h +++ b/src/librbd/PluginRegistry.h @@ -15,6 +15,10 @@ namespace librbd { struct ImageCtx; +namespace cache { +class ImageWritebackInterface; +} + namespace plugin { template <typename> struct Api; } template <typename ImageCtxT> @@ -25,15 +29,18 @@ public: void init(const std::string& plugins, Context* on_finish); -private: - typedef std::list<plugin::HookPoints> PluginHookPoints; + void acquired_exclusive_lock(Context* on_finish); + void prerelease_exclusive_lock(Context* on_finish); + void discard(Context* on_finish); +private: ImageCtxT* m_image_ctx; std::unique_ptr<plugin::Api<ImageCtxT>> m_plugin_api; + std::unique_ptr<cache::ImageWritebackInterface> m_image_writeback; std::string m_plugins; - PluginHookPoints m_plugin_hook_points; + plugin::PluginHookPoints m_plugin_hook_points; }; diff --git a/src/librbd/Utils.cc b/src/librbd/Utils.cc index 36e8fa353a6..af2b1655797 100644 --- a/src/librbd/Utils.cc +++ b/src/librbd/Utils.cc @@ -194,5 +194,11 @@ SnapContext get_snap_context( return snapc; } +uint64_t reserve_async_request_id() { + static std::atomic<uint64_t> async_request_seq = 0; + + return ++async_request_seq; +} + } // namespace util } // namespace librbd diff --git a/src/librbd/Utils.h b/src/librbd/Utils.h index 134447f7cd8..736a5063ac8 100644 --- a/src/librbd/Utils.h +++ b/src/librbd/Utils.h @@ -274,6 +274,8 @@ SnapContext get_snap_context( std::pair<std::uint64_t, std::vector<std::uint64_t>>>& write_snap_context); +uint64_t reserve_async_request_id(); + } // namespace util } // namespace librbd diff --git a/src/librbd/WatchNotifyTypes.cc b/src/librbd/WatchNotifyTypes.cc index 2f145955977..413983f3e63 100644 --- a/src/librbd/WatchNotifyTypes.cc +++ b/src/librbd/WatchNotifyTypes.cc @@ -168,15 +168,16 @@ void ResizePayload::decode(__u8 version, bufferlist::const_iterator &iter) { } void ResizePayload::dump(Formatter *f) const { + AsyncRequestPayloadBase::dump(f); f->dump_unsigned("size", size); f->dump_bool("allow_shrink", allow_shrink); - AsyncRequestPayloadBase::dump(f); } void SnapPayloadBase::encode(bufferlist &bl) const { using ceph::encode; encode(snap_name, bl); encode(snap_namespace, bl); + encode(async_request_id, bl); } void SnapPayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { @@ -185,9 +186,13 @@ void SnapPayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { if (version >= 6) { decode(snap_namespace, iter); } + if (version >= 7) { + decode(async_request_id, iter); + } } void SnapPayloadBase::dump(Formatter *f) const { + AsyncRequestPayloadBase::dump(f); f->dump_string("snap_name", snap_name); snap_namespace.dump(f); } @@ -195,7 +200,6 @@ void SnapPayloadBase::dump(Formatter *f) const { void SnapCreatePayload::encode(bufferlist &bl) const { using ceph::encode; SnapPayloadBase::encode(bl); - encode(async_request_id, bl); encode(flags, bl); } @@ -206,15 +210,11 @@ void SnapCreatePayload::decode(__u8 version, bufferlist::const_iterator &iter) { decode(snap_namespace, iter); } if (version >= 7) { - decode(async_request_id, iter); decode(flags, iter); } } void SnapCreatePayload::dump(Formatter *f) const { - f->open_object_section("async_request_id"); - async_request_id.dump(f); - f->close_section(); SnapPayloadBase::dump(f); f->dump_unsigned("flags", flags); } @@ -232,21 +232,26 @@ void SnapRenamePayload::decode(__u8 version, bufferlist::const_iterator &iter) { } void SnapRenamePayload::dump(Formatter *f) const { - f->dump_unsigned("src_snap_id", snap_id); SnapPayloadBase::dump(f); + f->dump_unsigned("src_snap_id", snap_id); } void RenamePayload::encode(bufferlist &bl) const { using ceph::encode; encode(image_name, bl); + encode(async_request_id, bl); } void RenamePayload::decode(__u8 version, bufferlist::const_iterator &iter) { using ceph::decode; decode(image_name, iter); + if (version >= 7) { + decode(async_request_id, iter); + } } void RenamePayload::dump(Formatter *f) const { + AsyncRequestPayloadBase::dump(f); f->dump_string("image_name", image_name); } @@ -254,15 +259,20 @@ void UpdateFeaturesPayload::encode(bufferlist &bl) const { using ceph::encode; encode(features, bl); encode(enabled, bl); + encode(async_request_id, bl); } void UpdateFeaturesPayload::decode(__u8 version, bufferlist::const_iterator &iter) { using ceph::decode; decode(features, iter); decode(enabled, iter); + if (version >= 7) { + decode(async_request_id, iter); + } } void UpdateFeaturesPayload::dump(Formatter *f) const { + AsyncRequestPayloadBase::dump(f); f->dump_unsigned("features", features); f->dump_bool("enabled", enabled); } @@ -288,15 +298,20 @@ void MetadataUpdatePayload::encode(bufferlist &bl) const { using ceph::encode; encode(key, bl); encode(value, bl); + encode(async_request_id, bl); } void MetadataUpdatePayload::decode(__u8 version, bufferlist::const_iterator &iter) { using ceph::decode; decode(key, iter); decode(value, iter); + if (version >= 7) { + decode(async_request_id, iter); + } } void MetadataUpdatePayload::dump(Formatter *f) const { + AsyncRequestPayloadBase::dump(f); f->dump_string("key", key); f->dump_string("value", *value); } @@ -415,21 +430,26 @@ void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) { o.push_back(new NotifyMessage(new AsyncProgressPayload(AsyncRequestId(ClientId(0, 1), 2), 3, 4))); o.push_back(new NotifyMessage(new AsyncCompletePayload(AsyncRequestId(ClientId(0, 1), 2), 3))); o.push_back(new NotifyMessage(new FlattenPayload(AsyncRequestId(ClientId(0, 1), 2)))); - o.push_back(new NotifyMessage(new ResizePayload(123, true, AsyncRequestId(ClientId(0, 1), 2)))); + o.push_back(new NotifyMessage(new ResizePayload(AsyncRequestId(ClientId(0, 1), 2), 123, true))); o.push_back(new NotifyMessage(new SnapCreatePayload(AsyncRequestId(ClientId(0, 1), 2), cls::rbd::UserSnapshotNamespace(), "foo", 1))); - o.push_back(new NotifyMessage(new SnapRemovePayload(cls::rbd::UserSnapshotNamespace(), "foo"))); - o.push_back(new NotifyMessage(new SnapProtectPayload(cls::rbd::UserSnapshotNamespace(), "foo"))); - o.push_back(new NotifyMessage(new SnapUnprotectPayload(cls::rbd::UserSnapshotNamespace(), "foo"))); + o.push_back(new NotifyMessage(new SnapRemovePayload(AsyncRequestId(ClientId(0, 1), 2), + cls::rbd::UserSnapshotNamespace(), "foo"))); + o.push_back(new NotifyMessage(new SnapProtectPayload(AsyncRequestId(ClientId(0, 1), 2), + cls::rbd::UserSnapshotNamespace(), "foo"))); + o.push_back(new NotifyMessage(new SnapUnprotectPayload(AsyncRequestId(ClientId(0, 1), 2), + cls::rbd::UserSnapshotNamespace(), "foo"))); o.push_back(new NotifyMessage(new RebuildObjectMapPayload(AsyncRequestId(ClientId(0, 1), 2)))); - o.push_back(new NotifyMessage(new RenamePayload("foo"))); - o.push_back(new NotifyMessage(new UpdateFeaturesPayload(1, true))); + o.push_back(new NotifyMessage(new RenamePayload(AsyncRequestId(ClientId(0, 1), 2), "foo"))); + o.push_back(new NotifyMessage(new UpdateFeaturesPayload(AsyncRequestId(ClientId(0, 1), 2), + 1, true))); o.push_back(new NotifyMessage(new MigratePayload(AsyncRequestId(ClientId(0, 1), 2)))); o.push_back(new NotifyMessage(new SparsifyPayload(AsyncRequestId(ClientId(0, 1), 2), 1))); o.push_back(new NotifyMessage(new QuiescePayload(AsyncRequestId(ClientId(0, 1), 2)))); o.push_back(new NotifyMessage(new UnquiescePayload(AsyncRequestId(ClientId(0, 1), 2)))); - o.push_back(new NotifyMessage(new MetadataUpdatePayload("foo", std::optional<std::string>{"xyz"}))); + o.push_back(new NotifyMessage(new MetadataUpdatePayload(AsyncRequestId(ClientId(0, 1), 2), + "foo", std::optional<std::string>{"xyz"}))); } void ResponseMessage::encode(bufferlist& bl) const { diff --git a/src/librbd/WatchNotifyTypes.h b/src/librbd/WatchNotifyTypes.h index b8101f45413..ca0b40f28f0 100644 --- a/src/librbd/WatchNotifyTypes.h +++ b/src/librbd/WatchNotifyTypes.h @@ -226,7 +226,7 @@ struct ResizePayload : public AsyncRequestPayloadBase { bool allow_shrink = true; ResizePayload() {} - ResizePayload(uint64_t size, bool allow_shrink, const AsyncRequestId &id) + ResizePayload(const AsyncRequestId &id, uint64_t size, bool allow_shrink) : AsyncRequestPayloadBase(id), size(size), allow_shrink(allow_shrink) {} NotifyOp get_notify_op() const override { @@ -241,7 +241,7 @@ struct ResizePayload : public AsyncRequestPayloadBase { void dump(Formatter *f) const override; }; -struct SnapPayloadBase : public Payload { +struct SnapPayloadBase : public AsyncRequestPayloadBase { public: cls::rbd::SnapshotNamespace snap_namespace; std::string snap_name; @@ -256,21 +256,22 @@ public: protected: SnapPayloadBase() {} - SnapPayloadBase(const cls::rbd::SnapshotNamespace& snap_namespace, + SnapPayloadBase(const AsyncRequestId &id, + const cls::rbd::SnapshotNamespace& snap_namespace, const std::string &name) - : snap_namespace(snap_namespace), snap_name(name) {} + : AsyncRequestPayloadBase(id), snap_namespace(snap_namespace), + snap_name(name) { + } }; struct SnapCreatePayload : public SnapPayloadBase { - AsyncRequestId async_request_id; uint64_t flags = 0; SnapCreatePayload() {} SnapCreatePayload(const AsyncRequestId &id, const cls::rbd::SnapshotNamespace &snap_namespace, const std::string &name, uint64_t flags) - : SnapPayloadBase(snap_namespace, name), async_request_id(id), - flags(flags) { + : SnapPayloadBase(id, snap_namespace, name), flags(flags) { } NotifyOp get_notify_op() const override { @@ -286,9 +287,12 @@ struct SnapRenamePayload : public SnapPayloadBase { uint64_t snap_id = 0; SnapRenamePayload() {} - SnapRenamePayload(const uint64_t &src_snap_id, + SnapRenamePayload(const AsyncRequestId &id, + const uint64_t &src_snap_id, const std::string &dst_name) - : SnapPayloadBase(cls::rbd::UserSnapshotNamespace(), dst_name), snap_id(src_snap_id) {} + : SnapPayloadBase(id, cls::rbd::UserSnapshotNamespace(), dst_name), + snap_id(src_snap_id) { + } NotifyOp get_notify_op() const override { return NOTIFY_OP_SNAP_RENAME; @@ -301,9 +305,11 @@ struct SnapRenamePayload : public SnapPayloadBase { struct SnapRemovePayload : public SnapPayloadBase { SnapRemovePayload() {} - SnapRemovePayload(const cls::rbd::SnapshotNamespace& snap_namespace, + SnapRemovePayload(const AsyncRequestId &id, + const cls::rbd::SnapshotNamespace& snap_namespace, const std::string &name) - : SnapPayloadBase(snap_namespace, name) {} + : SnapPayloadBase(id, snap_namespace, name) { + } NotifyOp get_notify_op() const override { return NOTIFY_OP_SNAP_REMOVE; @@ -312,9 +318,11 @@ struct SnapRemovePayload : public SnapPayloadBase { struct SnapProtectPayload : public SnapPayloadBase { SnapProtectPayload() {} - SnapProtectPayload(const cls::rbd::SnapshotNamespace& snap_namespace, + SnapProtectPayload(const AsyncRequestId &id, + const cls::rbd::SnapshotNamespace& snap_namespace, const std::string &name) - : SnapPayloadBase(snap_namespace, name) {} + : SnapPayloadBase(id, snap_namespace, name) { + } NotifyOp get_notify_op() const override { return NOTIFY_OP_SNAP_PROTECT; @@ -323,9 +331,11 @@ struct SnapProtectPayload : public SnapPayloadBase { struct SnapUnprotectPayload : public SnapPayloadBase { SnapUnprotectPayload() {} - SnapUnprotectPayload(const cls::rbd::SnapshotNamespace& snap_namespace, + SnapUnprotectPayload(const AsyncRequestId &id, + const cls::rbd::SnapshotNamespace& snap_namespace, const std::string &name) - : SnapPayloadBase(snap_namespace, name) {} + : SnapPayloadBase(id, snap_namespace, name) { + } NotifyOp get_notify_op() const override { return NOTIFY_OP_SNAP_UNPROTECT; @@ -345,11 +355,13 @@ struct RebuildObjectMapPayload : public AsyncRequestPayloadBase { } }; -struct RenamePayload : public Payload { +struct RenamePayload : public AsyncRequestPayloadBase { std::string image_name; RenamePayload() {} - RenamePayload(const std::string _image_name) : image_name(_image_name) {} + RenamePayload(const AsyncRequestId &id, const std::string _image_name) + : AsyncRequestPayloadBase(id), image_name(_image_name) { + } NotifyOp get_notify_op() const override { return NOTIFY_OP_RENAME; @@ -363,13 +375,15 @@ struct RenamePayload : public Payload { void dump(Formatter *f) const; }; -struct UpdateFeaturesPayload : public Payload { +struct UpdateFeaturesPayload : public AsyncRequestPayloadBase { uint64_t features = 0; bool enabled = false; UpdateFeaturesPayload() {} - UpdateFeaturesPayload(uint64_t features, bool enabled) - : features(features), enabled(enabled) {} + UpdateFeaturesPayload(const AsyncRequestId &id, uint64_t features, + bool enabled) + : AsyncRequestPayloadBase(id), features(features), enabled(enabled) { + } NotifyOp get_notify_op() const override { return NOTIFY_OP_UPDATE_FEATURES; @@ -439,12 +453,14 @@ struct UnquiescePayload : public AsyncRequestPayloadBase { } }; -struct MetadataUpdatePayload : public Payload { +struct MetadataUpdatePayload : public AsyncRequestPayloadBase { std::string key; std::optional<std::string> value; MetadataUpdatePayload() {} - MetadataUpdatePayload(std::string key, std::optional<std::string> value) - : key(key), value(value) {} + MetadataUpdatePayload(const AsyncRequestId &id, std::string key, + std::optional<std::string> value) + : AsyncRequestPayloadBase(id), key(key), value(value) { + } NotifyOp get_notify_op() const override { return NOTIFY_OP_METADATA_UPDATE; diff --git a/src/librbd/api/Migration.cc b/src/librbd/api/Migration.cc index 51d5602b73f..d9d2518992d 100644 --- a/src/librbd/api/Migration.cc +++ b/src/librbd/api/Migration.cc @@ -35,8 +35,8 @@ #include "librbd/image/Types.h" #include "librbd/internal.h" #include "librbd/migration/FormatInterface.h" +#include "librbd/migration/OpenSourceImageRequest.h" #include "librbd/migration/NativeFormat.h" -#include "librbd/migration/SourceSpecBuilder.h" #include "librbd/mirror/DisableRequest.h" #include "librbd/mirror/EnableRequest.h" @@ -269,6 +269,7 @@ int open_images(librados::IoCtx& io_ctx, const std::string &image_name, ldout(cct, 10) << "re-opening the destination image" << dendl; r = image_ctx->state->open(0); if (r < 0) { + image_ctx = nullptr; lderr(cct) << "failed to re-open destination image: " << cpp_strerror(r) << dendl; return r; @@ -531,38 +532,20 @@ int Migration<I>::prepare_import( << dest_io_ctx.get_pool_name() << "/" << dest_image_name << ", opts=" << opts << dendl; - auto src_image_ctx = I::create("", "", nullptr, dest_io_ctx, true); - auto asio_engine = src_image_ctx->asio_engine; - - migration::SourceSpecBuilder<I> source_spec_builder(src_image_ctx); - json_spirit::mObject source_spec_object; - int r = source_spec_builder.parse_source_spec(source_spec, - &source_spec_object); - if (r < 0) { - lderr(cct) << "failed to parse source spec: " << cpp_strerror(r) - << dendl; - src_image_ctx->state->close(); - return r; - } - - std::unique_ptr<migration::FormatInterface> format; - r = source_spec_builder.build_format(source_spec_object, true, &format); - if (r < 0) { - lderr(cct) << "failed to build migration format handler: " - << cpp_strerror(r) << dendl; - src_image_ctx->state->close(); - return r; - } - + I* src_image_ctx = nullptr; C_SaferCond open_ctx; - format->open(&open_ctx); - r = open_ctx.wait(); + auto req = migration::OpenSourceImageRequest<I>::create( + dest_io_ctx, nullptr, CEPH_NOSNAP, + {-1, "", "", "", source_spec, {}, 0, false}, &src_image_ctx, &open_ctx); + req->send(); + + int r = open_ctx.wait(); if (r < 0) { - lderr(cct) << "failed to open migration source: " << cpp_strerror(r) - << dendl; + lderr(cct) << "failed to open source image: " << cpp_strerror(r) << dendl; return r; } + auto asio_engine = src_image_ctx->asio_engine; BOOST_SCOPE_EXIT_TPL(src_image_ctx) { src_image_ctx->state->close(); } BOOST_SCOPE_EXIT_END; @@ -583,6 +566,18 @@ int Migration<I>::prepare_import( ldout(cct, 20) << "updated opts=" << opts << dendl; + // use json-spirit to clean-up json formatting + json_spirit::mObject source_spec_object; + json_spirit::mValue json_root; + if(json_spirit::read(source_spec, json_root)) { + try { + source_spec_object = json_root.get_obj(); + } catch (std::runtime_error&) { + lderr(cct) << "failed to clean source spec" << dendl; + return -EINVAL; + } + } + auto dst_image_ctx = I::create( dest_image_name, util::generate_image_id(dest_io_ctx), nullptr, dest_io_ctx, false); @@ -817,19 +812,38 @@ int Migration<I>::get_source_spec(I* image_ctx, std::string* source_spec) { auto cct = image_ctx->cct; ldout(cct, 10) << dendl; - if (image_ctx->migration_info.empty()) { - return -ENOENT; + image_ctx->image_lock.lock_shared(); + auto migration_info = image_ctx->migration_info; + image_ctx->image_lock.unlock_shared(); + + if (migration_info.empty()) { + // attempt to directly read the spec in case the state is EXECUTED + cls::rbd::MigrationSpec migration_spec; + int r = cls_client::migration_get(&image_ctx->md_ctx, image_ctx->header_oid, + &migration_spec); + if (r == -ENOENT) { + return r; + } else if (r < 0) { + lderr(cct) << "failed retrieving migration header: " << cpp_strerror(r) + << dendl; + return r; + } + + migration_info = { + migration_spec.pool_id, migration_spec.pool_namespace, + migration_spec.image_name, migration_spec.image_id, + migration_spec.source_spec, {}, 0, false}; } - if (!image_ctx->migration_info.source_spec.empty()) { - *source_spec = image_ctx->migration_info.source_spec; + if (!migration_info.source_spec.empty()) { + *source_spec = migration_info.source_spec; } else { // legacy migration source *source_spec = migration::NativeFormat<I>::build_source_spec( - image_ctx->migration_info.pool_id, - image_ctx->migration_info.pool_namespace, - image_ctx->migration_info.image_name, - image_ctx->migration_info.image_id); + migration_info.pool_id, + migration_info.pool_namespace, + migration_info.image_name, + migration_info.image_id); } return 0; diff --git a/src/librbd/cache/Utils.cc b/src/librbd/cache/Utils.cc deleted file mode 100644 index c65c57551a5..00000000000 --- a/src/librbd/cache/Utils.cc +++ /dev/null @@ -1,24 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "librbd/cache/pwl/DiscardRequest.h" -#include "librbd/cache/Utils.h" -#include "include/Context.h" - -namespace librbd { -namespace cache { -namespace util { - -template <typename I> -void discard_cache(I &image_ctx, Context *ctx) { - cache::pwl::DiscardRequest<I> *req = cache::pwl::DiscardRequest<I>::create( - image_ctx, ctx); - req->send(); -} - -} // namespace util -} // namespace cache -} // namespace librbd - -template void librbd::cache::util::discard_cache( - librbd::ImageCtx &image_ctx, Context *ctx); diff --git a/src/librbd/cache/Utils.h b/src/librbd/cache/Utils.h index 67c0b6fbc73..e338899c09e 100644 --- a/src/librbd/cache/Utils.h +++ b/src/librbd/cache/Utils.h @@ -24,9 +24,6 @@ bool is_pwl_enabled(T& image_ctx) { #endif // WITH_RBD_RWL } -template <typename T = librbd::ImageCtx> -void discard_cache(T &image_ctx, Context *ctx); - } // namespace util } // namespace cache } // namespace librbd diff --git a/src/librbd/cache/WriteLogImageDispatch.cc b/src/librbd/cache/WriteLogImageDispatch.cc index ce93d7ee83c..6cb8738e7f9 100644 --- a/src/librbd/cache/WriteLogImageDispatch.cc +++ b/src/librbd/cache/WriteLogImageDispatch.cc @@ -18,16 +18,6 @@ namespace librbd { namespace cache { -namespace { - -void start_in_flight_io(io::AioCompletion* aio_comp) { - if (!aio_comp->async_op.started()) { - aio_comp->start_op(); - } -} - -} // anonymous namespace - template <typename I> void WriteLogImageDispatch<I>::shut_down(Context* on_finish) { ceph_assert(m_image_cache != nullptr); @@ -39,7 +29,7 @@ void WriteLogImageDispatch<I>::shut_down(Context* on_finish) { }); cache::pwl::ShutdownRequest<I> *req = cache::pwl::ShutdownRequest<I>::create( - *m_image_ctx, m_image_cache, ctx); + *m_image_ctx, m_image_cache, m_plugin_api, ctx); req->send(); } @@ -64,14 +54,9 @@ bool WriteLogImageDispatch<I>::read( return true; } - start_in_flight_io(aio_comp); - - aio_comp->set_request_count(1); - aio_comp->read_result = std::move(read_result); - aio_comp->read_result.set_image_extents(image_extents); + m_plugin_api.update_aio_comp(aio_comp, 1, read_result, image_extents); - auto *req_comp = new io::ReadResult::C_ImageReadRequest( - aio_comp, image_extents); + auto *req_comp = m_plugin_api.create_image_read_request(aio_comp, 0, image_extents); m_image_cache->read(std::move(image_extents), &req_comp->bl, op_flags, @@ -94,10 +79,8 @@ bool WriteLogImageDispatch<I>::write( return true; } - start_in_flight_io(aio_comp); - - aio_comp->set_request_count(1); - io::C_AioRequest *req_comp = new io::C_AioRequest(aio_comp); + m_plugin_api.update_aio_comp(aio_comp, 1); + io::C_AioRequest *req_comp = m_plugin_api.create_aio_request(aio_comp); m_image_cache->write(std::move(image_extents), std::move(bl), op_flags, req_comp); return true; @@ -119,11 +102,9 @@ bool WriteLogImageDispatch<I>::discard( return true; } - start_in_flight_io(aio_comp); - - aio_comp->set_request_count(image_extents.size()); + m_plugin_api.update_aio_comp(aio_comp, image_extents.size()); for (auto &extent : image_extents) { - io::C_AioRequest *req_comp = new io::C_AioRequest(aio_comp); + io::C_AioRequest *req_comp = m_plugin_api.create_aio_request(aio_comp); m_image_cache->discard(extent.first, extent.second, discard_granularity_bytes, req_comp); @@ -147,11 +128,9 @@ bool WriteLogImageDispatch<I>::write_same( return true; } - start_in_flight_io(aio_comp); - - aio_comp->set_request_count(image_extents.size()); + m_plugin_api.update_aio_comp(aio_comp, image_extents.size()); for (auto &extent : image_extents) { - io::C_AioRequest *req_comp = new io::C_AioRequest(aio_comp); + io::C_AioRequest *req_comp = m_plugin_api.create_aio_request(aio_comp); m_image_cache->writesame(extent.first, extent.second, std::move(bl), op_flags, req_comp); @@ -175,10 +154,8 @@ bool WriteLogImageDispatch<I>::compare_and_write( return true; } - start_in_flight_io(aio_comp); - - aio_comp->set_request_count(1); - io::C_AioRequest *req_comp = new io::C_AioRequest(aio_comp); + m_plugin_api.update_aio_comp(aio_comp, 1); + io::C_AioRequest *req_comp = m_plugin_api.create_aio_request(aio_comp); m_image_cache->compare_and_write( std::move(image_extents), std::move(cmp_bl), std::move(bl), mismatch_offset, op_flags, req_comp); @@ -195,12 +172,11 @@ bool WriteLogImageDispatch<I>::flush( auto cct = m_image_ctx->cct; ldout(cct, 20) << "tid=" << tid << dendl; - start_in_flight_io(aio_comp); - *dispatch_result = io::DISPATCH_RESULT_COMPLETE; - aio_comp->set_request_count(1); - io::C_AioRequest *req_comp = new io::C_AioRequest(aio_comp); + m_plugin_api.update_aio_comp(aio_comp, 1); + + io::C_AioRequest *req_comp = m_plugin_api.create_aio_request(aio_comp); m_image_cache->flush(flush_source, req_comp); return true; @@ -223,7 +199,7 @@ bool WriteLogImageDispatch<I>::preprocess_length( io::AioCompletion* aio_comp, io::Extents &image_extents) const { auto total_bytes = io::util::get_extents_length(image_extents); if (total_bytes == 0) { - aio_comp->set_request_count(0); + m_plugin_api.update_aio_comp(aio_comp, 0); return true; } return false; diff --git a/src/librbd/cache/WriteLogImageDispatch.h b/src/librbd/cache/WriteLogImageDispatch.h index d0fb106e3a5..9344916237e 100644 --- a/src/librbd/cache/WriteLogImageDispatch.h +++ b/src/librbd/cache/WriteLogImageDispatch.h @@ -10,6 +10,7 @@ #include "common/zipkin_trace.h" #include "librbd/io/ReadResult.h" #include "librbd/io/Types.h" +#include "librbd/plugin/Api.h" struct Context; @@ -25,8 +26,10 @@ template <typename ImageCtxT> class WriteLogImageDispatch : public io::ImageDispatchInterface { public: WriteLogImageDispatch(ImageCtxT* image_ctx, - pwl::AbstractWriteLog<ImageCtx> *image_cache) : - m_image_ctx(image_ctx), m_image_cache(image_cache) { + pwl::AbstractWriteLog<ImageCtx> *image_cache, + plugin::Api<ImageCtxT>& plugin_api) : + m_image_ctx(image_ctx), m_image_cache(image_cache), + m_plugin_api(plugin_api) { } io::ImageDispatchLayer get_dispatch_layer() const override { @@ -91,6 +94,7 @@ public: private: ImageCtxT* m_image_ctx; pwl::AbstractWriteLog<ImageCtx> *m_image_cache; + plugin::Api<ImageCtxT>& m_plugin_api; bool preprocess_length( io::AioCompletion* aio_comp, io::Extents &image_extents) const; diff --git a/src/librbd/cache/pwl/AbstractWriteLog.cc b/src/librbd/cache/pwl/AbstractWriteLog.cc index fba6300c142..d5bac1b5093 100644 --- a/src/librbd/cache/pwl/AbstractWriteLog.cc +++ b/src/librbd/cache/pwl/AbstractWriteLog.cc @@ -18,6 +18,7 @@ #include "librbd/cache/pwl/ImageCacheState.h" #include "librbd/cache/pwl/LogEntry.h" #include "librbd/cache/pwl/ReadRequest.h" +#include "librbd/plugin/Api.h" #include <map> #include <vector> @@ -37,24 +38,27 @@ typedef AbstractWriteLog<ImageCtx>::Extent Extent; typedef AbstractWriteLog<ImageCtx>::Extents Extents; template <typename I> -AbstractWriteLog<I>::AbstractWriteLog(I &image_ctx, librbd::cache::pwl::ImageCacheState<I>* cache_state) +AbstractWriteLog<I>::AbstractWriteLog(I &image_ctx, librbd::cache::pwl::ImageCacheState<I>* cache_state, + cache::ImageWritebackInterface& image_writeback, + plugin::Api<I>& plugin_api) : m_write_log_guard(image_ctx.cct), - m_deferred_dispatch_lock(ceph::make_mutex(util::unique_lock_name( + m_deferred_dispatch_lock(ceph::make_mutex(pwl::unique_lock_name( "librbd::cache::pwl::AbstractWriteLog::m_deferred_dispatch_lock", this))), - m_blockguard_lock(ceph::make_mutex(util::unique_lock_name( + m_blockguard_lock(ceph::make_mutex(pwl::unique_lock_name( "librbd::cache::pwl::AbstractWriteLog::m_blockguard_lock", this))), m_thread_pool( image_ctx.cct, "librbd::cache::pwl::AbstractWriteLog::thread_pool", "tp_pwl", 4, ""), m_cache_state(cache_state), m_image_ctx(image_ctx), m_log_pool_config_size(DEFAULT_POOL_SIZE), - m_image_writeback(image_ctx), - m_log_retire_lock(ceph::make_mutex(util::unique_lock_name( + m_image_writeback(image_writeback), + m_plugin_api(plugin_api), + m_log_retire_lock(ceph::make_mutex(pwl::unique_lock_name( "librbd::cache::pwl::AbstractWriteLog::m_log_retire_lock", this))), m_entry_reader_lock("librbd::cache::pwl::AbstractWriteLog::m_entry_reader_lock"), - m_log_append_lock(ceph::make_mutex(util::unique_lock_name( + m_log_append_lock(ceph::make_mutex(pwl::unique_lock_name( "librbd::cache::pwl::AbstractWriteLog::m_log_append_lock", this))), - m_lock(ceph::make_mutex(util::unique_lock_name( + m_lock(ceph::make_mutex(pwl::unique_lock_name( "librbd::cache::pwl::AbstractWriteLog::m_lock", this))), m_blocks_to_log_entries(image_ctx.cct), m_work_queue("librbd::cache::pwl::ReplicatedWriteLog::work_queue", @@ -64,7 +68,7 @@ AbstractWriteLog<I>::AbstractWriteLog(I &image_ctx, librbd::cache::pwl::ImageCac &m_thread_pool) { CephContext *cct = m_image_ctx.cct; - ImageCtx::get_timer_instance(cct, &m_timer, &m_timer_lock); + m_plugin_api.get_image_timer_instance(cct, &m_timer, &m_timer_lock); } template <typename I> diff --git a/src/librbd/cache/pwl/AbstractWriteLog.h b/src/librbd/cache/pwl/AbstractWriteLog.h index e22bbcb6c8a..c96eb326869 100644 --- a/src/librbd/cache/pwl/AbstractWriteLog.h +++ b/src/librbd/cache/pwl/AbstractWriteLog.h @@ -24,6 +24,8 @@ namespace librbd { struct ImageCtx; +namespace plugin { template <typename> struct Api; } + namespace cache { namespace pwl { @@ -64,7 +66,9 @@ public: typedef io::Extent Extent; typedef io::Extents Extents; - AbstractWriteLog(ImageCtxT &image_ctx, librbd::cache::pwl::ImageCacheState<ImageCtxT>* cache_state); + AbstractWriteLog(ImageCtxT &image_ctx, librbd::cache::pwl::ImageCacheState<ImageCtxT>* cache_state, + cache::ImageWritebackInterface& image_writeback, + plugin::Api<ImageCtxT>& plugin_api); virtual ~AbstractWriteLog(); AbstractWriteLog(const AbstractWriteLog&) = delete; AbstractWriteLog &operator=(const AbstractWriteLog&) = delete; @@ -253,7 +257,9 @@ protected: std::atomic<bool> m_alloc_failed_since_retire = {false}; - ImageWriteback<ImageCtxT> m_image_writeback; + cache::ImageWritebackInterface& m_image_writeback; + plugin::Api<ImageCtxT>& m_plugin_api; + /* * When m_first_free_entry == m_first_valid_entry, the log is * empty. There is always at least one free entry, which can't be diff --git a/src/librbd/cache/pwl/DiscardRequest.cc b/src/librbd/cache/pwl/DiscardRequest.cc index 80ccd5abca0..9f66e077b1f 100644 --- a/src/librbd/cache/pwl/DiscardRequest.cc +++ b/src/librbd/cache/pwl/DiscardRequest.cc @@ -7,7 +7,6 @@ #include "librbd/asio/ContextWQ.h" #include "librbd/cache/pwl/DiscardRequest.h" -#if defined(WITH_RBD_RWL) #if __has_include(<filesystem>) #include <filesystem> namespace fs = std::filesystem; @@ -17,7 +16,6 @@ namespace fs = std::experimental::filesystem; #endif #include "librbd/cache/pwl/ImageCacheState.h" -#endif // WITH_RBD_RWL #include "librbd/cache/Types.h" #include "librbd/io/ImageDispatcherInterface.h" @@ -40,35 +38,33 @@ using librbd::util::create_context_callback; template <typename I> DiscardRequest<I>* DiscardRequest<I>::create( I &image_ctx, + plugin::Api<I>& plugin_api, Context *on_finish) { - return new DiscardRequest(image_ctx, on_finish); + return new DiscardRequest(image_ctx, plugin_api, on_finish); } template <typename I> DiscardRequest<I>::DiscardRequest( I &image_ctx, + plugin::Api<I>& plugin_api, Context *on_finish) : m_image_ctx(image_ctx), + m_plugin_api(plugin_api), m_on_finish(create_async_context_callback(image_ctx, on_finish)), m_error_result(0) { } template <typename I> void DiscardRequest<I>::send() { -#if defined(WITH_RBD_RWL) delete_image_cache_file(); -#else - finish(); -#endif } -#if defined(WITH_RBD_RWL) template <typename I> void DiscardRequest<I>::delete_image_cache_file() { CephContext *cct = m_image_ctx.cct; ldout(cct, 10) << dendl; - m_cache_state = ImageCacheState<I>::get_image_cache_state(&m_image_ctx); + m_cache_state = ImageCacheState<I>::get_image_cache_state(&m_image_ctx, m_plugin_api); if (!m_cache_state) { remove_feature_bit(); return; @@ -148,16 +144,12 @@ void DiscardRequest<I>::handle_remove_feature_bit(int r) { finish(); } -#endif // WITH_RBD_RWL - template <typename I> void DiscardRequest<I>::finish() { -#if defined(WITH_RBD_RWL) if (m_cache_state) { delete m_cache_state; m_cache_state = nullptr; } -#endif // WITH_RBD_RWL m_on_finish->complete(m_error_result); delete this; diff --git a/src/librbd/cache/pwl/DiscardRequest.h b/src/librbd/cache/pwl/DiscardRequest.h index 6edd194b7d9..c896369fe7f 100644 --- a/src/librbd/cache/pwl/DiscardRequest.h +++ b/src/librbd/cache/pwl/DiscardRequest.h @@ -9,6 +9,7 @@ class Context; namespace librbd { class ImageCtx; +namespace plugin { template <typename> struct Api; } namespace cache { @@ -22,6 +23,7 @@ class DiscardRequest { public: static DiscardRequest* create( ImageCtxT &image_ctx, + plugin::Api<ImageCtxT>& plugin_api, Context *on_finish); void send(); @@ -51,10 +53,12 @@ private: */ DiscardRequest(ImageCtxT &image_ctx, + plugin::Api<ImageCtxT>& plugin_api, Context *on_finish); ImageCtxT &m_image_ctx; ImageCacheState<ImageCtxT>* m_cache_state; + plugin::Api<ImageCtxT>& m_plugin_api; Context *m_on_finish; int m_error_result; diff --git a/src/librbd/cache/pwl/ImageCacheState.cc b/src/librbd/cache/pwl/ImageCacheState.cc index dba7b971d22..09ebd15b841 100644 --- a/src/librbd/cache/pwl/ImageCacheState.cc +++ b/src/librbd/cache/pwl/ImageCacheState.cc @@ -10,6 +10,7 @@ #include "common/ceph_json.h" #include "common/environment.h" #include "common/hostname.h" +#include "librbd/plugin/Api.h" #undef dout_subsys #define dout_subsys ceph_subsys_rbd_pwl @@ -33,7 +34,8 @@ bool get_json_format(const std::string& s, JSONFormattable *f) { } // namespace template <typename I> -ImageCacheState<I>::ImageCacheState(I *image_ctx) : m_image_ctx(image_ctx) { +ImageCacheState<I>::ImageCacheState(I *image_ctx, plugin::Api<I>& plugin_api) : + m_image_ctx(image_ctx), m_plugin_api(plugin_api) { ldout(image_ctx->cct, 20) << "Initialize RWL cache state with config data. " << dendl; @@ -43,7 +45,8 @@ ImageCacheState<I>::ImageCacheState(I *image_ctx) : m_image_ctx(image_ctx) { template <typename I> ImageCacheState<I>::ImageCacheState( - I *image_ctx, JSONFormattable &f) : m_image_ctx(image_ctx) { + I *image_ctx, JSONFormattable &f, plugin::Api<I>& plugin_api) : + m_image_ctx(image_ctx), m_plugin_api(plugin_api) { ldout(image_ctx->cct, 20) << "Initialize RWL cache state with data from " << "server side"<< dendl; @@ -73,15 +76,16 @@ void ImageCacheState<I>::write_image_cache_state(Context *on_finish) { ldout(m_image_ctx->cct, 20) << __func__ << " Store state: " << image_state_json << dendl; - m_image_ctx->operations->execute_metadata_set(IMAGE_CACHE_STATE, - image_state_json, on_finish); + m_plugin_api.execute_image_metadata_set(m_image_ctx, IMAGE_CACHE_STATE, + image_state_json, on_finish); } template <typename I> void ImageCacheState<I>::clear_image_cache_state(Context *on_finish) { std::shared_lock owner_lock{m_image_ctx->owner_lock}; ldout(m_image_ctx->cct, 20) << __func__ << " Remove state: " << dendl; - m_image_ctx->operations->execute_metadata_remove(IMAGE_CACHE_STATE, on_finish); + m_plugin_api.execute_image_metadata_remove( + m_image_ctx, IMAGE_CACHE_STATE, on_finish); } template <typename I> @@ -97,13 +101,13 @@ void ImageCacheState<I>::dump(ceph::Formatter *f) const { template <typename I> ImageCacheState<I>* ImageCacheState<I>::create_image_cache_state( - I* image_ctx, int &r) { + I* image_ctx, plugin::Api<I>& plugin_api, int &r) { std::string cache_state_str; ImageCacheState<I>* cache_state = nullptr; ldout(image_ctx->cct, 20) << "image_cache_state:" << cache_state_str << dendl; r = 0; - bool dirty_cache = image_ctx->test_features(RBD_FEATURE_DIRTY_CACHE); + bool dirty_cache = plugin_api.test_image_features(image_ctx, RBD_FEATURE_DIRTY_CACHE); if (dirty_cache) { cls_client::metadata_get(&image_ctx->md_ctx, image_ctx->header_oid, IMAGE_CACHE_STATE, &cache_state_str); @@ -112,8 +116,8 @@ ImageCacheState<I>* ImageCacheState<I>::create_image_cache_state( bool pwl_enabled = cache::util::is_pwl_enabled(*image_ctx); bool cache_desired = pwl_enabled; cache_desired &= !image_ctx->read_only; - cache_desired &= !image_ctx->test_features(RBD_FEATURE_MIGRATING); - cache_desired &= !image_ctx->test_features(RBD_FEATURE_JOURNALING); + cache_desired &= !plugin_api.test_image_features(image_ctx, RBD_FEATURE_MIGRATING); + cache_desired &= !plugin_api.test_image_features(image_ctx, RBD_FEATURE_JOURNALING); cache_desired &= !image_ctx->old_format; if (!dirty_cache && !cache_desired) { @@ -123,7 +127,7 @@ ImageCacheState<I>* ImageCacheState<I>::create_image_cache_state( << dendl; r = -EINVAL; }else if ((!dirty_cache || cache_state_str.empty()) && cache_desired) { - cache_state = new ImageCacheState<I>(image_ctx); + cache_state = new ImageCacheState<I>(image_ctx, plugin_api); } else { ceph_assert(!cache_state_str.empty()); JSONFormattable f; @@ -141,9 +145,9 @@ ImageCacheState<I>* ImageCacheState<I>::create_image_cache_state( switch (cache_type) { case IMAGE_CACHE_TYPE_RWL: if (!cache_exists) { - cache_state = new ImageCacheState<I>(image_ctx); + cache_state = new ImageCacheState<I>(image_ctx, plugin_api); } else { - cache_state = new ImageCacheState<I>(image_ctx, f); + cache_state = new ImageCacheState<I>(image_ctx, f, plugin_api); } break; default: @@ -154,7 +158,8 @@ ImageCacheState<I>* ImageCacheState<I>::create_image_cache_state( } template <typename I> -ImageCacheState<I>* ImageCacheState<I>::get_image_cache_state(I* image_ctx) { +ImageCacheState<I>* ImageCacheState<I>::get_image_cache_state( + I* image_ctx, plugin::Api<I>& plugin_api) { ImageCacheState<I>* cache_state = nullptr; string cache_state_str; cls_client::metadata_get(&image_ctx->md_ctx, image_ctx->header_oid, @@ -163,9 +168,9 @@ ImageCacheState<I>* ImageCacheState<I>::get_image_cache_state(I* image_ctx) { JSONFormattable f; bool success = get_json_format(cache_state_str, &f); if (!success) { - cache_state = new ImageCacheState<I>(image_ctx); + cache_state = new ImageCacheState<I>(image_ctx, plugin_api); } else { - cache_state = new ImageCacheState<I>(image_ctx, f); + cache_state = new ImageCacheState<I>(image_ctx, f, plugin_api); } } return cache_state; diff --git a/src/librbd/cache/pwl/ImageCacheState.h b/src/librbd/cache/pwl/ImageCacheState.h index 453316e8661..1da4306464f 100644 --- a/src/librbd/cache/pwl/ImageCacheState.h +++ b/src/librbd/cache/pwl/ImageCacheState.h @@ -14,6 +14,9 @@ namespace ceph { } namespace librbd { + +namespace plugin { template <typename> struct Api; } + namespace cache { namespace pwl { @@ -21,6 +24,7 @@ template <typename ImageCtxT = ImageCtx> class ImageCacheState { private: ImageCtxT* m_image_ctx; + plugin::Api<ImageCtxT>& m_plugin_api; public: bool present = false; bool empty = true; @@ -30,9 +34,10 @@ public: uint64_t size = 0; bool log_periodic_stats; - ImageCacheState(ImageCtxT* image_ctx); + ImageCacheState(ImageCtxT* image_ctx, plugin::Api<ImageCtxT>& plugin_api); - ImageCacheState(ImageCtxT* image_ctx, JSONFormattable& f); + ImageCacheState(ImageCtxT* image_ctx, JSONFormattable& f, + plugin::Api<ImageCtxT>& plugin_api); ~ImageCacheState() {} @@ -48,10 +53,10 @@ public: void dump(ceph::Formatter *f) const; static ImageCacheState<ImageCtxT>* create_image_cache_state( - ImageCtxT* image_ctx, int &r); + ImageCtxT* image_ctx, plugin::Api<ImageCtxT>& plugin_api, int &r); static ImageCacheState<ImageCtxT>* get_image_cache_state( - ImageCtxT* image_ctx); + ImageCtxT* image_ctx, plugin::Api<ImageCtxT>& plugin_api); bool is_valid(); }; diff --git a/src/librbd/cache/pwl/InitRequest.cc b/src/librbd/cache/pwl/InitRequest.cc index 5f3d87a4c3c..4b0962a8158 100644 --- a/src/librbd/cache/pwl/InitRequest.cc +++ b/src/librbd/cache/pwl/InitRequest.cc @@ -8,19 +8,20 @@ #include "common/errno.h" #include "librbd/asio/ContextWQ.h" -#if defined(WITH_RBD_RWL) || defined(WITH_RBD_SSD_CACHE) #include "librbd/cache/pwl/ImageCacheState.h" #include "librbd/cache/WriteLogImageDispatch.h" -#endif // WITH_RBD_RWL || WITH_RBD_SSD_CACHE +#include "librbd/cache/ImageWriteback.h" #ifdef WITH_RBD_RWL #include "librbd/cache/pwl/ReplicatedWriteLog.h" #endif + #ifdef WITH_RBD_SSD_CACHE #include "librbd/cache/pwl/SSDWriteLog.h" #endif #include "librbd/cache/Utils.h" #include "librbd/ImageCtx.h" +#include "librbd/plugin/Api.h" #define dout_subsys ceph_subsys_rbd_pwl #undef dout_prefix @@ -35,35 +36,40 @@ using librbd::util::create_async_context_callback; using librbd::util::create_context_callback; template <typename I> -InitRequest<I>* InitRequest<I>::create(I &image_ctx, - Context *on_finish) { - return new InitRequest(image_ctx, on_finish); +InitRequest<I>* InitRequest<I>::create( + I &image_ctx, + cache::ImageWritebackInterface& image_writeback, + plugin::Api<I>& plugin_api, + Context *on_finish) { + return new InitRequest(image_ctx, image_writeback, plugin_api, on_finish); } template <typename I> -InitRequest<I>::InitRequest(I &image_ctx, Context *on_finish) +InitRequest<I>::InitRequest( + I &image_ctx, + cache::ImageWritebackInterface& image_writeback, + plugin::Api<I>& plugin_api, + Context *on_finish) : m_image_ctx(image_ctx), + m_image_writeback(image_writeback), + m_plugin_api(plugin_api), m_on_finish(create_async_context_callback(image_ctx, on_finish)), m_error_result(0) { } template <typename I> void InitRequest<I>::send() { -#if defined(WITH_RBD_RWL) || defined(WITH_RBD_SSD_CACHE) get_image_cache_state(); -#else - finish(); -#endif // WITH_RBD_RWL } -#if defined(WITH_RBD_RWL) || defined(WITH_RBD_SSD_CACHE) template <typename I> void InitRequest<I>::get_image_cache_state() { CephContext *cct = m_image_ctx.cct; ldout(cct, 10) << dendl; int r; - auto cache_state = ImageCacheState<I>::create_image_cache_state(&m_image_ctx, r); + auto cache_state = ImageCacheState<I>::create_image_cache_state( + &m_image_ctx, m_plugin_api, r); if (r < 0 || !cache_state) { save_result(r); @@ -85,14 +91,18 @@ void InitRequest<I>::get_image_cache_state() { case cache::IMAGE_CACHE_TYPE_RWL: m_image_cache = new librbd::cache::pwl::ReplicatedWriteLog<I>(m_image_ctx, - cache_state); + cache_state, + m_image_writeback, + m_plugin_api); break; #endif #ifdef WITH_RBD_SSD_CACHE case cache::IMAGE_CACHE_TYPE_SSD: m_image_cache = new librbd::cache::pwl::SSDWriteLog<I>(m_image_ctx, - cache_state); + cache_state, + m_image_writeback, + m_plugin_api); break; #endif default: @@ -175,7 +185,8 @@ void InitRequest<I>::handle_set_feature_bit(int r) { } // Register RWL dispatch - auto image_dispatch = new cache::WriteLogImageDispatch<I>(&m_image_ctx, m_image_cache); + auto image_dispatch = new cache::WriteLogImageDispatch<I>( + &m_image_ctx, m_image_cache, m_plugin_api); m_image_ctx.io_image_dispatcher->register_dispatch(image_dispatch); @@ -208,8 +219,6 @@ void InitRequest<I>::handle_shutdown_image_cache(int r) { finish(); } -#endif // WITH_RBD_RWL - template <typename I> void InitRequest<I>::finish() { m_on_finish->complete(m_error_result); diff --git a/src/librbd/cache/pwl/InitRequest.h b/src/librbd/cache/pwl/InitRequest.h index b1bda3eda1c..56e63425e34 100644 --- a/src/librbd/cache/pwl/InitRequest.h +++ b/src/librbd/cache/pwl/InitRequest.h @@ -12,8 +12,12 @@ class ImageCtx; namespace io { class ImageDispatchInterface; } +namespace plugin { template <typename> struct Api; } + namespace cache { +class ImageWritebackInterface; + namespace pwl { template<typename> @@ -25,7 +29,11 @@ class ImageCacheState; template <typename ImageCtxT = ImageCtx> class InitRequest { public: - static InitRequest* create(ImageCtxT &image_ctx, Context *on_finish); + static InitRequest* create( + ImageCtxT &image_ctx, + librbd::cache::ImageWritebackInterface& image_writeback, + plugin::Api<ImageCtxT>& plugin_api, + Context *on_finish); void send(); @@ -53,9 +61,14 @@ private: * @endverbatim */ - InitRequest(ImageCtxT &image_ctx, Context *on_finish); + InitRequest(ImageCtxT &image_ctx, + librbd::cache::ImageWritebackInterface& image_writeback, + plugin::Api<ImageCtxT>& plugin_api, + Context *on_finish); ImageCtxT &m_image_ctx; + librbd::cache::ImageWritebackInterface& m_image_writeback; + plugin::Api<ImageCtxT>& m_plugin_api; AbstractWriteLog<ImageCtxT> *m_image_cache; Context *m_on_finish; diff --git a/src/librbd/cache/pwl/LogEntry.h b/src/librbd/cache/pwl/LogEntry.h index 0edd387f4de..6f477fe83bd 100644 --- a/src/librbd/cache/pwl/LogEntry.h +++ b/src/librbd/cache/pwl/LogEntry.h @@ -157,12 +157,12 @@ public: WriteLogEntry(std::shared_ptr<SyncPointLogEntry> sync_point_entry, const uint64_t image_offset_bytes, const uint64_t write_bytes) : GenericWriteLogEntry(sync_point_entry, image_offset_bytes, write_bytes), - m_entry_bl_lock(ceph::make_mutex(util::unique_lock_name( + m_entry_bl_lock(ceph::make_mutex(pwl::unique_lock_name( "librbd::cache::pwl::WriteLogEntry::m_entry_bl_lock", this))) { } WriteLogEntry(const uint64_t image_offset_bytes, const uint64_t write_bytes) : GenericWriteLogEntry(nullptr, image_offset_bytes, write_bytes), - m_entry_bl_lock(ceph::make_mutex(util::unique_lock_name( + m_entry_bl_lock(ceph::make_mutex(pwl::unique_lock_name( "librbd::cache::pwl::WriteLogEntry::m_entry_bl_lock", this))) { } ~WriteLogEntry() override {}; diff --git a/src/librbd/cache/pwl/LogMap.cc b/src/librbd/cache/pwl/LogMap.cc index d05612ac4d3..a2e6d65eb29 100644 --- a/src/librbd/cache/pwl/LogMap.cc +++ b/src/librbd/cache/pwl/LogMap.cc @@ -36,7 +36,7 @@ LogMapEntry<T>::LogMapEntry(std::shared_ptr<T> log_entry) template <typename T> LogMap<T>::LogMap(CephContext *cct) : m_cct(cct), - m_lock(ceph::make_mutex(util::unique_lock_name( + m_lock(ceph::make_mutex(pwl::unique_lock_name( "librbd::cache::pwl::LogMap::m_lock", this))) { } diff --git a/src/librbd/cache/pwl/LogOperation.cc b/src/librbd/cache/pwl/LogOperation.cc index 8125a5d41a7..aca964031e1 100644 --- a/src/librbd/cache/pwl/LogOperation.cc +++ b/src/librbd/cache/pwl/LogOperation.cc @@ -114,7 +114,7 @@ GenericWriteLogOperation::GenericWriteLogOperation(std::shared_ptr<SyncPoint> sy PerfCounters *perfcounter, CephContext *cct) : GenericLogOperation(dispatch_time, perfcounter), - m_lock(ceph::make_mutex(util::unique_lock_name( + m_lock(ceph::make_mutex(pwl::unique_lock_name( "librbd::cache::pwl::GenericWriteLogOperation::m_lock", this))), m_cct(cct), sync_point(sync_point) { diff --git a/src/librbd/cache/pwl/ReplicatedWriteLog.cc b/src/librbd/cache/pwl/ReplicatedWriteLog.cc index c7d1b4a6b5c..200746fccb2 100644 --- a/src/librbd/cache/pwl/ReplicatedWriteLog.cc +++ b/src/librbd/cache/pwl/ReplicatedWriteLog.cc @@ -16,6 +16,7 @@ #include "librbd/asio/ContextWQ.h" #include "librbd/cache/pwl/ImageCacheState.h" #include "librbd/cache/pwl/LogEntry.h" +#include "librbd/plugin/Api.h" #include <map> #include <vector> @@ -35,8 +36,10 @@ const unsigned long int OPS_APPENDED_TOGETHER = MAX_ALLOC_PER_TRANSACTION; template <typename I> ReplicatedWriteLog<I>::ReplicatedWriteLog( - I &image_ctx, librbd::cache::pwl::ImageCacheState<I>* cache_state) -: AbstractWriteLog<I>(image_ctx, cache_state), + I &image_ctx, librbd::cache::pwl::ImageCacheState<I>* cache_state, + ImageWritebackInterface& image_writeback, + plugin::Api<I>& plugin_api) +: AbstractWriteLog<I>(image_ctx, cache_state, image_writeback, plugin_api), m_pwl_pool_layout_name(POBJ_LAYOUT_NAME(rbd_pwl)) { } diff --git a/src/librbd/cache/pwl/ReplicatedWriteLog.h b/src/librbd/cache/pwl/ReplicatedWriteLog.h index 2464405de1a..bf4b0bea4e9 100644 --- a/src/librbd/cache/pwl/ReplicatedWriteLog.h +++ b/src/librbd/cache/pwl/ReplicatedWriteLog.h @@ -34,7 +34,9 @@ template <typename ImageCtxT> class ReplicatedWriteLog : public AbstractWriteLog<ImageCtxT> { public: ReplicatedWriteLog( - ImageCtxT &image_ctx, librbd::cache::pwl::ImageCacheState<ImageCtxT>* cache_state); + ImageCtxT &image_ctx, librbd::cache::pwl::ImageCacheState<ImageCtxT>* cache_state, + ImageWritebackInterface& image_writeback, + plugin::Api<ImageCtxT>& plugin_api); ~ReplicatedWriteLog(); ReplicatedWriteLog(const ReplicatedWriteLog&) = delete; ReplicatedWriteLog &operator=(const ReplicatedWriteLog&) = delete; diff --git a/src/librbd/cache/pwl/SSDWriteLog.cc b/src/librbd/cache/pwl/SSDWriteLog.cc index b34d1ce5f96..34c1a53f573 100644 --- a/src/librbd/cache/pwl/SSDWriteLog.cc +++ b/src/librbd/cache/pwl/SSDWriteLog.cc @@ -36,8 +36,10 @@ const unsigned long int ops_appended_together = MAX_WRITES_PER_SYNC_POINT; template <typename I> SSDWriteLog<I>::SSDWriteLog( - I &image_ctx, librbd::cache::pwl::ImageCacheState<I>* cache_state) - : AbstractWriteLog<I>(image_ctx, cache_state) + I &image_ctx, librbd::cache::pwl::ImageCacheState<I>* cache_state, + cache::ImageWritebackInterface& image_writeback, + plugin::Api<I>& plugin_api) + : AbstractWriteLog<I>(image_ctx, cache_state, image_writeback, plugin_api) { } diff --git a/src/librbd/cache/pwl/SSDWriteLog.h b/src/librbd/cache/pwl/SSDWriteLog.h index 0052535273e..ff9330e5461 100644 --- a/src/librbd/cache/pwl/SSDWriteLog.h +++ b/src/librbd/cache/pwl/SSDWriteLog.h @@ -34,7 +34,9 @@ template <typename ImageCtxT> class SSDWriteLog : public AbstractWriteLog<ImageCtxT> { public: SSDWriteLog(ImageCtxT &image_ctx, - librbd::cache::pwl::ImageCacheState<ImageCtxT>* cache_state); + librbd::cache::pwl::ImageCacheState<ImageCtxT>* cache_state, + cache::ImageWritebackInterface& image_writeback, + plugin::Api<ImageCtxT>& plugin_api); ~SSDWriteLog() {} SSDWriteLog(const SSDWriteLog&) = delete; SSDWriteLog &operator=(const SSDWriteLog&) = delete; diff --git a/src/librbd/cache/pwl/ShutdownRequest.cc b/src/librbd/cache/pwl/ShutdownRequest.cc index bb3c9520984..4475712dd6b 100644 --- a/src/librbd/cache/pwl/ShutdownRequest.cc +++ b/src/librbd/cache/pwl/ShutdownRequest.cc @@ -10,9 +10,8 @@ #include "librbd/asio/ContextWQ.h" #include "librbd/cache/Types.h" -#if defined(WITH_RBD_RWL) #include "librbd/cache/pwl/AbstractWriteLog.h" -#endif // WITH_RBD_RWL +#include "librbd/plugin/Api.h" #define dout_subsys ceph_subsys_rbd_pwl #undef dout_prefix @@ -30,31 +29,29 @@ template <typename I> ShutdownRequest<I>* ShutdownRequest<I>::create( I &image_ctx, AbstractWriteLog<I> *image_cache, + plugin::Api<I>& plugin_api, Context *on_finish) { - return new ShutdownRequest(image_ctx, image_cache, on_finish); + return new ShutdownRequest(image_ctx, image_cache, plugin_api, on_finish); } template <typename I> ShutdownRequest<I>::ShutdownRequest( I &image_ctx, AbstractWriteLog<I> *image_cache, + plugin::Api<I>& plugin_api, Context *on_finish) : m_image_ctx(image_ctx), m_image_cache(image_cache), + m_plugin_api(plugin_api), m_on_finish(create_async_context_callback(image_ctx, on_finish)), m_error_result(0) { } template <typename I> void ShutdownRequest<I>::send() { -#if defined(WITH_RBD_RWL) send_shutdown_image_cache(); -#else - finish(); -#endif // WITH_RBD_RWL } -#if defined(WITH_RBD_RWL) template <typename I> void ShutdownRequest<I>::send_shutdown_image_cache() { CephContext *cct = m_image_ctx.cct; @@ -135,7 +132,7 @@ void ShutdownRequest<I>::send_remove_image_cache_state() { Context *ctx = create_context_callback<klass, &klass::handle_remove_image_cache_state>( this); std::shared_lock owner_lock{m_image_ctx.owner_lock}; - m_image_ctx.operations->execute_metadata_remove(IMAGE_CACHE_STATE, ctx); + m_plugin_api.execute_image_metadata_remove(&m_image_ctx, IMAGE_CACHE_STATE, ctx); } template <typename I> @@ -151,8 +148,6 @@ void ShutdownRequest<I>::handle_remove_image_cache_state(int r) { finish(); } -#endif // WITH_RBD_RWL - template <typename I> void ShutdownRequest<I>::finish() { m_on_finish->complete(m_error_result); diff --git a/src/librbd/cache/pwl/ShutdownRequest.h b/src/librbd/cache/pwl/ShutdownRequest.h index 2ed22f72782..dd2385b7ec8 100644 --- a/src/librbd/cache/pwl/ShutdownRequest.h +++ b/src/librbd/cache/pwl/ShutdownRequest.h @@ -10,6 +10,8 @@ namespace librbd { class ImageCtx; +namespace plugin { template <typename> struct Api; } + namespace cache { namespace pwl { @@ -26,6 +28,7 @@ public: static ShutdownRequest* create( ImageCtxT &image_ctx, AbstractWriteLog<ImageCtxT> *image_cache, + plugin::Api<ImageCtxT>& plugin_api, Context *on_finish); void send(); @@ -56,10 +59,12 @@ private: ShutdownRequest(ImageCtxT &image_ctx, AbstractWriteLog<ImageCtxT> *image_cache, + plugin::Api<ImageCtxT>& plugin_api, Context *on_finish); ImageCtxT &m_image_ctx; AbstractWriteLog<ImageCtxT> *m_image_cache; + plugin::Api<ImageCtxT>& m_plugin_api; Context *m_on_finish; int m_error_result; diff --git a/src/librbd/cache/pwl/Types.cc b/src/librbd/cache/pwl/Types.cc index 25c9dce130e..9962d35df29 100644 --- a/src/librbd/cache/pwl/Types.cc +++ b/src/librbd/cache/pwl/Types.cc @@ -5,6 +5,7 @@ #include "Types.h" #include "common/ceph_context.h" #include "include/Context.h" +#include "include/stringify.h" #define dout_subsys ceph_subsys_rbd_pwl #undef dout_prefix @@ -172,6 +173,10 @@ Context * override_ctx(int r, Context *ctx) { } } +std::string unique_lock_name(const std::string &name, void *address) { + return name + " (" + stringify(address) + ")"; +} + } // namespace pwl } // namespace cache } // namespace librbd diff --git a/src/librbd/cache/pwl/Types.h b/src/librbd/cache/pwl/Types.h index ab6c696a132..4bb810b38f3 100644 --- a/src/librbd/cache/pwl/Types.h +++ b/src/librbd/cache/pwl/Types.h @@ -370,6 +370,8 @@ public: : io::Extent(extent), m_bl(bl) { } }; +std::string unique_lock_name(const std::string &name, void *address); + } // namespace pwl } // namespace cache } // namespace librbd diff --git a/src/librbd/crypto/BlockCrypto.cc b/src/librbd/crypto/BlockCrypto.cc index a8203e13647..f37e78f245b 100644 --- a/src/librbd/crypto/BlockCrypto.cc +++ b/src/librbd/crypto/BlockCrypto.cc @@ -12,9 +12,9 @@ namespace crypto { template <typename T> BlockCrypto<T>::BlockCrypto(CephContext* cct, DataCryptor<T>* data_cryptor, - uint64_t block_size) + uint64_t block_size, uint64_t data_offset) : m_cct(cct), m_data_cryptor(data_cryptor), m_block_size(block_size), - m_iv_size(data_cryptor->get_iv_size()) { + m_data_offset(data_offset), m_iv_size(data_cryptor->get_iv_size()) { ceph_assert(isp2(block_size)); ceph_assert((block_size % data_cryptor->get_block_size()) == 0); } @@ -100,3 +100,5 @@ int BlockCrypto<T>::decrypt(ceph::bufferlist* data, uint64_t image_offset) { } // namespace crypto } // namespace librbd + +template class librbd::crypto::BlockCrypto<EVP_CIPHER_CTX>; diff --git a/src/librbd/crypto/BlockCrypto.h b/src/librbd/crypto/BlockCrypto.h index 8dec0c5fd02..a9f0ad3c002 100644 --- a/src/librbd/crypto/BlockCrypto.h +++ b/src/librbd/crypto/BlockCrypto.h @@ -6,7 +6,7 @@ #include "include/Context.h" #include "librbd/crypto/CryptoInterface.h" -#include "librbd/crypto/DataCryptor.h" +#include "librbd/crypto/openssl/DataCryptor.h" namespace librbd { namespace crypto { @@ -15,8 +15,12 @@ template <typename T> class BlockCrypto : public CryptoInterface { public: + static BlockCrypto* create(CephContext* cct, DataCryptor<T>* data_cryptor, + uint32_t block_size, uint64_t data_offset) { + return new BlockCrypto(cct, data_cryptor, block_size, data_offset); + } BlockCrypto(CephContext* cct, DataCryptor<T>* data_cryptor, - uint64_t block_size); + uint64_t block_size, uint64_t data_offset); int encrypt(ceph::bufferlist* data, uint64_t image_offset) override; int decrypt(ceph::bufferlist* data, uint64_t image_offset) override; @@ -25,10 +29,15 @@ public: return m_block_size; } + uint64_t get_data_offset() const override { + return m_data_offset; + } + private: CephContext* m_cct; DataCryptor<T>* m_data_cryptor; uint64_t m_block_size; + uint64_t m_data_offset; uint32_t m_iv_size; int crypt(ceph::bufferlist* data, uint64_t image_offset, CipherMode mode); @@ -37,4 +46,6 @@ private: } // namespace crypto } // namespace librbd +extern template class librbd::crypto::BlockCrypto<EVP_CIPHER_CTX>; + #endif //CEPH_LIBRBD_CRYPTO_BLOCK_CRYPTO_H diff --git a/src/librbd/crypto/CryptoInterface.h b/src/librbd/crypto/CryptoInterface.h index cf60874aa31..29e205188fe 100644 --- a/src/librbd/crypto/CryptoInterface.h +++ b/src/librbd/crypto/CryptoInterface.h @@ -18,6 +18,7 @@ public: virtual int encrypt(ceph::bufferlist* data, uint64_t image_offset) = 0; virtual int decrypt(ceph::bufferlist* data, uint64_t image_offset) = 0; virtual uint64_t get_block_size() const = 0; + virtual uint64_t get_data_offset() const = 0; inline std::pair<uint64_t, uint64_t> get_pre_and_post_align( uint64_t off, uint64_t len) { diff --git a/src/librbd/crypto/Types.h b/src/librbd/crypto/Types.h index 93d9c172c06..c7b29d687dd 100644 --- a/src/librbd/crypto/Types.h +++ b/src/librbd/crypto/Types.h @@ -12,6 +12,16 @@ enum CipherMode { CIPHER_MODE_DEC, }; +enum DiskEncryptionFormat { + DISK_ENCRYPTION_FORMAT_LUKS1, + DISK_ENCRYPTION_FORMAT_LUKS2, +}; + +enum CipherAlgorithm { + CIPHER_ALGORITHM_AES128, + CIPHER_ALGORITHM_AES256, +}; + } // namespace crypto } // namespace librbd diff --git a/src/librbd/crypto/luks/FormatRequest.cc b/src/librbd/crypto/luks/FormatRequest.cc new file mode 100644 index 00000000000..7bf77dff886 --- /dev/null +++ b/src/librbd/crypto/luks/FormatRequest.cc @@ -0,0 +1,147 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "FormatRequest.h" + +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/Utils.h" +#include "librbd/crypto/luks/Header.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ImageDispatchSpec.h" +#include "librbd/io/ObjectDispatcherInterface.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::crypto::luks::FormatRequest: " << this \ + << " " << __func__ << ": " + +namespace librbd { +namespace crypto { +namespace luks { + +using librbd::util::create_context_callback; + +template <typename I> +FormatRequest<I>::FormatRequest( + I* image_ctx, DiskEncryptionFormat type, CipherAlgorithm cipher, + std::string&& passphrase, Context* on_finish, + bool insecure_fast_mode) : m_image_ctx(image_ctx), m_type(type), + m_cipher(cipher), m_on_finish(on_finish), + m_insecure_fast_mode(insecure_fast_mode), + m_header(image_ctx->cct), + m_passphrase(std::move(passphrase)) { +} + +template <typename I> +void FormatRequest<I>::send() { + if (m_image_ctx->io_object_dispatcher->exists( + io::OBJECT_DISPATCH_LAYER_CRYPTO)) { + finish(-EEXIST); + return; + } + + const char* type; + size_t sector_size; + switch(m_type) { + case DISK_ENCRYPTION_FORMAT_LUKS1: + type = CRYPT_LUKS1; + sector_size = 512; + break; + case DISK_ENCRYPTION_FORMAT_LUKS2: + type = CRYPT_LUKS2; + sector_size = 4096; + break; + default: + lderr(m_image_ctx->cct) << "unsupported disk encryption type: " << m_type + << dendl; + finish(-EINVAL); + return; + } + + const char* alg; + size_t key_size; + switch (m_cipher) { + case CIPHER_ALGORITHM_AES128: + alg = "aes"; + key_size = 32; + break; + case CIPHER_ALGORITHM_AES256: + alg = "aes"; + key_size = 64; + break; + default: + lderr(m_image_ctx->cct) << "unsupported cipher algorithm: " << m_cipher + << dendl; + finish(-EINVAL); + return; + } + + // setup interface with libcryptsetup + auto r = m_header.init(); + if (r < 0) { + finish(r); + return; + } + + // format (create LUKS header) + r = m_header.format(type, alg, key_size, "xts-plain64", sector_size, + m_image_ctx->get_object_size(), m_insecure_fast_mode); + if (r != 0) { + finish(r); + return; + } + + // add keyslot (volume key encrypted with passphrase) + r = m_header.add_keyslot(m_passphrase.c_str(), m_passphrase.size()); + if (r != 0) { + finish(r); + return; + } + + // read header from libcryptsetup interface + ceph::bufferlist bl; + r = m_header.read(&bl); + if (r < 0) { + finish(r); + return; + } + + // write header to offset 0 of the image + auto ctx = create_context_callback< + FormatRequest<I>, &FormatRequest<I>::handle_write_header>(this); + auto aio_comp = io::AioCompletion::create_and_start( + ctx, util::get_image_ctx(m_image_ctx), io::AIO_TYPE_WRITE); + + ZTracer::Trace trace; + auto req = io::ImageDispatchSpec::create_write( + *m_image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, + {{0, bl.length()}}, std::move(bl), + m_image_ctx->get_data_io_context(), 0, trace); + req->send(); +} + +template <typename I> +void FormatRequest<I>::handle_write_header(int r) { + if (r < 0) { + lderr(m_image_ctx->cct) << "error writing header to image: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void FormatRequest<I>::finish(int r) { + ceph_memzero_s(&m_passphrase[0], m_passphrase.capacity(), m_passphrase.size()); + m_on_finish->complete(r); + delete this; +} + +} // namespace luks +} // namespace crypto +} // namespace librbd + +template class librbd::crypto::luks::FormatRequest<librbd::ImageCtx>; diff --git a/src/librbd/crypto/luks/FormatRequest.h b/src/librbd/crypto/luks/FormatRequest.h new file mode 100644 index 00000000000..3574ed39aee --- /dev/null +++ b/src/librbd/crypto/luks/FormatRequest.h @@ -0,0 +1,54 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_CRYPTO_LUKS_FORMAT_REQUEST_H +#define CEPH_LIBRBD_CRYPTO_LUKS_FORMAT_REQUEST_H + +#include "librbd/ImageCtx.h" +#include "librbd/crypto/Types.h" +#include "librbd/crypto/luks/Header.h" + +namespace librbd { + +class ImageCtx; + +namespace crypto { +namespace luks { + +template <typename I> +class FormatRequest { +public: + static FormatRequest* create( + I* image_ctx, DiskEncryptionFormat type, CipherAlgorithm cipher, + std::string&& passphrase, Context* on_finish, + bool insecure_fast_mode) { + return new FormatRequest(image_ctx, type, cipher, std::move(passphrase), + on_finish, insecure_fast_mode); + } + + FormatRequest(I* image_ctx, DiskEncryptionFormat type, + CipherAlgorithm cipher, std::string&& passphrase, + Context* on_finish, bool insecure_fast_mode); + void send(); + void finish(int r); + +private: + I* m_image_ctx; + + DiskEncryptionFormat m_type; + CipherAlgorithm m_cipher; + Context* m_on_finish; + bool m_insecure_fast_mode; + Header m_header; + std::string m_passphrase; + + void handle_write_header(int r); +}; + +} // namespace luks +} // namespace crypto +} // namespace librbd + +extern template class librbd::crypto::luks::FormatRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_CRYPTO_LUKS_FORMAT_REQUEST_H diff --git a/src/librbd/crypto/luks/Header.cc b/src/librbd/crypto/luks/Header.cc new file mode 100644 index 00000000000..3dbc7d3b0ad --- /dev/null +++ b/src/librbd/crypto/luks/Header.cc @@ -0,0 +1,248 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Header.h" + +#include <errno.h> +#include <unistd.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include "common/dout.h" +#include "common/errno.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::crypto::luks::Header: " << this << " " \ + << __func__ << ": " + +namespace librbd { +namespace crypto { +namespace luks { + +Header::Header(CephContext* cct) : m_cct(cct), m_fd(-1), m_cd(nullptr) { +} + +Header::~Header() { + if (m_fd != -1) { + close(m_fd); + m_fd = -1; + } + if (m_cd != nullptr) { + crypt_free(m_cd); + m_cd = nullptr; + } +} + +void Header::libcryptsetup_log_wrapper(int level, const char* msg, void* header) { + ((Header*)header)->libcryptsetup_log(level, msg); +} + +void Header::libcryptsetup_log(int level, const char* msg) { + switch (level) { + case CRYPT_LOG_NORMAL: + ldout(m_cct, 5) << "[libcryptsetup] " << msg << dendl; + break; + case CRYPT_LOG_ERROR: + lderr(m_cct) << "[libcryptsetup] " << msg << dendl; + break; + case CRYPT_LOG_VERBOSE: + ldout(m_cct, 10) << "[libcryptsetup] " << msg << dendl; + break; + case CRYPT_LOG_DEBUG: + ldout(m_cct, 20) << "[libcryptsetup] " << msg << dendl; + break; + } +} + +int Header::init() { + // create anonymous file + m_fd = syscall(SYS_memfd_create, "LibcryptsetupInterface", 0); + if (m_fd == -1) { + lderr(m_cct) << "error creating anonymous file: " << cpp_strerror(-errno) + << dendl; + return -errno; + } + std::string path = + "/proc/" + std::to_string(getpid()) + "/fd/" + std::to_string(m_fd); + + if (m_cct->_conf->subsys.should_gather<dout_subsys, 20>()) { + crypt_set_debug_level(CRYPT_DEBUG_ALL); + } + + // init libcryptsetup handle + auto r = crypt_init(&m_cd, path.c_str()); + if (r != 0) { + lderr(m_cct) << "crypt_init failed: " << cpp_strerror(r) << dendl; + return r; + } + + // redirect logging + crypt_set_log_callback(m_cd, &libcryptsetup_log_wrapper, this); + + return 0; +} + +int Header::write(const ceph::bufferlist& bl) { + ceph_assert(m_fd != -1); + + auto r = bl.write_fd(m_fd); + if (r != 0) { + lderr(m_cct) << "error writing header: " << cpp_strerror(r) << dendl; + } + return r; +} + +ssize_t Header::read(ceph::bufferlist* bl) { + ceph_assert(m_fd != -1); + + // get current header size + struct stat st; + ssize_t r = fstat(m_fd, &st); + if (r < 0) { + r = -errno; + lderr(m_cct) << "failed to stat anonymous file: " << cpp_strerror(r) + << dendl; + return r; + } + + r = bl->read_fd(m_fd, st.st_size); + if (r < 0) { + lderr(m_cct) << "error reading header: " << cpp_strerror(r) << dendl; + } + return r; +} + +int Header::format(const char* type, const char* alg, size_t key_size, + const char* cipher_mode, uint32_t sector_size, + uint32_t data_alignment, bool insecure_fast_mode) { + ceph_assert(m_cd != nullptr); + + // required for passing libcryptsetup device size check + if (ftruncate(m_fd, 4096) != 0) { + lderr(m_cct) << "failed to truncate anonymous file: " + << cpp_strerror(-errno) << dendl; + return -errno; + } + + struct crypt_params_luks1 luks1params; + struct crypt_params_luks2 luks2params; + +#ifdef LIBCRYPTSETUP_LEGACY_DATA_ALIGNMENT + size_t converted_data_alignment = data_alignment / sector_size; +#else + size_t converted_data_alignment = data_alignment / 512; +#endif + + + void* params = nullptr; + if (strcmp(type, CRYPT_LUKS1) == 0) { + memset(&luks1params, 0, sizeof(luks1params)); + luks1params.data_alignment = converted_data_alignment; + params = &luks1params; + } else if (strcmp(type, CRYPT_LUKS2) == 0) { + memset(&luks2params, 0, sizeof(luks2params)); + luks2params.data_alignment = converted_data_alignment; + luks2params.sector_size = sector_size; + params = &luks2params; + } + + // this mode should be used for testing only + if (insecure_fast_mode) { + struct crypt_pbkdf_type pbkdf; + memset(&pbkdf, 0, sizeof(pbkdf)); + pbkdf.type = CRYPT_KDF_PBKDF2; + pbkdf.flags = CRYPT_PBKDF_NO_BENCHMARK; + pbkdf.hash = "sha256"; + pbkdf.iterations = 1000; + pbkdf.time_ms = 1; + auto r = crypt_set_pbkdf_type(m_cd, &pbkdf); + if (r != 0) { + lderr(m_cct) << "crypt_set_pbkdf_type failed: " << cpp_strerror(r) + << dendl; + return r; + } + } + + auto r = crypt_format( + m_cd, type, alg, cipher_mode, NULL, NULL, key_size, params); + if (r != 0) { + lderr(m_cct) << "crypt_format failed: " << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +int Header::add_keyslot(const char* passphrase, size_t passphrase_size) { + ceph_assert(m_cd != nullptr); + + auto r = crypt_keyslot_add_by_volume_key( + m_cd, CRYPT_ANY_SLOT, NULL, 0, passphrase, passphrase_size); + if (r != 0) { + lderr(m_cct) << "crypt_keyslot_add_by_volume_key failed: " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +int Header::load() { + ceph_assert(m_cd != nullptr); + + // libcryptsetup checks if device size matches the header and keyslots size + // in LUKS2, 2 X 4MB header + 128MB keyslots + if (ftruncate(m_fd, 136 * 1024 * 1024) != 0) { + lderr(m_cct) << "failed to truncate anonymous file: " + << cpp_strerror(-errno) << dendl; + return -errno; + } + + auto r = crypt_load(m_cd, CRYPT_LUKS, NULL); + if (r != 0) { + lderr(m_cct) << "crypt_load failed: " << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +int Header::read_volume_key(const char* passphrase, size_t passphrase_size, + char* volume_key, size_t* volume_key_size) { + ceph_assert(m_cd != nullptr); + + auto r = crypt_volume_key_get( + m_cd, CRYPT_ANY_SLOT, volume_key, volume_key_size, passphrase, + passphrase_size); + if (r != 0) { + lderr(m_cct) << "crypt_volume_key_get failed: " << cpp_strerror(r) + << dendl; + return r; + } + + return 0; +} + +int Header::get_sector_size() { + ceph_assert(m_cd != nullptr); + return crypt_get_sector_size(m_cd); +} + +uint64_t Header::get_data_offset() { + ceph_assert(m_cd != nullptr); + return crypt_get_data_offset(m_cd) << 9; +} + +const char* Header::get_cipher() { + ceph_assert(m_cd != nullptr); + return crypt_get_cipher(m_cd); +} + +const char* Header::get_cipher_mode() { + ceph_assert(m_cd != nullptr); + return crypt_get_cipher_mode(m_cd); +} + +} // namespace luks +} // namespace crypto +} // namespace librbd diff --git a/src/librbd/crypto/luks/Header.h b/src/librbd/crypto/luks/Header.h new file mode 100644 index 00000000000..6b58f246423 --- /dev/null +++ b/src/librbd/crypto/luks/Header.h @@ -0,0 +1,51 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_CRYPTO_LUKS_HEADER_H +#define CEPH_LIBRBD_CRYPTO_LUKS_HEADER_H + +#include <libcryptsetup.h> +#include "common/ceph_context.h" +#include "include/buffer.h" + +namespace librbd { +namespace crypto { +namespace luks { + +class Header { +public: + Header(CephContext* cct); + ~Header(); + int init(); + + int write(const ceph::bufferlist& bl); + ssize_t read(ceph::bufferlist* bl); + + int format(const char* type, const char* alg, size_t key_size, + const char* cipher_mode, uint32_t sector_size, + uint32_t data_alignment, bool insecure_fast_mode); + int add_keyslot(const char* passphrase, size_t passphrase_size); + int load(); + int read_volume_key(const char* passphrase, size_t passphrase_size, + char* volume_key, size_t* volume_key_size); + + int get_sector_size(); + uint64_t get_data_offset(); + const char* get_cipher(); + const char* get_cipher_mode(); + +private: + void libcryptsetup_log(int level, const char* msg); + static void libcryptsetup_log_wrapper(int level, const char* msg, + void* header); + + CephContext* m_cct; + int m_fd; + struct crypt_device *m_cd; +}; + +} // namespace luks +} // namespace crypto +} // namespace librbd + +#endif // CEPH_LIBRBD_CRYPTO_LUKS_HEADER_H diff --git a/src/librbd/crypto/luks/LoadRequest.cc b/src/librbd/crypto/luks/LoadRequest.cc new file mode 100644 index 00000000000..5229b6e5903 --- /dev/null +++ b/src/librbd/crypto/luks/LoadRequest.cc @@ -0,0 +1,214 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "LoadRequest.h" + +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/Utils.h" +#include "librbd/crypto/BlockCrypto.h" +#include "librbd/crypto/openssl/DataCryptor.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ImageDispatchSpec.h" +#include "librbd/io/ObjectDispatcherInterface.h" +#include "librbd/io/ReadResult.h" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::crypto::luks::LoadRequest: " << this \ + << " " << __func__ << ": " + +namespace librbd { +namespace crypto { +namespace luks { + +using librbd::util::create_context_callback; + +template <typename I> +LoadRequest<I>::LoadRequest( + I* image_ctx, std::string&& passphrase, + ceph::ref_t<CryptoInterface>* result_crypto, + Context* on_finish) : m_image_ctx(image_ctx), m_on_finish(on_finish), + m_result_crypto(result_crypto), + m_initial_read_size(DEFAULT_INITIAL_READ_SIZE), + m_header(image_ctx->cct), m_offset(0), + m_passphrase(std::move(passphrase)) { +} + +template <typename I> +void LoadRequest<I>::set_initial_read_size(uint64_t read_size) { + m_initial_read_size = read_size; +} + +template <typename I> +void LoadRequest<I>::send() { + if (m_image_ctx->io_object_dispatcher->exists( + io::OBJECT_DISPATCH_LAYER_CRYPTO)) { + finish(-EEXIST); + return; + } + + // setup interface with libcryptsetup + auto r = m_header.init(); + if (r < 0) { + finish(r); + return; + } + + auto ctx = create_context_callback< + LoadRequest<I>, &LoadRequest<I>::handle_read_header>(this); + read(m_initial_read_size, ctx); +} + +template <typename I> +void LoadRequest<I>::read(uint64_t end_offset, Context* on_finish) { + auto length = end_offset - m_offset; + auto aio_comp = io::AioCompletion::create_and_start( + on_finish, util::get_image_ctx(m_image_ctx), io::AIO_TYPE_READ); + ZTracer::Trace trace; + auto req = io::ImageDispatchSpec::create_read( + *m_image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, + {{m_offset, length}}, io::ReadResult{&m_bl}, + m_image_ctx->get_data_io_context(), 0, 0, trace); + req->send(); +} + +template <typename I> +bool LoadRequest<I>::handle_read(int r) { + if (r < 0) { + lderr(m_image_ctx->cct) << "error reading from image: " << cpp_strerror(r) + << dendl; + finish(r); + return false; + } + + // write header to libcryptsetup interface + r = m_header.write(m_bl); + if (r < 0) { + finish(r); + return false; + } + + m_offset += m_bl.length(); + m_bl.clear(); + return true; +} + +template <typename I> +void LoadRequest<I>::handle_read_header(int r) { + if (!handle_read(r)) { + return; + } + + // parse header via libcryptsetup + r = m_header.load(); + if (r != 0) { + if (m_offset < MAXIMUM_HEADER_SIZE) { + // perhaps we did not feed the entire header to libcryptsetup, retry + auto ctx = create_context_callback< + LoadRequest<I>, &LoadRequest<I>::handle_read_header>(this); + read(MAXIMUM_HEADER_SIZE, ctx); + return; + } + + finish(r); + return; + } + + auto cipher = m_header.get_cipher(); + if (strcmp(cipher, "aes") != 0) { + lderr(m_image_ctx->cct) << "unsupported cipher: " << cipher << dendl; + finish(-ENOTSUP); + return; + } + + auto cipher_mode = m_header.get_cipher_mode(); + if (strcmp(cipher_mode, "xts-plain64") != 0) { + lderr(m_image_ctx->cct) << "unsupported cipher mode: " << cipher_mode + << dendl; + finish(-ENOTSUP); + return; + } + + read_volume_key(); + return; +} + +template <typename I> +void LoadRequest<I>::handle_read_keyslots(int r) { + if (!handle_read(r)) { + return; + } + + read_volume_key(); +} + +template <typename I> +void LoadRequest<I>::read_volume_key() { + char volume_key[64]; + size_t volume_key_size = sizeof(volume_key); + + auto r = m_header.read_volume_key( + m_passphrase.c_str(), m_passphrase.size(), + reinterpret_cast<char*>(volume_key), &volume_key_size); + if (r != 0) { + auto keyslots_end_offset = m_header.get_data_offset(); + if (m_offset < keyslots_end_offset) { + // perhaps we did not feed the the necessary keyslot, retry + auto ctx = create_context_callback< + LoadRequest<I>, &LoadRequest<I>::handle_read_keyslots>(this); + read(keyslots_end_offset, ctx); + return; + } + + finish(r); + return; + } + + const char* cipher_suite; + switch (volume_key_size) { + case 32: + cipher_suite = "aes-128-xts"; + break; + case 64: + cipher_suite = "aes-256-xts"; + break; + default: + lderr(m_image_ctx->cct) << "unsupported volume key size: " + << volume_key_size << dendl; + finish(-ENOTSUP); + return; + } + + + auto data_cryptor = new openssl::DataCryptor(m_image_ctx->cct); + r = data_cryptor->init( + cipher_suite, reinterpret_cast<unsigned char*>(volume_key), + volume_key_size); + if (r != 0) { + lderr(m_image_ctx->cct) << "error initializing data cryptor: " << r + << dendl; + delete data_cryptor; + finish(r); + return; + } + + auto sector_size = m_header.get_sector_size(); + auto data_offset = m_header.get_data_offset(); + *m_result_crypto = BlockCrypto<EVP_CIPHER_CTX>::create( + m_image_ctx->cct, data_cryptor, sector_size, data_offset); + finish(0); +} + +template <typename I> +void LoadRequest<I>::finish(int r) { + explicit_bzero(&m_passphrase[0], m_passphrase.size()); + m_on_finish->complete(r); + delete this; +} + +} // namespace luks +} // namespace crypto +} // namespace librbd + +template class librbd::crypto::luks::LoadRequest<librbd::ImageCtx>; diff --git a/src/librbd/crypto/luks/LoadRequest.h b/src/librbd/crypto/luks/LoadRequest.h new file mode 100644 index 00000000000..4c7dca7bc36 --- /dev/null +++ b/src/librbd/crypto/luks/LoadRequest.h @@ -0,0 +1,63 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_CRYPTO_LUKS_LOAD_REQUEST_H +#define CEPH_LIBRBD_CRYPTO_LUKS_LOAD_REQUEST_H + +#include "librbd/ImageCtx.h" +#include "librbd/crypto/CryptoInterface.h" +#include "librbd/crypto/luks/Header.h" + +namespace librbd { + +class ImageCtx; + +namespace crypto { +namespace luks { + +// max header size in LUKS1/2 (excl. keyslots) is 4MB +const uint64_t MAXIMUM_HEADER_SIZE = 4 * 1024 * 1024; +// default header size in LUKS2 2 X 16KB + 1 X 256KB keyslot +const uint64_t DEFAULT_INITIAL_READ_SIZE = 288 * 1024; + +template <typename I> +class LoadRequest { +public: + static LoadRequest* create( + I* image_ctx, std::string&& passphrase, + ceph::ref_t<CryptoInterface>* result_crypto, Context* on_finish) { + return new LoadRequest(image_ctx, std::move(passphrase), result_crypto, + on_finish); + } + + LoadRequest(I* image_ctx, std::string&& passphrase, + ceph::ref_t<CryptoInterface>* result_crypto, + Context* on_finish); + void send(); + void finish(int r); + void set_initial_read_size(uint64_t read_size); + +private: + I* m_image_ctx; + Context* m_on_finish; + ceph::bufferlist m_bl; + ceph::ref_t<CryptoInterface>* m_result_crypto; + uint64_t m_initial_read_size; + Header m_header; + uint64_t m_offset; + std::string m_passphrase; + + void read(uint64_t end_offset, Context* on_finish); + bool handle_read(int r); + void handle_read_header(int r); + void handle_read_keyslots(int r); + void read_volume_key(); +}; + +} // namespace luks +} // namespace crypto +} // namespace librbd + +extern template class librbd::crypto::luks::LoadRequest<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_CRYPTO_LUKS_LOAD_REQUEST_H diff --git a/src/librbd/deep_copy/ImageCopyRequest.cc b/src/librbd/deep_copy/ImageCopyRequest.cc index 859b13adb6c..37b3e0fbd4e 100644 --- a/src/librbd/deep_copy/ImageCopyRequest.cc +++ b/src/librbd/deep_copy/ImageCopyRequest.cc @@ -65,7 +65,36 @@ void ImageCopyRequest<I>::cancel() { } template <typename I> +void ImageCopyRequest<I>::map_src_objects(uint64_t dst_object, + std::set<uint64_t> *src_objects) { + std::vector<std::pair<uint64_t, uint64_t>> image_extents; + Striper::extent_to_file(m_cct, &m_dst_image_ctx->layout, dst_object, 0, + m_dst_image_ctx->layout.object_size, image_extents); + + for (auto &e : image_extents) { + std::map<object_t, std::vector<ObjectExtent>> src_object_extents; + Striper::file_to_extents(m_cct, m_src_image_ctx->format_string, + &m_src_image_ctx->layout, e.first, e.second, 0, + src_object_extents); + for (auto &p : src_object_extents) { + for (auto &s : p.second) { + src_objects->insert(s.objectno); + } + } + } + + ceph_assert(!src_objects->empty()); + + ldout(m_cct, 20) << dst_object << " -> " << *src_objects << dendl; +} + +template <typename I> void ImageCopyRequest<I>::compute_diff() { + if (m_flatten) { + send_object_copies(); + return; + } + ldout(m_cct, 10) << dendl; auto ctx = create_context_callback< @@ -147,10 +176,24 @@ int ImageCopyRequest<I>::send_next_object_copy() { } uint64_t ono = m_object_no++; - if (ono < m_object_diff_state.size() && - m_object_diff_state[ono] == object_map::DIFF_STATE_NONE) { - ldout(m_cct, 20) << "skipping clean object " << ono << dendl; - return 1; + + if (m_object_diff_state.size() > 0) { + std::set<uint64_t> src_objects; + map_src_objects(ono, &src_objects); + + bool skip = true; + for (auto src_ono : src_objects) { + if (src_ono >= m_object_diff_state.size() || + m_object_diff_state[src_ono] != object_map::DIFF_STATE_NONE) { + skip = false; + break; + } + } + + if (skip) { + ldout(m_cct, 20) << "skipping clean object " << ono << dendl; + return 1; + } } ldout(m_cct, 20) << "object_num=" << ono << dendl; diff --git a/src/librbd/deep_copy/ImageCopyRequest.h b/src/librbd/deep_copy/ImageCopyRequest.h index 63b4bf3daaf..9b7934dd35a 100644 --- a/src/librbd/deep_copy/ImageCopyRequest.h +++ b/src/librbd/deep_copy/ImageCopyRequest.h @@ -14,6 +14,7 @@ #include <functional> #include <map> #include <queue> +#include <set> #include <vector> #include <boost/optional.hpp> @@ -102,6 +103,8 @@ private: BitVector<2> m_object_diff_state; + void map_src_objects(uint64_t dst_object, std::set<uint64_t> *src_objects); + void compute_diff(); void handle_compute_diff(int r); diff --git a/src/librbd/exclusive_lock/PostAcquireRequest.cc b/src/librbd/exclusive_lock/PostAcquireRequest.cc index d858e317a1b..4553b21583f 100644 --- a/src/librbd/exclusive_lock/PostAcquireRequest.cc +++ b/src/librbd/exclusive_lock/PostAcquireRequest.cc @@ -7,7 +7,6 @@ #include "common/dout.h" #include "common/errno.h" #include "include/stringify.h" -#include "librbd/cache/pwl/InitRequest.h" #include "librbd/ExclusiveLock.h" #include "librbd/ImageCtx.h" #include "librbd/ImageState.h" @@ -17,6 +16,7 @@ #include "librbd/Utils.h" #include "librbd/image/RefreshRequest.h" #include "librbd/journal/Policy.h" +#include "librbd/PluginRegistry.h" #define dout_subsys ceph_subsys_rbd #undef dout_prefix @@ -115,7 +115,7 @@ void PostAcquireRequest<I>::send_open_journal() { } if (!journal_enabled) { apply(); - send_open_image_cache(); + send_process_plugin_acquire_lock(); return; } @@ -173,33 +173,30 @@ void PostAcquireRequest<I>::handle_allocate_journal_tag(int r) { return; } - send_open_image_cache(); + send_process_plugin_acquire_lock(); } template <typename I> -void PostAcquireRequest<I>::send_open_image_cache() { +void PostAcquireRequest<I>::send_process_plugin_acquire_lock() { CephContext *cct = m_image_ctx.cct; ldout(cct, 10) << dendl; using klass = PostAcquireRequest<I>; - Context *ctx = create_async_context_callback( - m_image_ctx, create_context_callback< - klass, &klass::handle_open_image_cache>(this)); - cache::pwl::InitRequest<I> *req = cache::pwl::InitRequest<I>::create( - m_image_ctx, ctx); - req->send(); + Context *ctx = create_context_callback< + klass, &klass::handle_process_plugin_acquire_lock>(this); + m_image_ctx.plugin_registry->acquired_exclusive_lock(ctx); } template <typename I> -void PostAcquireRequest<I>::handle_open_image_cache(int r) { +void PostAcquireRequest<I>::handle_process_plugin_acquire_lock(int r) { CephContext *cct = m_image_ctx.cct; ldout(cct, 10) << "r=" << r << dendl; save_result(r); if (r < 0) { - lderr(cct) << "failed to open image cache: " << cpp_strerror(r) + lderr(cct) << "failed to process plugins: " << cpp_strerror(r) << dendl; - send_close_journal(); + send_process_plugin_release_lock(); return; } @@ -207,6 +204,30 @@ void PostAcquireRequest<I>::handle_open_image_cache(int r) { } template <typename I> +void PostAcquireRequest<I>::send_process_plugin_release_lock() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << dendl; + + using klass = PostAcquireRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_process_plugin_release_lock>(this); + m_image_ctx.plugin_registry->prerelease_exclusive_lock(ctx); +} + +template <typename I> +void PostAcquireRequest<I>::handle_process_plugin_release_lock(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << "r=" << r << dendl; + + save_result(r); + if (r < 0) { + lderr(cct) << "failed to release plugins: " << cpp_strerror(r) + << dendl; + } + send_close_journal(); +} + +template <typename I> void PostAcquireRequest<I>::send_close_journal() { if (m_journal == nullptr) { send_close_object_map(); diff --git a/src/librbd/exclusive_lock/PostAcquireRequest.h b/src/librbd/exclusive_lock/PostAcquireRequest.h index f9086342632..2f7efdf0761 100644 --- a/src/librbd/exclusive_lock/PostAcquireRequest.h +++ b/src/librbd/exclusive_lock/PostAcquireRequest.h @@ -49,10 +49,13 @@ private: * | * * * | * * * v * * - * OPEN_IMAGE_CACHE * * - * | * * * - * | * * * + * PROCESS_PLUGIN_ACQUIRE* + * | * * + * | * * * | v v v + * | PROCESS_PLUGIN_RELEASE + * | | + * | v * | CLOSE_JOURNAL * | | * | v @@ -95,11 +98,11 @@ private: void send_close_object_map(); void handle_close_object_map(int r); - void send_open_image_cache(); - void handle_open_image_cache(int r); + void send_process_plugin_acquire_lock(); + void handle_process_plugin_acquire_lock(int r); - void send_close_image_cache(); - void handle_close_image_cache(int r); + void send_process_plugin_release_lock(); + void handle_process_plugin_release_lock(int r); void apply(); void revert(); diff --git a/src/librbd/exclusive_lock/PreReleaseRequest.cc b/src/librbd/exclusive_lock/PreReleaseRequest.cc index 5d926c9059c..3adc3b54a45 100644 --- a/src/librbd/exclusive_lock/PreReleaseRequest.cc +++ b/src/librbd/exclusive_lock/PreReleaseRequest.cc @@ -17,6 +17,7 @@ #include "librbd/io/ImageDispatcherInterface.h" #include "librbd/io/ObjectDispatcherInterface.h" #include "librbd/io/Types.h" +#include "librbd/PluginRegistry.h" #define dout_subsys ceph_subsys_rbd #undef dout_prefix @@ -142,7 +143,7 @@ void PreReleaseRequest<I>::handle_wait_for_ops(int r) { template <typename I> void PreReleaseRequest<I>::send_prepare_lock() { if (m_shutting_down) { - send_shut_down_image_cache(); + send_process_plugin_release_lock(); return; } @@ -160,30 +161,29 @@ void PreReleaseRequest<I>::handle_prepare_lock(int r) { CephContext *cct = m_image_ctx.cct; ldout(cct, 10) << "r=" << r << dendl; - send_shut_down_image_cache(); + send_process_plugin_release_lock(); } template <typename I> -void PreReleaseRequest<I>::send_shut_down_image_cache() { +void PreReleaseRequest<I>::send_process_plugin_release_lock() { CephContext *cct = m_image_ctx.cct; ldout(cct, 10) << dendl; std::shared_lock owner_lock{m_image_ctx.owner_lock}; Context *ctx = create_async_context_callback(m_image_ctx, create_context_callback< PreReleaseRequest<I>, - &PreReleaseRequest<I>::handle_shut_down_image_cache>(this)); - m_image_ctx.io_image_dispatcher->shut_down_dispatch( - io::IMAGE_DISPATCH_LAYER_WRITEBACK_CACHE, ctx); + &PreReleaseRequest<I>::handle_process_plugin_release_lock>(this)); + m_image_ctx.plugin_registry->prerelease_exclusive_lock(ctx); } template <typename I> -void PreReleaseRequest<I>::handle_shut_down_image_cache(int r) { +void PreReleaseRequest<I>::handle_process_plugin_release_lock(int r) { CephContext *cct = m_image_ctx.cct; ldout(cct, 10) << "r=" << r << dendl; if (r < 0) { - lderr(cct) << "failed to shut down image cache: " << cpp_strerror(r) - << dendl; + lderr(cct) << "failed to handle plugins before releasing lock: " + << cpp_strerror(r) << dendl; m_image_dispatch->unset_require_lock(io::DIRECTION_BOTH); save_result(r); finish(); diff --git a/src/librbd/exclusive_lock/PreReleaseRequest.h b/src/librbd/exclusive_lock/PreReleaseRequest.h index 8156df3415f..4263379438e 100644 --- a/src/librbd/exclusive_lock/PreReleaseRequest.h +++ b/src/librbd/exclusive_lock/PreReleaseRequest.h @@ -49,6 +49,9 @@ private: * PREPARE_LOCK * | * v + * PROCESS_PLUGIN_RELEASE + * | + * v * SHUT_DOWN_IMAGE_CACHE * | * v @@ -100,8 +103,8 @@ private: void send_prepare_lock(); void handle_prepare_lock(int r); - void send_shut_down_image_cache(); - void handle_shut_down_image_cache(int r); + void send_process_plugin_release_lock(); + void handle_process_plugin_release_lock(int r); void send_invalidate_cache(); void handle_invalidate_cache(int r); diff --git a/src/librbd/image/RefreshParentRequest.cc b/src/librbd/image/RefreshParentRequest.cc index 27cc99f529e..348226c392b 100644 --- a/src/librbd/image/RefreshParentRequest.cc +++ b/src/librbd/image/RefreshParentRequest.cc @@ -119,8 +119,8 @@ void RefreshParentRequest<I>::send_open_parent() { RefreshParentRequest<I>, &RefreshParentRequest<I>::handle_open_parent, false>(this)); auto req = migration::OpenSourceImageRequest<I>::create( - &m_child_image_ctx, m_parent_md.spec.snap_id, m_migration_info, - &m_parent_image_ctx, ctx); + m_child_image_ctx.md_ctx, &m_child_image_ctx, m_parent_md.spec.snap_id, + m_migration_info, &m_parent_image_ctx, ctx); req->send(); return; } diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 1de2e14e9ad..46dabacc890 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -31,6 +31,7 @@ #include "librbd/Journal.h" #include "librbd/ObjectMap.h" #include "librbd/Operations.h" +#include "librbd/PluginRegistry.h" #include "librbd/Types.h" #include "librbd/Utils.h" #include "librbd/api/Config.h" @@ -1629,7 +1630,7 @@ int validate_pool(IoCtx &io_ctx, CephContext *cct) { !ictx->exclusive_lock->is_lock_owner()) && ictx->test_features(RBD_FEATURE_DIRTY_CACHE)) { C_SaferCond ctx3; - librbd::cache::util::discard_cache<>(*ictx, &ctx3); + ictx->plugin_registry->discard(&ctx3); r = ctx3.wait(); } return r; diff --git a/src/librbd/io/Dispatcher.h b/src/librbd/io/Dispatcher.h index 7dc9357bac9..cb64e11b27e 100644 --- a/src/librbd/io/Dispatcher.h +++ b/src/librbd/io/Dispatcher.h @@ -68,6 +68,11 @@ public: ceph_assert(result.second); } + bool exists(DispatchLayer dispatch_layer) override { + std::unique_lock locker{m_lock}; + return m_dispatches.find(dispatch_layer) != m_dispatches.end(); + } + void shut_down_dispatch(DispatchLayer dispatch_layer, Context* on_finish) override { auto cct = m_image_ctx->cct; diff --git a/src/librbd/io/DispatcherInterface.h b/src/librbd/io/DispatcherInterface.h index 57a56602dfd..2bac9ee757a 100644 --- a/src/librbd/io/DispatcherInterface.h +++ b/src/librbd/io/DispatcherInterface.h @@ -24,6 +24,7 @@ public: virtual void shut_down(Context* on_finish) = 0; virtual void register_dispatch(Dispatch* dispatch) = 0; + virtual bool exists(DispatchLayer dispatch_layer) = 0; virtual void shut_down_dispatch(DispatchLayer dispatch_layer, Context* on_finish) = 0; diff --git a/src/librbd/io/ReadResult.cc b/src/librbd/io/ReadResult.cc index 21fa29fed5f..d8c03e1da38 100644 --- a/src/librbd/io/ReadResult.cc +++ b/src/librbd/io/ReadResult.cc @@ -142,8 +142,10 @@ struct ReadResult::AssembleResultVisitor : public boost::static_visitor<void> { }; ReadResult::C_ImageReadRequest::C_ImageReadRequest( - AioCompletion *aio_completion, const Extents image_extents) - : aio_completion(aio_completion), image_extents(image_extents) { + AioCompletion *aio_completion, uint64_t buffer_offset, + const Extents image_extents) + : aio_completion(aio_completion), buffer_offset(buffer_offset), + image_extents(image_extents) { aio_completion->add_request(); } @@ -155,7 +157,7 @@ void ReadResult::C_ImageReadRequest::finish(int r) { striper::LightweightBufferExtents buffer_extents; size_t length = 0; for (auto &image_extent : image_extents) { - buffer_extents.emplace_back(length, image_extent.second); + buffer_extents.emplace_back(buffer_offset + length, image_extent.second); length += image_extent.second; } ceph_assert(length == bl.length()); diff --git a/src/librbd/io/ReadResult.h b/src/librbd/io/ReadResult.h index 69ced0c7b37..1dfd15b6988 100644 --- a/src/librbd/io/ReadResult.h +++ b/src/librbd/io/ReadResult.h @@ -27,10 +27,12 @@ class ReadResult { public: struct C_ImageReadRequest : public Context { AioCompletion *aio_completion; + uint64_t buffer_offset = 0; Extents image_extents; bufferlist bl; C_ImageReadRequest(AioCompletion *aio_completion, + uint64_t buffer_offset, const Extents image_extents); void finish(int r) override; diff --git a/src/librbd/migration/FileStream.cc b/src/librbd/migration/FileStream.cc index 21cc46be14a..8d69b7f63d7 100644 --- a/src/librbd/migration/FileStream.cc +++ b/src/librbd/migration/FileStream.cc @@ -213,13 +213,13 @@ void FileStream<I>::close(Context* on_finish) { } template <typename I> -void FileStream<I>::read(io::Extents&& byte_extents, bufferlist* data, - Context* on_finish) { +void FileStream<I>::get_size(uint64_t* size, Context* on_finish) { on_finish->complete(-EIO); } template <typename I> -void FileStream<I>::get_size(uint64_t* size, Context* on_finish) { +void FileStream<I>::read(io::Extents&& byte_extents, bufferlist* data, + Context* on_finish) { on_finish->complete(-EIO); } diff --git a/src/librbd/migration/FileStream.h b/src/librbd/migration/FileStream.h index 61c6906555c..06ef591b581 100644 --- a/src/librbd/migration/FileStream.h +++ b/src/librbd/migration/FileStream.h @@ -35,13 +35,13 @@ public: FileStream(const FileStream&) = delete; FileStream& operator=(const FileStream&) = delete; - void open(Context* on_finish); - void close(Context* on_finish); + void open(Context* on_finish) override; + void close(Context* on_finish) override; void get_size(uint64_t* size, Context* on_finish) override; void read(io::Extents&& byte_extents, bufferlist* data, - Context* on_finish); + Context* on_finish) override; private: CephContext* m_cct; diff --git a/src/librbd/migration/HttpClient.cc b/src/librbd/migration/HttpClient.cc new file mode 100644 index 00000000000..0cc51ddd861 --- /dev/null +++ b/src/librbd/migration/HttpClient.cc @@ -0,0 +1,945 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/migration/HttpClient.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/AsioEngine.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/asio/Utils.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ReadResult.h" +#include "librbd/migration/Utils.h" +#include <boost/asio/buffer.hpp> +#include <boost/asio/post.hpp> +#include <boost/asio/ip/tcp.hpp> +#include <boost/asio/read.hpp> +#include <boost/asio/ssl.hpp> +#include <boost/beast/core.hpp> +#include <boost/beast/http/read.hpp> +#include <boost/lexical_cast.hpp> +#include <deque> + +namespace librbd { +namespace migration { + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::migration::HttpClient::" \ + << "HttpSession " << this << " " << __func__ \ + << ": " + +/** + * boost::beast utilizes non-inheriting template classes for handling plain vs + * encrypted TCP streams. Utilize a base-class for handling the majority of the + * logic for handling connecting, disconnecting, reseting, and sending requests. + */ + +template <typename I> +template <typename D> +class HttpClient<I>::HttpSession : public HttpSessionInterface { +public: + void init(Context* on_finish) override { + ceph_assert(m_http_client->m_strand.running_in_this_thread()); + + auto cct = m_http_client->m_cct; + ldout(cct, 15) << dendl; + + ceph_assert(m_state == STATE_UNINITIALIZED); + m_state = STATE_CONNECTING; + + resolve_host(on_finish); + } + + void shut_down(Context* on_finish) override { + ceph_assert(m_http_client->m_strand.running_in_this_thread()); + + auto cct = m_http_client->m_cct; + ldout(cct, 15) << dendl; + + ceph_assert(on_finish != nullptr); + ceph_assert(m_on_shutdown == nullptr); + m_on_shutdown = on_finish; + + auto current_state = m_state; + if (current_state == STATE_UNINITIALIZED) { + // never initialized or resolve/connect failed + on_finish->complete(0); + return; + } + + m_state = STATE_SHUTTING_DOWN; + if (current_state != STATE_READY) { + // delay shutdown until current state transition completes + return; + } + + disconnect(new LambdaContext([this](int r) { handle_shut_down(r); })); + } + + void issue(std::shared_ptr<Work>&& work) override { + ceph_assert(m_http_client->m_strand.running_in_this_thread()); + + auto cct = m_http_client->m_cct; + ldout(cct, 20) << "work=" << work.get() << dendl; + + if (is_shutdown()) { + lderr(cct) << "cannot issue HTTP request, client is shutdown" + << dendl; + work->complete(-ESHUTDOWN, {}); + return; + } + + bool first_issue = m_issue_queue.empty(); + m_issue_queue.emplace_back(work); + if (m_state == STATE_READY && first_issue) { + ldout(cct, 20) << "sending http request: work=" << work.get() << dendl; + finalize_issue(std::move(work)); + } else if (m_state == STATE_UNINITIALIZED) { + ldout(cct, 20) << "resetting HTTP session: work=" << work.get() << dendl; + m_state = STATE_RESET_CONNECTING; + resolve_host(nullptr); + } else { + ldout(cct, 20) << "queueing HTTP request: work=" << work.get() << dendl; + } + } + + void finalize_issue(std::shared_ptr<Work>&& work) { + auto cct = m_http_client->m_cct; + ldout(cct, 20) << "work=" << work.get() << dendl; + + ++m_in_flight_requests; + (*work)(derived().stream()); + } + + void handle_issue(boost::system::error_code ec, + std::shared_ptr<Work>&& work) override { + ceph_assert(m_http_client->m_strand.running_in_this_thread()); + + auto cct = m_http_client->m_cct; + ldout(cct, 20) << "work=" << work.get() << ", r=" << -ec.value() << dendl; + + ceph_assert(m_in_flight_requests > 0); + --m_in_flight_requests; + if (maybe_finalize_reset()) { + // previous request is attempting reset to this request will be resent + return; + } + + ceph_assert(!m_issue_queue.empty()); + m_issue_queue.pop_front(); + + if (is_shutdown()) { + lderr(cct) << "client shutdown during in-flight request" << dendl; + work->complete(-ESHUTDOWN, {}); + + maybe_finalize_shutdown(); + return; + } + + if (ec) { + if (ec == boost::asio::error::bad_descriptor || + ec == boost::asio::error::broken_pipe || + ec == boost::asio::error::connection_reset || + ec == boost::asio::error::operation_aborted || + ec == boost::asio::ssl::error::stream_truncated || + ec == boost::beast::http::error::end_of_stream || + ec == boost::beast::http::error::partial_message) { + ldout(cct, 5) << "remote peer stream closed, retrying request" << dendl; + m_issue_queue.push_front(work); + } else if (ec == boost::beast::error::timeout) { + lderr(cct) << "timed-out while issuing request" << dendl; + work->complete(-ETIMEDOUT, {}); + } else { + lderr(cct) << "failed to issue request: " << ec.message() << dendl; + work->complete(-ec.value(), {}); + } + + // attempt to recover the connection + reset(); + return; + } + + bool first_receive = m_receive_queue.empty(); + m_receive_queue.push_back(work); + if (first_receive) { + receive(std::move(work)); + } + + // TODO disable pipelining for non-idempotent requests + + // pipeline the next request into the stream + if (!m_issue_queue.empty()) { + work = m_issue_queue.front(); + ldout(cct, 20) << "sending http request: work=" << work.get() << dendl; + finalize_issue(std::move(work)); + } + } + +protected: + HttpClient* m_http_client; + + HttpSession(HttpClient* http_client) + : m_http_client(http_client), m_resolver(http_client->m_strand) { + } + + virtual void connect(boost::asio::ip::tcp::resolver::results_type results, + Context* on_finish) = 0; + virtual void disconnect(Context* on_finish) = 0; + + void close_socket() { + auto cct = m_http_client->m_cct; + ldout(cct, 15) << dendl; + + boost::system::error_code ec; + boost::beast::get_lowest_layer(derived().stream()).socket().close(ec); + } + +private: + enum State { + STATE_UNINITIALIZED, + STATE_CONNECTING, + STATE_READY, + STATE_RESET_PENDING, + STATE_RESET_DISCONNECTING, + STATE_RESET_CONNECTING, + STATE_SHUTTING_DOWN, + STATE_SHUTDOWN, + }; + + State m_state = STATE_UNINITIALIZED; + boost::asio::ip::tcp::resolver m_resolver; + + Context* m_on_shutdown = nullptr; + + uint64_t m_in_flight_requests = 0; + std::deque<std::shared_ptr<Work>> m_issue_queue; + std::deque<std::shared_ptr<Work>> m_receive_queue; + + boost::beast::flat_buffer m_buffer; + std::optional<boost::beast::http::parser<false, EmptyBody>> m_header_parser; + std::optional<boost::beast::http::parser<false, StringBody>> m_parser; + + D& derived() { + return static_cast<D&>(*this); + } + + void resolve_host(Context* on_finish) { + auto cct = m_http_client->m_cct; + ldout(cct, 15) << dendl; + + shutdown_socket(); + m_resolver.async_resolve( + m_http_client->m_url_spec.host, m_http_client->m_url_spec.port, + [this, on_finish](boost::system::error_code ec, auto results) { + handle_resolve_host(ec, results, on_finish); }); + } + + void handle_resolve_host( + boost::system::error_code ec, + boost::asio::ip::tcp::resolver::results_type results, + Context* on_finish) { + auto cct = m_http_client->m_cct; + int r = -ec.value(); + ldout(cct, 15) << "r=" << r << dendl; + + if (ec) { + if (ec == boost::asio::error::host_not_found) { + r = -ENOENT; + } else if (ec == boost::asio::error::host_not_found_try_again) { + // TODO: add retry throttle + r = -EAGAIN; + } + + lderr(cct) << "failed to resolve host '" + << m_http_client->m_url_spec.host << "': " + << cpp_strerror(r) << dendl; + advance_state(STATE_UNINITIALIZED, r, on_finish); + return; + } + + connect(results, new LambdaContext([this, on_finish](int r) { + handle_connect(r, on_finish); })); + } + + void handle_connect(int r, Context* on_finish) { + auto cct = m_http_client->m_cct; + ldout(cct, 15) << "r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to connect to host '" + << m_http_client->m_url_spec.host << "': " + << cpp_strerror(r) << dendl; + advance_state(STATE_UNINITIALIZED, r, on_finish); + return; + } + + advance_state(STATE_READY, 0, on_finish); + } + + void handle_shut_down(int r) { + auto cct = m_http_client->m_cct; + ldout(cct, 15) << "r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to disconnect stream: '" << cpp_strerror(r) + << dendl; + } + + // cancel all in-flight send/receives (if any) + shutdown_socket(); + + maybe_finalize_shutdown(); + } + + void maybe_finalize_shutdown() { + if (m_in_flight_requests > 0) { + return; + } + + // cancel any queued IOs + fail_queued_work(-ESHUTDOWN); + + advance_state(STATE_SHUTDOWN, 0, nullptr); + } + + bool is_shutdown() const { + ceph_assert(m_http_client->m_strand.running_in_this_thread()); + return (m_state == STATE_SHUTTING_DOWN || m_state == STATE_SHUTDOWN); + } + + void reset() { + ceph_assert(m_http_client->m_strand.running_in_this_thread()); + ceph_assert(m_state == STATE_READY); + + auto cct = m_http_client->m_cct; + ldout(cct, 15) << dendl; + + m_state = STATE_RESET_PENDING; + maybe_finalize_reset(); + } + + bool maybe_finalize_reset() { + if (m_state != STATE_RESET_PENDING) { + return false; + } + + if (m_in_flight_requests > 0) { + return true; + } + + ceph_assert(m_http_client->m_strand.running_in_this_thread()); + auto cct = m_http_client->m_cct; + ldout(cct, 15) << dendl; + + m_buffer.clear(); + + // move in-flight request back to the front of the issue queue + m_issue_queue.insert(m_issue_queue.begin(), + m_receive_queue.begin(), m_receive_queue.end()); + m_receive_queue.clear(); + + m_state = STATE_RESET_DISCONNECTING; + disconnect(new LambdaContext([this](int r) { handle_reset(r); })); + return true; + } + + void handle_reset(int r) { + auto cct = m_http_client->m_cct; + ldout(cct, 15) << "r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to disconnect stream: '" << cpp_strerror(r) + << dendl; + } + + advance_state(STATE_RESET_CONNECTING, r, nullptr); + } + + int shutdown_socket() { + if (!boost::beast::get_lowest_layer( + derived().stream()).socket().is_open()) { + return 0; + } + + auto cct = m_http_client->m_cct; + ldout(cct, 15) << dendl; + + boost::system::error_code ec; + boost::beast::get_lowest_layer(derived().stream()).socket().shutdown( + boost::asio::ip::tcp::socket::shutdown_both, ec); + + if (ec && ec != boost::beast::errc::not_connected) { + lderr(cct) << "failed to shutdown socket: " << ec.message() << dendl; + return -ec.value(); + } + + close_socket(); + return 0; + } + + void receive(std::shared_ptr<Work>&& work) { + auto cct = m_http_client->m_cct; + ldout(cct, 15) << "work=" << work.get() << dendl; + + ceph_assert(!m_receive_queue.empty()); + ++m_in_flight_requests; + + // receive the response for this request + m_parser.emplace(); + if (work->header_only()) { + // HEAD requests don't trasfer data but the parser still cares about max + // content-length + m_header_parser.emplace(); + m_header_parser->body_limit(std::numeric_limits<uint64_t>::max()); + + boost::beast::http::async_read_header( + derived().stream(), m_buffer, *m_header_parser, + [this, work=std::move(work)] + (boost::beast::error_code ec, std::size_t) mutable { + handle_receive(ec, std::move(work)); + }); + } else { + m_parser->body_limit(1 << 25); // max RBD object size + boost::beast::http::async_read( + derived().stream(), m_buffer, *m_parser, + [this, work=std::move(work)] + (boost::beast::error_code ec, std::size_t) mutable { + handle_receive(ec, std::move(work)); + }); + } + } + + void handle_receive(boost::system::error_code ec, + std::shared_ptr<Work>&& work) { + auto cct = m_http_client->m_cct; + ldout(cct, 15) << "work=" << work.get() << ", r=" << -ec.value() << dendl; + + ceph_assert(m_in_flight_requests > 0); + --m_in_flight_requests; + if (maybe_finalize_reset()) { + // previous request is attempting reset to this request will be resent + return; + } + + ceph_assert(!m_receive_queue.empty()); + m_receive_queue.pop_front(); + + if (is_shutdown()) { + lderr(cct) << "client shutdown with in-flight request" << dendl; + work->complete(-ESHUTDOWN, {}); + + maybe_finalize_shutdown(); + return; + } + + if (ec) { + if (ec == boost::asio::error::bad_descriptor || + ec == boost::asio::error::broken_pipe || + ec == boost::asio::error::connection_reset || + ec == boost::asio::error::operation_aborted || + ec == boost::asio::ssl::error::stream_truncated || + ec == boost::beast::http::error::end_of_stream || + ec == boost::beast::http::error::partial_message) { + ldout(cct, 5) << "remote peer stream closed, retrying request" << dendl; + m_receive_queue.push_front(work); + } else if (ec == boost::beast::error::timeout) { + lderr(cct) << "timed-out while issuing request" << dendl; + work->complete(-ETIMEDOUT, {}); + } else { + lderr(cct) << "failed to issue request: " << ec.message() << dendl; + work->complete(-ec.value(), {}); + } + + reset(); + return; + } + + Response response; + if (work->header_only()) { + m_parser.emplace(std::move(*m_header_parser)); + } + response = m_parser->release(); + + // basic response code handling in a common location + int r = 0; + auto result = response.result(); + if (result == boost::beast::http::status::not_found) { + lderr(cct) << "requested resource does not exist" << dendl; + r = -ENOENT; + } else if (result == boost::beast::http::status::forbidden) { + lderr(cct) << "permission denied attempting to access resource" << dendl; + r = -EACCES; + } else if (boost::beast::http::to_status_class(result) != + boost::beast::http::status_class::successful) { + lderr(cct) << "failed to retrieve size: HTTP " << result << dendl; + r = -EIO; + } + + bool need_eof = response.need_eof(); + if (r < 0) { + work->complete(r, {}); + } else { + work->complete(0, std::move(response)); + } + + if (need_eof) { + ldout(cct, 20) << "reset required for non-pipelined response: " + << "work=" << work.get() << dendl; + reset(); + } else if (!m_receive_queue.empty()) { + auto work = m_receive_queue.front(); + receive(std::move(work)); + } + } + + void advance_state(State next_state, int r, Context* on_finish) { + auto cct = m_http_client->m_cct; + auto current_state = m_state; + ldout(cct, 15) << "current_state=" << current_state << ", " + << "next_state=" << next_state << ", " + << "r=" << r << dendl; + + m_state = next_state; + if (current_state == STATE_CONNECTING) { + if (next_state == STATE_UNINITIALIZED) { + shutdown_socket(); + on_finish->complete(r); + return; + } else if (next_state == STATE_READY) { + on_finish->complete(r); + return; + } + } else if (current_state == STATE_SHUTTING_DOWN) { + if (next_state == STATE_READY) { + // shut down requested while connecting/resetting + disconnect(new LambdaContext([this](int r) { handle_shut_down(r); })); + return; + } else if (next_state == STATE_UNINITIALIZED || + next_state == STATE_SHUTDOWN || + next_state == STATE_RESET_CONNECTING) { + ceph_assert(m_on_shutdown != nullptr); + m_on_shutdown->complete(r); + return; + } + } else if (current_state == STATE_RESET_DISCONNECTING) { + // disconnected from peer -- ignore errors and reconnect + ceph_assert(next_state == STATE_RESET_CONNECTING); + ceph_assert(on_finish == nullptr); + shutdown_socket(); + resolve_host(nullptr); + return; + } else if (current_state == STATE_RESET_CONNECTING) { + ceph_assert(on_finish == nullptr); + if (next_state == STATE_READY) { + // restart queued IO + if (!m_issue_queue.empty()) { + auto& work = m_issue_queue.front(); + finalize_issue(std::move(work)); + } + return; + } else if (next_state == STATE_UNINITIALIZED) { + shutdown_socket(); + + // fail all queued IO + fail_queued_work(r); + return; + } + } + + lderr(cct) << "unexpected state transition: " + << "current_state=" << current_state << ", " + << "next_state=" << next_state << dendl; + ceph_assert(false); + } + + void complete_work(std::shared_ptr<Work> work, int r, Response&& response) { + auto cct = m_http_client->m_cct; + ldout(cct, 20) << "work=" << work.get() << ", r=" << r << dendl; + + work->complete(r, std::move(response)); + } + + void fail_queued_work(int r) { + auto cct = m_http_client->m_cct; + ldout(cct, 10) << "r=" << r << dendl; + + for (auto& work : m_issue_queue) { + complete_work(work, r, {}); + } + m_issue_queue.clear(); + ceph_assert(m_receive_queue.empty()); + } +}; + +#undef dout_prefix +#define dout_prefix *_dout << "librbd::migration::HttpClient::" \ + << "PlainHttpSession " << this << " " << __func__ \ + << ": " + +template <typename I> +class HttpClient<I>::PlainHttpSession : public HttpSession<PlainHttpSession> { +public: + PlainHttpSession(HttpClient* http_client) + : HttpSession<PlainHttpSession>(http_client), + m_stream(http_client->m_strand) { + } + ~PlainHttpSession() override { + this->close_socket(); + } + + inline boost::beast::tcp_stream& + stream() { + return m_stream; + } + +protected: + void connect(boost::asio::ip::tcp::resolver::results_type results, + Context* on_finish) override { + auto http_client = this->m_http_client; + auto cct = http_client->m_cct; + ldout(cct, 15) << dendl; + + m_stream.async_connect( + results, + asio::util::get_callback_adapter( + [on_finish](int r, auto endpoint) { on_finish->complete(r); })); + } + + void disconnect(Context* on_finish) override { + on_finish->complete(0); + } + +private: + boost::beast::tcp_stream m_stream; + +}; + +#undef dout_prefix +#define dout_prefix *_dout << "librbd::migration::HttpClient::" \ + << "SslHttpSession " << this << " " << __func__ \ + << ": " + +template <typename I> +class HttpClient<I>::SslHttpSession : public HttpSession<SslHttpSession> { +public: + SslHttpSession(HttpClient* http_client) + : HttpSession<SslHttpSession>(http_client), + m_stream(http_client->m_strand, http_client->m_ssl_context) { + } + ~SslHttpSession() override { + this->close_socket(); + } + + inline boost::beast::ssl_stream<boost::beast::tcp_stream>& + stream() { + return m_stream; + } + +protected: + void connect(boost::asio::ip::tcp::resolver::results_type results, + Context* on_finish) override { + auto http_client = this->m_http_client; + auto cct = http_client->m_cct; + ldout(cct, 15) << dendl; + + boost::beast::get_lowest_layer(m_stream).async_connect( + results, + asio::util::get_callback_adapter( + [this, on_finish](int r, auto endpoint) { + handle_connect(r, on_finish); })); + } + + void disconnect(Context* on_finish) override { + auto http_client = this->m_http_client; + auto cct = http_client->m_cct; + ldout(cct, 15) << dendl; + + if (!m_ssl_enabled) { + on_finish->complete(0); + return; + } + + m_stream.async_shutdown( + asio::util::get_callback_adapter([this, on_finish](int r) { + shutdown(r, on_finish); })); + } + +private: + boost::beast::ssl_stream<boost::beast::tcp_stream> m_stream; + bool m_ssl_enabled = false; + + void handle_connect(int r, Context* on_finish) { + auto http_client = this->m_http_client; + auto cct = http_client->m_cct; + ldout(cct, 15) << dendl; + + if (r < 0) { + lderr(cct) << "failed to connect to host '" + << http_client->m_url_spec.host << "': " + << cpp_strerror(r) << dendl; + on_finish->complete(r); + return; + } + + handshake(on_finish); + } + + void handshake(Context* on_finish) { + auto http_client = this->m_http_client; + auto cct = http_client->m_cct; + ldout(cct, 15) << dendl; + + auto& host = http_client->m_url_spec.host; + m_stream.set_verify_mode( + boost::asio::ssl::verify_peer | + boost::asio::ssl::verify_fail_if_no_peer_cert); + m_stream.set_verify_callback( + [host, next=boost::asio::ssl::host_name_verification(host), + ignore_self_signed=http_client->m_ignore_self_signed_cert] + (bool preverified, boost::asio::ssl::verify_context& ctx) { + if (!preverified && ignore_self_signed) { + auto ec = X509_STORE_CTX_get_error(ctx.native_handle()); + switch (ec) { + case X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT: + case X509_V_ERR_SELF_SIGNED_CERT_IN_CHAIN: + // ignore self-signed cert issues + preverified = true; + break; + default: + break; + } + } + return next(preverified, ctx); + }); + + // Set SNI Hostname (many hosts need this to handshake successfully) + if(!SSL_set_tlsext_host_name(m_stream.native_handle(), + http_client->m_url_spec.host.c_str())) { + int r = -::ERR_get_error(); + lderr(cct) << "failed to initialize SNI hostname: " << cpp_strerror(r) + << dendl; + on_finish->complete(r); + return; + } + + // Perform the SSL/TLS handshake + m_stream.async_handshake( + boost::asio::ssl::stream_base::client, + asio::util::get_callback_adapter( + [this, on_finish](int r) { handle_handshake(r, on_finish); })); + } + + void handle_handshake(int r, Context* on_finish) { + auto http_client = this->m_http_client; + auto cct = http_client->m_cct; + ldout(cct, 15) << "r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to complete handshake: " << cpp_strerror(r) + << dendl; + disconnect(new LambdaContext([r, on_finish](int) { + on_finish->complete(r); })); + return; + } + + m_ssl_enabled = true; + on_finish->complete(0); + } + + void shutdown(int r, Context* on_finish) { + auto http_client = this->m_http_client; + auto cct = http_client->m_cct; + ldout(cct, 15) << "r=" << r << dendl; + + on_finish->complete(r); + } +}; + +#undef dout_prefix +#define dout_prefix *_dout << "librbd::migration::HttpClient: " << this \ + << " " << __func__ << ": " + +template <typename I> +HttpClient<I>::HttpClient(I* image_ctx, const std::string& url) + : m_cct(image_ctx->cct), m_image_ctx(image_ctx), + m_asio_engine(image_ctx->asio_engine), m_url(url), m_strand(*m_asio_engine), + m_ssl_context(boost::asio::ssl::context::sslv23_client) { + m_ssl_context.set_default_verify_paths(); +} + +template <typename I> +void HttpClient<I>::open(Context* on_finish) { + ldout(m_cct, 10) << "url=" << m_url << dendl; + + int r = util::parse_url(m_cct, m_url, &m_url_spec); + if (r < 0) { + lderr(m_cct) << "failed to parse url '" << m_url << "': " << cpp_strerror(r) + << dendl; + on_finish->complete(-EINVAL); + return; + } + + boost::asio::post(m_strand, [this, on_finish]() mutable { + create_http_session(on_finish); }); +} + +template <typename I> +void HttpClient<I>::close(Context* on_finish) { + boost::asio::post(m_strand, [this, on_finish]() mutable { + shut_down_http_session(on_finish); }); +} + +template <typename I> +void HttpClient<I>::get_size(uint64_t* size, Context* on_finish) { + ldout(m_cct, 10) << dendl; + + Request req; + req.method(boost::beast::http::verb::head); + + issue( + std::move(req), [this, size, on_finish](int r, Response&& response) { + handle_get_size(r, std::move(response), size, on_finish); + }); +} + +template <typename I> +void HttpClient<I>::handle_get_size(int r, Response&& response, uint64_t* size, + Context* on_finish) { + ldout(m_cct, 10) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to retrieve size: " << cpp_strerror(r) << dendl; + on_finish->complete(r); + return; + } else if (!response.has_content_length()) { + lderr(m_cct) << "failed to retrieve size: missing content-length" << dendl; + on_finish->complete(-EINVAL); + return; + } + + auto content_length = response[boost::beast::http::field::content_length]; + try { + *size = boost::lexical_cast<uint64_t>(content_length); + } catch (boost::bad_lexical_cast&) { + lderr(m_cct) << "invalid content-length in response" << dendl; + on_finish->complete(-EBADMSG); + return; + } + + on_finish->complete(0); +} + +template <typename I> +void HttpClient<I>::read(io::Extents&& byte_extents, bufferlist* data, + Context* on_finish) { + ldout(m_cct, 20) << dendl; + + auto aio_comp = io::AioCompletion::create_and_start( + on_finish, librbd::util::get_image_ctx(m_image_ctx), io::AIO_TYPE_READ); + aio_comp->set_request_count(byte_extents.size()); + + // utilize ReadResult to assemble multiple byte extents into a single bl + // since boost::beast doesn't support multipart responses out-of-the-box + io::ReadResult read_result{data}; + aio_comp->read_result = std::move(read_result); + aio_comp->read_result.set_image_extents(byte_extents); + + // issue a range get request for each extent + uint64_t buffer_offset = 0; + for (auto [byte_offset, byte_length] : byte_extents) { + auto ctx = new io::ReadResult::C_ImageReadRequest( + aio_comp, buffer_offset, {{byte_offset, byte_length}}); + buffer_offset += byte_length; + + Request req; + req.method(boost::beast::http::verb::get); + + std::stringstream range; + ceph_assert(byte_length > 0); + range << "bytes=" << byte_offset << "-" << (byte_offset + byte_length - 1); + req.set(boost::beast::http::field::range, range.str()); + + issue( + std::move(req), + [this, byte_offset=byte_offset, byte_length=byte_length, ctx] + (int r, Response&& response) { + handle_read(r, std::move(response), byte_offset, byte_length, &ctx->bl, + ctx); + }); + } +} + +template <typename I> +void HttpClient<I>::handle_read(int r, Response&& response, + uint64_t byte_offset, uint64_t byte_length, + bufferlist* data, Context* on_finish) { + ldout(m_cct, 20) << "bytes=" << byte_offset << "~" << byte_length << ", " + << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to read requested byte range: " + << cpp_strerror(r) << dendl; + on_finish->complete(r); + return; + } else if (response.result() != boost::beast::http::status::partial_content) { + lderr(m_cct) << "failed to retrieve requested byte range: HTTP " + << response.result() << dendl; + on_finish->complete(-EIO); + return; + } else if (byte_length != response.body().size()) { + lderr(m_cct) << "unexpected short range read: " + << "wanted=" << byte_length << ", " + << "received=" << response.body().size() << dendl; + on_finish->complete(-EINVAL); + return; + } + + data->clear(); + data->append(response.body()); + on_finish->complete(data->length()); +} + +template <typename I> +void HttpClient<I>::issue(std::shared_ptr<Work>&& work) { + boost::asio::post(m_strand, [this, work=std::move(work)]() mutable { + m_http_session->issue(std::move(work)); }); +} + +template <typename I> +void HttpClient<I>::create_http_session(Context* on_finish) { + ldout(m_cct, 15) << dendl; + + ceph_assert(m_http_session == nullptr); + switch (m_url_spec.scheme) { + case URL_SCHEME_HTTP: + m_http_session = std::make_unique<PlainHttpSession>(this); + break; + case URL_SCHEME_HTTPS: + m_http_session = std::make_unique<SslHttpSession>(this); + break; + default: + ceph_assert(false); + break; + } + + m_http_session->init(on_finish); +} + +template <typename I> +void HttpClient<I>::shut_down_http_session(Context* on_finish) { + ldout(m_cct, 15) << dendl; + + if (m_http_session == nullptr) { + on_finish->complete(0); + return; + } + + m_http_session->shut_down(on_finish); +} + +} // namespace migration +} // namespace librbd + +template class librbd::migration::HttpClient<librbd::ImageCtx>; diff --git a/src/librbd/migration/HttpClient.h b/src/librbd/migration/HttpClient.h new file mode 100644 index 00000000000..bab1a29abca --- /dev/null +++ b/src/librbd/migration/HttpClient.h @@ -0,0 +1,204 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_MIGRATION_HTTP_CLIENT_H +#define CEPH_LIBRBD_MIGRATION_HTTP_CLIENT_H + +#include "include/common_fwd.h" +#include "include/int_types.h" +#include "librbd/io/Types.h" +#include "librbd/migration/HttpProcessorInterface.h" +#include "librbd/migration/Types.h" +#include <boost/asio/io_context_strand.hpp> +#include <boost/asio/ip/tcp.hpp> +#include <boost/asio/ssl/context.hpp> +#include <boost/beast/version.hpp> +#include <boost/beast/core/tcp_stream.hpp> +#include <boost/beast/http/empty_body.hpp> +#include <boost/beast/http/message.hpp> +#include <boost/beast/http/string_body.hpp> +#include <boost/beast/http/write.hpp> +#include <boost/beast/ssl/ssl_stream.hpp> +#include <functional> +#include <memory> +#include <string> +#include <utility> + +struct Context; + +namespace librbd { + +struct AsioEngine; +struct ImageCtx; + +namespace migration { + +template <typename ImageCtxT> +class HttpClient { +public: + using EmptyBody = boost::beast::http::empty_body; + using StringBody = boost::beast::http::string_body; + using Request = boost::beast::http::request<EmptyBody>; + using Response = boost::beast::http::response<StringBody>; + + using RequestPreprocessor = std::function<void(Request&)>; + + static HttpClient* create(ImageCtxT* image_ctx, const std::string& url) { + return new HttpClient(image_ctx, url); + } + + HttpClient(ImageCtxT* image_ctx, const std::string& url); + HttpClient(const HttpClient&) = delete; + HttpClient& operator=(const HttpClient&) = delete; + + void open(Context* on_finish); + void close(Context* on_finish); + + void get_size(uint64_t* size, Context* on_finish); + + void read(io::Extents&& byte_extents, bufferlist* data, + Context* on_finish); + + void set_ignore_self_signed_cert(bool ignore) { + m_ignore_self_signed_cert = ignore; + } + + void set_http_processor(HttpProcessorInterface* http_processor) { + m_http_processor = http_processor; + } + + template <class Body, typename Completion> + void issue(boost::beast::http::request<Body>&& request, + Completion&& completion) { + struct WorkImpl : Work { + HttpClient* http_client; + boost::beast::http::request<Body> request; + Completion completion; + + WorkImpl(HttpClient* http_client, + boost::beast::http::request<Body>&& request, + Completion&& completion) + : http_client(http_client), request(std::move(request)), + completion(std::move(completion)) { + } + WorkImpl(const WorkImpl&) = delete; + WorkImpl& operator=(const WorkImpl&) = delete; + + bool need_eof() const override { + return request.need_eof(); + } + + bool header_only() const override { + return (request.method() == boost::beast::http::verb::head); + } + + void complete(int r, Response&& response) override { + completion(r, std::move(response)); + } + + void operator()(boost::beast::tcp_stream& stream) override { + preprocess_request(); + + boost::beast::http::async_write( + stream, request, + [http_session=http_client->m_http_session.get(), + work=this->shared_from_this()] + (boost::beast::error_code ec, std::size_t) mutable { + http_session->handle_issue(ec, std::move(work)); + }); + } + + void operator()( + boost::beast::ssl_stream<boost::beast::tcp_stream>& stream) override { + preprocess_request(); + + boost::beast::http::async_write( + stream, request, + [http_session=http_client->m_http_session.get(), + work=this->shared_from_this()] + (boost::beast::error_code ec, std::size_t) mutable { + http_session->handle_issue(ec, std::move(work)); + }); + } + + void preprocess_request() { + if (http_client->m_http_processor) { + http_client->m_http_processor->process_request(request); + } + } + }; + + initialize_default_fields(request); + issue(std::make_shared<WorkImpl>(this, std::move(request), + std::move(completion))); + } + +private: + struct Work; + struct HttpSessionInterface { + virtual ~HttpSessionInterface() {} + + virtual void init(Context* on_finish) = 0; + virtual void shut_down(Context* on_finish) = 0; + + virtual void issue(std::shared_ptr<Work>&& work) = 0; + virtual void handle_issue(boost::system::error_code ec, + std::shared_ptr<Work>&& work) = 0; + }; + + struct Work : public std::enable_shared_from_this<Work> { + virtual ~Work() {} + virtual bool need_eof() const = 0; + virtual bool header_only() const = 0; + virtual void complete(int r, Response&&) = 0; + virtual void operator()(boost::beast::tcp_stream& stream) = 0; + virtual void operator()( + boost::beast::ssl_stream<boost::beast::tcp_stream>& stream) = 0; + }; + + template <typename D> struct HttpSession; + struct PlainHttpSession; + struct SslHttpSession; + + CephContext* m_cct; + ImageCtxT* m_image_ctx; + std::shared_ptr<AsioEngine> m_asio_engine; + std::string m_url; + + UrlSpec m_url_spec; + + bool m_ignore_self_signed_cert = false; + + HttpProcessorInterface* m_http_processor = nullptr; + + boost::asio::io_context::strand m_strand; + + boost::asio::ssl::context m_ssl_context; + std::unique_ptr<HttpSessionInterface> m_http_session; + + template <typename Fields> + void initialize_default_fields(Fields& fields) const { + fields.target(m_url_spec.path); + fields.set(boost::beast::http::field::host, m_url_spec.host); + fields.set(boost::beast::http::field::user_agent, + BOOST_BEAST_VERSION_STRING); + } + + void handle_get_size(int r, Response&& response, uint64_t* size, + Context* on_finish); + + void handle_read(int r, Response&& response, uint64_t byte_offset, + uint64_t byte_length, bufferlist* data, Context* on_finish); + + void issue(std::shared_ptr<Work>&& work); + + void create_http_session(Context* on_finish); + void shut_down_http_session(Context* on_finish); +}; + +} // namespace migration +} // namespace librbd + +extern template class librbd::migration::HttpClient<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_MIGRATION_HTTP_CLIENT_H diff --git a/src/librbd/migration/HttpProcessorInterface.h b/src/librbd/migration/HttpProcessorInterface.h new file mode 100644 index 00000000000..3d9af88bd16 --- /dev/null +++ b/src/librbd/migration/HttpProcessorInterface.h @@ -0,0 +1,27 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_MIGRATION_HTTP_PROCESSOR_INTERFACE_H +#define CEPH_LIBRBD_MIGRATION_HTTP_PROCESSOR_INTERFACE_H + +#include <boost/beast/http/empty_body.hpp> +#include <boost/beast/http/message.hpp> + +namespace librbd { +namespace migration { + +struct HttpProcessorInterface { + using EmptyBody = boost::beast::http::empty_body; + using EmptyRequest = boost::beast::http::request<EmptyBody>; + + virtual ~HttpProcessorInterface() { + } + + virtual void process_request(EmptyRequest& request) = 0; + +}; + +} // namespace migration +} // namespace librbd + +#endif // CEPH_LIBRBD_MIGRATION_HTTP_PROCESSOR_INTERFACE_H diff --git a/src/librbd/migration/HttpStream.cc b/src/librbd/migration/HttpStream.cc new file mode 100644 index 00000000000..7b8f91a325c --- /dev/null +++ b/src/librbd/migration/HttpStream.cc @@ -0,0 +1,82 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/migration/HttpStream.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/AsioEngine.h" +#include "librbd/ImageCtx.h" +#include "librbd/asio/Utils.h" +#include "librbd/migration/HttpClient.h" +#include <boost/beast/http.hpp> + +namespace librbd { +namespace migration { + +namespace { + +const std::string URL_KEY {"url"}; + +} // anonymous namespace + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::migration::HttpStream: " << this \ + << " " << __func__ << ": " + +template <typename I> +HttpStream<I>::HttpStream(I* image_ctx, const json_spirit::mObject& json_object) + : m_image_ctx(image_ctx), m_cct(image_ctx->cct), + m_asio_engine(image_ctx->asio_engine), m_json_object(json_object) { +} + +template <typename I> +HttpStream<I>::~HttpStream() { +} + +template <typename I> +void HttpStream<I>::open(Context* on_finish) { + auto& url_value = m_json_object[URL_KEY]; + if (url_value.type() != json_spirit::str_type) { + lderr(m_cct) << "failed to locate '" << URL_KEY << "' key" << dendl; + on_finish->complete(-EINVAL); + return; + } + + m_url = url_value.get_str(); + ldout(m_cct, 10) << "url=" << m_url << dendl; + + m_http_client.reset(HttpClient<I>::create(m_image_ctx, m_url)); + m_http_client->open(on_finish); +} + +template <typename I> +void HttpStream<I>::close(Context* on_finish) { + ldout(m_cct, 10) << dendl; + + if (!m_http_client) { + on_finish->complete(0); + } + + m_http_client->close(on_finish); +} + +template <typename I> +void HttpStream<I>::get_size(uint64_t* size, Context* on_finish) { + ldout(m_cct, 10) << dendl; + + m_http_client->get_size(size, on_finish); +} + +template <typename I> +void HttpStream<I>::read(io::Extents&& byte_extents, bufferlist* data, + Context* on_finish) { + ldout(m_cct, 20) << "byte_extents=" << byte_extents << dendl; + + m_http_client->read(std::move(byte_extents), data, on_finish); +} + +} // namespace migration +} // namespace librbd + +template class librbd::migration::HttpStream<librbd::ImageCtx>; diff --git a/src/librbd/migration/HttpStream.h b/src/librbd/migration/HttpStream.h new file mode 100644 index 00000000000..01a58371496 --- /dev/null +++ b/src/librbd/migration/HttpStream.h @@ -0,0 +1,68 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_MIGRATION_HTTP_STREAM_H +#define CEPH_LIBRBD_MIGRATION_HTTP_STREAM_H + +#include "include/int_types.h" +#include "librbd/migration/StreamInterface.h" +#include <boost/beast/http/message.hpp> +#include <boost/beast/http/string_body.hpp> +#include <json_spirit/json_spirit.h> +#include <memory> +#include <string> + +struct Context; + +namespace librbd { + +struct AsioEngine; +struct ImageCtx; + +namespace migration { + +template <typename> class HttpClient; + +template <typename ImageCtxT> +class HttpStream : public StreamInterface { +public: + static HttpStream* create(ImageCtxT* image_ctx, + const json_spirit::mObject& json_object) { + return new HttpStream(image_ctx, json_object); + } + + HttpStream(ImageCtxT* image_ctx, const json_spirit::mObject& json_object); + ~HttpStream() override; + + HttpStream(const HttpStream&) = delete; + HttpStream& operator=(const HttpStream&) = delete; + + void open(Context* on_finish) override; + void close(Context* on_finish) override; + + void get_size(uint64_t* size, Context* on_finish) override; + + void read(io::Extents&& byte_extents, bufferlist* data, + Context* on_finish) override; + +private: + using HttpResponse = boost::beast::http::response< + boost::beast::http::string_body>; + + ImageCtxT* m_image_ctx; + CephContext* m_cct; + std::shared_ptr<AsioEngine> m_asio_engine; + json_spirit::mObject m_json_object; + + std::string m_url; + + std::unique_ptr<HttpClient<ImageCtxT>> m_http_client; + +}; + +} // namespace migration +} // namespace librbd + +extern template class librbd::migration::HttpStream<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_MIGRATION_HTTP_STREAM_H diff --git a/src/librbd/migration/NativeFormat.cc b/src/librbd/migration/NativeFormat.cc index e15e2820f38..68d1ac864a3 100644 --- a/src/librbd/migration/NativeFormat.cc +++ b/src/librbd/migration/NativeFormat.cc @@ -10,6 +10,7 @@ #include "librbd/asio/ContextWQ.h" #include "librbd/io/ImageDispatchSpec.h" #include "json_spirit/json_spirit.h" +#include "boost/lexical_cast.hpp" #include <sstream> #define dout_subsys ceph_subsys_rbd @@ -24,6 +25,7 @@ namespace { const std::string TYPE_KEY{"type"}; const std::string POOL_ID_KEY{"pool_id"}; +const std::string POOL_NAME_KEY{"pool_name"}; const std::string POOL_NAMESPACE_KEY{"pool_namespace"}; const std::string IMAGE_NAME_KEY{"image_name"}; const std::string IMAGE_ID_KEY{"image_id"}; @@ -57,21 +59,52 @@ void NativeFormat<I>::open(Context* on_finish) { auto cct = m_image_ctx->cct; ldout(cct, 10) << dendl; + auto& pool_name_val = m_json_object[POOL_NAME_KEY]; + if (pool_name_val.type() == json_spirit::str_type) { + librados::Rados rados(m_image_ctx->md_ctx); + librados::IoCtx io_ctx; + int r = rados.ioctx_create(pool_name_val.get_str().c_str(), io_ctx); + if (r < 0 ) { + lderr(cct) << "invalid pool name" << dendl; + on_finish->complete(r); + return; + } + + m_pool_id = io_ctx.get_id(); + } else if (pool_name_val.type() != json_spirit::null_type) { + lderr(cct) << "invalid pool name" << dendl; + on_finish->complete(-EINVAL); + return; + } + auto& pool_id_val = m_json_object[POOL_ID_KEY]; - if (pool_id_val.type() != json_spirit::int_type) { + if (m_pool_id != -1 && pool_id_val.type() != json_spirit::null_type) { + lderr(cct) << "cannot specify both pool name and pool id" << dendl; + on_finish->complete(-EINVAL); + return; + } else if (pool_id_val.type() == json_spirit::int_type) { + m_pool_id = pool_id_val.get_int64(); + } else if (pool_id_val.type() == json_spirit::str_type) { + try { + m_pool_id = boost::lexical_cast<int64_t>(pool_id_val.get_str()); + } catch (boost::bad_lexical_cast &) { + } + } + + if (m_pool_id == -1) { lderr(cct) << "missing or invalid pool id" << dendl; on_finish->complete(-EINVAL); return; } - m_pool_id = pool_id_val.get_int64(); auto& pool_namespace_val = m_json_object[POOL_NAMESPACE_KEY]; - if (pool_namespace_val.type() != json_spirit::str_type) { - lderr(cct) << "missing or invalid pool namespace" << dendl; + if (pool_namespace_val.type() == json_spirit::str_type) { + m_pool_namespace = pool_namespace_val.get_str(); + } else if (pool_namespace_val.type() != json_spirit::null_type) { + lderr(cct) << "invalid pool namespace" << dendl; on_finish->complete(-EINVAL); return; } - m_pool_namespace = pool_namespace_val.get_str(); auto& image_name_val = m_json_object[IMAGE_NAME_KEY]; if (image_name_val.type() != json_spirit::str_type) { @@ -120,8 +153,7 @@ void NativeFormat<I>::open(Context* on_finish) { } // open the source RBD image - auto ctx = util::create_async_context_callback(*m_image_ctx, on_finish); - m_image_ctx->state->open(flags, ctx); + m_image_ctx->state->open(flags, on_finish); } template <typename I> diff --git a/src/librbd/migration/OpenSourceImageRequest.cc b/src/librbd/migration/OpenSourceImageRequest.cc index 499525395d3..e19739b5e11 100644 --- a/src/librbd/migration/OpenSourceImageRequest.cc +++ b/src/librbd/migration/OpenSourceImageRequest.cc @@ -5,6 +5,7 @@ #include "common/dout.h" #include "common/errno.h" #include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" #include "librbd/Utils.h" #include "librbd/io/ImageDispatcher.h" #include "librbd/migration/ImageDispatch.h" @@ -21,13 +22,13 @@ namespace migration { template <typename I> OpenSourceImageRequest<I>::OpenSourceImageRequest( - I* dst_image_ctx, uint64_t src_snap_id, + librados::IoCtx& io_ctx, I* dst_image_ctx, uint64_t src_snap_id, const MigrationInfo &migration_info, I** src_image_ctx, Context* on_finish) - : m_dst_image_ctx(dst_image_ctx), m_src_snap_id(src_snap_id), + : m_cct(reinterpret_cast<CephContext*>(io_ctx.cct())), m_io_ctx(io_ctx), + m_dst_image_ctx(dst_image_ctx), m_src_snap_id(src_snap_id), m_migration_info(migration_info), m_src_image_ctx(src_image_ctx), m_on_finish(on_finish) { - auto cct = m_dst_image_ctx->cct; - ldout(cct, 10) << dendl; + ldout(m_cct, 10) << dendl; } template <typename I> @@ -37,12 +38,10 @@ void OpenSourceImageRequest<I>::send() { template <typename I> void OpenSourceImageRequest<I>::open_source() { - auto cct = m_dst_image_ctx->cct; - ldout(cct, 10) << dendl; + ldout(m_cct, 10) << dendl; // note that all source image ctx properties are placeholders - *m_src_image_ctx = I::create("", "", m_src_snap_id, m_dst_image_ctx->md_ctx, - true); + *m_src_image_ctx = I::create("", "", m_src_snap_id, m_io_ctx, true); auto src_image_ctx = *m_src_image_ctx; src_image_ctx->child = m_dst_image_ctx; @@ -69,8 +68,9 @@ void OpenSourceImageRequest<I>::open_source() { int r = source_spec_builder.parse_source_spec(source_spec, &source_spec_object); if (r < 0) { - lderr(cct) << "failed to parse migration source-spec:" << cpp_strerror(r) - << dendl; + lderr(m_cct) << "failed to parse migration source-spec:" << cpp_strerror(r) + << dendl; + (*m_src_image_ctx)->state->close(); finish(r); return; } @@ -78,8 +78,9 @@ void OpenSourceImageRequest<I>::open_source() { r = source_spec_builder.build_format(source_spec_object, import_only, &m_format); if (r < 0) { - lderr(cct) << "failed to build migration format handler: " - << cpp_strerror(r) << dendl; + lderr(m_cct) << "failed to build migration format handler: " + << cpp_strerror(r) << dendl; + (*m_src_image_ctx)->state->close(); finish(r); return; } @@ -92,33 +93,134 @@ void OpenSourceImageRequest<I>::open_source() { template <typename I> void OpenSourceImageRequest<I>::handle_open_source(int r) { - auto cct = m_dst_image_ctx->cct; - ldout(cct, 10) << "r=" << r << dendl; + ldout(m_cct, 10) << "r=" << r << dendl; if (r < 0) { - lderr(cct) << "failed to open migration source: " << cpp_strerror(r) - << dendl; + lderr(m_cct) << "failed to open migration source: " << cpp_strerror(r) + << dendl; finish(r); return; } + get_image_size(); +} + +template <typename I> +void OpenSourceImageRequest<I>::get_image_size() { + ldout(m_cct, 10) << dendl; + + auto ctx = util::create_context_callback< + OpenSourceImageRequest<I>, + &OpenSourceImageRequest<I>::handle_get_image_size>(this); + m_format->get_image_size(CEPH_NOSNAP, &m_image_size, ctx); +} + +template <typename I> +void OpenSourceImageRequest<I>::handle_get_image_size(int r) { + ldout(m_cct, 10) << "r=" << r << ", " + << "image_size=" << m_image_size << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to retrieve image size: " << cpp_strerror(r) + << dendl; + close_image(r); + return; + } + + auto src_image_ctx = *m_src_image_ctx; + src_image_ctx->image_lock.lock(); + src_image_ctx->size = m_image_size; + src_image_ctx->image_lock.unlock(); + + get_snapshots(); +} + +template <typename I> +void OpenSourceImageRequest<I>::get_snapshots() { + ldout(m_cct, 10) << dendl; + + auto ctx = util::create_context_callback< + OpenSourceImageRequest<I>, + &OpenSourceImageRequest<I>::handle_get_snapshots>(this); + m_format->get_snapshots(&m_snap_infos, ctx); +} + +template <typename I> +void OpenSourceImageRequest<I>::handle_get_snapshots(int r) { + ldout(m_cct, 10) << "r=" << r << dendl; + + if (r < 0) { + lderr(m_cct) << "failed to retrieve snapshots: " << cpp_strerror(r) + << dendl; + close_image(r); + return; + } + + // copy snapshot metadata to image ctx + auto src_image_ctx = *m_src_image_ctx; + src_image_ctx->image_lock.lock(); + + src_image_ctx->snaps.clear(); + src_image_ctx->snap_info.clear(); + src_image_ctx->snap_ids.clear(); + + ::SnapContext snapc; + for (auto it = m_snap_infos.rbegin(); it != m_snap_infos.rend(); ++it) { + auto& [snap_id, snap_info] = *it; + snapc.snaps.push_back(snap_id); + + src_image_ctx->add_snap( + snap_info.snap_namespace, snap_info.name, snap_id, + snap_info.size, snap_info.parent, snap_info.protection_status, + snap_info.flags, snap_info.timestamp); + } + if (!snapc.snaps.empty()) { + snapc.seq = snapc.snaps[0]; + } + src_image_ctx->snapc = snapc; + + // ensure data_ctx and data_io_context are pointing to correct snapshot + if (src_image_ctx->open_snap_id != CEPH_NOSNAP) { + int r = src_image_ctx->snap_set(src_image_ctx->open_snap_id); + ceph_assert(r == 0); + src_image_ctx->open_snap_id = CEPH_NOSNAP; + } + + src_image_ctx->image_lock.unlock(); + + finish(0); +} + +template <typename I> +void OpenSourceImageRequest<I>::close_image(int r) { + ldout(m_cct, 10) << "r=" << r << dendl; + + auto ctx = new LambdaContext([this, r](int) { + finish(r); + }); + (*m_src_image_ctx)->state->close(ctx); +} + +template <typename I> +void OpenSourceImageRequest<I>::register_image_dispatch() { + ldout(m_cct, 10) << dendl; + // intercept any IO requests to the source image auto io_image_dispatch = ImageDispatch<I>::create( *m_src_image_ctx, std::move(m_format)); (*m_src_image_ctx)->io_image_dispatcher->register_dispatch(io_image_dispatch); - - finish(0); } template <typename I> void OpenSourceImageRequest<I>::finish(int r) { - auto cct = m_dst_image_ctx->cct; - ldout(cct, 10) << "r=" << r << dendl; + ldout(m_cct, 10) << "r=" << r << dendl; if (r < 0) { - delete *m_src_image_ctx; *m_src_image_ctx = nullptr; + } else { + register_image_dispatch(); } + m_on_finish->complete(r); delete this; } diff --git a/src/librbd/migration/OpenSourceImageRequest.h b/src/librbd/migration/OpenSourceImageRequest.h index 1ad247686a1..f0dab3ad99c 100644 --- a/src/librbd/migration/OpenSourceImageRequest.h +++ b/src/librbd/migration/OpenSourceImageRequest.h @@ -4,7 +4,9 @@ #ifndef CEPH_LIBRBD_MIGRATION_OPEN_SOURCE_IMAGE_REQUEST_H #define CEPH_LIBRBD_MIGRATION_OPEN_SOURCE_IMAGE_REQUEST_H +#include "include/rados/librados_fwd.hpp" #include "librbd/Types.h" +#include <map> #include <memory> struct Context; @@ -20,17 +22,19 @@ struct FormatInterface; template <typename ImageCtxT> class OpenSourceImageRequest { public: - static OpenSourceImageRequest* create(ImageCtxT* destination_image_ctx, + static OpenSourceImageRequest* create(librados::IoCtx& io_ctx, + ImageCtxT* destination_image_ctx, uint64_t src_snap_id, const MigrationInfo &migration_info, ImageCtxT** source_image_ctx, Context* on_finish) { - return new OpenSourceImageRequest(destination_image_ctx, src_snap_id, - migration_info, source_image_ctx, - on_finish); + return new OpenSourceImageRequest(io_ctx, destination_image_ctx, + src_snap_id, migration_info, + source_image_ctx, on_finish); } - OpenSourceImageRequest(ImageCtxT* destination_image_ctx, + OpenSourceImageRequest(librados::IoCtx& io_ctx, + ImageCtxT* destination_image_ctx, uint64_t src_snap_id, const MigrationInfo &migration_info, ImageCtxT** source_image_ctx, @@ -48,11 +52,21 @@ private: * OPEN_SOURCE * | * v - * <finish> + * GET_IMAGE_SIZE * * * * * * * + * | * + * v v + * GET_SNAPSHOTS * * * * > CLOSE_IMAGE + * | | + * v | + * <finish> <------------------/ * * @endverbatim */ + typedef std::map<uint64_t, SnapInfo> SnapInfos; + + CephContext* m_cct; + librados::IoCtx& m_io_ctx; ImageCtxT* m_dst_image_ctx; uint64_t m_src_snap_id; MigrationInfo m_migration_info; @@ -61,9 +75,22 @@ private: std::unique_ptr<FormatInterface> m_format; + uint64_t m_image_size = 0; + SnapInfos m_snap_infos; + void open_source(); void handle_open_source(int r); + void get_image_size(); + void handle_get_image_size(int r); + + void get_snapshots(); + void handle_get_snapshots(int r); + + void close_image(int r); + + void register_image_dispatch(); + void finish(int r); }; diff --git a/src/librbd/migration/RawFormat.cc b/src/librbd/migration/RawFormat.cc index 582cb664fd8..774762594f4 100644 --- a/src/librbd/migration/RawFormat.cc +++ b/src/librbd/migration/RawFormat.cc @@ -9,101 +9,20 @@ #include "librbd/Utils.h" #include "librbd/io/AioCompletion.h" #include "librbd/io/ReadResult.h" -#include "librbd/migration/FileStream.h" +#include "librbd/migration/SnapshotInterface.h" #include "librbd/migration/SourceSpecBuilder.h" -#include "librbd/migration/StreamInterface.h" - -#define dout_subsys ceph_subsys_rbd -#undef dout_prefix -#define dout_prefix *_dout << "librbd::migration::RawFormat: " << this \ - << " " << __func__ << ": " namespace librbd { namespace migration { -#define dout_subsys ceph_subsys_rbd -#undef dout_prefix -#define dout_prefix *_dout << "librbd::migration::RawFormat::OpenRequest " \ - << this << " " << __func__ << ": " - -template <typename I> -struct RawFormat<I>::OpenRequest { - RawFormat* raw_format; - Context* on_finish; - - uint64_t image_size = 0; +namespace { - OpenRequest(RawFormat* raw_format, Context* on_finish) - : raw_format(raw_format), on_finish(on_finish) { - } +static const std::string SNAPSHOTS_KEY {"snapshots"}; - void send() { - open_stream(); - } - - void open_stream() { - auto cct = raw_format->m_image_ctx->cct; - ldout(cct, 10) << dendl; - - auto ctx = util::create_context_callback< - OpenRequest, &OpenRequest::handle_open_stream>(this); - raw_format->m_stream->open(ctx); - } - - void handle_open_stream(int r) { - auto cct = raw_format->m_image_ctx->cct; - ldout(cct, 10) << "r=" << r << dendl; - - if (r < 0) { - lderr(cct) << "failed to open stream: " << cpp_strerror(r) << dendl; - finish(r); - return; - } - - get_image_size(); - } - - void get_image_size() { - auto cct = raw_format->m_image_ctx->cct; - ldout(cct, 10) << dendl; - - auto ctx = util::create_context_callback< - OpenRequest, &OpenRequest::handle_get_image_size>(this); - raw_format->get_image_size(CEPH_NOSNAP, &image_size, ctx); - } - void handle_get_image_size(int r) { - auto cct = raw_format->m_image_ctx->cct; - ldout(cct, 10) << "r=" << r << dendl; - - if (r < 0) { - lderr(cct) << "failed to open stream: " << cpp_strerror(r) << dendl; - finish(r); - return; - } - - raw_format->m_image_ctx->image_lock.lock(); - raw_format->m_image_ctx->size = image_size; - raw_format->m_image_ctx->image_lock.unlock(); - - finish(0); - } - - void finish(int r) { - auto cct = raw_format->m_image_ctx->cct; - ldout(cct, 10) << "r=" << r << dendl; - - if (r < 0) { - raw_format->m_image_ctx->state->close(new LambdaContext( - [r, on_finish=on_finish](int _) { on_finish->complete(r); })); - } else { - on_finish->complete(0); - } - - delete this; - } -}; +} // anonymous namespace +#define dout_subsys ceph_subsys_rbd #undef dout_prefix #define dout_prefix *_dout << "librbd::migration::RawFormat: " << this \ << " " << __func__ << ": " @@ -121,17 +40,81 @@ void RawFormat<I>::open(Context* on_finish) { auto cct = m_image_ctx->cct; ldout(cct, 10) << dendl; - int r = m_source_spec_builder->build_stream(m_json_object, &m_stream); + on_finish = new LambdaContext([this, on_finish](int r) { + handle_open(r, on_finish); }); + + // treat the base image as a HEAD-revision snapshot + Snapshots snapshots; + int r = m_source_spec_builder->build_snapshot(m_json_object, CEPH_NOSNAP, + &snapshots[CEPH_NOSNAP]); + if (r < 0) { + lderr(cct) << "failed to build HEAD revision handler: " << cpp_strerror(r) + << dendl; + on_finish->complete(r); + return; + } + + auto& snapshots_val = m_json_object[SNAPSHOTS_KEY]; + if (snapshots_val.type() == json_spirit::array_type) { + auto& snapshots_arr = snapshots_val.get_array(); + for (auto& snapshot_val : snapshots_arr) { + uint64_t index = snapshots.size(); + if (snapshot_val.type() != json_spirit::obj_type) { + lderr(cct) << "invalid snapshot " << index << " JSON: " + << cpp_strerror(r) << dendl; + on_finish->complete(-EINVAL); + return; + } + + auto& snapshot_obj = snapshot_val.get_obj(); + r = m_source_spec_builder->build_snapshot(snapshot_obj, index, + &snapshots[index]); + if (r < 0) { + lderr(cct) << "failed to build snapshot " << index << " handler: " + << cpp_strerror(r) << dendl; + on_finish->complete(r); + return; + } + } + } else if (snapshots_val.type() != json_spirit::null_type) { + lderr(cct) << "invalid snapshots array" << dendl; + on_finish->complete(-EINVAL); + return; + } + + m_snapshots = std::move(snapshots); + + auto gather_ctx = new C_Gather(cct, on_finish); + SnapshotInterface* previous_snapshot = nullptr; + for (auto& [_, snapshot] : m_snapshots) { + snapshot->open(previous_snapshot, gather_ctx->new_sub()); + previous_snapshot = snapshot.get(); + } + gather_ctx->activate(); +} + +template <typename I> +void RawFormat<I>::handle_open(int r, Context* on_finish) { + auto cct = m_image_ctx->cct; + ldout(cct, 10) << "r=" << r << dendl; + if (r < 0) { - lderr(cct) << "failed to build migration stream handler" << cpp_strerror(r) + lderr(cct) << "failed to open raw image: " << cpp_strerror(r) << dendl; - m_image_ctx->state->close( - new LambdaContext([r, on_finish](int _) { on_finish->complete(r); })); + + auto gather_ctx = new C_Gather(cct, on_finish); + for (auto& [_, snapshot] : m_snapshots) { + snapshot->close(gather_ctx->new_sub()); + } + + m_image_ctx->state->close(new LambdaContext( + [r, on_finish=gather_ctx->new_sub()](int _) { on_finish->complete(r); })); + + gather_ctx->activate(); return; } - auto req = new OpenRequest(this, on_finish); - req->send(); + on_finish->complete(0); } template <typename I> @@ -139,12 +122,12 @@ void RawFormat<I>::close(Context* on_finish) { auto cct = m_image_ctx->cct; ldout(cct, 10) << dendl; - if (!m_stream) { - on_finish->complete(0); - return; + auto gather_ctx = new C_Gather(cct, on_finish); + for (auto& [snap_id, snapshot] : m_snapshots) { + snapshot->close(gather_ctx->new_sub()); } - m_stream->close(on_finish); + gather_ctx->activate(); } template <typename I> @@ -153,6 +136,12 @@ void RawFormat<I>::get_snapshots(SnapInfos* snap_infos, Context* on_finish) { ldout(cct, 10) << dendl; snap_infos->clear(); + for (auto& [snap_id, snapshot] : m_snapshots) { + if (snap_id == CEPH_NOSNAP) { + continue; + } + snap_infos->emplace(snap_id, snapshot->get_snap_info()); + } on_finish->complete(0); } @@ -162,12 +151,14 @@ void RawFormat<I>::get_image_size(uint64_t snap_id, uint64_t* size, auto cct = m_image_ctx->cct; ldout(cct, 10) << dendl; - if (snap_id != CEPH_NOSNAP) { - on_finish->complete(-EINVAL); + auto snapshot_it = m_snapshots.find(snap_id); + if (snapshot_it == m_snapshots.end()) { + on_finish->complete(-ENOENT); return; } - m_stream->get_size(size, on_finish); + *size = snapshot_it->second->get_snap_info().size; + on_finish->complete(0); } template <typename I> @@ -176,22 +167,18 @@ bool RawFormat<I>::read( io::ReadResult&& read_result, int op_flags, int read_flags, const ZTracer::Trace &parent_trace) { auto cct = m_image_ctx->cct; - ldout(cct, 20) << "image_extents=" << image_extents << dendl; + ldout(cct, 20) << "snap_id=" << snap_id << ", " + << "image_extents=" << image_extents << dendl; - if (snap_id != CEPH_NOSNAP) { - aio_comp->fail(-EINVAL); + auto snapshot_it = m_snapshots.find(snap_id); + if (snapshot_it == m_snapshots.end()) { + aio_comp->fail(-ENOENT); return true; } - aio_comp->read_result = std::move(read_result); - aio_comp->read_result.set_image_extents(image_extents); - - aio_comp->set_request_count(1); - auto ctx = new io::ReadResult::C_ImageReadRequest(aio_comp, - image_extents); - - // raw directly maps the image-extent IO down to a byte IO extent - m_stream->read(std::move(image_extents), &ctx->bl, ctx); + snapshot_it->second->read(aio_comp, std::move(image_extents), + std::move(read_result), op_flags, read_flags, + parent_trace); return true; } @@ -201,13 +188,100 @@ void RawFormat<I>::list_snaps(io::Extents&& image_extents, io::SnapshotDelta* snapshot_delta, const ZTracer::Trace &parent_trace, Context* on_finish) { - // raw does support snapshots so list the full IO extent as a delta - auto& snapshot = (*snapshot_delta)[{CEPH_NOSNAP, CEPH_NOSNAP}]; - for (auto& image_extent : image_extents) { - snapshot.insert(image_extent.first, image_extent.second, - {io::SPARSE_EXTENT_STATE_DATA, image_extent.second}); + auto cct = m_image_ctx->cct; + ldout(cct, 20) << "image_extents=" << image_extents << dendl; + + on_finish = new LambdaContext([this, snap_ids=std::move(snap_ids), + snapshot_delta, on_finish](int r) mutable { + handle_list_snaps(r, std::move(snap_ids), snapshot_delta, on_finish); + }); + + auto gather_ctx = new C_Gather(cct, on_finish); + + std::optional<uint64_t> previous_size = std::nullopt; + for (auto& [snap_id, snapshot] : m_snapshots) { + auto& sparse_extents = (*snapshot_delta)[{snap_id, snap_id}]; + + // zero out any space between the previous snapshot end and this + // snapshot's end + auto& snap_info = snapshot->get_snap_info(); + if (previous_size && *previous_size > snap_info.size) { + ldout(cct, 20) << "snapshot resize " << *previous_size << " -> " + << snap_info.size << dendl; + interval_set<uint64_t> zero_interval; + zero_interval.insert(snap_info.size, *previous_size - snap_info.size); + + for (auto& image_extent : image_extents) { + interval_set<uint64_t> image_interval; + image_interval.insert(image_extent.first, image_extent.second); + + image_interval.intersection_of(zero_interval); + for (auto [image_offset, image_length] : image_interval) { + ldout(cct, 20) << "zeroing extent " << image_offset << "~" + << image_length << " at snapshot " << snap_id << dendl; + sparse_extents.insert(image_offset, image_length, + {io::SPARSE_EXTENT_STATE_ZEROED, image_length}); + } + } + } + previous_size = snap_info.size; + + // build set of data/zeroed extents for the current snapshot + snapshot->list_snap(io::Extents{image_extents}, list_snaps_flags, + &sparse_extents, parent_trace, gather_ctx->new_sub()); } - on_finish->complete(0); + + gather_ctx->activate(); +} + +template <typename I> +void RawFormat<I>::handle_list_snaps(int r, io::SnapIds&& snap_ids, + io::SnapshotDelta* snapshot_delta, + Context* on_finish) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << "r=" << r << ", " + << "snapshot_delta=" << snapshot_delta << dendl; + + io::SnapshotDelta orig_snapshot_delta = std::move(*snapshot_delta); + snapshot_delta->clear(); + + auto snap_id_it = snap_ids.begin(); + ceph_assert(snap_id_it != snap_ids.end()); + + // merge any snapshot intervals that were not requested + std::list<io::SparseExtents*> pending_sparse_extents; + for (auto& [snap_key, sparse_extents] : orig_snapshot_delta) { + // advance to next valid requested snap id + while (snap_id_it != snap_ids.end() && *snap_id_it < snap_key.first) { + ++snap_id_it; + } + if (snap_id_it == snap_ids.end()) { + break; + } + + // loop through older write/read snapshot sparse extents to remove any + // overlaps with the current sparse extent + for (auto prev_sparse_extents : pending_sparse_extents) { + for (auto& sparse_extent : sparse_extents) { + prev_sparse_extents->erase(sparse_extent.get_off(), + sparse_extent.get_len()); + } + } + + auto write_read_snap_ids = std::make_pair(*snap_id_it, snap_key.second); + (*snapshot_delta)[write_read_snap_ids] = std::move(sparse_extents); + + if (write_read_snap_ids.first > snap_key.first) { + // the current snapshot wasn't requested so it might need to get + // merged with a later snapshot + pending_sparse_extents.push_back(&(*snapshot_delta)[write_read_snap_ids]); + } else { + // we don't merge results passed a valid requested snapshot + pending_sparse_extents.clear(); + } + } + + on_finish->complete(r); } } // namespace migration diff --git a/src/librbd/migration/RawFormat.h b/src/librbd/migration/RawFormat.h index 750ef8561cb..a20c0814f74 100644 --- a/src/librbd/migration/RawFormat.h +++ b/src/librbd/migration/RawFormat.h @@ -8,6 +8,7 @@ #include "librbd/Types.h" #include "librbd/migration/FormatInterface.h" #include "json_spirit/json_spirit.h" +#include <map> #include <memory> struct Context; @@ -20,7 +21,7 @@ struct ImageCtx; namespace migration { template <typename> struct SourceSpecBuilder; -struct StreamInterface; +struct SnapshotInterface; template <typename ImageCtxT> class RawFormat : public FormatInterface { @@ -54,14 +55,19 @@ public: Context* on_finish) override; private: - struct OpenRequest; + typedef std::shared_ptr<SnapshotInterface> Snapshot; + typedef std::map<uint64_t, Snapshot> Snapshots; ImageCtxT* m_image_ctx; json_spirit::mObject m_json_object; const SourceSpecBuilder<ImageCtxT>* m_source_spec_builder; - std::unique_ptr<StreamInterface> m_stream; + Snapshots m_snapshots; + void handle_open(int r, Context* on_finish); + + void handle_list_snaps(int r, io::SnapIds&& snap_ids, + io::SnapshotDelta* snapshot_delta, Context* on_finish); }; } // namespace migration diff --git a/src/librbd/migration/RawSnapshot.cc b/src/librbd/migration/RawSnapshot.cc new file mode 100644 index 00000000000..4a83fd8ad97 --- /dev/null +++ b/src/librbd/migration/RawSnapshot.cc @@ -0,0 +1,220 @@ +// -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/migration/RawSnapshot.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ReadResult.h" +#include "librbd/migration/SourceSpecBuilder.h" +#include "librbd/migration/StreamInterface.h" + +namespace librbd { +namespace migration { + +namespace { + +const std::string NAME_KEY{"name"}; + +} // anonymous namespace + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::migration::RawSnapshot::OpenRequest " \ + << this << " " << __func__ << ": " + +template <typename I> +struct RawSnapshot<I>::OpenRequest { + RawSnapshot* raw_snapshot; + Context* on_finish; + + OpenRequest(RawSnapshot* raw_snapshot, Context* on_finish) + : raw_snapshot(raw_snapshot), on_finish(on_finish) { + } + + void send() { + open_stream(); + } + + void open_stream() { + auto cct = raw_snapshot->m_image_ctx->cct; + ldout(cct, 10) << dendl; + + auto ctx = util::create_context_callback< + OpenRequest, &OpenRequest::handle_open_stream>(this); + raw_snapshot->m_stream->open(ctx); + } + + void handle_open_stream(int r) { + auto cct = raw_snapshot->m_image_ctx->cct; + ldout(cct, 10) << "r=" << r << dendl; + + if (r < 0) { + lderr(cct) << "failed to open stream: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_image_size(); + } + + void get_image_size() { + auto cct = raw_snapshot->m_image_ctx->cct; + ldout(cct, 10) << dendl; + + auto ctx = util::create_context_callback< + OpenRequest, &OpenRequest::handle_get_image_size>(this); + raw_snapshot->m_stream->get_size(&raw_snapshot->m_snap_info.size, ctx); + } + + void handle_get_image_size(int r) { + auto cct = raw_snapshot->m_image_ctx->cct; + ldout(cct, 10) << "r=" << r << ", " + << "image_size=" << raw_snapshot->m_snap_info.size << dendl; + + if (r < 0) { + lderr(cct) << "failed to open stream: " << cpp_strerror(r) << dendl; + close_stream(r); + return; + } + + finish(0); + } + + void close_stream(int r) { + auto cct = raw_snapshot->m_image_ctx->cct; + ldout(cct, 10) << dendl; + + auto ctx = new LambdaContext([this, r](int) { + handle_close_stream(r); + }); + raw_snapshot->m_stream->close(ctx); + } + + void handle_close_stream(int r) { + auto cct = raw_snapshot->m_image_ctx->cct; + ldout(cct, 10) << "r=" << r << dendl; + + raw_snapshot->m_stream.reset(); + + finish(r); + } + + void finish(int r) { + auto cct = raw_snapshot->m_image_ctx->cct; + ldout(cct, 10) << "r=" << r << dendl; + + on_finish->complete(r); + delete this; + } +}; + +#undef dout_prefix +#define dout_prefix *_dout << "librbd::migration::RawSnapshot: " << this \ + << " " << __func__ << ": " + +template <typename I> +RawSnapshot<I>::RawSnapshot(I* image_ctx, + const json_spirit::mObject& json_object, + const SourceSpecBuilder<I>* source_spec_builder, + uint64_t index) + : m_image_ctx(image_ctx), m_json_object(json_object), + m_source_spec_builder(source_spec_builder), m_index(index), + m_snap_info({}, {}, 0, {}, 0, 0, {}) { + auto cct = m_image_ctx->cct; + ldout(cct, 10) << dendl; +} + +template <typename I> +void RawSnapshot<I>::open(SnapshotInterface* previous_snapshot, + Context* on_finish) { + auto cct = m_image_ctx->cct; + + // special-case for treating the HEAD revision as a snapshot + if (m_index != CEPH_NOSNAP) { + auto& name_val = m_json_object[NAME_KEY]; + if (name_val.type() == json_spirit::str_type) { + m_snap_info.name = name_val.get_str(); + } else if (name_val.type() == json_spirit::null_type) { + uuid_d uuid_gen; + uuid_gen.generate_random(); + + m_snap_info.name = uuid_gen.to_string(); + } else { + lderr(cct) << "invalid snapshot name" << dendl; + on_finish->complete(-EINVAL); + return; + } + } + + ldout(cct, 10) << "name=" << m_snap_info.name << dendl; + + int r = m_source_spec_builder->build_stream(m_json_object, &m_stream); + if (r < 0) { + lderr(cct) << "failed to build migration stream handler" << cpp_strerror(r) + << dendl; + on_finish->complete(r); + return; + } + + auto req = new OpenRequest(this, on_finish); + req->send(); +} + +template <typename I> +void RawSnapshot<I>::close(Context* on_finish) { + auto cct = m_image_ctx->cct; + ldout(cct, 10) << dendl; + + if (!m_stream) { + on_finish->complete(0); + return; + } + + m_stream->close(on_finish); +} + +template <typename I> +void RawSnapshot<I>::read(io::AioCompletion* aio_comp, + io::Extents&& image_extents, + io::ReadResult&& read_result, int op_flags, + int read_flags, + const ZTracer::Trace &parent_trace) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << "image_extents=" << image_extents << dendl; + + aio_comp->read_result = std::move(read_result); + aio_comp->read_result.set_image_extents(image_extents); + + aio_comp->set_request_count(1); + auto ctx = new io::ReadResult::C_ImageReadRequest(aio_comp, + 0, image_extents); + + // raw directly maps the image-extent IO down to a byte IO extent + m_stream->read(std::move(image_extents), &ctx->bl, ctx); +} + +template <typename I> +void RawSnapshot<I>::list_snap(io::Extents&& image_extents, + int list_snaps_flags, + io::SparseExtents* sparse_extents, + const ZTracer::Trace &parent_trace, + Context* on_finish) { + auto cct = m_image_ctx->cct; + ldout(cct, 20) << "image_extents=" << image_extents << dendl; + + // raw does support sparse extents so list the full IO extent as a delta + for (auto& [image_offset, image_length] : image_extents) { + sparse_extents->insert(image_offset, image_length, + {io::SPARSE_EXTENT_STATE_DATA, image_length}); + } + + on_finish->complete(0); +} + +} // namespace migration +} // namespace librbd + +template class librbd::migration::RawSnapshot<librbd::ImageCtx>; diff --git a/src/librbd/migration/RawSnapshot.h b/src/librbd/migration/RawSnapshot.h new file mode 100644 index 00000000000..9f76d687824 --- /dev/null +++ b/src/librbd/migration/RawSnapshot.h @@ -0,0 +1,75 @@ +// -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_MIGRATION_RAW_SNAPSHOT_H +#define CEPH_LIBRBD_MIGRATION_RAW_SNAPSHOT_H + +#include "include/buffer_fwd.h" +#include "include/int_types.h" +#include "common/zipkin_trace.h" +#include "librbd/Types.h" +#include "librbd/io/Types.h" +#include "librbd/migration/SnapshotInterface.h" +#include "json_spirit/json_spirit.h" +#include <memory> + +namespace librbd { + +struct ImageCtx; + +namespace migration { + +template <typename> struct SourceSpecBuilder; +struct StreamInterface; + +template <typename ImageCtxT> +class RawSnapshot : public SnapshotInterface { +public: + static RawSnapshot* create( + ImageCtx* image_ctx, const json_spirit::mObject& json_object, + const SourceSpecBuilder<ImageCtxT>* source_spec_builder, uint64_t index) { + return new RawSnapshot(image_ctx, json_object, source_spec_builder, index); + } + + RawSnapshot(ImageCtxT* image_ctx, const json_spirit::mObject& json_object, + const SourceSpecBuilder<ImageCtxT>* source_spec_builder, + uint64_t index); + RawSnapshot(const RawSnapshot&) = delete; + RawSnapshot& operator=(const RawSnapshot&) = delete; + + void open(SnapshotInterface* previous_snapshot, Context* on_finish) override; + void close(Context* on_finish) override; + + const SnapInfo& get_snap_info() const override { + return m_snap_info; + } + + void read(io::AioCompletion* aio_comp, io::Extents&& image_extents, + io::ReadResult&& read_result, int op_flags, int read_flags, + const ZTracer::Trace &parent_trace) override; + + void list_snap(io::Extents&& image_extents, int list_snaps_flags, + io::SparseExtents* sparse_extents, + const ZTracer::Trace &parent_trace, + Context* on_finish) override; + +private: + struct OpenRequest; + + ImageCtxT* m_image_ctx; + json_spirit::mObject m_json_object; + const SourceSpecBuilder<ImageCtxT>* m_source_spec_builder; + uint64_t m_index = 0; + + SnapInfo m_snap_info; + + std::shared_ptr<StreamInterface> m_stream; + +}; + +} // namespace migration +} // namespace librbd + +extern template class librbd::migration::RawSnapshot<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_MIGRATION_RAW_SNAPSHOT_H diff --git a/src/librbd/migration/S3Stream.cc b/src/librbd/migration/S3Stream.cc new file mode 100644 index 00000000000..f812ef0294a --- /dev/null +++ b/src/librbd/migration/S3Stream.cc @@ -0,0 +1,178 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/migration/S3Stream.h" +#include "common/armor.h" +#include "common/ceph_crypto.h" +#include "common/ceph_time.h" +#include "common/dout.h" +#include "common/errno.h" +#include "librbd/AsioEngine.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/asio/Utils.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ReadResult.h" +#include "librbd/migration/HttpClient.h" +#include "librbd/migration/HttpProcessorInterface.h" +#include <boost/beast/http.hpp> + +#undef FMT_HEADER_ONLY +#define FMT_HEADER_ONLY 1 +#include <fmt/chrono.h> +#include <fmt/format.h> + +#include <time.h> + +namespace librbd { +namespace migration { + +using HttpRequest = boost::beast::http::request<boost::beast::http::empty_body>; + +namespace { + +const std::string URL_KEY {"url"}; +const std::string ACCESS_KEY {"access_key"}; +const std::string SECRET_KEY {"secret_key"}; + +} // anonymous namespace + +template <typename I> +struct S3Stream<I>::HttpProcessor : public HttpProcessorInterface { + S3Stream* s3stream; + + HttpProcessor(S3Stream* s3stream) : s3stream(s3stream) { + } + + void process_request(EmptyRequest& request) override { + s3stream->process_request(request); + } +}; + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::migration::S3Stream: " << this \ + << " " << __func__ << ": " + +template <typename I> +S3Stream<I>::S3Stream(I* image_ctx, const json_spirit::mObject& json_object) + : m_image_ctx(image_ctx), m_cct(image_ctx->cct), + m_asio_engine(image_ctx->asio_engine), m_json_object(json_object), + m_http_processor(std::make_unique<HttpProcessor>(this)) { +} + +template <typename I> +S3Stream<I>::~S3Stream() { +} + +template <typename I> +void S3Stream<I>::open(Context* on_finish) { + auto& url_value = m_json_object[URL_KEY]; + if (url_value.type() != json_spirit::str_type) { + lderr(m_cct) << "failed to locate '" << URL_KEY << "' key" << dendl; + on_finish->complete(-EINVAL); + return; + } + + auto& access_key = m_json_object[ACCESS_KEY]; + if (access_key.type() != json_spirit::str_type) { + lderr(m_cct) << "failed to locate '" << ACCESS_KEY << "' key" << dendl; + on_finish->complete(-EINVAL); + return; + } + + auto& secret_key = m_json_object[SECRET_KEY]; + if (secret_key.type() != json_spirit::str_type) { + lderr(m_cct) << "failed to locate '" << SECRET_KEY << "' key" << dendl; + on_finish->complete(-EINVAL); + return; + } + + m_url = url_value.get_str(); + m_access_key = access_key.get_str(); + m_secret_key = secret_key.get_str(); + ldout(m_cct, 10) << "url=" << m_url << ", " + << "access_key=" << m_access_key << dendl; + + m_http_client.reset(HttpClient<I>::create(m_image_ctx, m_url)); + m_http_client->set_http_processor(m_http_processor.get()); + m_http_client->open(on_finish); +} + +template <typename I> +void S3Stream<I>::close(Context* on_finish) { + ldout(m_cct, 10) << dendl; + + if (!m_http_client) { + on_finish->complete(0); + } + + m_http_client->close(on_finish); +} + +template <typename I> +void S3Stream<I>::get_size(uint64_t* size, Context* on_finish) { + ldout(m_cct, 10) << dendl; + + m_http_client->get_size(size, on_finish); +} + +template <typename I> +void S3Stream<I>::read(io::Extents&& byte_extents, bufferlist* data, + Context* on_finish) { + ldout(m_cct, 20) << "byte_extents=" << byte_extents << dendl; + + m_http_client->read(std::move(byte_extents), data, on_finish); +} + +template <typename I> +void S3Stream<I>::process_request(HttpRequest& http_request) { + ldout(m_cct, 20) << dendl; + + // format RFC 1123 date/time + auto time = ceph::real_clock::to_time_t(ceph::real_clock::now()); + struct tm timeInfo; + gmtime_r(&time, &timeInfo); + + std::string date = fmt::format("{:%a, %d %b %Y %H:%M:%S %z}", timeInfo); + http_request.set(boost::beast::http::field::date, date); + + // note: we don't support S3 subresources + std::string canonicalized_resource = std::string(http_request.target()); + + std::string string_to_sign = fmt::format( + "{}\n\n\n{}\n{}", + std::string(boost::beast::http::to_string(http_request.method())), + date, canonicalized_resource); + + // create HMAC-SHA1 signature from secret key + string-to-sign + sha1_digest_t digest; + crypto::HMACSHA1 hmac( + reinterpret_cast<const unsigned char*>(m_secret_key.data()), + m_secret_key.size()); + hmac.Update(reinterpret_cast<const unsigned char*>(string_to_sign.data()), + string_to_sign.size()); + hmac.Final(reinterpret_cast<unsigned char*>(digest.v)); + + // base64 encode the result + char buf[64]; + int r = ceph_armor(std::begin(buf), std::begin(buf) + sizeof(buf), + reinterpret_cast<const char *>(digest.v), + reinterpret_cast<const char *>(digest.v + digest.SIZE)); + if (r < 0) { + ceph_abort("ceph_armor failed"); + } + + // store the access-key + signature in the HTTP authorization header + std::string signature = std::string(std::begin(buf), std::begin(buf) + r); + std::string authorization = fmt::format("AWS {}:{}", m_access_key, signature); + http_request.set(boost::beast::http::field::authorization, authorization); + + ldout(m_cct, 20) << "string_to_sign=" << string_to_sign << ", " + << "authorization=" << authorization << dendl; +} + +} // namespace migration +} // namespace librbd + +template class librbd::migration::S3Stream<librbd::ImageCtx>; diff --git a/src/librbd/migration/S3Stream.h b/src/librbd/migration/S3Stream.h new file mode 100644 index 00000000000..586b217878c --- /dev/null +++ b/src/librbd/migration/S3Stream.h @@ -0,0 +1,78 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_MIGRATION_S3_STREAM_H +#define CEPH_LIBRBD_MIGRATION_S3_STREAM_H + +#include "include/int_types.h" +#include "librbd/migration/StreamInterface.h" +#include <boost/beast/http/empty_body.hpp> +#include <boost/beast/http/message.hpp> +#include <boost/beast/http/string_body.hpp> +#include <json_spirit/json_spirit.h> +#include <memory> +#include <string> + +struct Context; + +namespace librbd { + +struct AsioEngine; +struct ImageCtx; + +namespace migration { + +template <typename> class HttpClient; + +template <typename ImageCtxT> +class S3Stream : public StreamInterface { +public: + static S3Stream* create(ImageCtxT* image_ctx, + const json_spirit::mObject& json_object) { + return new S3Stream(image_ctx, json_object); + } + + S3Stream(ImageCtxT* image_ctx, const json_spirit::mObject& json_object); + ~S3Stream() override; + + S3Stream(const S3Stream&) = delete; + S3Stream& operator=(const S3Stream&) = delete; + + void open(Context* on_finish) override; + void close(Context* on_finish) override; + + void get_size(uint64_t* size, Context* on_finish) override; + + void read(io::Extents&& byte_extents, bufferlist* data, + Context* on_finish) override; + +private: + using HttpRequest = boost::beast::http::request< + boost::beast::http::empty_body>; + using HttpResponse = boost::beast::http::response< + boost::beast::http::string_body>; + + struct HttpProcessor; + + ImageCtxT* m_image_ctx; + CephContext* m_cct; + std::shared_ptr<AsioEngine> m_asio_engine; + json_spirit::mObject m_json_object; + + std::string m_url; + std::string m_access_key; + std::string m_secret_key; + + std::unique_ptr<HttpProcessor> m_http_processor; + std::unique_ptr<HttpClient<ImageCtxT>> m_http_client; + + void process_request(HttpRequest& http_request); + +}; + +} // namespace migration +} // namespace librbd + +extern template class librbd::migration::S3Stream<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_MIGRATION_S3_STREAM_H diff --git a/src/librbd/migration/SnapshotInterface.h b/src/librbd/migration/SnapshotInterface.h new file mode 100644 index 00000000000..9990802c594 --- /dev/null +++ b/src/librbd/migration/SnapshotInterface.h @@ -0,0 +1,48 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_MIGRATION_SNAPSHOT_INTERFACE_H +#define CEPH_LIBRBD_MIGRATION_SNAPSHOT_INTERFACE_H + +#include "include/buffer_fwd.h" +#include "include/int_types.h" +#include "common/zipkin_trace.h" +#include "librbd/Types.h" +#include "librbd/io/Types.h" +#include <string> + +struct Context; + +namespace librbd { + +namespace io { +struct AioCompletion; +struct ReadResult; +} // namespace io + +namespace migration { + +struct SnapshotInterface { + virtual ~SnapshotInterface() { + } + + virtual void open(SnapshotInterface* previous_snapshot, + Context* on_finish) = 0; + virtual void close(Context* on_finish) = 0; + + virtual const SnapInfo& get_snap_info() const = 0; + + virtual void read(io::AioCompletion* aio_comp, io::Extents&& image_extents, + io::ReadResult&& read_result, int op_flags, int read_flags, + const ZTracer::Trace &parent_trace) = 0; + + virtual void list_snap(io::Extents&& image_extents, int list_snaps_flags, + io::SparseExtents* sparse_extents, + const ZTracer::Trace &parent_trace, + Context* on_finish) = 0; +}; + +} // namespace migration +} // namespace librbd + +#endif // CEPH_LIBRBD_MIGRATION_SNAPSHOT_INTERFACE_H diff --git a/src/librbd/migration/SourceSpecBuilder.cc b/src/librbd/migration/SourceSpecBuilder.cc index 99c5e10c0cd..526522fba60 100644 --- a/src/librbd/migration/SourceSpecBuilder.cc +++ b/src/librbd/migration/SourceSpecBuilder.cc @@ -5,8 +5,11 @@ #include "common/dout.h" #include "librbd/ImageCtx.h" #include "librbd/migration/FileStream.h" +#include "librbd/migration/HttpStream.h" +#include "librbd/migration/S3Stream.h" #include "librbd/migration/NativeFormat.h" #include "librbd/migration/RawFormat.h" +#include "librbd/migration/RawSnapshot.h" #define dout_subsys ceph_subsys_rbd #undef dout_prefix @@ -72,9 +75,35 @@ int SourceSpecBuilder<I>::build_format( } template <typename I> +int SourceSpecBuilder<I>::build_snapshot( + const json_spirit::mObject& source_spec_object, uint64_t index, + std::shared_ptr<SnapshotInterface>* snapshot) const { + auto cct = m_image_ctx->cct; + ldout(cct, 10) << dendl; + + auto type_value_it = source_spec_object.find(TYPE_KEY); + if (type_value_it == source_spec_object.end() || + type_value_it->second.type() != json_spirit::str_type) { + lderr(cct) << "failed to locate snapshot type value" << dendl; + return -EINVAL; + } + + auto& type = type_value_it->second.get_str(); + if (type == "raw") { + snapshot->reset(RawSnapshot<I>::create(m_image_ctx, source_spec_object, + this, index)); + } else { + lderr(cct) << "unknown or unsupported format type '" << type << "'" + << dendl; + return -ENOSYS; + } + return 0; +} + +template <typename I> int SourceSpecBuilder<I>::build_stream( const json_spirit::mObject& source_spec_object, - std::unique_ptr<StreamInterface>* stream) const { + std::shared_ptr<StreamInterface>* stream) const { auto cct = m_image_ctx->cct; ldout(cct, 10) << dendl; @@ -96,6 +125,10 @@ int SourceSpecBuilder<I>::build_stream( auto& type = type_value_it->second.get_str(); if (type == "file") { stream->reset(FileStream<I>::create(m_image_ctx, stream_obj)); + } else if (type == "http") { + stream->reset(HttpStream<I>::create(m_image_ctx, stream_obj)); + } else if (type == "s3") { + stream->reset(S3Stream<I>::create(m_image_ctx, stream_obj)); } else { lderr(cct) << "unknown or unsupported stream type '" << type << "'" << dendl; diff --git a/src/librbd/migration/SourceSpecBuilder.h b/src/librbd/migration/SourceSpecBuilder.h index 3948ca03004..191cb1cbdd3 100644 --- a/src/librbd/migration/SourceSpecBuilder.h +++ b/src/librbd/migration/SourceSpecBuilder.h @@ -19,6 +19,7 @@ struct ImageCtx; namespace migration { struct FormatInterface; +struct SnapshotInterface; struct StreamInterface; template <typename ImageCtxT> @@ -33,8 +34,12 @@ public: int build_format(const json_spirit::mObject& format_object, bool import_only, std::unique_ptr<FormatInterface>* format) const; + int build_snapshot(const json_spirit::mObject& source_spec_object, + uint64_t index, + std::shared_ptr<SnapshotInterface>* snapshot) const; + int build_stream(const json_spirit::mObject& source_spec_object, - std::unique_ptr<StreamInterface>* stream) const; + std::shared_ptr<StreamInterface>* stream) const; private: ImageCtxT* m_image_ctx; diff --git a/src/librbd/migration/Types.h b/src/librbd/migration/Types.h new file mode 100644 index 00000000000..244dc28b774 --- /dev/null +++ b/src/librbd/migration/Types.h @@ -0,0 +1,42 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_MIGRATION_TYPES_H +#define CEPH_LIBRBD_MIGRATION_TYPES_H + +#include <string> +#include <utility> + +namespace librbd { +namespace migration { + +enum UrlScheme { + URL_SCHEME_HTTP, + URL_SCHEME_HTTPS, +}; + +struct UrlSpec { + UrlSpec() {} + UrlSpec(UrlScheme scheme, const std::string& host, const std::string& port, + const std::string& path) + : scheme(scheme), host(host), port(port), path(path) { + } + + UrlScheme scheme = URL_SCHEME_HTTP; + std::string host; + std::string port = "80"; + std::string path = "/"; + +}; + +inline bool operator==(const UrlSpec& lhs, const UrlSpec& rhs) { + return (lhs.scheme == rhs.scheme && + lhs.host == rhs.host && + lhs.port == rhs.port && + lhs.path == rhs.path); +} + +} // namespace migration +} // namespace librbd + +#endif // CEPH_LIBRBD_MIGRATION_TYPES_H diff --git a/src/librbd/migration/Utils.cc b/src/librbd/migration/Utils.cc new file mode 100644 index 00000000000..23b803d2762 --- /dev/null +++ b/src/librbd/migration/Utils.cc @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/migration/Utils.h" +#include "common/dout.h" +#include "common/errno.h" +#include <boost/lexical_cast.hpp> +#include <regex> + +namespace librbd { +namespace migration { +namespace util { + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::migration::util::" << __func__ << ": " + +int parse_url(CephContext* cct, const std::string& url, UrlSpec* url_spec) { + ldout(cct, 10) << "url=" << url << dendl; + *url_spec = UrlSpec{}; + + // parse the provided URL (scheme, user, password, host, port, path, + // parameters, query, and fragment) + std::regex url_regex( + R"(^(?:([^:/]*)://)?(?:(\w+)(?::(\w+))?@)?([^/;\?:#]+)(?::([^/;\?#]+))?)" + R"((?:/([^;\?#]*))?(?:;([^\?#]+))?(?:\?([^#]+))?(?:#(\w+))?$)"); + std::smatch match; + if(!std::regex_match(url, match, url_regex)) { + lderr(cct) << "invalid url: '" << url << "'" << dendl; + return -EINVAL; + } + + auto& scheme = match[1]; + if (scheme == "http" || scheme == "") { + url_spec->scheme = URL_SCHEME_HTTP; + } else if (scheme == "https") { + url_spec->scheme = URL_SCHEME_HTTPS; + url_spec->port = "443"; + } else { + lderr(cct) << "invalid url scheme: '" << url << "'" << dendl; + return -EINVAL; + } + + url_spec->host = match[4]; + auto& port = match[5]; + if (port.matched) { + try { + boost::lexical_cast<uint16_t>(port); + } catch (boost::bad_lexical_cast&) { + lderr(cct) << "invalid url port: '" << url << "'" << dendl; + return -EINVAL; + } + url_spec->port = port; + } + + auto& path = match[6]; + if (path.matched) { + url_spec->path += path; + } + return 0; +} + +} // namespace util +} // namespace migration +} // namespace librbd diff --git a/src/librbd/migration/Utils.h b/src/librbd/migration/Utils.h new file mode 100644 index 00000000000..a3e2fe0132c --- /dev/null +++ b/src/librbd/migration/Utils.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_MIGRATION_UTILS_H +#define CEPH_LIBRBD_MIGRATION_UTILS_H + +#include "include/common_fwd.h" +#include "librbd/migration/Types.h" +#include <string> + +namespace librbd { +namespace migration { +namespace util { + +int parse_url(CephContext* cct, const std::string& url, UrlSpec* url_spec); + +} // namespace util +} // namespace migration +} // namespace librbd + +#endif // CEPH_LIBRBD_MIGRATION_UTILS_H diff --git a/src/librbd/object_map/DiffRequest.cc b/src/librbd/object_map/DiffRequest.cc index bd279de9023..1f80eb155cb 100644 --- a/src/librbd/object_map/DiffRequest.cc +++ b/src/librbd/object_map/DiffRequest.cc @@ -62,7 +62,7 @@ void DiffRequest<I>::load_object_map( } } - if ((m_image_ctx->features & RBD_FEATURE_FAST_DIFF) != 0) { + if ((m_image_ctx->features & RBD_FEATURE_FAST_DIFF) == 0) { image_locker->unlock(); ldout(cct, 10) << "fast-diff feature not enabled" << dendl; diff --git a/src/librbd/plugin/Api.cc b/src/librbd/plugin/Api.cc index 76a9859f79d..67303be3f4e 100644 --- a/src/librbd/plugin/Api.cc +++ b/src/librbd/plugin/Api.cc @@ -1,9 +1,13 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab +#include "common/Timer.h" #include "librbd/plugin/Api.h" #include "librbd/ImageCtx.h" +#include "librbd/io/AioCompletion.h" #include "librbd/io/Utils.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" namespace librbd { namespace plugin { @@ -17,6 +21,71 @@ void Api<I>::read_parent( on_finish); } +template <typename I> +void Api<I>::execute_image_metadata_set( + I *image_ctx, const std::string &key, + const std::string &value, Context *on_finish) { + ImageCtx* ictx = util::get_image_ctx(image_ctx); + ictx->operations->execute_metadata_set(key, value, on_finish); +} + +template <typename I> +void Api<I>::execute_image_metadata_remove( + I *image_ctx, const std::string &key, Context *on_finish) { + ImageCtx* ictx = util::get_image_ctx(image_ctx); + ictx->operations->execute_metadata_remove(key, on_finish); +} + +template <typename I> +void Api<I>::get_image_timer_instance( + CephContext *cct, SafeTimer **timer, ceph::mutex **timer_lock) { + ImageCtx::get_timer_instance(cct, timer, timer_lock); +} + +template <typename I> +bool Api<I>::test_image_features(I *image_ctx, uint64_t features) { + return image_ctx->test_features(features); +} + +template <typename I> +void Api<I>::update_aio_comp(io::AioCompletion* aio_comp, + uint32_t request_count, + io::ReadResult &read_result, + io::Extents &image_extents) { + aio_comp->set_request_count(request_count); + aio_comp->read_result = std::move(read_result); + aio_comp->read_result.set_image_extents(image_extents); + start_in_flight_io(aio_comp); +} + +template <typename I> +void Api<I>::update_aio_comp( + io::AioCompletion* aio_comp, uint32_t request_count) { + aio_comp->set_request_count(request_count); + start_in_flight_io(aio_comp); +} + +template <typename I> +io::ReadResult::C_ImageReadRequest* Api<I>::create_image_read_request( + io::AioCompletion* aio_comp, uint64_t buffer_offset, + const Extents& image_extents) { + return new io::ReadResult::C_ImageReadRequest( + aio_comp, buffer_offset, image_extents); +} + +template <typename I> +io::C_AioRequest* Api<I>::create_aio_request(io::AioCompletion* aio_comp) { + io::C_AioRequest *req_comp = new io::C_AioRequest(aio_comp); + return req_comp; +} + +template <typename I> +void Api<I>::start_in_flight_io(io::AioCompletion* aio_comp) { + if (!aio_comp->async_op.started()) { + aio_comp->start_op(); + } +} + } // namespace plugin } // namespace librbd diff --git a/src/librbd/plugin/Api.h b/src/librbd/plugin/Api.h index f2dd5c82249..2d55c17c19f 100644 --- a/src/librbd/plugin/Api.h +++ b/src/librbd/plugin/Api.h @@ -4,15 +4,24 @@ #ifndef CEPH_LIBRBD_PLUGIN_API_H #define CEPH_LIBRBD_PLUGIN_API_H +#include "common/ceph_mutex.h" #include "include/common_fwd.h" #include "include/int_types.h" #include "include/rados/librados.hpp" #include "librbd/io/Types.h" +#include "librbd/io/ReadResult.h" namespace ZTracer { struct Trace; } +class SafeTimer; + namespace librbd { +namespace io { +class AioCompletion; +class C_AioRequest; +} + struct ImageCtx; namespace plugin { @@ -29,6 +38,43 @@ struct Api { librados::snap_t snap_id, const ZTracer::Trace &trace, Context* on_finish); + virtual void execute_image_metadata_set( + ImageCtxT *image_ctx, + const std::string &key, + const std::string &value, + Context *on_finish); + + virtual void execute_image_metadata_remove( + ImageCtxT *image_ctx, + const std::string &key, + Context *on_finish); + + virtual void get_image_timer_instance( + CephContext *cct, SafeTimer **timer, + ceph::mutex **timer_lock); + + virtual bool test_image_features( + ImageCtxT *image_ctx, + uint64_t features); + + virtual void update_aio_comp( + io::AioCompletion* aio_comp, + uint32_t request_count, + io::ReadResult& read_result, + io::Extents &image_extents); + + virtual void update_aio_comp( + io::AioCompletion* aio_comp, + uint32_t request_count); + + virtual io::ReadResult::C_ImageReadRequest* create_image_read_request( + io::AioCompletion* aio_comp, uint64_t buffer_offset, + const Extents& image_extents); + + virtual io::C_AioRequest* create_aio_request(io::AioCompletion* aio_comp); + +private: + void start_in_flight_io(io::AioCompletion* aio_comp); }; } // namespace plugin diff --git a/src/librbd/plugin/ParentCache.cc b/src/librbd/plugin/ParentCache.cc index 0c50c8e7197..3eba430abce 100644 --- a/src/librbd/plugin/ParentCache.cc +++ b/src/librbd/plugin/ParentCache.cc @@ -33,22 +33,23 @@ namespace librbd { namespace plugin { template <typename I> -void ParentCache<I>::init(I* image_ctx, Api<I>& api, HookPoints* hook_points, +void ParentCache<I>::init(I* image_ctx, Api<I>& api, + cache::ImageWritebackInterface& image_writeback, + PluginHookPoints& hook_points_list, Context* on_finish) { - m_image_ctx = image_ctx; - bool parent_cache_enabled = m_image_ctx->config.template get_val<bool>( + bool parent_cache_enabled = image_ctx->config.template get_val<bool>( "rbd_parent_cache_enabled"); - if (m_image_ctx->child == nullptr || !parent_cache_enabled || - !m_image_ctx->data_ctx.is_valid()) { + if (image_ctx->child == nullptr || !parent_cache_enabled || + !image_ctx->data_ctx.is_valid()) { on_finish->complete(0); return; } - auto cct = m_image_ctx->cct; + auto cct = image_ctx->cct; ldout(cct, 5) << dendl; auto parent_cache = cache::ParentCacheObjectDispatch<I>::create( - m_image_ctx, api); + image_ctx, api); on_finish = new LambdaContext([this, on_finish, parent_cache](int r) { if (r < 0) { // the object dispatcher will handle cleanup if successfully initialized @@ -62,7 +63,6 @@ void ParentCache<I>::init(I* image_ctx, Api<I>& api, HookPoints* hook_points, template <typename I> void ParentCache<I>::handle_init_parent_cache(int r, Context* on_finish) { - auto cct = m_image_ctx->cct; ldout(cct, 5) << "r=" << r << dendl; if (r < 0) { diff --git a/src/librbd/plugin/ParentCache.h b/src/librbd/plugin/ParentCache.h index e456c5ac3d8..1039efff9ae 100644 --- a/src/librbd/plugin/ParentCache.h +++ b/src/librbd/plugin/ParentCache.h @@ -19,13 +19,14 @@ public: ParentCache(CephContext* cct) : Interface<ImageCtxT>(cct) { } - void init(ImageCtxT* image_ctx, Api<ImageCtxT>& api, HookPoints* hook_points, + void init(ImageCtxT* image_ctx, Api<ImageCtxT>& api, + cache::ImageWritebackInterface& image_writeback, + PluginHookPoints& hook_points_list, Context* on_finish) override; private: - ImageCtxT* m_image_ctx = nullptr; - void handle_init_parent_cache(int r, Context* on_finish); + using ceph::Plugin::cct; }; diff --git a/src/librbd/plugin/Types.h b/src/librbd/plugin/Types.h index af60f49ef63..b66d754ac28 100644 --- a/src/librbd/plugin/Types.h +++ b/src/librbd/plugin/Types.h @@ -5,9 +5,9 @@ #define CEPH_LIBRBD_PLUGIN_TYPES_H #include "include/common_fwd.h" +#include "include/Context.h" #include "common/PluginRegistry.h" - -struct Context; +#include "librbd/cache/ImageWriteback.h" namespace librbd { namespace plugin { @@ -15,16 +15,28 @@ namespace plugin { template <typename> struct Api; struct HookPoints { - // TODO later commits will add support for exclusive-lock hook points + virtual ~HookPoints() { + } + virtual void acquired_exclusive_lock(Context* on_finish) = 0; + virtual void prerelease_exclusive_lock(Context* on_finish) = 0; + virtual void discard(Context* on_finish) { + on_finish->complete(0); + } }; +typedef std::list<std::unique_ptr<HookPoints>> PluginHookPoints; + template <typename ImageCtxT> struct Interface : public ceph::Plugin { Interface(CephContext* cct) : Plugin(cct) { } + virtual ~Interface() { + } + virtual void init(ImageCtxT* image_ctx, Api<ImageCtxT>& api, - HookPoints* hook_points, Context* on_finish) = 0; + librbd::cache::ImageWritebackInterface& image_writeback, + PluginHookPoints& hook_points_list, Context* on_finish) = 0; }; } // namespace plugin diff --git a/src/librbd/plugin/WriteLogImageCache.cc b/src/librbd/plugin/WriteLogImageCache.cc new file mode 100644 index 00000000000..2a32841f4b4 --- /dev/null +++ b/src/librbd/plugin/WriteLogImageCache.cc @@ -0,0 +1,104 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ceph_ver.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/PluginRegistry.h" +#include "librbd/ImageCtx.h" +#include "librbd/cache/WriteLogImageDispatch.h" +#include "librbd/cache/ImageWriteback.h" +#include "librbd/cache/pwl/DiscardRequest.h" +#include "librbd/cache/pwl/InitRequest.h" +#include "librbd/io/ImageDispatcherInterface.h" +#include "librbd/plugin/WriteLogImageCache.h" + +extern "C" { + +const char *__ceph_plugin_version() { + return CEPH_GIT_NICE_VER; +} + +int __ceph_plugin_init(CephContext *cct, const std::string& type, + const std::string& name) { + auto plugin_registry = cct->get_plugin_registry(); + return plugin_registry->add( + type, name, new librbd::plugin::WriteLogImageCache<librbd::ImageCtx>(cct)); +} + +} // extern "C" + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::plugin::WriteLogImageCache: " \ + << this << " " << __func__ << ": " + +namespace librbd { +namespace plugin { + +template <typename I> +void WriteLogImageCache<I>::init(I* image_ctx, Api<I>& api, + cache::ImageWritebackInterface& image_writeback, + PluginHookPoints& hook_points_list, + Context* on_finish) { + bool rwl_enabled = image_ctx->config.template get_val<bool>( + "rbd_rwl_enabled"); + if (!rwl_enabled || !image_ctx->data_ctx.is_valid()) { + on_finish->complete(0); + return; + } + + auto cct = image_ctx->cct; + ldout(cct, 5) << dendl; + + auto hook_points = std::make_unique<WriteLogImageCache::HookPoints>( + image_ctx, image_writeback, api); + hook_points_list.emplace_back(std::move(hook_points)); + + on_finish->complete(0); +} + +template <typename I> +WriteLogImageCache<I>::~WriteLogImageCache() { +} + +template <typename I> +WriteLogImageCache<I>::HookPoints::HookPoints( + I* image_ctx, cache::ImageWritebackInterface& image_writeback, + plugin::Api<I>& plugin_api) + : m_image_ctx(image_ctx), m_image_writeback(image_writeback), + m_plugin_api(plugin_api) +{ +} + +template <typename I> +WriteLogImageCache<I>::HookPoints::~HookPoints() { +} + +template <typename I> +void WriteLogImageCache<I>::HookPoints::acquired_exclusive_lock( + Context* on_finish) { + cache::pwl::InitRequest<I> *req = cache::pwl::InitRequest<I>::create( + *m_image_ctx, m_image_writeback, m_plugin_api, on_finish); + req->send(); +} + +template <typename I> +void WriteLogImageCache<I>::HookPoints::prerelease_exclusive_lock( + Context* on_finish) { + m_image_ctx->io_image_dispatcher->shut_down_dispatch( + io::IMAGE_DISPATCH_LAYER_WRITEBACK_CACHE, on_finish); +} + +template <typename I> +void WriteLogImageCache<I>::HookPoints::discard( + Context* on_finish) { + cache::pwl::DiscardRequest<I> *req = cache::pwl::DiscardRequest<I>::create( + *m_image_ctx, m_plugin_api, on_finish); + req->send(); +} + +} // namespace plugin +} // namespace librbd + +template class librbd::plugin::WriteLogImageCache<librbd::ImageCtx>; diff --git a/src/librbd/plugin/WriteLogImageCache.h b/src/librbd/plugin/WriteLogImageCache.h new file mode 100644 index 00000000000..2ceb87ec654 --- /dev/null +++ b/src/librbd/plugin/WriteLogImageCache.h @@ -0,0 +1,53 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_PLUGIN_WRITELOG_IMAGE_CACHE_H +#define CEPH_LIBRBD_PLUGIN_WRITELOG_IMAGE_CACHE_H + +#include "librbd/plugin/Types.h" +#include "include/Context.h" + +namespace librbd { + +struct ImageCtx; + +namespace plugin { + +template <typename ImageCtxT> +class WriteLogImageCache : public Interface<ImageCtxT> { +public: + WriteLogImageCache(CephContext* cct) : Interface<ImageCtxT>(cct) { + } + + ~WriteLogImageCache() override; + + void init(ImageCtxT* image_ctx, Api<ImageCtxT>& api, + cache::ImageWritebackInterface& image_writeback, + PluginHookPoints& hook_points_list, + Context* on_finish) override; + + class HookPoints : public plugin::HookPoints { + public: + HookPoints(ImageCtxT* image_ctx, + cache::ImageWritebackInterface& image_writeback, + plugin::Api<ImageCtxT>& plugin_api); + ~HookPoints() override; + + void acquired_exclusive_lock(Context* on_finish) override; + void prerelease_exclusive_lock(Context* on_finish) override; + void discard(Context* on_finish) override; + + private: + ImageCtxT* m_image_ctx; + cache::ImageWritebackInterface& m_image_writeback; + plugin::Api<ImageCtxT>& m_plugin_api; + }; + +}; + +} // namespace plugin +} // namespace librbd + +extern template class librbd::plugin::WriteLogImageCache<librbd::ImageCtx>; + +#endif // CEPH_LIBRBD_PLUGIN_WRITELOG_IMAGE_CACHE_H diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 3a734e989ff..2a8ea6777b0 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -2159,37 +2159,41 @@ public: class C_IO_Dir_Commit_Ops : public Context { public: - C_IO_Dir_Commit_Ops(CDir *d, int pr, bufferlist &&bl, - vector<dentry_key_t> &&r, vector<CDir::dentry_commit_item> &&s, + C_IO_Dir_Commit_Ops(CDir *d, int pr, + vector<CDir::dentry_commit_item> &&s, bufferlist &&bl, + vector<dentry_key_t> &&r, mempool::mds_co::compact_set<mempool::mds_co::string> &&stale) : dir(d), op_prio(pr) { + metapool = dir->mdcache->mds->mdsmap->get_metadata_pool(); version = dir->get_version(); is_new = dir->is_new(); + to_set.swap(s); dfts.swap(bl); to_remove.swap(r); - to_set.swap(s); stale_items.swap(stale); } void finish(int r) override { - dir->_omap_commit_ops(r, op_prio, version, is_new, dfts, to_remove, to_set, - stale_items); + dir->_omap_commit_ops(r, op_prio, metapool, version, is_new, to_set, dfts, + to_remove, stale_items); } private: CDir *dir; - version_t version; int op_prio; + int64_t metapool; + version_t version; bool is_new; + vector<CDir::dentry_commit_item> to_set; bufferlist dfts; vector<dentry_key_t> to_remove; - vector<CDir::dentry_commit_item> to_set; mempool::mds_co::compact_set<mempool::mds_co::string> stale_items; }; // This is not locked by mds_lock -void CDir::_omap_commit_ops(int r, int op_prio, version_t version, bool _new, bufferlist &dfts, - vector<dentry_key_t>& to_remove, vector<dentry_commit_item> &to_set, +void CDir::_omap_commit_ops(int r, int op_prio, int64_t metapool, version_t version, bool _new, + vector<dentry_commit_item> &to_set, bufferlist &dfts, + vector<dentry_key_t>& to_remove, mempool::mds_co::compact_set<mempool::mds_co::string> &stales) { dout(10) << __func__ << dendl; @@ -2205,7 +2209,7 @@ void CDir::_omap_commit_ops(int r, int op_prio, version_t version, bool _new, bu SnapContext snapc; object_t oid = get_ondisk_object(); - object_locator_t oloc(mdcache->mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(metapool); map<string, bufferlist> _set; set<string> _rm; @@ -2249,21 +2253,23 @@ void CDir::_omap_commit_ops(int r, int op_prio, version_t version, bool _new, bu }; for (auto &key : stales) { - write_size += key.length(); - _rm.emplace(key); - - if (write_size >= max_write_size) + unsigned size = key.length() + sizeof(__u32); + if (write_size + size > max_write_size) commit_one(); + + write_size += size; + _rm.emplace(key); } for (auto &k : to_remove) { string key; k.encode(key); - write_size += key.length(); - _rm.emplace(std::move(key)); - - if (write_size >= max_write_size) + unsigned size = key.length() + sizeof(__u32); + if (write_size + size > max_write_size) commit_one(); + + write_size += size; + _rm.emplace(std::move(key)); } uint64_t off = 0; @@ -2312,10 +2318,12 @@ void CDir::_omap_commit_ops(int r, int op_prio, version_t version, bool _new, bu } off += item.dft_len; - write_size += key.length() + bl.length(); - _set[std::move(key)].swap(bl); - if (write_size >= max_write_size) + unsigned size = key.length() + bl.length() + 2 * sizeof(__u32); + if (write_size + size > max_write_size) commit_one(); + + write_size += size; + _set[std::move(key)].swap(bl); } commit_one(true); @@ -2409,9 +2417,8 @@ void CDir::_omap_commit(int op_prio) } } - auto c = new C_IO_Dir_Commit_Ops(this, op_prio, std::move(dfts), - std::move(to_remove), std::move(to_set), - std::move(stale_items)); + auto c = new C_IO_Dir_Commit_Ops(this, op_prio, std::move(to_set), std::move(dfts), + std::move(to_remove), std::move(stale_items)); stale_items.clear(); mdcache->mds->finisher->queue(c); } diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 6d876975e11..d1fe9c5b913 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -670,8 +670,9 @@ protected: // -- commit -- void _commit(version_t want, int op_prio); - void _omap_commit_ops(int r, int op_prio, version_t version, bool _new, bufferlist &bl, - vector<dentry_key_t> &to_remove, vector<dentry_commit_item> &to_set, + void _omap_commit_ops(int r, int op_prio, int64_t metapool, version_t version, bool _new, + vector<dentry_commit_item> &to_set, bufferlist &dfts, + vector<dentry_key_t> &to_remove, mempool::mds_co::compact_set<mempool::mds_co::string> &_stale); void _omap_commit(int op_prio); void _parse_dentry(CDentry *dn, dentry_commit_item &item, diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 12fb2068fb6..b70a727c16d 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -11904,6 +11904,7 @@ void MDCache::_fragment_logged(MDRequestRef& mdr) for (const auto& dir : info.resultfrags) { dout(10) << " storing result frag " << *dir << dendl; + dir->mark_dirty(mdr->ls); dir->mark_new(mdr->ls); // freeze and store them too diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 91e6c3759b9..815c8589e82 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -12,6 +12,7 @@ */ #include "DaemonServer.h" +#include <boost/algorithm/string.hpp> #include "mgr/Mgr.h" #include "include/stringify.h" @@ -863,7 +864,7 @@ void DaemonServer::log_access_denied( << "entity='" << session->entity_name << "' " << "cmd=" << cmdctx->cmd << ": access denied"; ss << "access denied: does your client key have mgr caps? " - "See http://docs.ceph.com/docs/master/mgr/administrator/" + "See http://docs.ceph.com/en/latest/mgr/administrator/" "#client-authentication"; } @@ -889,8 +890,6 @@ bool DaemonServer::_handle_command( session->inst.name = m->get_source(); } - std::string format; - boost::scoped_ptr<Formatter> f; map<string,string> param_str_map; std::stringstream ss; int r = 0; @@ -900,15 +899,20 @@ bool DaemonServer::_handle_command( return true; } - { - cmd_getval(cmdctx->cmdmap, "format", format, string("plain")); - f.reset(Formatter::create(format)); - } - string prefix; cmd_getval(cmdctx->cmdmap, "prefix", prefix); + dout(10) << "decoded-size=" << cmdctx->cmdmap.size() << " prefix=" << prefix << dendl; - dout(10) << "decoded-size=" << cmdctx->cmdmap.size() << " prefix=" << prefix << dendl; + boost::scoped_ptr<Formatter> f; + { + std::string format; + if (boost::algorithm::ends_with(prefix, "_json")) { + format = "json"; + } else { + cmd_getval(cmdctx->cmdmap, "format", format, string("plain")); + } + f.reset(Formatter::create(format)); + } // this is just for mgr commands - admin socket commands will fall // through and use the admin socket version of diff --git a/src/mgr/Mgr.cc b/src/mgr/Mgr.cc index 781017a2689..1e9675944f5 100644 --- a/src/mgr/Mgr.cc +++ b/src/mgr/Mgr.cc @@ -308,12 +308,6 @@ void Mgr::init() // Load module KV store auto kv_store = load_store(); - // Migrate config from KV store on luminous->mimic - // drop lock because we do blocking config sets to mon - lock.unlock(); - py_module_registry->upgrade_config(monc, kv_store); - lock.lock(); - // assume finisher already initialized in background_init dout(4) << "starting python modules..." << dendl; py_module_registry->active_start(daemon_state, cluster_state, diff --git a/src/mgr/PyModuleRegistry.cc b/src/mgr/PyModuleRegistry.cc index 2aa556cc9d5..ad461aa47bc 100644 --- a/src/mgr/PyModuleRegistry.cc +++ b/src/mgr/PyModuleRegistry.cc @@ -445,83 +445,3 @@ void PyModuleRegistry::handle_config_notify() active_modules->config_notify(); } } - -void PyModuleRegistry::upgrade_config( - MonClient *monc, - const std::map<std::string, std::string> &old_config) -{ - // Only bother doing anything if we didn't already have - // some new-style config. - if (module_config.config.empty()) { - dout(1) << "Upgrading module configuration for Mimic" << dendl; - // Upgrade luminous->mimic: migrate config-key configuration - // into main configuration store - for (auto &i : old_config) { - auto last_slash = i.first.rfind('/'); - const std::string module_name = i.first.substr(4, i.first.substr(4).find('/')); - const std::string key = i.first.substr(last_slash + 1); - - const auto &value = i.second; - - // Heuristic to skip things that look more like stores - // than configs. - bool is_config = true; - for (const auto &c : value) { - if (c == '\n' || c == '\r' || c < 0x20) { - is_config = false; - break; - } - } - - if (value.size() > 256) { - is_config = false; - } - - if (!is_config) { - dout(1) << "Not migrating config module:key " - << module_name << " : " << key << dendl; - continue; - } - - // Check that the named module exists - auto module_iter = modules.find(module_name); - if (module_iter == modules.end()) { - dout(1) << "KV store contains data for unknown module '" - << module_name << "'" << dendl; - continue; - } - PyModuleRef module = module_iter->second; - - // Parse option name out of key - std::string option_name; - auto slash_loc = key.find("/"); - if (slash_loc != std::string::npos) { - if (key.size() > slash_loc + 1) { - // Localized option - option_name = key.substr(slash_loc + 1); - } else { - // Trailing slash: garbage. - derr << "Invalid mgr store key: '" << key << "'" << dendl; - continue; - } - } else { - option_name = key; - } - - // Consult module schema to see if this is really - // a configuration value - if (!option_name.empty() && module->is_option(option_name)) { - module_config.set_config(monc, module_name, key, i.second); - dout(4) << "Rewrote configuration module:key " - << module_name << ":" << key << dendl; - } else { - dout(4) << "Leaving store module:key " << module_name - << ":" << key << " in store, not config" << dendl; - } - } - } else { - dout(10) << "Module configuration contains " - << module_config.config.size() << " keys" << dendl; - } -} - diff --git a/src/mon/AuthMonitor.cc b/src/mon/AuthMonitor.cc index bdc0c64738a..2b27dc6f530 100644 --- a/src/mon/AuthMonitor.cc +++ b/src/mon/AuthMonitor.cc @@ -69,9 +69,9 @@ using ceph::make_message; using ceph::mono_clock; using ceph::mono_time; using ceph::timespan_str; -static ostream& _prefix(std::ostream *_dout, Monitor *mon, version_t v) { - return *_dout << "mon." << mon->name << "@" << mon->rank - << "(" << mon->get_state_name() +static ostream& _prefix(std::ostream *_dout, Monitor &mon, version_t v) { + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() << ").auth v" << v << " "; } @@ -84,7 +84,7 @@ bool AuthMonitor::check_rotate() { KeyServerData::Incremental rot_inc; rot_inc.op = KeyServerData::AUTH_INC_SET_ROTATING; - if (!mon->key_server.updated_rotating(rot_inc.rotating_bl, last_rotating_ver)) + if (!mon.key_server.updated_rotating(rot_inc.rotating_bl, last_rotating_ver)) return false; dout(10) << __func__ << " updated rotating" << dendl; push_cephx_inc(rot_inc); @@ -105,23 +105,23 @@ void AuthMonitor::tick() bool propose = false; bool increase; { - std::lock_guard l(mon->auth_lock); + std::lock_guard l(mon.auth_lock); increase = _should_increase_max_global_id(); } if (increase) { - if (mon->is_leader()) { + if (mon.is_leader()) { increase_max_global_id(); propose = true; } else { dout(10) << __func__ << "requesting more ids from leader" << dendl; - int leader = mon->get_leader(); + int leader = mon.get_leader(); MMonGlobalID *req = new MMonGlobalID(); req->old_max_id = max_global_id; - mon->send_mon_message(req, leader); + mon.send_mon_message(req, leader); } } - if (!mon->is_leader()) { + if (!mon.is_leader()) { return; } @@ -138,13 +138,13 @@ void AuthMonitor::on_active() { dout(10) << "AuthMonitor::on_active()" << dendl; - if (!mon->is_leader()) + if (!mon.is_leader()) return; - mon->key_server.start_server(); + mon.key_server.start_server(); bool increase; { - std::lock_guard l(mon->auth_lock); + std::lock_guard l(mon.auth_lock); increase = _should_increase_max_global_id(); } if (is_writeable() && increase) { @@ -166,7 +166,7 @@ void AuthMonitor::get_initial_keyring(KeyRing *keyring) ceph_assert(keyring != nullptr); bufferlist bl; - int ret = mon->store->get("mkfs", "keyring", bl); + int ret = mon.store->get("mkfs", "keyring", bl); if (ret == -ENOENT) { return; } @@ -241,12 +241,12 @@ void AuthMonitor::create_initial() dout(10) << "create_initial -- creating initial map" << dendl; // initialize rotating keys - mon->key_server.clear_secrets(); + mon.key_server.clear_secrets(); last_rotating_ver = 0; check_rotate(); ceph_assert(pending_auth.size() == 1); - if (mon->is_keyring_required()) { + if (mon.is_keyring_required()) { KeyRing keyring; // attempt to obtain an existing mkfs-time keyring get_initial_keyring(&keyring); @@ -272,7 +272,7 @@ void AuthMonitor::update_from_paxos(bool *need_bootstrap) load_health(); version_t version = get_last_committed(); - version_t keys_ver = mon->key_server.get_ver(); + version_t keys_ver = mon.key_server.get_ver(); if (version == keys_ver) return; ceph_assert(version > keys_ver); @@ -293,12 +293,12 @@ void AuthMonitor::update_from_paxos(bool *need_bootstrap) __u8 struct_v; decode(struct_v, p); decode(max_global_id, p); - decode(mon->key_server, p); - mon->key_server.set_ver(latest_full); + decode(mon.key_server, p); + mon.key_server.set_ver(latest_full); keys_ver = latest_full; } - dout(10) << __func__ << " key server version " << mon->key_server.get_ver() << dendl; + dout(10) << __func__ << " key server version " << mon.key_server.get_ver() << dendl; // walk through incrementals while (version > keys_ver) { @@ -311,7 +311,7 @@ void AuthMonitor::update_from_paxos(bool *need_bootstrap) // keys in here temporarily for bootstrapping that we need to // clear out. if (keys_ver == 0) - mon->key_server.clear_secrets(); + mon.key_server.clear_secrets(); dout(20) << __func__ << " walking through version " << (keys_ver+1) << " len " << bl.length() << dendl; @@ -332,24 +332,24 @@ void AuthMonitor::update_from_paxos(bool *need_bootstrap) KeyServerData::Incremental auth_inc; auto iter = inc.auth_data.cbegin(); decode(auth_inc, iter); - mon->key_server.apply_data_incremental(auth_inc); + mon.key_server.apply_data_incremental(auth_inc); break; } } } keys_ver++; - mon->key_server.set_ver(keys_ver); + mon.key_server.set_ver(keys_ver); - if (keys_ver == 1 && mon->is_keyring_required()) { + if (keys_ver == 1 && mon.is_keyring_required()) { auto t(std::make_shared<MonitorDBStore::Transaction>()); t->erase("mkfs", "keyring"); - mon->store->apply_transaction(t); + mon.store->apply_transaction(t); } } { - std::lock_guard l(mon->auth_lock); + std::lock_guard l(mon.auth_lock); if (last_allocated_id == 0) { last_allocated_id = max_global_id; dout(10) << __func__ << " last_allocated_id initialized to " @@ -364,7 +364,7 @@ void AuthMonitor::update_from_paxos(bool *need_bootstrap) bool AuthMonitor::_should_increase_max_global_id() { - ceph_assert(ceph_mutex_is_locked(mon->auth_lock)); + ceph_assert(ceph_mutex_is_locked(mon.auth_lock)); auto num_prealloc = g_conf()->mon_globalid_prealloc; if (max_global_id < num_prealloc || (last_allocated_id + 1) >= max_global_id - num_prealloc / 2) { @@ -375,7 +375,7 @@ bool AuthMonitor::_should_increase_max_global_id() void AuthMonitor::increase_max_global_id() { - ceph_assert(mon->is_leader()); + ceph_assert(mon.is_leader()); Incremental inc; inc.inc_type = GLOBAL_ID; @@ -405,7 +405,7 @@ void AuthMonitor::encode_pending(MonitorDBStore::TransactionRef t) encode(v, bl); vector<Incremental>::iterator p; for (p = pending_auth.begin(); p != pending_auth.end(); ++p) - p->encode(bl, mon->get_quorum_con_features()); + p->encode(bl, mon.get_quorum_con_features()); version_t version = get_last_committed() + 1; put_version(t, version, bl); @@ -414,8 +414,8 @@ void AuthMonitor::encode_pending(MonitorDBStore::TransactionRef t) // health health_check_map_t next; map<string,list<string>> bad_detail; // entity -> details - for (auto i = mon->key_server.secrets_begin(); - i != mon->key_server.secrets_end(); + for (auto i = mon.key_server.secrets_begin(); + i != mon.key_server.secrets_end(); ++i) { for (auto& p : i->second.caps) { ostringstream ss; @@ -461,7 +461,7 @@ void AuthMonitor::encode_pending(MonitorDBStore::TransactionRef t) void AuthMonitor::encode_full(MonitorDBStore::TransactionRef t) { - version_t version = mon->key_server.get_ver(); + version_t version = mon.key_server.get_ver(); // do not stash full version 0 as it will never be removed nor read if (version == 0) return; @@ -470,14 +470,14 @@ void AuthMonitor::encode_full(MonitorDBStore::TransactionRef t) ceph_assert(get_last_committed() == version); bufferlist full_bl; - std::scoped_lock l{mon->key_server.get_lock()}; + std::scoped_lock l{mon.key_server.get_lock()}; dout(20) << __func__ << " key server has " - << (mon->key_server.has_secrets() ? "" : "no ") + << (mon.key_server.has_secrets() ? "" : "no ") << "secrets!" << dendl; __u8 v = 1; encode(v, full_bl); encode(max_global_id, full_bl); - encode(mon->key_server, full_bl); + encode(mon.key_server, full_bl); put_version_full(t, version, full_bl); put_version_latest_full(t, version); @@ -487,7 +487,7 @@ version_t AuthMonitor::get_trim_to() const { unsigned max = g_conf()->paxos_max_join_drift * 2; version_t version = get_last_committed(); - if (mon->is_leader() && (version > max)) + if (mon.is_leader() && (version > max)) return version - max; return 0; } @@ -502,7 +502,7 @@ bool AuthMonitor::preprocess_query(MonOpRequestRef op) return preprocess_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } @@ -528,7 +528,7 @@ bool AuthMonitor::prepare_update(MonOpRequestRef op) return prepare_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } case MSG_MON_GLOBAL_ID: @@ -544,14 +544,14 @@ bool AuthMonitor::prepare_update(MonOpRequestRef op) void AuthMonitor::_set_mon_num_rank(int num, int rank) { dout(10) << __func__ << " num " << num << " rank " << rank << dendl; - ceph_assert(ceph_mutex_is_locked(mon->auth_lock)); + ceph_assert(ceph_mutex_is_locked(mon.auth_lock)); mon_num = num; mon_rank = rank; } uint64_t AuthMonitor::_assign_global_id() { - ceph_assert(ceph_mutex_is_locked(mon->auth_lock)); + ceph_assert(ceph_mutex_is_locked(mon.auth_lock)); if (mon_num < 1 || mon_rank < 0) { dout(10) << __func__ << " inactive (num_mon " << mon_num << " rank " << mon_rank << ")" << dendl; @@ -584,13 +584,13 @@ uint64_t AuthMonitor::assign_global_id(bool should_increase_max) { uint64_t id; { - std::lock_guard l(mon->auth_lock); + std::lock_guard l(mon.auth_lock); id =_assign_global_id(); if (should_increase_max) { should_increase_max = _should_increase_max_global_id(); } } - if (mon->is_leader() && + if (mon.is_leader() && should_increase_max) { increase_max_global_id(); } @@ -688,11 +688,11 @@ bool AuthMonitor::prep_auth(MonOpRequestRef op, bool paxos_writable) entity_name.get_type() == CEPH_ENTITY_TYPE_OSD || entity_name.get_type() == CEPH_ENTITY_TYPE_MDS || entity_name.get_type() == CEPH_ENTITY_TYPE_MGR) - type = mon->auth_cluster_required.pick(supported); + type = mon.auth_cluster_required.pick(supported); else - type = mon->auth_service_required.pick(supported); + type = mon.auth_service_required.pick(supported); - s->auth_handler = get_auth_service_handler(type, g_ceph_context, &mon->key_server); + s->auth_handler = get_auth_service_handler(type, g_ceph_context, &mon.key_server); if (!s->auth_handler) { dout(1) << "client did not provide supported auth type" << dendl; ret = -ENOTSUP; @@ -716,18 +716,18 @@ bool AuthMonitor::prep_auth(MonOpRequestRef op, bool paxos_writable) delete s->auth_handler; s->auth_handler = NULL; - if (mon->is_leader() && paxos_writable) { + if (mon.is_leader() && paxos_writable) { dout(10) << "increasing global id, waitlisting message" << dendl; wait_for_active(op, new C_RetryMessage(this, op)); goto done; } - if (!mon->is_leader()) { + if (!mon.is_leader()) { dout(10) << "not the leader, requesting more ids from leader" << dendl; - int leader = mon->get_leader(); + int leader = mon.get_leader(); MMonGlobalID *req = new MMonGlobalID(); req->old_max_id = max_global_id; - mon->send_mon_message(req, leader); + mon.send_mon_message(req, leader); wait_for_finished_proposal(op, new C_RetryMessage(this, op)); return true; } @@ -761,7 +761,7 @@ bool AuthMonitor::prep_auth(MonOpRequestRef op, bool paxos_writable) } if (ret > 0) { if (!s->authenticated && - mon->ms_handle_authentication(s->con.get()) > 0) { + mon.ms_handle_authentication(s->con.get()) > 0) { finished = true; } ret = 0; @@ -773,13 +773,13 @@ bool AuthMonitor::prep_auth(MonOpRequestRef op, bool paxos_writable) reply: reply = new MAuthReply(proto, &response_bl, ret, s->con->peer_global_id); - mon->send_reply(op, reply); + mon.send_reply(op, reply); if (finished) { // always send the latest monmap. - if (m->monmap_epoch < mon->monmap->get_epoch()) - mon->send_latest_monmap(m->get_connection().get()); + if (m->monmap_epoch < mon.monmap->get_epoch()) + mon.send_latest_monmap(m->get_connection().get()); - mon->configmon()->check_sub(s); + mon.configmon()->check_sub(s); } done: return true; @@ -796,7 +796,7 @@ bool AuthMonitor::preprocess_command(MonOpRequestRef op) if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { // ss has reason for failure string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); return true; } @@ -815,7 +815,7 @@ bool AuthMonitor::preprocess_command(MonOpRequestRef op) MonSession *session = op->get_session(); if (!session) { - mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); + mon.reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); return true; } @@ -825,7 +825,7 @@ bool AuthMonitor::preprocess_command(MonOpRequestRef op) EntityName entity; if (!entity_name.empty() && !entity.from_str(entity_name)) { ss << "invalid entity_auth " << entity_name; - mon->reply_command(op, -EINVAL, ss.str(), get_last_committed()); + mon.reply_command(op, -EINVAL, ss.str(), get_last_committed()); return true; } @@ -863,7 +863,7 @@ bool AuthMonitor::preprocess_command(MonOpRequestRef op) } else if (prefix == "auth get" && !entity_name.empty()) { KeyRing keyring; EntityAuth entity_auth; - if(!mon->key_server.get_auth(entity, entity_auth)) { + if(!mon.key_server.get_auth(entity, entity_auth)) { ss << "failed to find " << entity_name << " in keyring"; r = -ENOENT; } else { @@ -879,7 +879,7 @@ bool AuthMonitor::preprocess_command(MonOpRequestRef op) prefix == "auth print_key" || prefix == "auth get-key") { EntityAuth auth; - if (!mon->key_server.get_auth(entity, auth)) { + if (!mon.key_server.get_auth(entity, auth)) { ss << "don't have " << entity; r = -ENOENT; goto done; @@ -893,9 +893,9 @@ bool AuthMonitor::preprocess_command(MonOpRequestRef op) } else if (prefix == "auth list" || prefix == "auth ls") { if (f) { - mon->key_server.encode_formatted("auth", f.get(), rdata); + mon.key_server.encode_formatted("auth", f.get(), rdata); } else { - mon->key_server.encode_plaintext(rdata); + mon.key_server.encode_plaintext(rdata); if (rdata.length() > 0) ss << "installed auth entries:" << std::endl; else @@ -912,13 +912,13 @@ bool AuthMonitor::preprocess_command(MonOpRequestRef op) rdata.append(ds); string rs; getline(ss, rs, '\0'); - mon->reply_command(op, r, rs, rdata, get_last_committed()); + mon.reply_command(op, r, rs, rdata, get_last_committed()); return true; } void AuthMonitor::export_keyring(KeyRing& keyring) { - mon->key_server.export_keyring(keyring); + mon.key_server.export_keyring(keyring); } int AuthMonitor::import_keyring(KeyRing& keyring) @@ -941,7 +941,7 @@ int AuthMonitor::import_keyring(KeyRing& keyring) int AuthMonitor::remove_entity(const EntityName &entity) { dout(10) << __func__ << " " << entity << dendl; - if (!mon->key_server.contains(entity)) + if (!mon.key_server.contains(entity)) return -ENOENT; KeyServerData::Incremental auth_inc; @@ -991,7 +991,7 @@ int AuthMonitor::exists_and_matches_entity( EntityAuth existing_auth; // does entry already exist? - if (mon->key_server.get_auth(name, existing_auth)) { + if (mon.key_server.get_auth(name, existing_auth)) { // key match? if (has_secret) { if (existing_auth.key.get_secret().cmp(auth.key.get_secret())) { @@ -1044,7 +1044,7 @@ int AuthMonitor::validate_osd_destroy( EntityName& lockbox_entity, stringstream& ss) { - ceph_assert(paxos->is_plugged()); + ceph_assert(paxos.is_plugged()); dout(10) << __func__ << " id " << id << " uuid " << uuid << dendl; @@ -1065,8 +1065,8 @@ int AuthMonitor::validate_osd_destroy( return -EINVAL; } - if (!mon->key_server.contains(cephx_entity) && - !mon->key_server.contains(lockbox_entity)) { + if (!mon.key_server.contains(cephx_entity) && + !mon.key_server.contains(lockbox_entity)) { return -ENOENT; } @@ -1077,7 +1077,7 @@ int AuthMonitor::do_osd_destroy( const EntityName& cephx_entity, const EntityName& lockbox_entity) { - ceph_assert(paxos->is_plugged()); + ceph_assert(paxos.is_plugged()); dout(10) << __func__ << " cephx " << cephx_entity << " lockbox " << lockbox_entity << dendl; @@ -1232,7 +1232,7 @@ int AuthMonitor::do_osd_new( const auth_entity_t& lockbox_entity, bool has_lockbox) { - ceph_assert(paxos->is_plugged()); + ceph_assert(paxos.is_plugged()); dout(10) << __func__ << " cephx " << cephx_entity.name << " lockbox "; @@ -1246,7 +1246,7 @@ int AuthMonitor::do_osd_new( // we must have validated before reaching this point. // if keys exist, then this means they also match; otherwise we would // have failed before calling this function. - bool cephx_exists = mon->key_server.contains(cephx_entity.name); + bool cephx_exists = mon.key_server.contains(cephx_entity.name); if (!cephx_exists) { int err = add_entity(cephx_entity.name, cephx_entity.auth); @@ -1254,7 +1254,7 @@ int AuthMonitor::do_osd_new( } if (has_lockbox && - !mon->key_server.contains(lockbox_entity.name)) { + !mon.key_server.contains(lockbox_entity.name)) { int err = add_entity(lockbox_entity.name, lockbox_entity.auth); ceph_assert(0 == err); } @@ -1334,7 +1334,7 @@ bool AuthMonitor::prepare_command(MonOpRequestRef op) if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { // ss has reason for failure string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); return true; } @@ -1351,7 +1351,7 @@ bool AuthMonitor::prepare_command(MonOpRequestRef op) MonSession *session = op->get_session(); if (!session) { - mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); + mon.reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); return true; } @@ -1375,7 +1375,7 @@ bool AuthMonitor::prepare_command(MonOpRequestRef op) if (bl.length() == 0) { ss << "auth import: no data supplied"; getline(ss, rs); - mon->reply_command(op, -EINVAL, rs, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); return true; } auto iter = bl.cbegin(); @@ -1391,7 +1391,7 @@ bool AuthMonitor::prepare_command(MonOpRequestRef op) if (err < 0) { ss << "auth import: no caps supplied"; getline(ss, rs); - mon->reply_command(op, -EINVAL, rs, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); return true; } ss << "imported keyring"; @@ -1519,7 +1519,7 @@ bool AuthMonitor::prepare_command(MonOpRequestRef op) // do we have it? EntityAuth entity_auth; - if (mon->key_server.get_auth(entity, entity_auth)) { + if (mon.key_server.get_auth(entity, entity_auth)) { for (const auto &sys_cap : wanted_caps) { if (entity_auth.caps.count(sys_cap.first) == 0 || !entity_auth.caps[sys_cap.first].contents_equal(sys_cap.second)) { @@ -1607,7 +1607,7 @@ bool AuthMonitor::prepare_command(MonOpRequestRef op) std::shared_ptr<const Filesystem> fs; if (filesystem != "*" && filesystem != "all") { - fs = mon->mdsmon()->get_fsmap().get_filesystem(filesystem); + fs = mon.mdsmon()->get_fsmap().get_filesystem(filesystem); if (fs == nullptr) { ss << "filesystem " << filesystem << " does not exist."; err = -EINVAL; @@ -1691,7 +1691,7 @@ bool AuthMonitor::prepare_command(MonOpRequestRef op) } EntityAuth entity_auth; - if (mon->key_server.get_auth(entity, entity_auth)) { + if (mon.key_server.get_auth(entity, entity_auth)) { for (const auto &sys_cap : wanted_caps) { if (entity_auth.caps.count(sys_cap.first) == 0 || !entity_auth.caps[sys_cap.first].contents_equal(sys_cap.second)) { @@ -1741,7 +1741,7 @@ bool AuthMonitor::prepare_command(MonOpRequestRef op) } else if (prefix == "auth caps" && !entity_name.empty()) { KeyServerData::Incremental auth_inc; auth_inc.name = entity; - if (!mon->key_server.get_auth(auth_inc.name, auth_inc.auth)) { + if (!mon.key_server.get_auth(auth_inc.name, auth_inc.auth)) { ss << "couldn't find entry " << auth_inc.name; err = -ENOENT; goto done; @@ -1770,7 +1770,7 @@ bool AuthMonitor::prepare_command(MonOpRequestRef op) !entity_name.empty()) { KeyServerData::Incremental auth_inc; auth_inc.name = entity; - if (!mon->key_server.contains(auth_inc.name)) { + if (!mon.key_server.contains(auth_inc.name)) { ss << "entity " << entity << " does not exist"; err = 0; goto done; @@ -1787,7 +1787,7 @@ bool AuthMonitor::prepare_command(MonOpRequestRef op) done: rdata.append(ds); getline(ss, rs, '\0'); - mon->reply_command(op, err, rs, rdata, get_last_committed()); + mon.reply_command(op, err, rs, rdata, get_last_committed()); return false; } @@ -1806,8 +1806,8 @@ bool AuthMonitor::_upgrade_format_to_dumpling() bool changed = false; map<EntityName, EntityAuth>::iterator p; - for (p = mon->key_server.secrets_begin(); - p != mon->key_server.secrets_end(); + for (p = mon.key_server.secrets_begin(); + p != mon.key_server.secrets_end(); ++p) { // grab mon caps, if any string mon_caps; @@ -1866,8 +1866,8 @@ bool AuthMonitor::_upgrade_format_to_luminous() bool changed = false; map<EntityName, EntityAuth>::iterator p; - for (p = mon->key_server.secrets_begin(); - p != mon->key_server.secrets_end(); + for (p = mon.key_server.secrets_begin(); + p != mon.key_server.secrets_end(); ++p) { string n = p->first.to_str(); @@ -1924,7 +1924,7 @@ bool AuthMonitor::_upgrade_format_to_luminous() EntityName bootstrap_mgr_name; int r = bootstrap_mgr_name.from_str("client.bootstrap-mgr"); ceph_assert(r); - if (!mon->key_server.contains(bootstrap_mgr_name)) { + if (!mon.key_server.contains(bootstrap_mgr_name)) { EntityName name = bootstrap_mgr_name; EntityAuth auth; @@ -1946,7 +1946,7 @@ bool AuthMonitor::_upgrade_format_to_mimic() bool changed = false; for (auto &p : auth_lst) { - if (mon->key_server.contains(p.first)) { + if (mon.key_server.contains(p.first)) { continue; } int err = add_entity(p.first, p.second); @@ -1971,11 +1971,11 @@ void AuthMonitor::upgrade_format() // by N+1. unsigned int current = FORMAT_MIMIC; - if (!mon->get_quorum_mon_features().contains_all( + if (!mon.get_quorum_mon_features().contains_all( ceph::features::mon::FEATURE_LUMINOUS)) { // pre-luminous quorum current = FORMAT_DUMPLING; - } else if (!mon->get_quorum_mon_features().contains_all( + } else if (!mon.get_quorum_mon_features().contains_all( ceph::features::mon::FEATURE_MIMIC)) { // pre-mimic quorum current = FORMAT_LUMINOUS; @@ -2016,6 +2016,6 @@ void AuthMonitor::dump_info(Formatter *f) f->open_object_section("auth"); f->dump_unsigned("first_committed", get_first_committed()); f->dump_unsigned("last_committed", get_last_committed()); - f->dump_unsigned("num_secrets", mon->key_server.get_num_secrets()); + f->dump_unsigned("num_secrets", mon.key_server.get_num_secrets()); f->close_section(); } diff --git a/src/mon/AuthMonitor.h b/src/mon/AuthMonitor.h index 96bf0cbc1ea..048fc0c08ef 100644 --- a/src/mon/AuthMonitor.h +++ b/src/mon/AuthMonitor.h @@ -184,7 +184,7 @@ private: const EntityAuth& auth); public: - AuthMonitor(Monitor *mn, Paxos *p, const std::string& service_name) + AuthMonitor(Monitor &mn, Paxos &p, const std::string& service_name) : PaxosService(mn, p, service_name), last_rotating_ver(0), max_global_id(0), diff --git a/src/mon/ConfigKeyService.cc b/src/mon/ConfigKeyService.cc index 5c02b1038a2..a75f0dcbeb8 100644 --- a/src/mon/ConfigKeyService.cc +++ b/src/mon/ConfigKeyService.cc @@ -58,18 +58,28 @@ using ceph::mono_time; using ceph::parse_timespan; using ceph::timespan_str; -static ostream& _prefix(std::ostream *_dout, const Monitor *mon, - const ConfigKeyService *service) { - return *_dout << "mon." << mon->name << "@" << mon->rank - << "(" << mon->get_state_name() << ")." << service->get_name() - << "(" << service->get_epoch() << ") "; +static ostream& _prefix(std::ostream *_dout, const Monitor &mon, + const ConfigKeyService *service) +{ + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() << ").config_key"; } const string CONFIG_PREFIX = "mon_config_key"; +ConfigKeyService::ConfigKeyService(Monitor &m, Paxos &p) + : mon(m), + paxos(p) +{} + +bool ConfigKeyService::in_quorum() const +{ + return (mon.is_leader() || mon.is_peon()); +} + int ConfigKeyService::store_get(const string &key, bufferlist &bl) { - return mon->store->get(CONFIG_PREFIX, key, bl); + return mon.store->get(CONFIG_PREFIX, key, bl); } void ConfigKeyService::get_store_prefixes(set<string>& s) const @@ -79,20 +89,20 @@ void ConfigKeyService::get_store_prefixes(set<string>& s) const void ConfigKeyService::store_put(const string &key, bufferlist &bl, Context *cb) { - MonitorDBStore::TransactionRef t = paxos->get_pending_transaction(); + MonitorDBStore::TransactionRef t = paxos.get_pending_transaction(); t->put(CONFIG_PREFIX, key, bl); if (cb) - paxos->queue_pending_finisher(cb); - paxos->trigger_propose(); + paxos.queue_pending_finisher(cb); + paxos.trigger_propose(); } void ConfigKeyService::store_delete(const string &key, Context *cb) { - MonitorDBStore::TransactionRef t = paxos->get_pending_transaction(); + MonitorDBStore::TransactionRef t = paxos.get_pending_transaction(); store_delete(t, key); if (cb) - paxos->queue_pending_finisher(cb); - paxos->trigger_propose(); + paxos.queue_pending_finisher(cb); + paxos.trigger_propose(); } void ConfigKeyService::store_delete( @@ -104,13 +114,13 @@ void ConfigKeyService::store_delete( bool ConfigKeyService::store_exists(const string &key) { - return mon->store->exists(CONFIG_PREFIX, key); + return mon.store->exists(CONFIG_PREFIX, key); } void ConfigKeyService::store_list(stringstream &ss) { KeyValueDB::Iterator iter = - mon->store->get_iterator(CONFIG_PREFIX); + mon.store->get_iterator(CONFIG_PREFIX); JSONFormatter f(true); f.open_array_section("keys"); @@ -127,7 +137,7 @@ void ConfigKeyService::store_list(stringstream &ss) bool ConfigKeyService::store_has_prefix(const string &prefix) { KeyValueDB::Iterator iter = - mon->store->get_iterator(CONFIG_PREFIX); + mon.store->get_iterator(CONFIG_PREFIX); while (iter->valid()) { string key(iter->key()); @@ -154,7 +164,7 @@ static bool is_binary_string(const string& s) void ConfigKeyService::store_dump(stringstream &ss, const string& prefix) { KeyValueDB::Iterator iter = - mon->store->get_iterator(CONFIG_PREFIX); + mon.store->get_iterator(CONFIG_PREFIX); dout(10) << __func__ << " prefix '" << prefix << "'" << dendl; if (prefix.size()) { @@ -188,7 +198,7 @@ void ConfigKeyService::store_delete_prefix( const string &prefix) { KeyValueDB::Iterator iter = - mon->store->get_iterator(CONFIG_PREFIX); + mon.store->get_iterator(CONFIG_PREFIX); while (iter->valid()) { string key(iter->key()); @@ -201,7 +211,7 @@ void ConfigKeyService::store_delete_prefix( } } -bool ConfigKeyService::service_dispatch(MonOpRequestRef op) +bool ConfigKeyService::dispatch(MonOpRequestRef op) { Message *m = op->get_req(); ceph_assert(m != NULL); @@ -209,7 +219,7 @@ bool ConfigKeyService::service_dispatch(MonOpRequestRef op) if (!in_quorum()) { dout(1) << __func__ << " not in quorum -- waiting" << dendl; - paxos->wait_for_readable(op, new Monitor::C_RetryMessage(mon, op)); + paxos.wait_for_readable(op, new Monitor::C_RetryMessage(&mon, op)); return false; } @@ -245,8 +255,8 @@ bool ConfigKeyService::service_dispatch(MonOpRequestRef op) } else if (prefix == "config-key put" || prefix == "config-key set") { - if (!mon->is_leader()) { - mon->forward_request_leader(op); + if (!mon.is_leader()) { + mon.forward_request_leader(op); // we forward the message; so return now. return true; } @@ -278,8 +288,8 @@ bool ConfigKeyService::service_dispatch(MonOpRequestRef op) } else if (prefix == "config-key del" || prefix == "config-key rm") { - if (!mon->is_leader()) { - mon->forward_request_leader(op); + if (!mon.is_leader()) { + mon.forward_request_leader(op); return true; } @@ -323,7 +333,7 @@ bool ConfigKeyService::service_dispatch(MonOpRequestRef op) out: if (!cmd->get_source().is_mon()) { string rs = ss.str(); - mon->reply_command(op, ret, rs, rdata, 0); + mon.reply_command(op, ret, rs, rdata, 0); } return (ret == 0); @@ -355,12 +365,12 @@ void ConfigKeyService::do_osd_destroy(int32_t id, uuid_d& uuid) string daemon_prefix = "daemon-private/osd." + stringify(id) + "/"; - MonitorDBStore::TransactionRef t = paxos->get_pending_transaction(); + MonitorDBStore::TransactionRef t = paxos.get_pending_transaction(); for (auto p : { dmcrypt_prefix, daemon_prefix }) { store_delete_prefix(t, p); } - paxos->trigger_propose(); + paxos.trigger_propose(); } int ConfigKeyService::validate_osd_new( @@ -394,7 +404,7 @@ void ConfigKeyService::do_osd_new( const uuid_d& uuid, const string& dmcrypt_key) { - ceph_assert(paxos->is_plugged()); + ceph_assert(paxos.is_plugged()); string dmcrypt_key_prefix = _get_dmcrypt_prefix(uuid, "luks"); bufferlist dmcrypt_key_value; diff --git a/src/mon/ConfigKeyService.h b/src/mon/ConfigKeyService.h index f40062046a6..71ee59bd858 100644 --- a/src/mon/ConfigKeyService.h +++ b/src/mon/ConfigKeyService.h @@ -14,15 +14,36 @@ #ifndef CEPH_MON_CONFIG_KEY_SERVICE_H #define CEPH_MON_CONFIG_KEY_SERVICE_H -#include "mon/QuorumService.h" +#include "include/Context.h" +#include "mon/MonOpRequest.h" #include "mon/MonitorDBStore.h" class Paxos; class Monitor; -class ConfigKeyService : public QuorumService +class ConfigKeyService { - Paxos *paxos; +public: + ConfigKeyService(Monitor &m, Paxos &p); + ~ConfigKeyService() {} + + bool dispatch(MonOpRequestRef op); + + int validate_osd_destroy(const int32_t id, const uuid_d& uuid); + void do_osd_destroy(int32_t id, uuid_d& uuid); + int validate_osd_new( + const uuid_d& uuid, + const std::string& dmcrypt_key, + std::stringstream& ss); + void do_osd_new(const uuid_d& uuid, const std::string& dmcrypt_key); + + void get_store_prefixes(std::set<std::string>& s) const; + +private: + Monitor &mon; + Paxos &paxos; + + bool in_quorum() const; int store_get(const std::string &key, ceph::buffer::list &bl); void store_put(const std::string &key, ceph::buffer::list &bl, Context *cb = NULL); @@ -37,49 +58,6 @@ class ConfigKeyService : public QuorumService bool store_has_prefix(const std::string &prefix); static const std::string STORE_PREFIX; - -protected: - void service_shutdown() override { } - -public: - ConfigKeyService(Monitor *m, Paxos *p) : - QuorumService(m), - paxos(p) - { } - ~ConfigKeyService() override { } - - - /** - * @defgroup ConfigKeyService_Inherited_h Inherited abstract methods - * @{ - */ - void init() override { } - bool service_dispatch(MonOpRequestRef op) override; - - void start_epoch() override { } - void finish_epoch() override { } - void cleanup() override { } - void service_tick() override { } - - int validate_osd_destroy(const int32_t id, const uuid_d& uuid); - void do_osd_destroy(int32_t id, uuid_d& uuid); - int validate_osd_new( - const uuid_d& uuid, - const std::string& dmcrypt_key, - std::stringstream& ss); - void do_osd_new(const uuid_d& uuid, const std::string& dmcrypt_key); - - int get_type() override { - return QuorumService::SERVICE_CONFIG_KEY; - } - - std::string get_name() const override { - return "config_key"; - } - void get_store_prefixes(std::set<std::string>& s) const; - /** - * @} // ConfigKeyService_Inherited_h - */ }; #endif // CEPH_MON_CONFIG_KEY_SERVICE_H diff --git a/src/mon/ConfigMonitor.cc b/src/mon/ConfigMonitor.cc index d7fe933c8b3..bc8c28133ec 100644 --- a/src/mon/ConfigMonitor.cc +++ b/src/mon/ConfigMonitor.cc @@ -48,16 +48,16 @@ using ceph::JSONFormatter; using ceph::mono_clock; using ceph::mono_time; using ceph::timespan_str; -static ostream& _prefix(std::ostream *_dout, const Monitor *mon, +static ostream& _prefix(std::ostream *_dout, const Monitor &mon, const ConfigMonitor *hmon) { - return *_dout << "mon." << mon->name << "@" << mon->rank - << "(" << mon->get_state_name() << ").config "; + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() << ").config "; } const string KEY_PREFIX("config/"); const string HISTORY_PREFIX("config-history/"); -ConfigMonitor::ConfigMonitor(Monitor *m, Paxos *p, const string& service_name) +ConfigMonitor::ConfigMonitor(Monitor &m, Paxos &p, const string& service_name) : PaxosService(m, p, service_name) { } @@ -153,7 +153,7 @@ bool ConfigMonitor::preprocess_query(MonOpRequestRef op) return preprocess_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } } @@ -180,7 +180,7 @@ bool ConfigMonitor::preprocess_command(MonOpRequestRef op) cmdmap_t cmdmap; if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); return true; } string format; @@ -197,7 +197,7 @@ bool ConfigMonitor::preprocess_command(MonOpRequestRef op) cmd_getval(cmdmap, "key", name); const Option *opt = g_conf().find_option(name); if (!opt) { - opt = mon->mgrmon()->find_module_option(name); + opt = mon.mgrmon()->find_module_option(name); } if (opt) { if (f) { @@ -227,7 +227,7 @@ bool ConfigMonitor::preprocess_command(MonOpRequestRef op) ss << i.name << "\n"; } } - for (auto& i : mon->mgrmon()->get_mgr_module_options()) { + for (auto& i : mon.mgrmon()->get_mgr_module_options()) { if (f) { f->dump_string("option", i.first); } else { @@ -306,9 +306,9 @@ bool ConfigMonitor::preprocess_command(MonOpRequestRef op) map<string,string> crush_location; string device_class; if (entity.is_osd()) { - mon->osdmon()->osdmap.crush->get_full_location(who, &crush_location); + mon.osdmon()->osdmap.crush->get_full_location(who, &crush_location); int id = atoi(entity.get_id().c_str()); - const char *c = mon->osdmon()->osdmap.crush->get_item_class(id); + const char *c = mon.osdmon()->osdmap.crush->get_item_class(id); if (c) { device_class = c; } @@ -320,14 +320,14 @@ bool ConfigMonitor::preprocess_command(MonOpRequestRef op) auto config = config_map.generate_entity_map( entity, crush_location, - mon->osdmon()->osdmap.crush.get(), + mon.osdmon()->osdmap.crush.get(), device_class, &src); if (cmd_getval(cmdmap, "key", name)) { const Option *opt = g_conf().find_option(name); if (!opt) { - opt = mon->mgrmon()->find_module_option(name); + opt = mon.mgrmon()->find_module_option(name); } if (!opt) { err = -ENOENT; @@ -336,7 +336,7 @@ bool ConfigMonitor::preprocess_command(MonOpRequestRef op) if (opt->has_flag(Option::FLAG_NO_MON_UPDATE)) { // handle special options if (name == "fsid") { - odata.append(stringify(mon->monmap->get_fsid())); + odata.append(stringify(mon.monmap->get_fsid())); odata.append("\n"); goto reply; } @@ -426,16 +426,16 @@ bool ConfigMonitor::preprocess_command(MonOpRequestRef op) } } else if (prefix == "config generate-minimal-conf") { ostringstream conf; - conf << "# minimal ceph.conf for " << mon->monmap->get_fsid() << "\n"; + conf << "# minimal ceph.conf for " << mon.monmap->get_fsid() << "\n"; // the basics conf << "[global]\n"; - conf << "\tfsid = " << mon->monmap->get_fsid() << "\n"; + conf << "\tfsid = " << mon.monmap->get_fsid() << "\n"; conf << "\tmon_host = "; - for (auto i = mon->monmap->mon_info.begin(); - i != mon->monmap->mon_info.end(); + for (auto i = mon.monmap->mon_info.begin(); + i != mon.monmap->mon_info.end(); ++i) { - if (i != mon->monmap->mon_info.begin()) { + if (i != mon.monmap->mon_info.begin()) { conf << " "; } if (i->second.public_addrs.size() == 1 && @@ -467,7 +467,7 @@ bool ConfigMonitor::preprocess_command(MonOpRequestRef op) } reply: - mon->reply_command(op, err, ss.str(), odata, get_last_committed()); + mon.reply_command(op, err, ss.str(), odata, get_last_committed()); return true; } @@ -476,7 +476,7 @@ void ConfigMonitor::handle_get_config(MonOpRequestRef op) auto m = op->get_req<MGetConfig>(); dout(10) << __func__ << " " << m->name << " host " << m->host << dendl; - const OSDMap& osdmap = mon->osdmon()->osdmap; + const OSDMap& osdmap = mon.osdmon()->osdmap; map<string,string> crush_location; osdmap.crush->get_full_location(m->host, &crush_location); auto out = config_map.generate_entity_map( @@ -499,7 +499,7 @@ bool ConfigMonitor::prepare_update(MonOpRequestRef op) return prepare_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } } @@ -515,7 +515,7 @@ bool ConfigMonitor::prepare_command(MonOpRequestRef op) cmdmap_t cmdmap; if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); return true; } @@ -536,7 +536,7 @@ bool ConfigMonitor::prepare_command(MonOpRequestRef op) if (prefix == "config set" && !force) { const Option *opt = g_conf().find_option(name); if (!opt) { - opt = mon->mgrmon()->find_module_option(name); + opt = mon.mgrmon()->find_module_option(name); } if (!opt) { ss << "unrecognized config option '" << name << "'"; @@ -639,7 +639,7 @@ bool ConfigMonitor::prepare_command(MonOpRequestRef op) // a known and worthy option? const Option *o = g_conf().find_option(key); if (!o) { - o = mon->mgrmon()->find_module_option(key); + o = mon.mgrmon()->find_module_option(key); } if (!o || (o->flags & Option::FLAG_NO_MON_UPDATE) || @@ -701,7 +701,7 @@ bool ConfigMonitor::prepare_command(MonOpRequestRef op) } reply: - mon->reply_command(op, err, ss.str(), odata, get_last_committed()); + mon.reply_command(op, err, ss.str(), odata, get_last_committed()); return false; update: @@ -734,7 +734,7 @@ update: void ConfigMonitor::tick() { - if (!is_active() || !mon->is_leader()) { + if (!is_active() || !mon.is_leader()) { return; } dout(10) << __func__ << dendl; @@ -764,7 +764,7 @@ void ConfigMonitor::load_config() }; unsigned num = 0; - KeyValueDB::Iterator it = mon->store->get_iterator(CONFIG_PREFIX); + KeyValueDB::Iterator it = mon.store->get_iterator(CONFIG_PREFIX); it->lower_bound(KEY_PREFIX); config_map.clear(); current.clear(); @@ -793,7 +793,7 @@ void ConfigMonitor::load_config() { auto p = renamed_pacific.find(name); if (p != renamed_pacific.end()) { - if (mon->monmap->min_mon_release >= ceph_release_t::pacific) { + if (mon.monmap->min_mon_release >= ceph_release_t::pacific) { // schedule a cleanup pending_cleanup[key] = boost::none; pending_cleanup[who + "/" + p->second] = it->value(); @@ -805,7 +805,7 @@ void ConfigMonitor::load_config() const Option *opt = g_conf().find_option(name); if (!opt) { - opt = mon->mgrmon()->find_module_option(name); + opt = mon.mgrmon()->find_module_option(name); } if (!opt) { dout(10) << __func__ << " unrecognized option '" << name << "'" << dendl; @@ -864,7 +864,7 @@ void ConfigMonitor::load_config() // refresh our own config { - const OSDMap& osdmap = mon->osdmon()->osdmap; + const OSDMap& osdmap = mon.osdmon()->osdmap; map<string,string> crush_location; osdmap.crush->get_full_location(g_conf()->host, &crush_location); auto out = config_map.generate_entity_map( @@ -880,7 +880,7 @@ void ConfigMonitor::load_changeset(version_t v, ConfigChangeSet *ch) { ch->version = v; string prefix = HISTORY_PREFIX + stringify(v) + "/"; - KeyValueDB::Iterator it = mon->store->get_iterator(CONFIG_PREFIX); + KeyValueDB::Iterator it = mon.store->get_iterator(CONFIG_PREFIX); it->lower_bound(prefix); while (it->valid() && it->key().find(prefix) == 0) { if (it->key() == prefix) { @@ -908,7 +908,7 @@ void ConfigMonitor::load_changeset(version_t v, ConfigChangeSet *ch) bool ConfigMonitor::refresh_config(MonSession *s) { - const OSDMap& osdmap = mon->osdmon()->osdmap; + const OSDMap& osdmap = mon.osdmon()->osdmap; map<string,string> crush_location; if (s->remote_host.size()) { osdmap.crush->get_full_location(s->remote_host, &crush_location); @@ -983,7 +983,7 @@ void ConfigMonitor::check_sub(Subscription *sub) if (sub->next <= version) { maybe_send_config(sub->session); if (sub->onetime) { - mon->with_session_map([sub](MonSessionMap& session_map) { + mon.with_session_map([sub](MonSessionMap& session_map) { session_map.remove_sub(sub); }); } else { @@ -995,8 +995,8 @@ void ConfigMonitor::check_sub(Subscription *sub) void ConfigMonitor::check_all_subs() { dout(10) << __func__ << dendl; - auto subs = mon->session_map.subs.find("config"); - if (subs == mon->session_map.subs.end()) { + auto subs = mon.session_map.subs.find("config"); + if (subs == mon.session_map.subs.end()) { return; } int updated = 0, total = 0; diff --git a/src/mon/ConfigMonitor.h b/src/mon/ConfigMonitor.h index 283cd04a022..8d05dc3b46b 100644 --- a/src/mon/ConfigMonitor.h +++ b/src/mon/ConfigMonitor.h @@ -21,7 +21,7 @@ class ConfigMonitor : public PaxosService std::map<std::string,ceph::buffer::list> current; public: - ConfigMonitor(Monitor *m, Paxos *p, const std::string& service_name); + ConfigMonitor(Monitor &m, Paxos &p, const std::string& service_name); void init() override; diff --git a/src/mon/HealthMonitor.cc b/src/mon/HealthMonitor.cc index 0e6bf5be2d4..ac36bb45fc6 100644 --- a/src/mon/HealthMonitor.cc +++ b/src/mon/HealthMonitor.cc @@ -63,13 +63,13 @@ using ceph::mono_clock; using ceph::mono_time; using ceph::parse_timespan; using ceph::timespan_str; -static ostream& _prefix(std::ostream *_dout, const Monitor *mon, +static ostream& _prefix(std::ostream *_dout, const Monitor &mon, const HealthMonitor *hmon) { - return *_dout << "mon." << mon->name << "@" << mon->rank - << "(" << mon->get_state_name() << ").health "; + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() << ").health "; } -HealthMonitor::HealthMonitor(Monitor *m, Paxos *p, const string& service_name) +HealthMonitor::HealthMonitor(Monitor &m, Paxos &p, const string& service_name) : PaxosService(m, p, service_name) { } @@ -90,7 +90,7 @@ void HealthMonitor::update_from_paxos(bool *need_bootstrap) load_health(); bufferlist qbl; - mon->store->get(service_name, "quorum", qbl); + mon.store->get(service_name, "quorum", qbl); if (qbl.length()) { auto p = qbl.cbegin(); decode(quorum_checks, p); @@ -99,7 +99,7 @@ void HealthMonitor::update_from_paxos(bool *need_bootstrap) } bufferlist lbl; - mon->store->get(service_name, "leader", lbl); + mon.store->get(service_name, "leader", lbl); if (lbl.length()) { auto p = lbl.cbegin(); decode(leader_checks, p); @@ -109,7 +109,7 @@ void HealthMonitor::update_from_paxos(bool *need_bootstrap) { bufferlist bl; - mon->store->get(service_name, "mutes", bl); + mon.store->get(service_name, "mutes", bl); if (bl.length()) { auto p = bl.cbegin(); decode(mutes, p); @@ -163,7 +163,7 @@ void HealthMonitor::encode_pending(MonitorDBStore::TransactionRef t) map<string,set<string>> names; // code -> <mon names> for (auto p : quorum_checks) { for (auto q : p.second.checks) { - names[q.first].insert(mon->monmap->get_name(p.first)); + names[q.first].insert(mon.monmap->get_name(p.first)); } pending_health.merge(p.second); } @@ -207,7 +207,7 @@ bool HealthMonitor::preprocess_query(MonOpRequestRef op) case MSG_MON_HEALTH_CHECKS: return false; default: - mon->no_reply(op); + mon.no_reply(op); derr << "Unhandled message type " << m->get_type() << dendl; return true; } @@ -237,13 +237,13 @@ bool HealthMonitor::preprocess_command(MonOpRequestRef op) cmdmap_t cmdmap; if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); return true; } MonSession *session = op->get_session(); if (!session) { - mon->reply_command(op, -EACCES, "access denied", rdata, + mon.reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); return true; } @@ -254,7 +254,7 @@ bool HealthMonitor::preprocess_command(MonOpRequestRef op) string prefix; cmd_getval(cmdmap, "prefix", prefix); } catch (const bad_cmd_get& e) { - mon->reply_command(op, -EINVAL, e.what(), rdata, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), rdata, get_last_committed()); return true; } return false; @@ -270,13 +270,13 @@ bool HealthMonitor::prepare_command(MonOpRequestRef op) cmdmap_t cmdmap; if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); return true; } MonSession *session = op->get_session(); if (!session) { - mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); + mon.reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); return true; } @@ -356,7 +356,7 @@ out: return true; } else { // reply immediately - mon->reply_command(op, r, rs, rdata, get_last_committed()); + mon.reply_command(op, r, rs, rdata, get_last_committed()); return false; } } @@ -379,7 +379,7 @@ void HealthMonitor::tick() if (check_member_health()) { changed = true; } - if (!mon->is_leader()) { + if (!mon.is_leader()) { return; } if (check_leader_health()) { @@ -403,7 +403,7 @@ bool HealthMonitor::check_mutes() while (p != pending_mutes.end()) { if (p->second.ttl != utime_t() && p->second.ttl <= now) { - mon->clog->info() << "Health alert mute " << p->first + mon.clog->info() << "Health alert mute " << p->first << " cleared (passed TTL " << p->second.ttl << ")"; p = pending_mutes.erase(p); changed = true; @@ -412,7 +412,7 @@ bool HealthMonitor::check_mutes() if (!p->second.sticky) { auto q = all.checks.find(p->first); if (q == all.checks.end()) { - mon->clog->info() << "Health alert mute " << p->first + mon.clog->info() << "Health alert mute " << p->first << " cleared (health alert cleared)"; p = pending_mutes.erase(p); changed = true; @@ -421,7 +421,7 @@ bool HealthMonitor::check_mutes() if (p->second.count) { // count-based mute if (q->second.count > p->second.count) { - mon->clog->info() << "Health alert mute " << p->first + mon.clog->info() << "Health alert mute " << p->first << " cleared (count increased from " << p->second.count << " to " << q->second.count << ")"; p = pending_mutes.erase(p); @@ -439,7 +439,7 @@ bool HealthMonitor::check_mutes() } else { // summary-based mute if (p->second.summary != q->second.summary) { - mon->clog->info() << "Health alert mute " << p->first + mon.clog->info() << "Health alert mute " << p->first << " cleared (summary changed)"; p = pending_mutes.erase(p); changed = true; @@ -454,7 +454,7 @@ bool HealthMonitor::check_mutes() void HealthMonitor::gather_all_health_checks(health_check_map_t *all) { - for (auto& svc : mon->paxos_service) { + for (auto& svc : mon.paxos_service) { all->merge(svc->get_health_checks()); } } @@ -576,7 +576,7 @@ bool HealthMonitor::check_member_health() DataStats stats; get_fs_stats(stats.fs_stats, g_conf()->mon_data.c_str()); map<string,uint64_t> extra; - uint64_t store_size = mon->store->get_estimated_size(extra); + uint64_t store_size = mon.store->get_estimated_size(extra); ceph_assert(store_size > 0); stats.store_stats.bytes_total = store_size; stats.store_stats.bytes_sst = extra["sst"]; @@ -594,14 +594,14 @@ bool HealthMonitor::check_member_health() stringstream ss, ss2; ss << "mon%plurals% %names% %isorare% very low on available space"; auto& d = next.add("MON_DISK_CRIT", HEALTH_ERR, ss.str(), 1); - ss2 << "mon." << mon->name << " has " << stats.fs_stats.avail_percent + ss2 << "mon." << mon.name << " has " << stats.fs_stats.avail_percent << "% avail"; d.detail.push_back(ss2.str()); } else if (stats.fs_stats.avail_percent <= g_conf()->mon_data_avail_warn) { stringstream ss, ss2; ss << "mon%plurals% %names% %isorare% low on available space"; auto& d = next.add("MON_DISK_LOW", HEALTH_WARN, ss.str(), 1); - ss2 << "mon." << mon->name << " has " << stats.fs_stats.avail_percent + ss2 << "mon." << mon.name << " has " << stats.fs_stats.avail_percent << "% avail"; d.detail.push_back(ss2.str()); } @@ -609,7 +609,7 @@ bool HealthMonitor::check_member_health() stringstream ss, ss2; ss << "mon%plurals% %names% %isorare% using a lot of disk space"; auto& d = next.add("MON_DISK_BIG", HEALTH_WARN, ss.str(), 1); - ss2 << "mon." << mon->name << " is " + ss2 << "mon." << mon.name << " is " << byte_u_t(stats.store_stats.bytes_total) << " >= mon_data_size_warn (" << byte_u_t(g_conf()->mon_data_size_warn) << ")"; @@ -635,12 +635,12 @@ bool HealthMonitor::check_member_health() ostringstream ss, ds; ss << "mon%plurals% %names% %hasorhave% mon_osd_down_out_interval set to 0"; auto& d = next.add("OSD_NO_DOWN_OUT_INTERVAL", HEALTH_WARN, ss.str(), 1); - ds << "mon." << mon->name << " has mon_osd_down_out_interval set to 0"; + ds << "mon." << mon.name << " has mon_osd_down_out_interval set to 0"; d.detail.push_back(ds.str()); } } - auto p = quorum_checks.find(mon->rank); + auto p = quorum_checks.find(mon.rank); if (p == quorum_checks.end()) { if (next.empty()) { return false; @@ -651,13 +651,13 @@ bool HealthMonitor::check_member_health() } } - if (mon->is_leader()) { + if (mon.is_leader()) { // prepare to propose - quorum_checks[mon->rank] = next; + quorum_checks[mon.rank] = next; changed = true; } else { // tell the leader - mon->send_mon_message(new MMonHealthChecks(next), mon->get_leader()); + mon.send_mon_message(new MMonHealthChecks(next), mon.get_leader()); } return changed; @@ -670,7 +670,7 @@ bool HealthMonitor::check_leader_health() // prune quorum_health { - auto& qset = mon->get_quorum(); + auto& qset = mon.get_quorum(); auto p = quorum_checks.begin(); while (p != quorum_checks.end()) { if (qset.count(p->first) == 0) { @@ -693,7 +693,7 @@ bool HealthMonitor::check_leader_health() old_version_first_time = now; if ((now - old_version_first_time) > g_conf().get_val<double>("mon_warn_older_version_delay")) { std::map<string, std::list<string> > all_versions; - mon->get_all_versions(all_versions); + mon.get_all_versions(all_versions); if (all_versions.size() > 1) { dout(20) << __func__ << " all_versions=" << all_versions << dendl; // The last entry has the largest version @@ -734,19 +734,19 @@ bool HealthMonitor::check_leader_health() // MON_DOWN { - int max = mon->monmap->size(); - int actual = mon->get_quorum().size(); + int max = mon.monmap->size(); + int actual = mon.get_quorum().size(); if (actual < max) { ostringstream ss; ss << (max-actual) << "/" << max << " mons down, quorum " - << mon->get_quorum_names(); + << mon.get_quorum_names(); auto& d = next.add("MON_DOWN", HEALTH_WARN, ss.str(), max - actual); - set<int> q = mon->get_quorum(); + set<int> q = mon.get_quorum(); for (int i=0; i<max; i++) { if (q.count(i) == 0) { ostringstream ss; - ss << "mon." << mon->monmap->get_name(i) << " (rank " << i - << ") addr " << mon->monmap->get_addrs(i) + ss << "mon." << mon.monmap->get_name(i) << " (rank " << i + << ") addr " << mon.monmap->get_addrs(i) << " is down (out of quorum)"; d.detail.push_back(ss.str()); } @@ -755,15 +755,15 @@ bool HealthMonitor::check_leader_health() } // MON_CLOCK_SKEW - if (!mon->timecheck_skews.empty()) { + if (!mon.timecheck_skews.empty()) { list<string> warns; list<string> details; - for (auto& i : mon->timecheck_skews) { + for (auto& i : mon.timecheck_skews) { double skew = i.second; - double latency = mon->timecheck_latencies[i.first]; - string name = mon->monmap->get_name(i.first); + double latency = mon.timecheck_latencies[i.first]; + string name = mon.monmap->get_name(i.first); ostringstream tcss; - health_status_t tcstatus = mon->timecheck_status(tcss, skew, latency); + health_status_t tcstatus = mon.timecheck_status(tcss, skew, latency); if (tcstatus != HEALTH_OK) { warns.push_back(name); ostringstream tmp_ss; @@ -789,10 +789,10 @@ bool HealthMonitor::check_leader_health() // MON_MSGR2_NOT_ENABLED if (g_conf().get_val<bool>("ms_bind_msgr2") && g_conf().get_val<bool>("mon_warn_on_msgr2_not_enabled") && - mon->monmap->get_required_features().contains_all( + mon.monmap->get_required_features().contains_all( ceph::features::mon::FEATURE_NAUTILUS)) { list<string> details; - for (auto& i : mon->monmap->mon_info) { + for (auto& i : mon.monmap->mon_info) { if (!i.second.public_addrs.has_msgr2()) { ostringstream ds; ds << "mon." << i.first << " is not bound to a msgr2 port, only " diff --git a/src/mon/HealthMonitor.h b/src/mon/HealthMonitor.h index e21ca72bc02..10debe51871 100644 --- a/src/mon/HealthMonitor.h +++ b/src/mon/HealthMonitor.h @@ -26,7 +26,7 @@ class HealthMonitor : public PaxosService std::map<std::string,health_mute_t> pending_mutes; public: - HealthMonitor(Monitor *m, Paxos *p, const std::string& service_name); + HealthMonitor(Monitor &m, Paxos &p, const std::string& service_name); /** * @defgroup HealthMonitor_Inherited_h Inherited abstract methods diff --git a/src/mon/LogMonitor.cc b/src/mon/LogMonitor.cc index 1ce36646c34..88327663a5b 100644 --- a/src/mon/LogMonitor.cc +++ b/src/mon/LogMonitor.cc @@ -166,9 +166,9 @@ ceph::logging::Graylog::Ref LogMonitor::log_channel_info::get_graylog( #undef dout_prefix #define dout_prefix _prefix(_dout, mon, get_last_committed()) -static ostream& _prefix(std::ostream *_dout, Monitor *mon, version_t v) { - return *_dout << "mon." << mon->name << "@" << mon->rank - << "(" << mon->get_state_name() +static ostream& _prefix(std::ostream *_dout, Monitor &mon, version_t v) { + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() << ").log v" << v << " "; } @@ -194,12 +194,12 @@ void LogMonitor::create_initial() dout(10) << "create_initial -- creating initial map" << dendl; LogEntry e; e.name = g_conf()->name; - e.rank = entity_name_t::MON(mon->rank); - e.addrs = mon->messenger->get_myaddrs(); + e.rank = entity_name_t::MON(mon.rank); + e.addrs = mon.messenger->get_myaddrs(); e.stamp = ceph_clock_now(); e.prio = CLOG_INFO; std::stringstream ss; - ss << "mkfs " << mon->monmap->get_fsid(); + ss << "mkfs " << mon.monmap->get_fsid(); e.msg = ss.str(); e.seq = 0; pending_log.insert(pair<utime_t,LogEntry>(e.stamp, e)); @@ -353,7 +353,7 @@ void LogMonitor::encode_pending(MonitorDBStore::TransactionRef t) __u8 v = 1; encode(v, bl); for (auto p = pending_log.begin(); p != pending_log.end(); ++p) - p->second.encode(bl, mon->get_quorum_con_features()); + p->second.encode(bl, mon.get_quorum_con_features()); put_version(t, version, bl); put_last_committed(t, version); @@ -365,7 +365,7 @@ void LogMonitor::encode_full(MonitorDBStore::TransactionRef t) ceph_assert(get_last_committed() == summary.version); bufferlist summary_bl; - encode(summary, summary_bl, mon->get_quorum_con_features()); + encode(summary, summary_bl, mon.get_quorum_con_features()); put_version_full(t, summary.version, summary_bl); put_version_latest_full(t, summary.version); @@ -373,7 +373,7 @@ void LogMonitor::encode_full(MonitorDBStore::TransactionRef t) version_t LogMonitor::get_trim_to() const { - if (!mon->is_leader()) + if (!mon.is_leader()) return 0; unsigned max = g_conf()->mon_max_log_epochs; @@ -394,7 +394,7 @@ bool LogMonitor::preprocess_query(MonOpRequestRef op) return preprocess_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } @@ -418,7 +418,7 @@ bool LogMonitor::prepare_update(MonOpRequestRef op) return prepare_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } case MSG_LOG: @@ -459,7 +459,7 @@ bool LogMonitor::preprocess_log(MonOpRequestRef op) return false; done: - mon->no_reply(op); + mon.no_reply(op); return true; } @@ -481,8 +481,8 @@ bool LogMonitor::prepare_log(MonOpRequestRef op) auto m = op->get_req<MLog>(); dout(10) << "prepare_log " << *m << " from " << m->get_orig_source() << dendl; - if (m->fsid != mon->monmap->fsid) { - dout(0) << "handle_log on fsid " << m->fsid << " != " << mon->monmap->fsid + if (m->fsid != mon.monmap->fsid) { + dout(0) << "handle_log on fsid " << m->fsid << " != " << mon.monmap->fsid << dendl; return false; } @@ -505,7 +505,7 @@ void LogMonitor::_updated_log(MonOpRequestRef op) { auto m = op->get_req<MLog>(); dout(7) << "_updated_log for " << m->get_orig_source_inst() << dendl; - mon->send_reply(op, new MLogAck(m->fsid, m->entries.rbegin()->seq)); + mon.send_reply(op, new MLogAck(m->fsid, m->entries.rbegin()->seq)); } bool LogMonitor::should_propose(double& delay) @@ -531,12 +531,12 @@ bool LogMonitor::preprocess_command(MonOpRequestRef op) cmdmap_t cmdmap; if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); return true; } MonSession *session = op->get_session(); if (!session) { - mon->reply_command(op, -EACCES, "access denied", get_last_committed()); + mon.reply_command(op, -EACCES, "access denied", get_last_committed()); return true; } @@ -560,7 +560,7 @@ bool LogMonitor::preprocess_command(MonOpRequestRef op) level = LogEntry::str_to_level(level_str); if (level == CLOG_UNKNOWN) { ss << "Invalid severity '" << level_str << "'"; - mon->reply_command(op, -EINVAL, ss.str(), get_last_committed()); + mon.reply_command(op, -EINVAL, ss.str(), get_last_committed()); return true; } } else { @@ -668,7 +668,7 @@ bool LogMonitor::preprocess_command(MonOpRequestRef op) string rs; getline(ss, rs); - mon->reply_command(op, r, rs, rdata, get_last_committed()); + mon.reply_command(op, r, rs, rdata, get_last_committed()); return true; } @@ -685,7 +685,7 @@ bool LogMonitor::prepare_command(MonOpRequestRef op) if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { // ss has reason for failure string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); return true; } @@ -694,7 +694,7 @@ bool LogMonitor::prepare_command(MonOpRequestRef op) MonSession *session = op->get_session(); if (!session) { - mon->reply_command(op, -EACCES, "access denied", get_last_committed()); + mon.reply_command(op, -EACCES, "access denied", get_last_committed()); return true; } @@ -721,7 +721,7 @@ bool LogMonitor::prepare_command(MonOpRequestRef op) } getline(ss, rs); - mon->reply_command(op, err, rs, get_last_committed()); + mon.reply_command(op, err, rs, get_last_committed()); return false; } @@ -738,8 +738,8 @@ int LogMonitor::sub_name_to_id(const string& n) void LogMonitor::check_subs() { dout(10) << __func__ << dendl; - for (map<string, xlist<Subscription*>*>::iterator i = mon->session_map.subs.begin(); - i != mon->session_map.subs.end(); + for (map<string, xlist<Subscription*>*>::iterator i = mon.session_map.subs.begin(); + i != mon.session_map.subs.end(); ++i) { for (xlist<Subscription*>::iterator j = i->second->begin(); !j.end(); ++j) { if (sub_name_to_id((*j)->type) >= 0) @@ -764,7 +764,7 @@ void LogMonitor::check_sub(Subscription *s) return; } - MLog *mlog = new MLog(mon->monmap->fsid); + MLog *mlog = new MLog(mon.monmap->fsid); if (s->next == 0) { /* First timer, heh? */ @@ -784,7 +784,7 @@ void LogMonitor::check_sub(Subscription *s) mlog->put(); } if (s->onetime) - mon->session_map.remove_sub(s); + mon.session_map.remove_sub(s); else s->next = summary_version+1; } diff --git a/src/mon/LogMonitor.h b/src/mon/LogMonitor.h index 38018ddcc4c..6d6a0b71c68 100644 --- a/src/mon/LogMonitor.h +++ b/src/mon/LogMonitor.h @@ -143,7 +143,7 @@ private: void _create_sub_incremental(MLog *mlog, int level, version_t sv); public: - LogMonitor(Monitor *mn, Paxos *p, const std::string& service_name) + LogMonitor(Monitor &mn, Paxos &p, const std::string& service_name) : PaxosService(mn, p, service_name) { } void init() override { diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 2e8d112ffe4..011b61fff9d 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -70,9 +70,9 @@ using ceph::mono_time; #define dout_subsys ceph_subsys_mon #undef dout_prefix #define dout_prefix _prefix(_dout, mon, get_fsmap()) -static ostream& _prefix(std::ostream *_dout, Monitor *mon, const FSMap& fsmap) { - return *_dout << "mon." << mon->name << "@" << mon->rank - << "(" << mon->get_state_name() +static ostream& _prefix(std::ostream *_dout, Monitor &mon, const FSMap& fsmap) { + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() << ").mds e" << fsmap.get_epoch() << " "; } @@ -167,8 +167,8 @@ void MDSMonitor::create_pending() { auto &fsmap = PaxosFSMap::create_pending(); - if (mon->osdmon()->is_readable()) { - const auto &osdmap = mon->osdmon()->osdmap; + if (mon.osdmon()->is_readable()) { + const auto &osdmap = mon.osdmon()->osdmap; fsmap.sanitize([&osdmap](int64_t pool){return osdmap.have_pg_pool(pool);}); } @@ -198,7 +198,7 @@ void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t) // apply to paxos ceph_assert(get_last_committed() + 1 == pending.epoch); bufferlist pending_bl; - pending.encode(pending_bl, mon->get_quorum_con_features()); + pending.encode(pending_bl, mon.get_quorum_con_features()); /* put everything in the transaction */ put_version(t, pending.epoch, pending_bl); @@ -234,7 +234,7 @@ void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t) health = p->second; } else { bufferlist bl; - mon->store->get(MDS_HEALTH_PREFIX, stringify(gid), bl); + mon.store->get(MDS_HEALTH_PREFIX, stringify(gid), bl); if (!bl.length()) { derr << "Missing health data for MDS " << gid << dendl; continue; @@ -321,7 +321,7 @@ bool MDSMonitor::preprocess_query(MonOpRequestRef op) return preprocess_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } @@ -367,8 +367,8 @@ bool MDSMonitor::preprocess_beacon(MonOpRequestRef op) goto ignore; } - if (m->get_fsid() != mon->monmap->fsid) { - dout(0) << "preprocess_beacon on fsid " << m->get_fsid() << " != " << mon->monmap->fsid << dendl; + if (m->get_fsid() != mon.monmap->fsid) { + dout(0) << "preprocess_beacon on fsid " << m->get_fsid() << " != " << mon.monmap->fsid << dendl; goto ignore; } @@ -409,8 +409,8 @@ bool MDSMonitor::preprocess_beacon(MonOpRequestRef op) MDSMap null_map; null_map.epoch = fsmap.epoch; null_map.compat = fsmap.compat; - auto m = make_message<MMDSMap>(mon->monmap->fsid, null_map); - mon->send_reply(op, m.detach()); + auto m = make_message<MMDSMap>(mon.monmap->fsid, null_map); + mon.send_reply(op, m.detach()); return true; } else { return false; // not booted yet. @@ -511,16 +511,16 @@ bool MDSMonitor::preprocess_beacon(MonOpRequestRef op) ceph_assert(effective_epoch > 0); _note_beacon(m); { - auto beacon = make_message<MMDSBeacon>(mon->monmap->fsid, + auto beacon = make_message<MMDSBeacon>(mon.monmap->fsid, m->get_global_id(), m->get_name(), effective_epoch, state, seq, CEPH_FEATURES_SUPPORTED_DEFAULT); - mon->send_reply(op, beacon.detach()); + mon.send_reply(op, beacon.detach()); } return true; ignore: // I won't reply this beacon, drop it. - mon->no_reply(op); + mon.no_reply(op); return true; } @@ -549,7 +549,7 @@ bool MDSMonitor::preprocess_offload_targets(MonOpRequestRef op) return false; ignore: - mon->no_reply(op); + mon.no_reply(op); return true; } @@ -570,7 +570,7 @@ bool MDSMonitor::prepare_update(MonOpRequestRef op) return prepare_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } @@ -626,7 +626,7 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) // Log the disappearance of health messages at INFO for (const auto &old_metric : old_health) { if (new_types.count(old_metric.type) == 0) { - mon->clog->info() << "MDS health message cleared (" + mon.clog->info() << "MDS health message cleared (" << m->get_orig_source() << "): " << old_metric.message; } } @@ -640,19 +640,19 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) if (g_conf()->mds_enforce_unique_name) { bool failed_mds = false; while (mds_gid_t existing = pending.find_mds_gid_by_name(m->get_name())) { - if (!mon->osdmon()->is_writeable()) { - mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); + if (!mon.osdmon()->is_writeable()) { + mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return false; } const MDSMap::mds_info_t &existing_info = pending.get_info_gid(existing); - mon->clog->info() << existing_info.human_name() << " restarted"; + mon.clog->info() << existing_info.human_name() << " restarted"; fail_mds_gid(pending, existing); failed_mds = true; } if (failed_mds) { - ceph_assert(mon->osdmon()->is_writeable()); - request_proposal(mon->osdmon()); + ceph_assert(mon.osdmon()->is_writeable()); + request_proposal(mon.osdmon()); } } @@ -709,8 +709,8 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) MDSMap null_map; null_map.epoch = fsmap.epoch; null_map.compat = fsmap.compat; - auto m = make_message<MMDSMap>(mon->monmap->fsid, null_map); - mon->send_reply(op, m.detach()); + auto m = make_message<MMDSMap>(mon.monmap->fsid, null_map); + mon.send_reply(op, m.detach()); } else { dispatch(op); // try again } @@ -758,7 +758,7 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) const auto fscid = pending.mds_roles.at(gid); const auto &fs = pending.get_filesystem(fscid); - mon->clog->info() << info.human_name() << " finished " + mon.clog->info() << info.human_name() << " finished " << "stopping rank " << info.rank << " in filesystem " << fs->mds_map.fs_name << " (now has " << fs->mds_map.get_num_in_mds() - 1 << " ranks)"; @@ -776,10 +776,10 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) } else if (state == MDSMap::STATE_DAMAGED) { - if (!mon->osdmon()->is_writeable()) { + if (!mon.osdmon()->is_writeable()) { dout(1) << __func__ << ": DAMAGED from rank " << info.rank << " waiting for osdmon writeable to blocklist it" << dendl; - mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); + mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return false; } @@ -790,34 +790,34 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) utime_t until = ceph_clock_now(); until += g_conf().get_val<double>("mon_mds_blocklist_interval"); - const auto blocklist_epoch = mon->osdmon()->blocklist(info.addrs, until); - request_proposal(mon->osdmon()); + const auto blocklist_epoch = mon.osdmon()->blocklist(info.addrs, until); + request_proposal(mon.osdmon()); pending.damaged(gid, blocklist_epoch); last_beacon.erase(gid); // Respond to MDS, so that it knows it can continue to shut down auto beacon = make_message<MMDSBeacon>( - mon->monmap->fsid, m->get_global_id(), + mon.monmap->fsid, m->get_global_id(), m->get_name(), pending.get_epoch(), state, seq, CEPH_FEATURES_SUPPORTED_DEFAULT); - mon->send_reply(op, beacon.detach()); + mon.send_reply(op, beacon.detach()); } else if (state == MDSMap::STATE_DNE) { - if (!mon->osdmon()->is_writeable()) { + if (!mon.osdmon()->is_writeable()) { dout(1) << __func__ << ": DNE from rank " << info.rank << " waiting for osdmon writeable to blocklist it" << dendl; - mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); + mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return false; } fail_mds_gid(pending, gid); - ceph_assert(mon->osdmon()->is_writeable()); - request_proposal(mon->osdmon()); + ceph_assert(mon.osdmon()->is_writeable()); + request_proposal(mon.osdmon()); // Respond to MDS, so that it knows it can continue to shut down - auto beacon = make_message<MMDSBeacon>(mon->monmap->fsid, + auto beacon = make_message<MMDSBeacon>(mon.monmap->fsid, m->get_global_id(), m->get_name(), pending.get_epoch(), state, seq, CEPH_FEATURES_SUPPORTED_DEFAULT); - mon->send_reply(op, beacon.detach()); + mon.send_reply(op, beacon.detach()); } else if (info.state == MDSMap::STATE_STANDBY && state != info.state) { // Standby daemons should never modify their own // state. Reject any attempts to do so. @@ -836,7 +836,7 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) if (info.state != MDSMap::STATE_ACTIVE && state == MDSMap::STATE_ACTIVE) { const auto &fscid = pending.mds_roles.at(gid); const auto &fs = pending.get_filesystem(fscid); - mon->clog->info() << info.human_name() << " is now active in " + mon.clog->info() << info.human_name() << " is now active in " << "filesystem " << fs->mds_map.fs_name << " as rank " << info.rank; } @@ -857,7 +857,7 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) if (r >= 0) _updated(op); // success else if (r == -ECANCELED) { - mon->no_reply(op); + mon.no_reply(op); } else { dispatch(op); // try again } @@ -879,7 +879,7 @@ bool MDSMonitor::prepare_offload_targets(MonOpRequestRef op) } else { dout(10) << "prepare_offload_targets " << gid << " not in map" << dendl; } - mon->no_reply(op); + mon.no_reply(op); return true; } @@ -895,7 +895,7 @@ void MDSMonitor::_updated(MonOpRequestRef op) op->mark_mdsmon_event(__func__); auto m = op->get_req<MMDSBeacon>(); dout(10) << "_updated " << m->get_orig_source() << " " << *m << dendl; - mon->clog->debug() << m->get_orig_source() << " " + mon.clog->debug() << m->get_orig_source() << " " << m->get_orig_source_addrs() << " " << ceph_mds_state_name(m->get_state()); @@ -904,13 +904,13 @@ void MDSMonitor::_updated(MonOpRequestRef op) MDSMap null_map; null_map.epoch = fsmap.epoch; null_map.compat = fsmap.compat; - auto m = make_message<MMDSMap>(mon->monmap->fsid, null_map); - mon->send_reply(op, m.detach()); + auto m = make_message<MMDSMap>(mon.monmap->fsid, null_map); + mon.send_reply(op, m.detach()); } else { - auto beacon = make_message<MMDSBeacon>(mon->monmap->fsid, + auto beacon = make_message<MMDSBeacon>(mon.monmap->fsid, m->get_global_id(), m->get_name(), fsmap.get_epoch(), m->get_state(), m->get_seq(), CEPH_FEATURES_SUPPORTED_DEFAULT); - mon->send_reply(op, beacon.detach()); + mon.send_reply(op, beacon.detach()); } } @@ -919,7 +919,7 @@ void MDSMonitor::on_active() tick(); if (is_leader()) { - mon->clog->debug() << "fsmap " << get_fsmap(); + mon.clog->debug() << "fsmap " << get_fsmap(); } } @@ -945,7 +945,7 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op) if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { // ss has reason for failure string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); return true; } @@ -957,7 +957,7 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op) MonSession *session = op->get_session(); if (!session) { - mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); + mon.reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); return true; } @@ -1157,7 +1157,7 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op) f->dump_string("name", mds_map.fs_name); /* Output both the names and IDs of pools, for use by * humans and machines respectively */ - f->dump_string("metadata_pool", mon->osdmon()->osdmap.get_pool_name( + f->dump_string("metadata_pool", mon.osdmon()->osdmap.get_pool_name( mds_map.metadata_pool)); f->dump_int("metadata_pool_id", mds_map.metadata_pool); f->open_array_section("data_pool_ids"); @@ -1168,7 +1168,7 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op) f->open_array_section("data_pools"); for (const auto &id : mds_map.data_pools) { - const auto &name = mon->osdmon()->osdmap.get_pool_name(id); + const auto &name = mon.osdmon()->osdmap.get_pool_name(id); f->dump_string("data_pool", name); } f->close_section(); @@ -1181,13 +1181,13 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op) for (const auto &p : fsmap.filesystems) { const auto &fs = p.second; const MDSMap &mds_map = fs->mds_map; - const string &md_pool_name = mon->osdmon()->osdmap.get_pool_name( + const string &md_pool_name = mon.osdmon()->osdmap.get_pool_name( mds_map.metadata_pool); ds << "name: " << mds_map.fs_name << ", metadata pool: " << md_pool_name << ", data pools: ["; for (const auto &id : mds_map.data_pools) { - const string &pool_name = mon->osdmon()->osdmap.get_pool_name(id); + const string &pool_name = mon.osdmon()->osdmap.get_pool_name(id); ds << pool_name << " "; } ds << "]" << std::endl; @@ -1222,7 +1222,7 @@ out: rdata.append(ds); string rs; getline(ss, rs); - mon->reply_command(op, r, rs, rdata, get_last_committed()); + mon.reply_command(op, r, rs, rdata, get_last_committed()); return true; } else return false; @@ -1233,13 +1233,13 @@ bool MDSMonitor::fail_mds_gid(FSMap &fsmap, mds_gid_t gid) const auto& info = fsmap.get_info_gid(gid); dout(1) << "fail_mds_gid " << gid << " mds." << info.name << " role " << info.rank << dendl; - ceph_assert(mon->osdmon()->is_writeable()); + ceph_assert(mon.osdmon()->is_writeable()); epoch_t blocklist_epoch = 0; if (info.rank >= 0 && info.state != MDSMap::STATE_STANDBY_REPLAY) { utime_t until = ceph_clock_now(); until += g_conf().get_val<double>("mon_mds_blocklist_interval"); - blocklist_epoch = mon->osdmon()->blocklist(info.addrs, until); + blocklist_epoch = mon.osdmon()->blocklist(info.addrs, until); } fsmap.erase(gid, blocklist_epoch); @@ -1307,7 +1307,7 @@ int MDSMonitor::fail_mds(FSMap &fsmap, std::ostream &ss, if (gid == MDS_GID_NONE) { return 0; } - if (!mon->osdmon()->is_writeable()) { + if (!mon.osdmon()->is_writeable()) { return -EAGAIN; } @@ -1317,8 +1317,8 @@ int MDSMonitor::fail_mds(FSMap &fsmap, std::ostream &ss, fail_mds_gid(fsmap, gid); ss << "failed mds gid " << gid; - ceph_assert(mon->osdmon()->is_writeable()); - request_proposal(mon->osdmon()); + ceph_assert(mon.osdmon()->is_writeable()); + request_proposal(mon.osdmon()); return 0; } @@ -1333,7 +1333,7 @@ bool MDSMonitor::prepare_command(MonOpRequestRef op) cmdmap_t cmdmap; if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); return true; } @@ -1343,7 +1343,7 @@ bool MDSMonitor::prepare_command(MonOpRequestRef op) /* Refuse access if message not associated with a valid session */ MonSession *session = op->get_session(); if (!session) { - mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); + mon.reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); return true; } @@ -1362,11 +1362,11 @@ bool MDSMonitor::prepare_command(MonOpRequestRef op) batched_propose = h->batched_propose(); if (batched_propose) { - paxos->plug(); + paxos.plug(); } - r = h->handle(mon, pending, op, cmdmap, ss); + r = h->handle(&mon, pending, op, cmdmap, ss); if (batched_propose) { - paxos->unplug(); + paxos.unplug(); } if (r == -EAGAIN) { @@ -1414,7 +1414,7 @@ out: return true; } else { // reply immediately - mon->reply_command(op, r, rs, rdata, get_last_committed()); + mon.reply_command(op, r, rs, rdata, get_last_committed()); return false; } } @@ -1477,12 +1477,12 @@ int MDSMonitor::filesystem_command( r = fail_mds(fsmap, ss, who, &failed_info); if (r < 0 && r == -EAGAIN) { - mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); + mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return -EAGAIN; // don't propose yet; wait for message to be retried } else if (r == 0) { // Only log if we really did something (not when was already gone) if (failed_info.global_id != MDS_GID_NONE) { - mon->clog->info() << failed_info.human_name() << " marked failed by " + mon.clog->info() << failed_info.human_name() << " marked failed by " << op->get_session()->entity_name; } } @@ -1663,7 +1663,7 @@ void MDSMonitor::check_subs() } for (const auto &type : types) { - auto& subs = mon->session_map.subs; + auto& subs = mon.session_map.subs; auto subs_it = subs.find(type); if (subs_it == subs.end()) continue; @@ -1690,9 +1690,9 @@ void MDSMonitor::check_sub(Subscription *sub) } if (sub->type == "fsmap") { - sub->session->con->send_message(new MFSMap(mon->monmap->fsid, fsmap)); + sub->session->con->send_message(new MFSMap(mon.monmap->fsid, fsmap)); if (sub->onetime) { - mon->session_map.remove_sub(sub); + mon.session_map.remove_sub(sub); } else { sub->next = fsmap.get_epoch() + 1; } @@ -1705,9 +1705,9 @@ void MDSMonitor::check_sub(Subscription *sub) fs_info.cid = p.second->fscid; fs_info.name = p.second->mds_map.fs_name; } - sub->session->con->send_message(new MFSMapUser(mon->monmap->fsid, fsmap_u)); + sub->session->con->send_message(new MFSMapUser(mon.monmap->fsid, fsmap_u)); if (sub->onetime) { - mon->session_map.remove_sub(sub); + mon.session_map.remove_sub(sub); } else { sub->next = fsmap.get_epoch() + 1; } @@ -1791,12 +1791,12 @@ void MDSMonitor::check_sub(Subscription *sub) if (sub->next > mds_map->epoch) { return; } - auto msg = make_message<MMDSMap>(mon->monmap->fsid, *mds_map, + auto msg = make_message<MMDSMap>(mon.monmap->fsid, *mds_map, mds_map->fs_name); sub->session->con->send_message(msg.detach()); if (sub->onetime) { - mon->session_map.remove_sub(sub); + mon.session_map.remove_sub(sub); } else { sub->next = mds_map->get_epoch() + 1; } @@ -1812,11 +1812,11 @@ void MDSMonitor::update_metadata(mds_gid_t gid, } pending_metadata[gid] = metadata; - MonitorDBStore::TransactionRef t = paxos->get_pending_transaction(); + MonitorDBStore::TransactionRef t = paxos.get_pending_transaction(); bufferlist bl; encode(pending_metadata, bl); t->put(MDS_METADATA_PREFIX, "last_metadata", bl); - paxos->trigger_propose(); + paxos.trigger_propose(); } void MDSMonitor::remove_from_metadata(const FSMap &fsmap, MonitorDBStore::TransactionRef t) @@ -1840,7 +1840,7 @@ void MDSMonitor::remove_from_metadata(const FSMap &fsmap, MonitorDBStore::Transa int MDSMonitor::load_metadata(map<mds_gid_t, Metadata>& m) { bufferlist bl; - int r = mon->store->get(MDS_METADATA_PREFIX, "last_metadata", bl); + int r = mon.store->get(MDS_METADATA_PREFIX, "last_metadata", bl); if (r) { dout(5) << "Unable to load 'last_metadata'" << dendl; return r; @@ -1986,7 +1986,7 @@ bool MDSMonitor::maybe_resize_cluster(FSMap &fsmap, fs_cluster_id_t fscid) dout(1) << "assigned standby " << info->addrs << " as mds." << mds << dendl; - mon->clog->info() << info->human_name() << " assigned to " + mon.clog->info() << info->human_name() << " assigned to " "filesystem " << mds_map.fs_name << " as rank " << mds << " (now has " << mds_map.get_num_in_mds() + 1 << " ranks)"; @@ -1997,7 +1997,7 @@ bool MDSMonitor::maybe_resize_cluster(FSMap &fsmap, fs_cluster_id_t fscid) const auto &info = mds_map.get_info(target); if (mds_map.is_active(target)) { dout(1) << "stopping " << target << dendl; - mon->clog->info() << "stopping " << info.human_name(); + mon.clog->info() << "stopping " << info.human_name(); auto f = [](auto& info) { info.state = MDSMap::STATE_STOPPING; }; @@ -2048,7 +2048,7 @@ bool MDSMonitor::drop_mds(FSMap &fsmap, mds_gid_t gid, const mds_info_t* rep_inf << " with " << rep_info->global_id << "/" << rep_info->name << " " << rep_info->addrs << dendl; - mon->clog->warn() << "Replacing " << info.human_name() + mon.clog->warn() << "Replacing " << info.human_name() << " as rank " << rank << " with standby " << rep_info->human_name(); @@ -2067,7 +2067,7 @@ bool MDSMonitor::check_health(FSMap& fsmap, bool* propose_osdmap) { bool do_propose = false; const auto now = mono_clock::now(); - const bool osdmap_writeable = mon->osdmon()->is_writeable(); + const bool osdmap_writeable = mon.osdmon()->is_writeable(); const auto mds_beacon_grace = g_conf().get_val<double>("mds_beacon_grace"); const auto mds_beacon_interval = g_conf().get_val<double>("mds_beacon_interval"); @@ -2156,7 +2156,7 @@ bool MDSMonitor::check_health(FSMap& fsmap, bool* propose_osdmap) } bool dropped = drop_mds(fsmap, gid, rep_info, propose_osdmap); if (dropped) { - mon->clog->info() << "MDS " << info.human_name() + mon.clog->info() << "MDS " << info.human_name() << " is removed because it is dead or otherwise unavailable."; do_propose = true; } @@ -2195,13 +2195,13 @@ bool MDSMonitor::check_health(FSMap& fsmap, bool* propose_osdmap) } if (better_affinity) { if (state == MDSMap::STATE_STANDBY_REPLAY) { - mon->clog->info() << "Dropping low affinity standby-replay " + mon.clog->info() << "Dropping low affinity standby-replay " << info.human_name() << " in favor of higher affinity standby."; *propose_osdmap |= fail_mds_gid(fsmap, gid); /* Now let maybe_promote_standby do the promotion. */ } else { - mon->clog->info() << "Dropping low affinity active " + mon.clog->info() << "Dropping low affinity active " << info.human_name() << " in favor of higher affinity standby."; do_propose |= drop_mds(fsmap, gid, rep_info, propose_osdmap); @@ -2231,7 +2231,7 @@ bool MDSMonitor::maybe_promote_standby(FSMap &fsmap, Filesystem& fs) if (info) { dout(1) << " taking over failed mds." << rank << " with " << info->global_id << "/" << info->name << " " << info->addrs << dendl; - mon->clog->info() << "Standby " << info->human_name() + mon.clog->info() << "Standby " << info->human_name() << " assigned to filesystem " << fs.mds_map.fs_name << " as rank " << rank; @@ -2292,7 +2292,7 @@ void MDSMonitor::tick() } if (propose_osdmap) { - request_proposal(mon->osdmon()); + request_proposal(mon.osdmon()); } if (do_propose) { @@ -2302,10 +2302,10 @@ void MDSMonitor::tick() last_tick = mono_clock::now(); } -MDSMonitor::MDSMonitor(Monitor *mn, Paxos *p, string service_name) +MDSMonitor::MDSMonitor(Monitor &mn, Paxos &p, string service_name) : PaxosService(mn, p, service_name) { - handlers = FileSystemCommandHandler::load(p); + handlers = FileSystemCommandHandler::load(&p); } void MDSMonitor::on_restart() diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h index f84744e0c73..56723961b95 100644 --- a/src/mon/MDSMonitor.h +++ b/src/mon/MDSMonitor.h @@ -32,7 +32,7 @@ class FileSystemCommandHandler; class MDSMonitor : public PaxosService, public PaxosFSMap, protected CommandHandler { public: - MDSMonitor(Monitor *mn, Paxos *p, std::string service_name); + MDSMonitor(Monitor &mn, Paxos &p, std::string service_name); // service methods void create_initial() override; @@ -70,7 +70,7 @@ class MDSMonitor : public PaxosService, public PaxosFSMap, protected CommandHand */ bool fail_mds_gid(FSMap &fsmap, mds_gid_t gid); - bool is_leader() const override { return mon->is_leader(); } + bool is_leader() const override { return mon.is_leader(); } protected: using mds_info_t = MDSMap::mds_info_t; diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc index cd5d6305e0e..97c36f2924d 100644 --- a/src/mon/MgrMonitor.cc +++ b/src/mon/MgrMonitor.cc @@ -58,10 +58,10 @@ using ceph::make_message; using ceph::mono_clock; using ceph::mono_time; -static ostream& _prefix(std::ostream *_dout, Monitor *mon, +static ostream& _prefix(std::ostream *_dout, Monitor &mon, const MgrMap& mgrmap) { - return *_dout << "mon." << mon->name << "@" << mon->rank - << "(" << mon->get_state_name() + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() << ").mgr e" << mgrmap.get_epoch() << " "; } @@ -210,7 +210,7 @@ void MgrMonitor::update_from_paxos(bool *need_bootstrap) dout(4) << "mkfs or daemon transitioned to available, loading commands" << dendl; bufferlist loaded_commands; - int r = mon->store->get(command_descs_prefix, "", loaded_commands); + int r = mon.store->get(command_descs_prefix, "", loaded_commands); if (r < 0) { derr << "Failed to load mgr commands: " << cpp_strerror(r) << dendl; } else { @@ -269,9 +269,9 @@ void MgrMonitor::update_from_paxos(bool *need_bootstrap) } // force ConfigMonitor to refresh, since it uses const Option * // pointers into our mgr_module_options (which we just rebuilt). - mon->configmon()->load_config(); + mon.configmon()->load_config(); - if (!mon->is_init()) { + if (!mon.is_init()) { // feed our pet MgrClient, unless we are in Monitor::[pre]init() prime_mgr_client(); } @@ -280,7 +280,7 @@ void MgrMonitor::update_from_paxos(bool *need_bootstrap) void MgrMonitor::prime_mgr_client() { dout(10) << __func__ << dendl; - mon->mgr_client.ms_dispatch2(make_message<MMgrMap>(map)); + mon.mgr_client.ms_dispatch2(make_message<MMgrMap>(map)); } void MgrMonitor::create_pending() @@ -295,8 +295,8 @@ health_status_t MgrMonitor::should_warn_about_mgr_down() // we warn if we have osds AND we've exceeded the grace period // which means a new mon cluster and be HEALTH_OK indefinitely as long as // no OSDs are ever created. - if (mon->osdmon()->osdmap.get_num_osds() > 0 && - now > mon->monmap->created + g_conf().get_val<int64_t>("mon_mgr_mkfs_grace")) { + if (mon.osdmon()->osdmap.get_num_osds() > 0 && + now > mon.monmap->created + g_conf().get_val<int64_t>("mon_mgr_mkfs_grace")) { health_status_t level = HEALTH_WARN; if (first_seen_inactive != utime_t() && now - first_seen_inactive > g_conf().get_val<int64_t>("mon_mgr_inactive_grace")) { @@ -313,12 +313,12 @@ void MgrMonitor::post_paxos_update() if (digest_event) { bool send = false; if (prev_health_checks.empty()) { - prev_health_checks.resize(mon->paxos_service.size()); + prev_health_checks.resize(mon.paxos_service.size()); send = true; } - ceph_assert(prev_health_checks.size() == mon->paxos_service.size()); + ceph_assert(prev_health_checks.size() == mon.paxos_service.size()); for (auto i = 0u; i < prev_health_checks.size(); i++) { - const auto& curr = mon->paxos_service[i]->get_health_checks(); + const auto& curr = mon.paxos_service[i]->get_health_checks(); if (!send && curr != prev_health_checks[i]) { send = true; } @@ -329,7 +329,7 @@ void MgrMonitor::post_paxos_update() send_digests(); } else { cancel_timer(); - wait_for_active_ctx(new C_MonContext{mon, [this](int) { + wait_for_active_ctx(new C_MonContext{&mon, [this](int) { send_digests(); }}); } @@ -341,7 +341,7 @@ void MgrMonitor::encode_pending(MonitorDBStore::TransactionRef t) { dout(10) << __func__ << " " << pending_map << dendl; bufferlist bl; - pending_map.encode(bl, mon->get_quorum_con_features()); + pending_map.encode(bl, mon.get_quorum_con_features()); put_version(t, pending_map.epoch, bl); put_last_committed(t, pending_map.epoch); @@ -393,9 +393,9 @@ bool MgrMonitor::check_caps(MonOpRequestRef op, const uuid_d& fsid) dout(1) << __func__ << " insufficient caps " << session->caps << dendl; return false; } - if (fsid != mon->monmap->fsid) { + if (fsid != mon.monmap->fsid) { dout(1) << __func__ << " op fsid " << fsid - << " != " << mon->monmap->fsid << dendl; + << " != " << mon.monmap->fsid << dendl; return false; } return true; @@ -412,12 +412,12 @@ bool MgrMonitor::preprocess_query(MonOpRequestRef op) return preprocess_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } default: - mon->no_reply(op); + mon.no_reply(op); derr << "Unhandled message type " << m->get_type() << dendl; return true; } @@ -435,12 +435,12 @@ bool MgrMonitor::prepare_update(MonOpRequestRef op) return prepare_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } default: - mon->no_reply(op); + mon.no_reply(op); derr << "Unhandled message type " << m->get_type() << dendl; return true; } @@ -458,7 +458,7 @@ public: if (r >= 0) { // Success } else if (r == -ECANCELED) { - mm->mon->no_reply(op); + mm->mon.no_reply(op); } else { mm->dispatch(op); // try again } @@ -468,7 +468,7 @@ public: bool MgrMonitor::preprocess_beacon(MonOpRequestRef op) { auto m = op->get_req<MMgrBeacon>(); - mon->no_reply(op); // we never reply to beacons + mon.no_reply(op); // we never reply to beacons dout(4) << "beacon from " << m->get_gid() << dendl; if (!check_caps(op, m->get_fsid())) { @@ -490,12 +490,12 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) && m->get_gid() != pending_map.active_gid) { dout(4) << "Active daemon restart (mgr." << m->get_name() << ")" << dendl; - mon->clog->info() << "Active manager daemon " << m->get_name() + mon.clog->info() << "Active manager daemon " << m->get_name() << " restarted"; - if (!mon->osdmon()->is_writeable()) { + if (!mon.osdmon()->is_writeable()) { dout(1) << __func__ << ": waiting for osdmon writeable to" " blocklist old instance." << dendl; - mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); + mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return false; } drop_active(); @@ -506,7 +506,7 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) const MgrMap::StandbyInfo &s = i.second; if (s.name == m->get_name() && s.gid != m->get_gid()) { dout(4) << "Standby daemon restart (mgr." << m->get_name() << ")" << dendl; - mon->clog->debug() << "Standby manager daemon " << m->get_name() + mon.clog->debug() << "Standby manager daemon " << m->get_name() << " restarted"; drop_standby(i.first); break; @@ -536,7 +536,7 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) if (pending_map.get_available() != m->get_available()) { dout(4) << "available " << m->get_gid() << dendl; - mon->clog->info() << "Manager daemon " << pending_map.active_name + mon.clog->info() << "Manager daemon " << pending_map.active_name << " is now available"; // This beacon should include command descriptions @@ -586,7 +586,7 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) encode(m->get_metadata(), pending_metadata[m->get_name()]); pending_metadata_rm.erase(m->get_name()); - mon->clog->info() << "Activating manager daemon " + mon.clog->info() << "Activating manager daemon " << pending_map.active_name; updated = true; @@ -605,7 +605,7 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) } } else { dout(10) << "new standby " << m->get_gid() << dendl; - mon->clog->debug() << "Standby manager daemon " << m->get_name() + mon.clog->debug() << "Standby manager daemon " << m->get_name() << " started"; pending_map.standbys[m->get_gid()] = {m->get_gid(), m->get_name(), m->get_available_modules(), @@ -629,9 +629,9 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) void MgrMonitor::check_subs() { const std::string type = "mgrmap"; - if (mon->session_map.subs.count(type) == 0) + if (mon.session_map.subs.count(type) == 0) return; - for (auto sub : *(mon->session_map.subs[type])) { + for (auto sub : *(mon.session_map.subs[type])) { check_sub(sub); } } @@ -644,7 +644,7 @@ void MgrMonitor::check_sub(Subscription *sub) << " " << sub->session->con->get_peer_addr() << dendl; sub->session->con->send_message2(make_message<MMgrMap>(map)); if (sub->onetime) { - mon->session_map.remove_sub(sub); + mon.session_map.remove_sub(sub); } else { sub->next = map.get_epoch() + 1; } @@ -670,7 +670,7 @@ void MgrMonitor::send_digests() cancel_timer(); const std::string type = "mgrdigest"; - if (mon->session_map.subs.count(type) == 0) { + if (mon.session_map.subs.count(type) == 0) { prev_health_checks.clear(); return; } @@ -681,17 +681,17 @@ void MgrMonitor::send_digests() } dout(10) << __func__ << dendl; - for (auto sub : *(mon->session_map.subs[type])) { + for (auto sub : *(mon.session_map.subs[type])) { dout(10) << __func__ << " sending digest to subscriber " << sub->session->con << " " << sub->session->con->get_peer_addr() << dendl; auto mdigest = make_message<MMgrDigest>(); JSONFormatter f; - mon->healthmon()->get_health_status(true, &f, nullptr, nullptr, nullptr); + mon.healthmon()->get_health_status(true, &f, nullptr, nullptr, nullptr); f.flush(mdigest->health_json); f.reset(); - mon->get_mon_status(&f); + mon.get_mon_status(&f); f.flush(mdigest->mon_status_json); f.reset(); @@ -699,9 +699,9 @@ void MgrMonitor::send_digests() } timer: - digest_event = mon->timer.add_event_after( + digest_event = mon.timer.add_event_after( g_conf().get_val<int64_t>("mon_mgr_digest_period"), - new C_MonContext{mon, [this](int) { + new C_MonContext{&mon, [this](int) { send_digests(); }}); } @@ -709,18 +709,18 @@ timer: void MgrMonitor::cancel_timer() { if (digest_event) { - mon->timer.cancel_event(digest_event); + mon.timer.cancel_event(digest_event); digest_event = nullptr; } } void MgrMonitor::on_active() { - if (!mon->is_leader()) { + if (!mon.is_leader()) { return; } - mon->clog->debug() << "mgrmap e" << map.epoch << ": " << map; - if (!HAVE_FEATURE(mon->get_quorum_con_features(), SERVER_NAUTILUS)) { + mon.clog->debug() << "mgrmap e" << map.epoch << ": " << map; + if (!HAVE_FEATURE(mon.get_quorum_con_features(), SERVER_NAUTILUS)) { return; } if (pending_map.always_on_modules == always_on_modules) { @@ -735,7 +735,7 @@ void MgrMonitor::on_active() void MgrMonitor::tick() { - if (!is_active() || !mon->is_leader()) + if (!is_active() || !mon.is_leader()) return; const auto now = ceph::coarse_mono_clock::now(); @@ -796,25 +796,25 @@ void MgrMonitor::tick() if (pending_map.active_gid != 0 && last_beacon.at(pending_map.active_gid) < cutoff - && mon->osdmon()->is_writeable()) { + && mon.osdmon()->is_writeable()) { const std::string old_active_name = pending_map.active_name; drop_active(); propose = true; dout(4) << "Dropping active" << pending_map.active_gid << dendl; if (promote_standby()) { dout(4) << "Promoted standby " << pending_map.active_gid << dendl; - mon->clog->info() << "Manager daemon " << old_active_name + mon.clog->info() << "Manager daemon " << old_active_name << " is unresponsive, replacing it with standby" << " daemon " << pending_map.active_name; } else { dout(4) << "Active is laggy but have no standbys to replace it" << dendl; - mon->clog->info() << "Manager daemon " << old_active_name + mon.clog->info() << "Manager daemon " << old_active_name << " is unresponsive. No standby daemons available."; } } else if (pending_map.active_gid == 0) { if (promote_standby()) { dout(4) << "Promoted standby " << pending_map.active_gid << dendl; - mon->clog->info() << "Activating manager daemon " + mon.clog->info() << "Activating manager daemon " << pending_map.active_name; propose = true; } @@ -830,7 +830,7 @@ void MgrMonitor::tick() } // obsolete modules? - if (mon->monmap->min_mon_release >= ceph_release_t::octopus && + if (mon.monmap->min_mon_release >= ceph_release_t::octopus && pending_map.module_enabled("orchestrator_cli")) { dout(10) << " disabling obsolete/renamed 'orchestrator_cli'" << dendl; // we don't need to enable 'orchestrator' because it's now always-on @@ -875,7 +875,7 @@ bool MgrMonitor::promote_standby() void MgrMonitor::drop_active() { - ceph_assert(mon->osdmon()->is_writeable()); + ceph_assert(mon.osdmon()->is_writeable()); if (last_beacon.count(pending_map.active_gid) > 0) { last_beacon.erase(pending_map.active_gid); @@ -887,13 +887,13 @@ void MgrMonitor::drop_active() dout(5) << "blocklisting previous mgr." << pending_map.active_name << "." << pending_map.active_gid << " (" << pending_map.active_addrs << ")" << dendl; - auto blocklist_epoch = mon->osdmon()->blocklist(pending_map.active_addrs, until); + auto blocklist_epoch = mon.osdmon()->blocklist(pending_map.active_addrs, until); /* blocklist RADOS clients in use by the mgr */ for (const auto& a : pending_map.clients) { - mon->osdmon()->blocklist(a, until); + mon.osdmon()->blocklist(a, until); } - request_proposal(mon->osdmon()); + request_proposal(mon.osdmon()); pending_metadata_rm.insert(pending_map.active_name); pending_metadata.erase(pending_map.active_name); @@ -933,13 +933,13 @@ bool MgrMonitor::preprocess_command(MonOpRequestRef op) cmdmap_t cmdmap; if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); return true; } MonSession *session = op->get_session(); if (!session) { - mon->reply_command(op, -EACCES, "access denied", rdata, + mon.reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); return true; } @@ -1064,7 +1064,7 @@ bool MgrMonitor::preprocess_command(MonOpRequestRef op) reply: string rs; getline(ss, rs); - mon->reply_command(op, r, rs, rdata, get_last_committed()); + mon.reply_command(op, r, rs, rdata, get_last_committed()); return true; } @@ -1078,13 +1078,13 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op) cmdmap_t cmdmap; if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); return true; } MonSession *session = op->get_session(); if (!session) { - mon->reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); + mon.reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); return true; } @@ -1113,8 +1113,8 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op) if (!err.empty()) { // Does not parse as a gid, treat it as a name if (pending_map.active_name == who) { - if (!mon->osdmon()->is_writeable()) { - mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); + if (!mon.osdmon()->is_writeable()) { + mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return false; } drop_active(); @@ -1136,8 +1136,8 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op) } } else { if (pending_map.active_gid == gid) { - if (!mon->osdmon()->is_writeable()) { - mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); + if (!mon.osdmon()->is_writeable()) { + mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return false; } drop_active(); @@ -1228,7 +1228,7 @@ out: return true; } else { // reply immediately - mon->reply_command(op, r, rs, rdata, get_last_committed()); + mon.reply_command(op, r, rs, rdata, get_last_committed()); return false; } } @@ -1249,7 +1249,7 @@ int MgrMonitor::load_metadata(const string& name, std::map<string, string>& m, ostream *err) const { bufferlist bl; - int r = mon->store->get(MGR_METADATA_PREFIX, name, bl); + int r = mon.store->get(MGR_METADATA_PREFIX, name, bl); if (r < 0) return r; try { diff --git a/src/mon/MgrMonitor.h b/src/mon/MgrMonitor.h index dabd9386e25..5df70fc3634 100644 --- a/src/mon/MgrMonitor.h +++ b/src/mon/MgrMonitor.h @@ -71,7 +71,7 @@ class MgrMonitor: public PaxosService std::vector<MonCommand> pending_command_descs; public: - MgrMonitor(Monitor *mn, Paxos *p, const std::string& service_name) + MgrMonitor(Monitor &mn, Paxos &p, const std::string& service_name) : PaxosService(mn, p, service_name) {} ~MgrMonitor() override {} diff --git a/src/mon/MgrStatMonitor.cc b/src/mon/MgrStatMonitor.cc index b0172be2b69..40a322d7698 100644 --- a/src/mon/MgrStatMonitor.cc +++ b/src/mon/MgrStatMonitor.cc @@ -42,13 +42,13 @@ using ceph::make_message; using ceph::mono_clock; using ceph::mono_time; -static ostream& _prefix(std::ostream *_dout, Monitor *mon) { - return *_dout << "mon." << mon->name << "@" << mon->rank - << "(" << mon->get_state_name() +static ostream& _prefix(std::ostream *_dout, Monitor &mon) { + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() << ").mgrstat "; } -MgrStatMonitor::MgrStatMonitor(Monitor *mn, Paxos *p, const string& service_name) +MgrStatMonitor::MgrStatMonitor(Monitor &mn, Paxos &p, const string& service_name) : PaxosService(mn, p, service_name) { } @@ -93,25 +93,25 @@ void MgrStatMonitor::update_from_paxos(bool *need_bootstrap) } check_subs(); update_logger(); - mon->osdmon()->notify_new_pg_digest(); + mon.osdmon()->notify_new_pg_digest(); } void MgrStatMonitor::update_logger() { dout(20) << __func__ << dendl; - mon->cluster_logger->set(l_cluster_osd_bytes, digest.osd_sum.statfs.total); - mon->cluster_logger->set(l_cluster_osd_bytes_used, + mon.cluster_logger->set(l_cluster_osd_bytes, digest.osd_sum.statfs.total); + mon.cluster_logger->set(l_cluster_osd_bytes_used, digest.osd_sum.statfs.get_used_raw()); - mon->cluster_logger->set(l_cluster_osd_bytes_avail, + mon.cluster_logger->set(l_cluster_osd_bytes_avail, digest.osd_sum.statfs.available); - mon->cluster_logger->set(l_cluster_num_pool, digest.pg_pool_sum.size()); + mon.cluster_logger->set(l_cluster_num_pool, digest.pg_pool_sum.size()); uint64_t num_pg = 0; for (auto i : digest.num_pg_by_pool) { num_pg += i.second; } - mon->cluster_logger->set(l_cluster_num_pg, num_pg); + mon.cluster_logger->set(l_cluster_num_pg, num_pg); unsigned active = 0, active_clean = 0, peering = 0; for (auto p = digest.num_pg_by_state.begin(); @@ -125,15 +125,15 @@ void MgrStatMonitor::update_logger() if (p->first & PG_STATE_PEERING) peering += p->second; } - mon->cluster_logger->set(l_cluster_num_pg_active_clean, active_clean); - mon->cluster_logger->set(l_cluster_num_pg_active, active); - mon->cluster_logger->set(l_cluster_num_pg_peering, peering); + mon.cluster_logger->set(l_cluster_num_pg_active_clean, active_clean); + mon.cluster_logger->set(l_cluster_num_pg_active, active); + mon.cluster_logger->set(l_cluster_num_pg_peering, peering); - mon->cluster_logger->set(l_cluster_num_object, digest.pg_sum.stats.sum.num_objects); - mon->cluster_logger->set(l_cluster_num_object_degraded, digest.pg_sum.stats.sum.num_objects_degraded); - mon->cluster_logger->set(l_cluster_num_object_misplaced, digest.pg_sum.stats.sum.num_objects_misplaced); - mon->cluster_logger->set(l_cluster_num_object_unfound, digest.pg_sum.stats.sum.num_objects_unfound); - mon->cluster_logger->set(l_cluster_num_bytes, digest.pg_sum.stats.sum.num_bytes); + mon.cluster_logger->set(l_cluster_num_object, digest.pg_sum.stats.sum.num_objects); + mon.cluster_logger->set(l_cluster_num_object_degraded, digest.pg_sum.stats.sum.num_objects_degraded); + mon.cluster_logger->set(l_cluster_num_object_misplaced, digest.pg_sum.stats.sum.num_objects_misplaced); + mon.cluster_logger->set(l_cluster_num_object_unfound, digest.pg_sum.stats.sum.num_objects_unfound); + mon.cluster_logger->set(l_cluster_num_bytes, digest.pg_sum.stats.sum.num_bytes); } @@ -143,7 +143,7 @@ void MgrStatMonitor::create_pending() pending_digest = digest; pending_health_checks = get_health_checks(); pending_service_map_bl.clear(); - encode(service_map, pending_service_map_bl, mon->get_quorum_con_features()); + encode(service_map, pending_service_map_bl, mon.get_quorum_con_features()); } void MgrStatMonitor::encode_pending(MonitorDBStore::TransactionRef t) @@ -151,7 +151,7 @@ void MgrStatMonitor::encode_pending(MonitorDBStore::TransactionRef t) ++version; dout(10) << " " << version << dendl; bufferlist bl; - encode(pending_digest, bl, mon->get_quorum_con_features()); + encode(pending_digest, bl, mon.get_quorum_con_features()); ceph_assert(pending_service_map_bl.length()); bl.append(pending_service_map_bl); encode(pending_progress_events, bl); @@ -190,7 +190,7 @@ bool MgrStatMonitor::preprocess_query(MonOpRequestRef op) case MSG_GETPOOLSTATS: return preprocess_getpoolstats(op); default: - mon->no_reply(op); + mon.no_reply(op); derr << "Unhandled message type " << m->get_type() << dendl; return true; } @@ -203,7 +203,7 @@ bool MgrStatMonitor::prepare_update(MonOpRequestRef op) case MSG_MON_MGR_REPORT: return prepare_report(op); default: - mon->no_reply(op); + mon.no_reply(op); derr << "Unhandled message type " << m->get_type() << dendl; return true; } @@ -211,7 +211,7 @@ bool MgrStatMonitor::prepare_update(MonOpRequestRef op) bool MgrStatMonitor::preprocess_report(MonOpRequestRef op) { - mon->no_reply(op); + mon.no_reply(op); return false; } @@ -267,16 +267,16 @@ bool MgrStatMonitor::preprocess_getpoolstats(MonOpRequestRef op) << session->caps << dendl; return true; } - if (m->fsid != mon->monmap->fsid) { + if (m->fsid != mon.monmap->fsid) { dout(0) << __func__ << " on fsid " - << m->fsid << " != " << mon->monmap->fsid << dendl; + << m->fsid << " != " << mon.monmap->fsid << dendl; return true; } epoch_t ver = get_last_committed(); auto reply = new MGetPoolStatsReply(m->fsid, m->get_tid(), ver); reply->per_pool = digest.use_per_pool_stats(); for (const auto& pool_name : m->pools) { - const auto pool_id = mon->osdmon()->osdmap.lookup_pg_pool_name(pool_name); + const auto pool_id = mon.osdmon()->osdmap.lookup_pg_pool_name(pool_name); if (pool_id == -ENOENT) continue; auto pool_stat = get_pool_stat(pool_id); @@ -284,7 +284,7 @@ bool MgrStatMonitor::preprocess_getpoolstats(MonOpRequestRef op) continue; reply->pool_stats[pool_name] = *pool_stat; } - mon->send_reply(op, reply); + mon.send_reply(op, reply); return true; } @@ -301,13 +301,13 @@ bool MgrStatMonitor::preprocess_statfs(MonOpRequestRef op) << session->caps << dendl; return true; } - if (statfs->fsid != mon->monmap->fsid) { + if (statfs->fsid != mon.monmap->fsid) { dout(0) << __func__ << " on fsid " << statfs->fsid - << " != " << mon->monmap->fsid << dendl; + << " != " << mon.monmap->fsid << dendl; return true; } const auto& pool = statfs->data_pool; - if (pool && !mon->osdmon()->osdmap.have_pg_pool(*pool)) { + if (pool && !mon.osdmon()->osdmap.have_pg_pool(*pool)) { // There's no error field for MStatfsReply so just ignore the request. // This is known to happen when a client is still accessing a removed fs. dout(1) << __func__ << " on removed pool " << *pool << dendl; @@ -317,14 +317,14 @@ bool MgrStatMonitor::preprocess_statfs(MonOpRequestRef op) << " from " << statfs->get_orig_source() << dendl; epoch_t ver = get_last_committed(); auto reply = new MStatfsReply(statfs->fsid, statfs->get_tid(), ver); - reply->h.st = get_statfs(mon->osdmon()->osdmap, pool); - mon->send_reply(op, reply); + reply->h.st = get_statfs(mon.osdmon()->osdmap, pool); + mon.send_reply(op, reply); return true; } void MgrStatMonitor::check_sub(Subscription *sub) { - const auto epoch = mon->monmap->get_epoch(); + const auto epoch = mon.monmap->get_epoch(); dout(10) << __func__ << " next " << sub->next << " have " << epoch << dendl; @@ -332,7 +332,7 @@ void MgrStatMonitor::check_sub(Subscription *sub) auto m = new MServiceMap(service_map); sub->session->con->send_message(m); if (sub->onetime) { - mon->with_session_map([sub](MonSessionMap& session_map) { + mon.with_session_map([sub](MonSessionMap& session_map) { session_map.remove_sub(sub); }); } else { @@ -347,8 +347,8 @@ void MgrStatMonitor::check_subs() if (!service_map.epoch) { return; } - auto subs = mon->session_map.subs.find("servicemap"); - if (subs == mon->session_map.subs.end()) { + auto subs = mon.session_map.subs.find("servicemap"); + if (subs == mon.session_map.subs.end()) { return; } auto p = subs->second->begin(); diff --git a/src/mon/MgrStatMonitor.h b/src/mon/MgrStatMonitor.h index 500399b3a52..7c31f2c13f6 100644 --- a/src/mon/MgrStatMonitor.h +++ b/src/mon/MgrStatMonitor.h @@ -22,7 +22,7 @@ class MgrStatMonitor : public PaxosService { ceph::buffer::list pending_service_map_bl; public: - MgrStatMonitor(Monitor *mn, Paxos *p, const std::string& service_name); + MgrStatMonitor(Monitor &mn, Paxos &p, const std::string& service_name); ~MgrStatMonitor() override; void init() override {} diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 779f76f9c24..83f007477c0 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1090,11 +1090,11 @@ COMMAND("osd pool rename " "rename <srcpool> to <destpool>", "osd", "rw") COMMAND("osd pool get " "name=pool,type=CephPoolname " - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio", + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size", "get pool parameter <var>", "osd", "r") COMMAND("osd pool set " "name=pool,type=CephPoolname " - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio " + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size " "name=val,type=CephString " "name=yes_i_really_mean_it,type=CephBool,req=false", "set pool parameter <var> to <val>", "osd", "rw") diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index dad67ab4d46..0898222ec61 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -85,9 +85,8 @@ #include "MgrMonitor.h" #include "MgrStatMonitor.h" #include "ConfigMonitor.h" -#include "mon/QuorumService.h" -#include "mon/HealthMonitor.h" #include "mon/ConfigKeyService.h" +#include "mon/HealthMonitor.h" #include "common/config.h" #include "common/cmdparse.h" #include "include/ceph_assert.h" @@ -238,19 +237,19 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s, g_conf().get_val<uint64_t>("mon_op_history_slow_op_size"), g_conf().get_val<std::chrono::seconds>("mon_op_history_slow_op_threshold").count()); - paxos = new Paxos(this, "paxos"); + paxos = std::make_unique<Paxos>(*this, "paxos"); - paxos_service[PAXOS_MDSMAP].reset(new MDSMonitor(this, paxos, "mdsmap")); - paxos_service[PAXOS_MONMAP].reset(new MonmapMonitor(this, paxos, "monmap")); - paxos_service[PAXOS_OSDMAP].reset(new OSDMonitor(cct, this, paxos, "osdmap")); - paxos_service[PAXOS_LOG].reset(new LogMonitor(this, paxos, "logm")); - paxos_service[PAXOS_AUTH].reset(new AuthMonitor(this, paxos, "auth")); - paxos_service[PAXOS_MGR].reset(new MgrMonitor(this, paxos, "mgr")); - paxos_service[PAXOS_MGRSTAT].reset(new MgrStatMonitor(this, paxos, "mgrstat")); - paxos_service[PAXOS_HEALTH].reset(new HealthMonitor(this, paxos, "health")); - paxos_service[PAXOS_CONFIG].reset(new ConfigMonitor(this, paxos, "config")); + paxos_service[PAXOS_MDSMAP].reset(new MDSMonitor(*this, *paxos, "mdsmap")); + paxos_service[PAXOS_MONMAP].reset(new MonmapMonitor(*this, *paxos, "monmap")); + paxos_service[PAXOS_OSDMAP].reset(new OSDMonitor(cct, *this, *paxos, "osdmap")); + paxos_service[PAXOS_LOG].reset(new LogMonitor(*this, *paxos, "logm")); + paxos_service[PAXOS_AUTH].reset(new AuthMonitor(*this, *paxos, "auth")); + paxos_service[PAXOS_MGR].reset(new MgrMonitor(*this, *paxos, "mgr")); + paxos_service[PAXOS_MGRSTAT].reset(new MgrStatMonitor(*this, *paxos, "mgrstat")); + paxos_service[PAXOS_HEALTH].reset(new HealthMonitor(*this, *paxos, "health")); + paxos_service[PAXOS_CONFIG].reset(new ConfigMonitor(*this, *paxos, "config")); - config_key_service = new ConfigKeyService(this, paxos); + config_key_service = std::make_unique<ConfigKeyService>(*this, *paxos); bool r = mon_caps.parse("allow *", NULL); ceph_assert(r); @@ -285,8 +284,7 @@ Monitor::~Monitor() { op_tracker.on_shutdown(); - delete config_key_service; - delete paxos; + delete logger; ceph_assert(session_map.sessions.empty()); } @@ -656,7 +654,8 @@ void Monitor::handle_conf_change(const ConfigProxy& conf, } if (changed.count("mon_scrub_interval")) { - int scrub_interval = conf->mon_scrub_interval; + auto scrub_interval = + conf.get_val<std::chrono::seconds>("mon_scrub_interval"); finisher.queue(new C_MonContext{this, [this, scrub_interval](int) { std::lock_guard l{lock}; scrub_update_interval(scrub_interval); @@ -1082,8 +1081,6 @@ void Monitor::shutdown() if (logger) { cct->get_perfcounters_collection()->remove(logger); - delete logger; - logger = NULL; } if (cluster_logger) { if (cluster_logger_registered) @@ -1371,9 +1368,7 @@ set<string> Monitor::get_sync_targets_names() for (auto& svc : paxos_service) { svc->get_store_prefixes(targets); } - ConfigKeyService *config_key_service_ptr = dynamic_cast<ConfigKeyService*>(config_key_service); - ceph_assert(config_key_service_ptr); - config_key_service_ptr->get_store_prefixes(targets); + config_key_service->get_store_prefixes(targets); return targets; } @@ -5640,14 +5635,14 @@ void Monitor::scrub_reset() scrub_state.reset(); } -inline void Monitor::scrub_update_interval(int secs) +inline void Monitor::scrub_update_interval(ceph::timespan interval) { // we don't care about changes if we are not the leader. // changes will be visible if we become the leader. if (!is_leader()) return; - dout(1) << __func__ << " new interval = " << secs << dendl; + dout(1) << __func__ << " new interval = " << interval << dendl; // if scrub already in progress, all changes will already be visible during // the next round. Nothing to do. @@ -5665,15 +5660,17 @@ void Monitor::scrub_event_start() if (scrub_event) scrub_event_cancel(); - if (cct->_conf->mon_scrub_interval <= 0) { + auto scrub_interval = + cct->_conf.get_val<std::chrono::seconds>("mon_scrub_interval"); + if (scrub_interval == std::chrono::seconds::zero()) { dout(1) << __func__ << " scrub event is disabled" - << " (mon_scrub_interval = " << cct->_conf->mon_scrub_interval + << " (mon_scrub_interval = " << scrub_interval << ")" << dendl; return; } scrub_event = timer.add_event_after( - cct->_conf->mon_scrub_interval, + scrub_interval, new C_MonContext{this, [this](int) { scrub_start(); }}); diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 936af1ee91d..01e78da381c 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -98,7 +98,7 @@ enum { l_mon_last, }; -class QuorumService; +class ConfigKeyService; class PaxosService; class AdminSocketHook; @@ -218,7 +218,7 @@ public: // -- elector -- private: - Paxos *paxos; + std::unique_ptr<Paxos> paxos; Elector elector; friend class Elector; @@ -298,7 +298,7 @@ private: void scrub_timeout(); void scrub_finish(); void scrub_reset(); - void scrub_update_interval(int secs); + void scrub_update_interval(ceph::timespan interval); Context *scrub_event; ///< periodic event to trigger scrub (leader) Context *scrub_timeout_event; ///< scrub round timeout (leader) @@ -684,7 +684,7 @@ public: friend class LogMonitor; friend class ConfigKeyService; - QuorumService *config_key_service; + std::unique_ptr<ConfigKeyService> config_key_service; // -- sessions -- MonSessionMap session_map; @@ -842,14 +842,14 @@ public: public: struct C_Command : public C_MonOp { - Monitor *mon; + Monitor &mon; int rc; std::string rs; ceph::buffer::list rdata; version_t version; - C_Command(Monitor *_mm, MonOpRequestRef _op, int r, std::string s, version_t v) : + C_Command(Monitor &_mm, MonOpRequestRef _op, int r, std::string s, version_t v) : C_MonOp(_op), mon(_mm), rc(r), rs(s), version(v){} - C_Command(Monitor *_mm, MonOpRequestRef _op, int r, std::string s, ceph::buffer::list rd, version_t v) : + C_Command(Monitor &_mm, MonOpRequestRef _op, int r, std::string s, ceph::buffer::list rd, version_t v) : C_MonOp(_op), mon(_mm), rc(r), rs(s), rdata(rd), version(v){} void _finish(int r) override { @@ -871,13 +871,13 @@ public: } ss << "cmd='" << m->cmd << "': finished"; - mon->audit_clog->info() << ss.str(); - mon->reply_command(op, rc, rs, rdata, version); + mon.audit_clog->info() << ss.str(); + mon.reply_command(op, rc, rs, rdata, version); } else if (r == -ECANCELED) return; else if (r == -EAGAIN) - mon->dispatch_op(op); + mon.dispatch_op(op); else ceph_abort_msg("bad C_Command return value"); } diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index 4669e5872f6..75652e00a2f 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -58,16 +58,16 @@ using ceph::make_message; using ceph::mono_clock; using ceph::mono_time; using ceph::timespan_str; -static ostream& _prefix(std::ostream *_dout, Monitor *mon) { - return *_dout << "mon." << mon->name << "@" << mon->rank - << "(" << mon->get_state_name() - << ").monmap v" << mon->monmap->epoch << " "; +static ostream& _prefix(std::ostream *_dout, Monitor &mon) { + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() + << ").monmap v" << mon.monmap->epoch << " "; } void MonmapMonitor::create_initial() { dout(10) << __func__ << " using current monmap" << dendl; - pending_map = *mon->monmap; + pending_map = *mon.monmap; pending_map.epoch = 1; if (g_conf()->mon_debug_no_initial_persistent_features) { @@ -83,13 +83,13 @@ void MonmapMonitor::create_initial() void MonmapMonitor::update_from_paxos(bool *need_bootstrap) { version_t version = get_last_committed(); - if (version <= mon->monmap->get_epoch()) + if (version <= mon.monmap->get_epoch()) return; dout(10) << __func__ << " version " << version - << ", my v " << mon->monmap->epoch << dendl; + << ", my v " << mon.monmap->epoch << dendl; - if (need_bootstrap && version != mon->monmap->get_epoch()) { + if (need_bootstrap && version != mon.monmap->get_epoch()) { dout(10) << " signaling that we need a bootstrap" << dendl; *need_bootstrap = true; } @@ -101,32 +101,32 @@ void MonmapMonitor::update_from_paxos(bool *need_bootstrap) ceph_assert(monmap_bl.length()); dout(10) << __func__ << " got " << version << dendl; - mon->monmap->decode(monmap_bl); + mon.monmap->decode(monmap_bl); - if (mon->store->exists("mkfs", "monmap")) { + if (mon.store->exists("mkfs", "monmap")) { auto t(std::make_shared<MonitorDBStore::Transaction>()); t->erase("mkfs", "monmap"); - mon->store->apply_transaction(t); + mon.store->apply_transaction(t); } check_subs(); // make sure we've recorded min_mon_release string val; - if (mon->store->read_meta("min_mon_release", &val) < 0 || + if (mon.store->read_meta("min_mon_release", &val) < 0 || val.size() == 0 || atoi(val.c_str()) != (int)ceph_release()) { dout(10) << __func__ << " updating min_mon_release meta" << dendl; - mon->store->write_meta("min_mon_release", + mon.store->write_meta("min_mon_release", stringify(ceph_release())); } - mon->notify_new_monmap(); + mon.notify_new_monmap(); } void MonmapMonitor::create_pending() { - pending_map = *mon->monmap; + pending_map = *mon.monmap; pending_map.epoch++; pending_map.last_changed = ceph_clock_now(); dout(10) << __func__ << " monmap epoch " << pending_map.epoch << dendl; @@ -136,17 +136,17 @@ void MonmapMonitor::encode_pending(MonitorDBStore::TransactionRef t) { dout(10) << __func__ << " epoch " << pending_map.epoch << dendl; - ceph_assert(mon->monmap->epoch + 1 == pending_map.epoch || + ceph_assert(mon.monmap->epoch + 1 == pending_map.epoch || pending_map.epoch == 1); // special case mkfs! bufferlist bl; - pending_map.encode(bl, mon->get_quorum_con_features()); + pending_map.encode(bl, mon.get_quorum_con_features()); put_version(t, pending_map.epoch, bl); put_last_committed(t, pending_map.epoch); // generate a cluster fingerprint, too? if (pending_map.epoch == 1) { - mon->prepare_new_fingerprint(t); + mon.prepare_new_fingerprint(t); } //health @@ -185,7 +185,7 @@ void MonmapMonitor::apply_mon_features(const mon_feature_t& features, } // do nothing here unless we have a full quorum - if (mon->get_quorum().size() < mon->monmap->size()) { + if (mon.get_quorum().size() < mon.monmap->size()) { return; } @@ -226,7 +226,7 @@ void MonmapMonitor::apply_mon_features(const mon_feature_t& features, void MonmapMonitor::on_active() { - if (get_last_committed() >= 1 && !mon->has_ever_joined) { + if (get_last_committed() >= 1 && !mon.has_ever_joined) { // make note of the fact that i was, once, part of the quorum. dout(10) << "noting that i was, once, part of an active quorum." << dendl; @@ -238,16 +238,16 @@ void MonmapMonitor::on_active() */ auto t(std::make_shared<MonitorDBStore::Transaction>()); t->put(Monitor::MONITOR_NAME, "joined", 1); - mon->store->apply_transaction(t); - mon->has_ever_joined = true; + mon.store->apply_transaction(t); + mon.has_ever_joined = true; } - if (mon->is_leader()) { - mon->clog->debug() << "monmap " << *mon->monmap; + if (mon.is_leader()) { + mon.clog->debug() << "monmap " << *mon.monmap; } - apply_mon_features(mon->get_quorum_mon_features(), - mon->quorum_min_mon_release); + apply_mon_features(mon.get_quorum_mon_features(), + mon.quorum_min_mon_release); } bool MonmapMonitor::preprocess_query(MonOpRequestRef op) @@ -261,7 +261,7 @@ bool MonmapMonitor::preprocess_query(MonOpRequestRef op) } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } case MSG_MON_JOIN: @@ -277,10 +277,10 @@ void MonmapMonitor::dump_info(Formatter *f) f->dump_unsigned("monmap_first_committed", get_first_committed()); f->dump_unsigned("monmap_last_committed", get_last_committed()); f->open_object_section("monmap"); - mon->monmap->dump(f); + mon.monmap->dump(f); f->close_section(); f->open_array_section("quorum"); - for (set<int>::iterator q = mon->get_quorum().begin(); q != mon->get_quorum().end(); ++q) + for (set<int>::iterator q = mon.get_quorum().begin(); q != mon.get_quorum().end(); ++q) f->dump_int("mon", *q); f->close_section(); } @@ -295,7 +295,7 @@ bool MonmapMonitor::preprocess_command(MonOpRequestRef op) cmdmap_t cmdmap; if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); return true; } @@ -304,7 +304,7 @@ bool MonmapMonitor::preprocess_command(MonOpRequestRef op) MonSession *session = op->get_session(); if (!session) { - mon->reply_command(op, -EACCES, "access denied", get_last_committed()); + mon.reply_command(op, -EACCES, "access denied", get_last_committed()); return true; } @@ -315,11 +315,11 @@ bool MonmapMonitor::preprocess_command(MonOpRequestRef op) if (prefix == "mon stat") { if (f) { f->open_object_section("monmap"); - mon->monmap->dump_summary(f.get()); - f->dump_string("leader", mon->get_leader_name()); + mon.monmap->dump_summary(f.get()); + f->dump_string("leader", mon.get_leader_name()); f->open_array_section("quorum"); - for (auto rank: mon->get_quorum()) { - std::string name = mon->monmap->get_name(rank); + for (auto rank: mon.get_quorum()) { + std::string name = mon.monmap->get_name(rank); f->open_object_section("mon"); f->dump_int("rank", rank); f->dump_string("name", name); @@ -329,11 +329,11 @@ bool MonmapMonitor::preprocess_command(MonOpRequestRef op) f->close_section(); // monmap f->flush(ss); } else { - mon->monmap->print_summary(ss); - ss << ", election epoch " << mon->get_epoch() << ", leader " - << mon->get_leader() << " " << mon->get_leader_name() - << ", quorum " << mon->get_quorum() - << " " << mon->get_quorum_names(); + mon.monmap->print_summary(ss); + ss << ", election epoch " << mon.get_epoch() << ", leader " + << mon.get_leader() << " " << mon.get_leader_name() + << ", quorum " << mon.get_quorum() + << " " << mon.get_quorum_names(); } rdata.append(ss); @@ -348,7 +348,7 @@ bool MonmapMonitor::preprocess_command(MonOpRequestRef op) cmd_getval(cmdmap, "epoch", epochnum, (int64_t)0); epoch = epochnum; - MonMap *p = mon->monmap; + MonMap *p = mon.monmap; if (epoch) { bufferlist bl; r = get_version(epoch, bl); @@ -374,8 +374,8 @@ bool MonmapMonitor::preprocess_command(MonOpRequestRef op) f->open_object_section("monmap"); p->dump(f.get()); f->open_array_section("quorum"); - for (set<int>::iterator q = mon->get_quorum().begin(); - q != mon->get_quorum().end(); ++q) { + for (set<int>::iterator q = mon.get_quorum().begin(); + q != mon.get_quorum().end(); ++q) { f->dump_int("mon", *q); } f->close_section(); @@ -389,7 +389,7 @@ bool MonmapMonitor::preprocess_command(MonOpRequestRef op) rdata.append(ds); ss << "dumped monmap epoch " << p->get_epoch(); } - if (p != mon->monmap) { + if (p != mon.monmap) { delete p; p = nullptr; } @@ -403,7 +403,7 @@ bool MonmapMonitor::preprocess_command(MonOpRequestRef op) list_with_value = true; } - MonMap *p = mon->monmap; + MonMap *p = mon.monmap; // list features mon_feature_t supported = ceph::features::mon::get_supported(); @@ -472,7 +472,7 @@ reply: string rs; getline(ss, rs); - mon->reply_command(op, r, rs, rdata, get_last_committed()); + mon.reply_command(op, r, rs, rdata, get_last_committed()); return true; } else return false; @@ -490,7 +490,7 @@ bool MonmapMonitor::prepare_update(MonOpRequestRef op) return prepare_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } case MSG_MON_JOIN: @@ -512,7 +512,7 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) cmdmap_t cmdmap; if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); return true; } @@ -521,7 +521,7 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) MonSession *session = op->get_session(); if (!session) { - mon->reply_command(op, -EACCES, "access denied", get_last_committed()); + mon.reply_command(op, -EACCES, "access denied", get_last_committed()); return true; } @@ -553,8 +553,8 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) * state, thus we are not bound by it. */ - ceph_assert(mon->monmap); - MonMap &monmap = *mon->monmap; + ceph_assert(mon.monmap); + MonMap &monmap = *mon.monmap; /* Please note: @@ -595,7 +595,7 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) cmd_getval(cmdmap, "location", locationvec); CrushWrapper::parse_loc_map(locationvec, &loc); if (locationvec.size() && - !mon->get_quorum_mon_features().contains_all( + !mon.get_quorum_mon_features().contains_all( ceph::features::mon::FEATURE_PINGING)) { err = -ENOTSUP; ss << "Not all monitors support adding monitors with a location; please upgrade first!"; @@ -797,10 +797,10 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) goto reply; } - if (!mon->get_quorum_mon_features().contains_all(feature)) { + if (!mon.get_quorum_mon_features().contains_all(feature)) { ss << "current quorum does not support feature '" << feature << "'; supported features: " - << mon->get_quorum_mon_features(); + << mon.get_quorum_mon_features(); err = -EINVAL; goto reply; } @@ -916,7 +916,7 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) } err = 0; } else if (prefix == "mon set election_strategy") { - if (!mon->get_quorum_mon_features().contains_all( + if (!mon.get_quorum_mon_features().contains_all( ceph::features::mon::FEATURE_PINGING)) { err = -ENOTSUP; ss << "Not all monitors support changing election strategies; please upgrade first!"; @@ -942,7 +942,7 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) pending_map.strategy = strategy; propose = true; } else if (prefix == "mon add disallowed_leader") { - if (!mon->get_quorum_mon_features().contains_all( + if (!mon.get_quorum_mon_features().contains_all( ceph::features::mon::FEATURE_PINGING)) { err = -ENOTSUP; ss << "Not all monitors support changing election strategies; please upgrade first!"; @@ -978,7 +978,7 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) err = 0; propose = true; } else if (prefix == "mon rm disallowed_leader") { - if (!mon->get_quorum_mon_features().contains_all( + if (!mon.get_quorum_mon_features().contains_all( ceph::features::mon::FEATURE_PINGING)) { err = -ENOTSUP; ss << "Not all monitors support changing election strategies; please upgrade first!"; @@ -1009,7 +1009,7 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) err = 0; propose = true; } else if (prefix == "mon set_location") { - if (!mon->get_quorum_mon_features().contains_all( + if (!mon.get_quorum_mon_features().contains_all( ceph::features::mon::FEATURE_PINGING)) { err = -ENOTSUP; ss << "Not all monitors support monitor locations; please upgrade first!"; @@ -1026,11 +1026,11 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) goto reply; } - if (!mon->osdmon()->is_readable()) { - mon->osdmon()->wait_for_readable(op, new Monitor::C_RetryMessage(mon, op)); + if (!mon.osdmon()->is_readable()) { + mon.osdmon()->wait_for_readable(op, new Monitor::C_RetryMessage(&mon, op)); } CrushWrapper crush; - mon->osdmon()->_get_pending_crush(crush); + mon.osdmon()->_get_pending_crush(crush); vector<string> argvec; map<string, string> loc; cmd_getval(cmdmap, "args", argvec); @@ -1050,10 +1050,10 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) err = 0; propose = true; } else if (prefix == "mon enable_stretch_mode") { - if (!mon->osdmon()->is_writeable()) { + if (!mon.osdmon()->is_writeable()) { dout(1) << __func__ << ": waiting for osdmon writeable for stretch mode" << dendl; - mon->osdmon()->wait_for_writeable(op, new Monitor::C_RetryMessage(mon, op)); + mon.osdmon()->wait_for_writeable(op, new Monitor::C_RetryMessage(&mon, op)); return false; } { @@ -1086,20 +1086,20 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) goto reply; } //okay, initial arguments make sense, check pools and cluster state - err = mon->osdmon()->check_cluster_features(CEPH_FEATUREMASK_STRETCH_MODE, ss); + err = mon.osdmon()->check_cluster_features(CEPH_FEATUREMASK_STRETCH_MODE, ss); if (err) goto reply; struct Plugger { - Paxos *p; - Plugger(Paxos *p) : p(p) { p->plug(); } - ~Plugger() { p->unplug(); } + Paxos &p; + Plugger(Paxos &p) : p(p) { p.plug(); } + ~Plugger() { p.unplug(); } } plugger(paxos); set<pg_pool_t*> pools; bool okay = false; int errcode = 0; - mon->osdmon()->try_enable_stretch_mode_pools(ss, &okay, &errcode, + mon.osdmon()->try_enable_stretch_mode_pools(ss, &okay, &errcode, &pools, new_crush_rule); if (!okay) { err = errcode; @@ -1111,7 +1111,7 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) err = errcode; goto reply; } - mon->osdmon()->try_enable_stretch_mode(ss, &okay, &errcode, false, + mon.osdmon()->try_enable_stretch_mode(ss, &okay, &errcode, false, dividing_bucket, 2, pools, new_crush_rule); if (!okay) { err = errcode; @@ -1120,13 +1120,13 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) // everything looks good, actually commit the changes! try_enable_stretch_mode(ss, &okay, &errcode, true, tiebreaker_mon, dividing_bucket); - mon->osdmon()->try_enable_stretch_mode(ss, &okay, &errcode, true, + mon.osdmon()->try_enable_stretch_mode(ss, &okay, &errcode, true, dividing_bucket, 2, // right now we only support 2 sites pools, new_crush_rule); ceph_assert(okay == true); } - request_proposal(mon->osdmon()); + request_proposal(mon.osdmon()); err = 0; propose = true; } else { @@ -1136,7 +1136,7 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) reply: getline(ss, rs); - mon->reply_command(op, err, rs, get_last_committed()); + mon.reply_command(op, err, rs, get_last_committed()); // we are returning to the user; do not propose. return propose; } @@ -1161,7 +1161,7 @@ void MonmapMonitor::try_enable_stretch_mode(stringstream& ss, bool *okay, return; } map<string,string> buckets; - for (const auto&mii : mon->monmap->mon_info) { + for (const auto&mii : mon.monmap->mon_info) { const auto& mi = mii.second; const auto& bi = mi.crush_loc.find(dividing_bucket); if (bi == mi.crush_loc.end()) { @@ -1288,7 +1288,7 @@ int MonmapMonitor::get_monmap(bufferlist &bl) version_t latest_ver = get_last_committed(); dout(10) << __func__ << " ver " << latest_ver << dendl; - if (!mon->store->exists(get_service_name(), stringify(latest_ver))) + if (!mon.store->exists(get_service_name(), stringify(latest_ver))) return -ENOENT; int err = get_version(latest_ver, bl); @@ -1303,7 +1303,7 @@ int MonmapMonitor::get_monmap(bufferlist &bl) void MonmapMonitor::check_subs() { const string type = "monmap"; - mon->with_session_map([this, &type](const MonSessionMap& session_map) { + mon.with_session_map([this, &type](const MonSessionMap& session_map) { auto subs = session_map.subs.find(type); if (subs == session_map.subs.end()) return; @@ -1315,14 +1315,14 @@ void MonmapMonitor::check_subs() void MonmapMonitor::check_sub(Subscription *sub) { - const auto epoch = mon->monmap->get_epoch(); + const auto epoch = mon.monmap->get_epoch(); dout(10) << __func__ << " monmap next " << sub->next << " have " << epoch << dendl; if (sub->next <= epoch) { - mon->send_latest_monmap(sub->session->con.get()); + mon.send_latest_monmap(sub->session->con.get()); if (sub->onetime) { - mon->with_session_map([sub](MonSessionMap& session_map) { + mon.with_session_map([sub](MonSessionMap& session_map) { session_map.remove_sub(sub); }); } else { @@ -1334,11 +1334,11 @@ void MonmapMonitor::check_sub(Subscription *sub) void MonmapMonitor::tick() { if (!is_active() || - !mon->is_leader()) { + !mon.is_leader()) { return; } - if (mon->monmap->created.is_zero()) { + if (mon.monmap->created.is_zero()) { dout(10) << __func__ << " detected empty created stamp" << dendl; utime_t ctime; for (version_t v = 1; v <= get_last_committed(); v++) { diff --git a/src/mon/MonmapMonitor.h b/src/mon/MonmapMonitor.h index b4cadacfa81..cf22ae9f8e3 100644 --- a/src/mon/MonmapMonitor.h +++ b/src/mon/MonmapMonitor.h @@ -31,7 +31,7 @@ class MonmapMonitor : public PaxosService { public: - MonmapMonitor(Monitor *mn, Paxos *p, const std::string& service_name) + MonmapMonitor(Monitor &mn, Paxos &p, const std::string& service_name) : PaxosService(mn, p, service_name) { } diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 150d9393bf6..e6d09366d04 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -435,23 +435,23 @@ public: #undef dout_prefix #define dout_prefix _prefix(_dout, mon, osdmap) -static ostream& _prefix(std::ostream *_dout, Monitor *mon, const OSDMap& osdmap) { - return *_dout << "mon." << mon->name << "@" << mon->rank - << "(" << mon->get_state_name() +static ostream& _prefix(std::ostream *_dout, Monitor &mon, const OSDMap& osdmap) { + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() << ").osd e" << osdmap.get_epoch() << " "; } OSDMonitor::OSDMonitor( CephContext *cct, - Monitor *mn, - Paxos *p, + Monitor &mn, + Paxos &p, const string& service_name) : PaxosService(mn, p, service_name), cct(cct), inc_osd_cache(g_conf()->mon_osd_cache_size), full_osd_cache(g_conf()->mon_osd_cache_size), has_osdmap_manifest(false), - mapper(mn->cct, &mn->cpu_tp) + mapper(mn.cct, &mn.cpu_tp) { inc_cache = std::make_shared<IncCache>(this); full_cache = std::make_shared<FullCache>(this); @@ -629,18 +629,18 @@ void OSDMonitor::_get_pending_crush(CrushWrapper& newcrush) void OSDMonitor::create_initial() { - dout(10) << "create_initial for " << mon->monmap->fsid << dendl; + dout(10) << "create_initial for " << mon.monmap->fsid << dendl; OSDMap newmap; bufferlist bl; - mon->store->get("mkfs", "osdmap", bl); + mon.store->get("mkfs", "osdmap", bl); if (bl.length()) { newmap.decode(bl); - newmap.set_fsid(mon->monmap->fsid); + newmap.set_fsid(mon.monmap->fsid); } else { - newmap.build_simple(cct, 0, mon->monmap->fsid, 0); + newmap.build_simple(cct, 0, mon.monmap->fsid, 0); } newmap.set_epoch(1); newmap.created = newmap.modified = ceph_clock_now(); @@ -755,7 +755,7 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) latest_full = 0; for (version_t v = lc; v >= fc; v--) { string full_key = "full_" + stringify(v); - if (mon->store->exists(get_service_name(), full_key)) { + if (mon.store->exists(get_service_name(), full_key)) { dout(10) << __func__ << " found latest full map v " << v << dendl; latest_full = v; break; @@ -765,7 +765,7 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) ceph_assert(latest_full > 0); auto t(std::make_shared<MonitorDBStore::Transaction>()); put_version_latest_full(t, latest_full); - mon->store->apply_transaction(t); + mon.store->apply_transaction(t); dout(10) << __func__ << " updated the on-disk full map version to " << latest_full << dendl; } @@ -780,7 +780,7 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) } bufferlist bl; - if (!mon->store->get(OSD_PG_CREATING_PREFIX, "creating", bl)) { + if (!mon.store->get(OSD_PG_CREATING_PREFIX, "creating", bl)) { auto p = bl.cbegin(); std::lock_guard<std::mutex> l(creating_pgs_lock); creating_pgs.decode(p); @@ -827,7 +827,7 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) // encode with all features. uint64_t f = inc.encode_features; if (!f) - f = mon->get_quorum_con_features(); + f = mon.get_quorum_con_features(); if (!f) f = -1; bufferlist full_bl; @@ -882,24 +882,30 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) } if (tx_size > g_conf()->mon_sync_max_payload_size*2) { - mon->store->apply_transaction(t); + mon.store->apply_transaction(t); t = MonitorDBStore::TransactionRef(); tx_size = 0; } - for (const auto &osd_state : inc.new_state) { - if (osd_state.second & CEPH_OSD_UP) { + for (const auto [osd, state] : inc.new_state) { + if (state & CEPH_OSD_UP) { // could be marked up *or* down, but we're too lazy to check which - last_osd_report.erase(osd_state.first); + last_osd_report.erase(osd); } - if (osd_state.second & CEPH_OSD_OUT) { + if (state & CEPH_OSD_OUT) { // could be marked in *or* out, but we can safely drop it - osd_epochs.erase(osd_state.first); + osd_epochs.erase(osd); + } + } + for (const auto [osd, weight] : inc.new_weight) { + if (weight == CEPH_OSD_OUT) { + // manually marked out, so drop it + osd_epochs.erase(osd); } } } if (t) { - mon->store->apply_transaction(t); + mon.store->apply_transaction(t); } bool marked_osd_down = false; @@ -933,17 +939,17 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) // make sure our feature bits reflect the latest map update_msgr_features(); - if (!mon->is_leader()) { + if (!mon.is_leader()) { // will be called by on_active() on the leader, avoid doing so twice start_mapping(); } if (osdmap.stretch_mode_enabled) { dout(20) << "Stretch mode enabled in this map" << dendl; - mon->maybe_engage_stretch_mode(); + mon.maybe_engage_stretch_mode(); if (osdmap.degraded_stretch_mode) { dout(20) << "Degraded stretch mode set in this map" << dendl; if (!osdmap.recovering_stretch_mode) { - mon->set_degraded_stretch_mode(); + mon.set_degraded_stretch_mode(); if (prev_num_up_osd < osdmap.num_up_osd && (osdmap.num_up_osd / (double)osdmap.num_osd) > cct->_conf.get_val<double>("mon_stretch_cluster_recovery_ratio")) { @@ -951,14 +957,14 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) // trimmed and everything is "normal" but not if you have a lot of out OSDs // you're ignoring or in some really degenerate failure cases dout(10) << "Enabling recovery stretch mode in this map" << dendl; - mon->go_recovery_stretch_mode(); + mon.go_recovery_stretch_mode(); } } } if (marked_osd_down && (!osdmap.degraded_stretch_mode || osdmap.recovering_stretch_mode)) { dout(20) << "Checking degraded stretch mode due to osd changes" << dendl; - mon->maybe_go_degraded_stretch_mode(); + mon.maybe_go_degraded_stretch_mode(); } if (osdmap.recovering_stretch_mode && stretch_recovery_triggered.is_zero()) { stretch_recovery_triggered = ceph_clock_now(); @@ -989,7 +995,7 @@ int OSDMonitor::register_cache_with_pcm() max = ltarget - base; } - rocksdb_binned_kv_cache = mon->store->get_priority_cache(); + rocksdb_binned_kv_cache = mon.store->get_priority_cache(); if (!rocksdb_binned_kv_cache) { derr << __func__ << " not using rocksdb" << dendl; return -EINVAL; @@ -1064,19 +1070,20 @@ void OSDMonitor::start_mapping() void OSDMonitor::update_msgr_features() { - set<int> types; - types.insert((int)entity_name_t::TYPE_OSD); - types.insert((int)entity_name_t::TYPE_CLIENT); - types.insert((int)entity_name_t::TYPE_MDS); - types.insert((int)entity_name_t::TYPE_MON); - for (set<int>::iterator q = types.begin(); q != types.end(); ++q) { + const int types[] = { + entity_name_t::TYPE_OSD, + entity_name_t::TYPE_CLIENT, + entity_name_t::TYPE_MDS, + entity_name_t::TYPE_MON + }; + for (int type : types) { uint64_t mask; - uint64_t features = osdmap.get_features(*q, &mask); - if ((mon->messenger->get_policy(*q).features_required & mask) != features) { + uint64_t features = osdmap.get_features(type, &mask); + if ((mon.messenger->get_policy(type).features_required & mask) != features) { dout(0) << "crush map has features " << features << ", adjusting msgr requires" << dendl; - ceph::net::Policy p = mon->messenger->get_policy(*q); + ceph::net::Policy p = mon.messenger->get_policy(type); p.features_required = (p.features_required & ~mask) | features; - mon->messenger->set_policy(*q, p); + mon.messenger->set_policy(type, p); } } } @@ -1085,8 +1092,8 @@ void OSDMonitor::on_active() { update_logger(); - if (mon->is_leader()) { - mon->clog->debug() << "osdmap " << osdmap; + if (mon.is_leader()) { + mon.clog->debug() << "osdmap " << osdmap; if (!priority_convert) { // Only do this once at start-up convert_pool_priorities(); @@ -1129,16 +1136,16 @@ void OSDMonitor::update_logger() { dout(10) << "update_logger" << dendl; - mon->cluster_logger->set(l_cluster_num_osd, osdmap.get_num_osds()); - mon->cluster_logger->set(l_cluster_num_osd_up, osdmap.get_num_up_osds()); - mon->cluster_logger->set(l_cluster_num_osd_in, osdmap.get_num_in_osds()); - mon->cluster_logger->set(l_cluster_osd_epoch, osdmap.get_epoch()); + mon.cluster_logger->set(l_cluster_num_osd, osdmap.get_num_osds()); + mon.cluster_logger->set(l_cluster_num_osd_up, osdmap.get_num_up_osds()); + mon.cluster_logger->set(l_cluster_num_osd_in, osdmap.get_num_in_osds()); + mon.cluster_logger->set(l_cluster_osd_epoch, osdmap.get_epoch()); } void OSDMonitor::create_pending() { pending_inc = OSDMap::Incremental(osdmap.epoch+1); - pending_inc.fsid = mon->monmap->fsid; + pending_inc.fsid = mon.monmap->fsid; pending_metadata.clear(); pending_metadata_rm.clear(); pending_pseudo_purged_snaps.clear(); @@ -1201,7 +1208,7 @@ void OSDMonitor::create_pending() dout(1) << __func__ << " " << i.first << " -> " << i.second << dendl; } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); } } @@ -1303,7 +1310,7 @@ OSDMonitor::update_pending_pgs(const OSDMap::Incremental& inc, dout(10) << __func__ << " queue remaining: " << pending_creatings.queue.size() << " pools" << dendl; - if (mon->monmap->min_mon_release >= ceph_release_t::octopus) { + if (mon.monmap->min_mon_release >= ceph_release_t::octopus) { // walk creating pgs' history and past_intervals forward for (auto& i : pending_creatings.pgs) { // this mirrors PG::start_peering_interval() @@ -1646,7 +1653,7 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) auto pending_creatings = update_pending_pgs(pending_inc, tmp); bufferlist creatings_bl; uint64_t features = CEPH_FEATURES_ALL; - if (mon->monmap->min_mon_release < ceph_release_t::octopus) { + if (mon.monmap->min_mon_release < ceph_release_t::octopus) { dout(20) << __func__ << " encoding pending pgs without octopus features" << dendl; features &= ~CEPH_FEATURE_SERVER_OCTOPUS; @@ -1855,7 +1862,7 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) auto mv = tmp.get_min_compat_client(); dout(1) << __func__ << " setting require_min_compat_client to currently " << "required " << mv << dendl; - mon->clog->info() << "setting require_min_compat_client to currently " + mon.clog->info() << "setting require_min_compat_client to currently " << "required " << mv; pending_inc.new_require_min_compat_client = mv; } @@ -1925,7 +1932,7 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) // prior to this epoch, and store it in the current epoch (i.e., // the last pre-octopus epoch, just prior to the one we're // encoding now). - auto it = mon->store->get_iterator(OSD_SNAP_PREFIX); + auto it = mon.store->get_iterator(OSD_SNAP_PREFIX); it->lower_bound("purged_snap_"); map<int64_t,snap_interval_set_t> combined; while (it->valid()) { @@ -2020,7 +2027,7 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) << " features " << features << dendl; // the features should be a subset of the mon quorum's features! - ceph_assert((features & ~mon->get_quorum_con_features()) == 0); + ceph_assert((features & ~mon.get_quorum_con_features()) == 0); bufferlist fullbl; encode(tmp, fullbl, features | CEPH_FEATURE_RESERVED); @@ -2091,7 +2098,7 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) int OSDMonitor::load_metadata(int osd, map<string, string>& m, ostream *err) { bufferlist bl; - int r = mon->store->get(OSD_METADATA_PREFIX, stringify(osd), bl); + int r = mon.store->get(OSD_METADATA_PREFIX, stringify(osd), bl); if (r < 0) return r; try { @@ -2225,7 +2232,7 @@ void OSDMonitor::share_map_with_random_osd() return; } - MonSession *s = mon->session_map.get_random_osd_session(&osdmap); + MonSession *s = mon.session_map.get_random_osd_session(&osdmap); if (!s) { dout(10) << __func__ << " no up osd on our session map" << dendl; return; @@ -2237,7 +2244,7 @@ void OSDMonitor::share_map_with_random_osd() // get feature of the peer // use quorum_con_features, if it's an anonymous connection. uint64_t features = s->con_features ? s->con_features : - mon->get_quorum_con_features(); + mon.get_quorum_con_features(); // whatev, they'll request more if they need it MOSDMap *m = build_incremental(osdmap.get_epoch() - 1, osdmap.get_epoch(), features); s->con->send_message(m); @@ -2247,14 +2254,15 @@ void OSDMonitor::share_map_with_random_osd() version_t OSDMonitor::get_trim_to() const { - if (mon->get_quorum().empty()) { - dout(10) << __func__ << ": quorum not formed" << dendl; + if (mon.get_quorum().empty()) { + dout(10) << __func__ << " quorum not formed, trim_to = 0" << dendl; return 0; } { std::lock_guard<std::mutex> l(creating_pgs_lock); if (!creating_pgs.pgs.empty()) { + dout(10) << __func__ << " pgs creating, trim_to = 0" << dendl; return 0; } } @@ -2262,8 +2270,8 @@ version_t OSDMonitor::get_trim_to() const if (g_conf().get_val<bool>("mon_debug_block_osdmap_trim")) { dout(0) << __func__ << " blocking osdmap trim" - " ('mon_debug_block_osdmap_trim' set to 'true')" - << dendl; + << " ('mon_debug_block_osdmap_trim' set to 'true')" + << " trim_to = 0" << dendl; return 0; } @@ -2273,7 +2281,8 @@ version_t OSDMonitor::get_trim_to() const if (g_conf()->mon_osd_force_trim_to > 0 && g_conf()->mon_osd_force_trim_to < (int)get_last_committed()) { floor = g_conf()->mon_osd_force_trim_to; - dout(10) << " explicit mon_osd_force_trim_to = " << floor << dendl; + dout(10) << __func__ + << " explicit mon_osd_force_trim_to = " << floor << dendl; } unsigned min = g_conf()->mon_min_osdmap_epochs; if (floor + min > get_last_committed()) { @@ -2282,9 +2291,12 @@ version_t OSDMonitor::get_trim_to() const else floor = 0; } - if (floor > get_first_committed()) + if (floor > get_first_committed()) { + dout(10) << __func__ << " trim_to = " << floor << dendl; return floor; + } } + dout(10) << __func__ << " trim_to = 0" << dendl; return 0; } @@ -2324,7 +2336,7 @@ void OSDMonitor::encode_trim_extra(MonitorDBStore::TransactionRef tx, void OSDMonitor::load_osdmap_manifest() { bool store_has_manifest = - mon->store->exists(get_service_name(), "osdmap_manifest"); + mon.store->exists(get_service_name(), "osdmap_manifest"); if (!store_has_manifest) { if (!has_osdmap_manifest) { @@ -2466,7 +2478,7 @@ void OSDMonitor::prune_init(osdmap_manifest_t& manifest) // the trim that *must* have removed past the last pinned map in a // previous prune). ceph_assert(osdmap_manifest.pinned.empty()); - ceph_assert(!mon->store->exists(get_service_name(), "osdmap_manifest")); + ceph_assert(!mon.store->exists(get_service_name(), "osdmap_manifest")); pin_first = get_first_committed(); } else { @@ -2528,7 +2540,7 @@ bool OSDMonitor::_prune_sanitize_options() const if (txsize < prune_interval - 1) { derr << __func__ - << "'mon_osdmap_full_prune_txsize' (" << txsize + << " 'mon_osdmap_full_prune_txsize' (" << txsize << ") < 'mon_osdmap_full_prune_interval-1' (" << prune_interval - 1 << "); abort." << dendl; r = false; @@ -2541,7 +2553,7 @@ bool OSDMonitor::is_prune_enabled() const { } bool OSDMonitor::is_prune_supported() const { - return mon->get_required_mon_features().contains_any( + return mon.get_required_mon_features().contains_any( ceph::features::mon::FEATURE_OSDMAP_PRUNE); } @@ -2605,8 +2617,8 @@ bool OSDMonitor::do_prune(MonitorDBStore::TransactionRef tx) // hundreds or thousands of maps. auto map_exists = [this](version_t v) { - string k = mon->store->combine_strings("full", v); - return mon->store->exists(get_service_name(), k); + string k = mon.store->combine_strings("full", v); + return mon.store->exists(get_service_name(), k); }; // 'interval' represents the number of maps from the last pinned @@ -2659,7 +2671,7 @@ bool OSDMonitor::do_prune(MonitorDBStore::TransactionRef tx) ceph_assert(!manifest.is_pinned(v)); dout(20) << __func__ << " pruning full osdmap e" << v << dendl; - string full_key = mon->store->combine_strings("full", v); + string full_key = mon.store->combine_strings("full", v); tx->erase(get_service_name(), full_key); ++num_pruned; } @@ -2690,7 +2702,7 @@ bool OSDMonitor::preprocess_query(MonOpRequestRef op) return preprocess_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } case CEPH_MSG_MON_GET_OSDMAP: @@ -2767,7 +2779,7 @@ bool OSDMonitor::prepare_update(MonOpRequestRef op) return prepare_command(op); } catch (const bad_cmd_get& e) { bufferlist bl; - mon->reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } @@ -2816,12 +2828,12 @@ bool OSDMonitor::preprocess_get_osdmap(MonOpRequestRef op) op->mark_osdmon_event(__func__); auto m = op->get_req<MMonGetOSDMap>(); - uint64_t features = mon->get_quorum_con_features(); + uint64_t features = mon.get_quorum_con_features(); if (op->get_session() && op->get_session()->con_features) features = op->get_session()->con_features; dout(10) << __func__ << " " << *m << dendl; - MOSDMap *reply = new MOSDMap(mon->monmap->fsid, features); + MOSDMap *reply = new MOSDMap(mon.monmap->fsid, features); epoch_t first = get_first_committed(); epoch_t last = osdmap.get_epoch(); int max = g_conf()->osd_map_message_max; @@ -2844,7 +2856,7 @@ bool OSDMonitor::preprocess_get_osdmap(MonOpRequestRef op) } reply->oldest_map = first; reply->newest_map = last; - mon->send_reply(op, reply); + mon.send_reply(op, reply); return true; } @@ -2864,9 +2876,9 @@ bool OSDMonitor::check_source(MonOpRequestRef op, uuid_d fsid) { << session->caps << dendl; return true; } - if (fsid != mon->monmap->fsid) { + if (fsid != mon.monmap->fsid) { dout(0) << "check_source: on fsid " << fsid - << " != " << mon->monmap->fsid << dendl; + << " != " << mon.monmap->fsid << dendl; return true; } return false; @@ -2941,7 +2953,7 @@ bool OSDMonitor::preprocess_failure(MonOpRequestRef op) return false; didit: - mon->no_reply(op); + mon.no_reply(op); return true; } @@ -2956,7 +2968,7 @@ public: void _finish(int r) override { if (r == 0) { auto m = op->get_req<MOSDMarkMeDown>(); - osdmon->mon->send_reply( + osdmon->mon.send_reply( op, new MOSDMarkMeDown( m->fsid, @@ -3022,7 +3034,7 @@ bool OSDMonitor::prepare_mark_me_down(MonOpRequestRef op) ceph_assert(osdmap.is_up(target_osd)); ceph_assert(osdmap.get_addrs(target_osd) == m->target_addrs); - mon->clog->info() << "osd." << target_osd << " marked itself down"; + mon.clog->info() << "osd." << target_osd << " marked itself down"; pending_inc.new_state[target_osd] = CEPH_OSD_UP; if (m->request_ack) wait_for_finished_proposal(op, new C_AckMarkedDown(this, op)); @@ -3037,13 +3049,13 @@ bool OSDMonitor::preprocess_mark_me_dead(MonOpRequestRef op) // check permissions if (check_source(op, m->fsid)) { - mon->no_reply(op); + mon.no_reply(op); return true; } // first, verify the reporting host is valid if (!m->get_orig_source().is_osd()) { - mon->no_reply(op); + mon.no_reply(op); return true; } @@ -3052,7 +3064,7 @@ bool OSDMonitor::preprocess_mark_me_dead(MonOpRequestRef op) dout(5) << __func__ << " from nonexistent or up osd." << from << ", ignoring" << dendl; send_incremental(op, m->get_epoch()+1); - mon->no_reply(op); + mon.no_reply(op); return true; } @@ -3067,7 +3079,7 @@ bool OSDMonitor::prepare_mark_me_dead(MonOpRequestRef op) ceph_assert(osdmap.is_down(target_osd)); - mon->clog->info() << "osd." << target_osd << " marked itself dead as of e" + mon.clog->info() << "osd." << target_osd << " marked itself dead as of e" << m->get_epoch(); if (!pending_inc.new_xinfo.count(target_osd)) { pending_inc.new_xinfo[target_osd] = osdmap.osd_xinfo[target_osd]; @@ -3078,7 +3090,7 @@ bool OSDMonitor::prepare_mark_me_dead(MonOpRequestRef op) new LambdaContext( [op, this] (int r) { if (r >= 0) { - mon->no_reply(op); // ignore on success + mon.no_reply(op); // ignore on success } } )); @@ -3257,7 +3269,7 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi) << " down" << dendl; pending_inc.new_state[target_osd] = CEPH_OSD_UP; - mon->clog->info() << "osd." << target_osd << " failed (" + mon.clog->info() << "osd." << target_osd << " failed (" << osdmap.crush->get_full_location_ordered_string( target_osd) << ") (" @@ -3286,7 +3298,7 @@ void OSDMonitor::force_failure(int target_osd, int by) } pending_inc.new_xinfo[target_osd].dead_epoch = pending_inc.epoch; - mon->clog->info() << "osd." << target_osd << " failed (" + mon.clog->info() << "osd." << target_osd << " failed (" << osdmap.crush->get_full_location_ordered_string(target_osd) << ") (connection refused reported by osd." << by << ")"; return; @@ -3306,7 +3318,7 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op) ceph_assert(osdmap.is_up(target_osd)); ceph_assert(osdmap.get_addrs(target_osd) == m->get_target_addrs()); - mon->no_reply(op); + mon.no_reply(op); if (m->if_osd_failed()) { // calculate failure time @@ -3316,32 +3328,32 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op) // add a report if (m->is_immediate()) { - mon->clog->debug() << "osd." << m->get_target_osd() + mon.clog->debug() << "osd." << m->get_target_osd() << " reported immediately failed by " << m->get_orig_source(); force_failure(target_osd, reporter); return true; } - mon->clog->debug() << "osd." << m->get_target_osd() << " reported failed by " + mon.clog->debug() << "osd." << m->get_target_osd() << " reported failed by " << m->get_orig_source(); failure_info_t& fi = failure_info[target_osd]; MonOpRequestRef old_op = fi.add_report(reporter, failed_since, op); if (old_op) { - mon->no_reply(old_op); + mon.no_reply(old_op); } return check_failure(now, target_osd, fi); } else { // remove the report - mon->clog->debug() << "osd." << m->get_target_osd() + mon.clog->debug() << "osd." << m->get_target_osd() << " failure report canceled by " << m->get_orig_source(); if (failure_info.count(target_osd)) { failure_info_t& fi = failure_info[target_osd]; MonOpRequestRef report_op = fi.cancel_report(reporter); if (report_op) { - mon->no_reply(report_op); + mon.no_reply(report_op); } if (fi.reporters.empty()) { dout(10) << " removing last failure_info for osd." << target_osd @@ -3377,7 +3389,7 @@ void OSDMonitor::process_failures() o->mark_event(__func__); MOSDFailure *m = o->get_req<MOSDFailure>(); send_latest(o, m->get_epoch()); - mon->no_reply(o); + mon.no_reply(o); } ls.pop_front(); } @@ -3449,9 +3461,9 @@ bool OSDMonitor::preprocess_boot(MonOpRequestRef op) goto ignore; } - if (m->sb.cluster_fsid != mon->monmap->fsid) { + if (m->sb.cluster_fsid != mon.monmap->fsid) { dout(0) << "preprocess_boot on fsid " << m->sb.cluster_fsid - << " != " << mon->monmap->fsid << dendl; + << " != " << mon.monmap->fsid << dendl; goto ignore; } @@ -3484,7 +3496,7 @@ bool OSDMonitor::preprocess_boot(MonOpRequestRef op) stringstream ss; copy(begin(missing), end(missing), make_ostream_joiner(ss, ";")); - mon->clog->info() << "disallowing boot of OSD " + mon.clog->info() << "disallowing boot of OSD " << m->get_orig_source_inst() << " because the osd lacks " << ss.str(); goto ignore; @@ -3494,14 +3506,14 @@ bool OSDMonitor::preprocess_boot(MonOpRequestRef op) // make sure osd versions do not span more than 3 releases if (HAVE_FEATURE(m->osd_features, SERVER_OCTOPUS) && osdmap.require_osd_release < ceph_release_t::mimic) { - mon->clog->info() << "disallowing boot of octopus+ OSD " + mon.clog->info() << "disallowing boot of octopus+ OSD " << m->get_orig_source_inst() << " because require_osd_release < mimic"; goto ignore; } if (HAVE_FEATURE(m->osd_features, SERVER_PACIFIC) && osdmap.require_osd_release < ceph_release_t::nautilus) { - mon->clog->info() << "disallowing boot of pacific+ OSD " + mon.clog->info() << "disallowing boot of pacific+ OSD " << m->get_orig_source_inst() << " because require_osd_release < nautilus"; goto ignore; @@ -3512,7 +3524,7 @@ bool OSDMonitor::preprocess_boot(MonOpRequestRef op) if (osdmap.require_osd_release >= ceph_release_t::luminous && osdmap.test_flag(CEPH_OSDMAP_PGLOG_HARDLIMIT) && !(m->osd_features & CEPH_FEATURE_OSD_PGLOG_HARDLIMIT)) { - mon->clog->info() << "disallowing boot of OSD " + mon.clog->info() << "disallowing boot of OSD " << m->get_orig_source_inst() << " because 'pglog_hardlimit' osdmap flag is set and OSD lacks the OSD_PGLOG_HARDLIMIT feature"; goto ignore; @@ -3520,7 +3532,7 @@ bool OSDMonitor::preprocess_boot(MonOpRequestRef op) if (osdmap.stretch_mode_enabled && !(m->osd_features & CEPH_FEATUREMASK_STRETCH_MODE)) { - mon->clog->info() << "disallowing boot of OSD " + mon.clog->info() << "disallowing boot of OSD " << m->get_orig_source_inst() << " because stretch mode is on and OSD lacks support"; goto ignore; @@ -3734,7 +3746,7 @@ void OSDMonitor::_booted(MonOpRequestRef op, bool logit) << " w " << m->sb.weight << " from " << m->sb.current_epoch << dendl; if (logit) { - mon->clog->info() << m->get_source() << " " << m->get_orig_source_addrs() + mon.clog->info() << m->get_source() << " " << m->get_orig_source_addrs() << " boot"; } @@ -3883,7 +3895,7 @@ bool OSDMonitor::prepare_alive(MonOpRequestRef op) int from = m->get_orig_source().num(); if (0) { // we probably don't care much about these - mon->clog->debug() << m->get_orig_source_inst() << " alive"; + mon.clog->debug() << m->get_orig_source_inst() << " alive"; } dout(7) << "prepare_alive want up_thru " << m->want << " have " << m->version @@ -3910,7 +3922,7 @@ bool OSDMonitor::preprocess_pg_created(MonOpRequestRef op) auto m = op->get_req<MOSDPGCreated>(); dout(10) << __func__ << " " << *m << dendl; auto session = op->get_session(); - mon->no_reply(op); + mon.no_reply(op); if (!session) { dout(10) << __func__ << ": no monitor session!" << dendl; return true; @@ -3932,8 +3944,8 @@ bool OSDMonitor::prepare_pg_created(MonOpRequestRef op) auto src = m->get_orig_source(); auto from = src.num(); if (!src.is_osd() || - !mon->osdmon()->osdmap.is_up(from) || - !mon->osdmon()->osdmap.get_addrs(from).legacy_equals( + !mon.osdmon()->osdmap.is_up(from) || + !mon.osdmon()->osdmap.get_addrs(from).legacy_equals( m->get_orig_source_addrs())) { dout(1) << __func__ << " ignoring stats from non-active osd." << dendl; return false; @@ -3978,7 +3990,7 @@ bool OSDMonitor::preprocess_pg_ready_to_merge(MonOpRequestRef op) return false; ignore: - mon->no_reply(op); + mon.no_reply(op); return true; } @@ -4025,13 +4037,13 @@ bool OSDMonitor::prepare_pg_ready_to_merge(MonOpRequestRef op) prob > 0 && prob > (double)(rand() % 1000)/1000.0) { derr << __func__ << " injecting pg merge pg_num bounce" << dendl; - auto n = new MMonCommand(mon->monmap->get_fsid()); + auto n = new MMonCommand(mon.monmap->get_fsid()); n->set_connection(m->get_connection()); n->cmd = { "{\"prefix\":\"osd pool set\", \"pool\": \"" + osdmap.get_pool_name(m->pgid.pool()) + "\", \"var\": \"pg_num_actual\", \"val\": \"" + stringify(m->pgid.ps() + 1) + "\"}" }; - MonOpRequestRef nop = mon->op_tracker.create_request<MonOpRequest>(n); + MonOpRequestRef nop = mon.op_tracker.create_request<MonOpRequest>(n); nop->set_type_service(); wait_for_finished_proposal(op, new C_RetryMessage(this, nop)); } else { @@ -4134,7 +4146,7 @@ bool OSDMonitor::preprocess_pgtemp(MonOpRequestRef op) return true; ignore: - mon->no_reply(op); + mon.no_reply(op); return true; } @@ -4198,7 +4210,7 @@ bool OSDMonitor::preprocess_remove_snaps(MonOpRequestRef op) // check privilege, ignore if failed MonSession *session = op->get_session(); - mon->no_reply(op); + mon.no_reply(op); if (!session) goto ignore; if (!session->caps.is_capable( @@ -4233,7 +4245,7 @@ bool OSDMonitor::preprocess_remove_snaps(MonOpRequestRef op) if (HAVE_FEATURE(m->get_connection()->get_features(), SERVER_OCTOPUS)) { auto reply = make_message<MRemoveSnaps>(); reply->snaps = m->snaps; - mon->send_reply(op, reply.detach()); + mon.send_reply(op, reply.detach()); } ignore: @@ -4298,7 +4310,7 @@ bool OSDMonitor::preprocess_get_purged_snaps(MonOpRequestRef op) map<epoch_t,mempool::osdmap::map<int64_t,snap_interval_set_t>> r; string k = make_purged_snap_epoch_key(m->start); - auto it = mon->store->get_iterator(OSD_SNAP_PREFIX); + auto it = mon.store->get_iterator(OSD_SNAP_PREFIX); it->upper_bound(k); unsigned long epoch = m->last; while (it->valid()) { @@ -4334,7 +4346,7 @@ bool OSDMonitor::preprocess_get_purged_snaps(MonOpRequestRef op) auto reply = make_message<MMonGetPurgedSnapsReply>(m->start, epoch); reply->purged_snaps.swap(r); - mon->send_reply(op, reply.detach()); + mon.send_reply(op, reply.detach()); return true; } @@ -4345,7 +4357,7 @@ bool OSDMonitor::preprocess_beacon(MonOpRequestRef op) op->mark_osdmon_event(__func__); // check caps auto session = op->get_session(); - mon->no_reply(op); + mon.no_reply(op); if (!session) { dout(10) << __func__ << " no monitor session!" << dendl; return true; @@ -4420,7 +4432,7 @@ void OSDMonitor::send_latest(MonOpRequestRef op, epoch_t start) MOSDMap *OSDMonitor::build_latest_full(uint64_t features) { - MOSDMap *r = new MOSDMap(mon->monmap->fsid, features); + MOSDMap *r = new MOSDMap(mon.monmap->fsid, features); get_version_full(osdmap.get_epoch(), features, r->maps[osdmap.get_epoch()]); r->oldest_map = get_first_committed(); r->newest_map = osdmap.get_epoch(); @@ -4431,7 +4443,7 @@ MOSDMap *OSDMonitor::build_incremental(epoch_t from, epoch_t to, uint64_t featur { dout(10) << "build_incremental [" << from << ".." << to << "] with features " << std::hex << features << std::dec << dendl; - MOSDMap *m = new MOSDMap(mon->monmap->fsid, features); + MOSDMap *m = new MOSDMap(mon.monmap->fsid, features); m->oldest_map = get_first_committed(); m->newest_map = osdmap.get_epoch(); @@ -4465,7 +4477,7 @@ void OSDMonitor::send_full(MonOpRequestRef op) { op->mark_osdmon_event(__func__); dout(5) << "send_full to " << op->get_req()->get_orig_source_inst() << dendl; - mon->send_reply(op, build_latest_full(op->get_session()->con_features)); + mon.send_reply(op, build_latest_full(op->get_session()->con_features)); } void OSDMonitor::send_incremental(MonOpRequestRef op, epoch_t first) @@ -4500,7 +4512,7 @@ void OSDMonitor::send_incremental(epoch_t first, // get feature of the peer // use quorum_con_features, if it's an anonymous connection. uint64_t features = session->con_features ? session->con_features : - mon->get_quorum_con_features(); + mon.get_quorum_con_features(); if (first <= session->osd_epoch) { dout(10) << __func__ << " " << session->name << " should already have epoch " @@ -4523,7 +4535,7 @@ void OSDMonitor::send_incremental(epoch_t first, m->maps[first] = bl; if (req) { - mon->send_reply(req, m); + mon.send_reply(req, m); session->osd_epoch = first; return; } else { @@ -4541,7 +4553,7 @@ void OSDMonitor::send_incremental(epoch_t first, if (req) { // send some maps. it may not be all of them, but it will get them // started. - mon->send_reply(req, m); + mon.send_reply(req, m); } else { session->con->send_message(m); first = last + 1; @@ -4554,7 +4566,7 @@ void OSDMonitor::send_incremental(epoch_t first, int OSDMonitor::get_version(version_t ver, bufferlist& bl) { - return get_version(ver, mon->get_quorum_con_features(), bl); + return get_version(ver, mon.get_quorum_con_features(), bl); } void OSDMonitor::reencode_incremental_map(bufferlist& bl, uint64_t features) @@ -4613,7 +4625,7 @@ int OSDMonitor::get_version(version_t ver, uint64_t features, bufferlist& bl) // reencode once and then cache the (identical) result under both // feature masks. if (significant_features != - OSDMap::get_significant_features(mon->get_quorum_con_features())) { + OSDMap::get_significant_features(mon.get_quorum_con_features())) { reencode_incremental_map(bl, features); } inc_osd_cache.add_bytes({ver, significant_features}, bl); @@ -4656,7 +4668,7 @@ int OSDMonitor::get_full_from_pinned_map(version_t ver, bufferlist& bl) bufferlist osdm_bl; bool has_cached_osdmap = false; for (version_t v = ver-1; v >= closest_pinned; --v) { - if (full_osd_cache.lookup({v, mon->get_quorum_con_features()}, + if (full_osd_cache.lookup({v, mon.get_quorum_con_features()}, &osdm_bl)) { dout(10) << __func__ << " found map in cache ver " << v << dendl; closest_pinned = v; @@ -4703,7 +4715,7 @@ int OSDMonitor::get_full_from_pinned_map(version_t ver, bufferlist& bl) uint64_t f = encode_features; if (!f) { - f = (mon->quorum_con_features ? mon->quorum_con_features : -1); + f = (mon.quorum_con_features ? mon.quorum_con_features : -1); } // encode osdmap to force calculating crcs @@ -4730,7 +4742,7 @@ int OSDMonitor::get_full_from_pinned_map(version_t ver, bufferlist& bl) << " last incremental map didn't have features;" << " defaulting to quorum's or all" << dendl; encode_features = - (mon->quorum_con_features ? mon->quorum_con_features : -1); + (mon.quorum_con_features ? mon.quorum_con_features : -1); } osdm.encode(bl, encode_features | CEPH_FEATURE_RESERVED); @@ -4739,7 +4751,7 @@ int OSDMonitor::get_full_from_pinned_map(version_t ver, bufferlist& bl) int OSDMonitor::get_version_full(version_t ver, bufferlist& bl) { - return get_version_full(ver, mon->get_quorum_con_features(), bl); + return get_version_full(ver, mon.get_quorum_con_features(), bl); } int OSDMonitor::get_version_full(version_t ver, uint64_t features, @@ -4762,7 +4774,7 @@ int OSDMonitor::get_version_full(version_t ver, uint64_t features, // reencode once and then cache the (identical) result under both // feature masks. if (significant_features != - OSDMap::get_significant_features(mon->get_quorum_con_features())) { + OSDMap::get_significant_features(mon.get_quorum_con_features())) { reencode_full_map(bl, features); } full_osd_cache.add_bytes({ver, significant_features}, bl); @@ -4802,8 +4814,8 @@ void OSDMonitor::check_osdmap_subs() if (!osdmap.get_epoch()) { return; } - auto osdmap_subs = mon->session_map.subs.find("osdmap"); - if (osdmap_subs == mon->session_map.subs.end()) { + auto osdmap_subs = mon.session_map.subs.find("osdmap"); + if (osdmap_subs == mon.session_map.subs.end()) { return; } auto p = osdmap_subs->second->begin(); @@ -4824,7 +4836,7 @@ void OSDMonitor::check_osdmap_sub(Subscription *sub) else sub->session->con->send_message(build_latest_full(sub->session->con_features)); if (sub->onetime) - mon->session_map.remove_sub(sub); + mon.session_map.remove_sub(sub); else sub->next = osdmap.get_epoch() + 1; } @@ -4836,7 +4848,7 @@ void OSDMonitor::check_pg_creates_subs() return; } ceph_assert(osdmap.get_up_osd_features() & CEPH_FEATURE_MON_STATEFUL_SUB); - mon->with_session_map([this](const MonSessionMap& session_map) { + mon.with_session_map([this](const MonSessionMap& session_map) { auto pg_creates_subs = session_map.subs.find("osd_pg_creates"); if (pg_creates_subs == session_map.subs.end()) { return; @@ -4854,7 +4866,7 @@ void OSDMonitor::check_pg_creates_sub(Subscription *sub) // only send these if the OSD is up. we will check_subs() when they do // come up so they will get the creates then. if (sub->session->name.is_osd() && - mon->osdmon()->osdmap.is_up(sub->session->name.num())) { + mon.osdmon()->osdmap.is_up(sub->session->name.num())) { sub->next = send_pg_creates(sub->session->name.num(), sub->session->con.get(), sub->next); @@ -4867,7 +4879,7 @@ void OSDMonitor::do_application_enable(int64_t pool_id, const std::string &app_value, bool force) { - ceph_assert(paxos->is_plugged() && is_writeable()); + ceph_assert(paxos.is_plugged() && is_writeable()); dout(20) << __func__ << ": pool_id=" << pool_id << ", app_name=" << app_name << dendl; @@ -5098,7 +5110,7 @@ void OSDMonitor::tick() } } - if (!mon->is_leader()) return; + if (!mon.is_leader()) return; bool do_propose = false; utime_t now = ceph_clock_now(); @@ -5197,7 +5209,7 @@ void OSDMonitor::tick() do_propose = true; - mon->clog->info() << "Marking osd." << o << " out (has been down for " + mon.clog->info() << "Marking osd." << o << " out (has been down for " << int(down.sec()) << " seconds)"; } else continue; @@ -5260,7 +5272,7 @@ bool OSDMonitor::handle_osd_timeouts(const utime_t &now, std::map<int, std::pair<utime_t, int>> &last_osd_report) { utime_t timeo(g_conf()->mon_osd_report_timeout, 0); - if (now - mon->get_leader_since() < timeo) { + if (now - mon.get_leader_since() < timeo) { // We haven't been the leader for long enough to consider OSD timeouts return false; } @@ -5288,7 +5300,7 @@ bool OSDMonitor::handle_osd_timeouts(const utime_t &now, int mon_osd_report_timeout = g_conf()->mon_osd_report_timeout; utime_t max_timeout(std::max(mon_osd_report_timeout, 2 * t->second.second), 0); if (diff > max_timeout) { - mon->clog->info() << "osd." << i << " marked down after no beacon for " + mon.clog->info() << "osd." << i << " marked down after no beacon for " << diff << " seconds"; derr << "no beacon from osd." << i << " since " << t->second.first << ", " << diff << " seconds ago. marking down" << dendl; @@ -5385,7 +5397,8 @@ namespace { COMPRESSION_MAX_BLOB_SIZE, COMPRESSION_MIN_BLOB_SIZE, CSUM_TYPE, CSUM_MAX_BLOCK, CSUM_MIN_BLOCK, FINGERPRINT_ALGORITHM, PG_AUTOSCALE_MODE, PG_NUM_MIN, TARGET_SIZE_BYTES, TARGET_SIZE_RATIO, - PG_AUTOSCALE_BIAS }; + PG_AUTOSCALE_BIAS, DEDUP_TIER, DEDUP_CHUNK_ALGORITHM, + DEDUP_CDC_CHUNK_SIZE }; std::set<osd_pool_get_choices> subtract_second_from_first(const std::set<osd_pool_get_choices>& first, @@ -5411,14 +5424,14 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) cmdmap_t cmdmap; if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); return true; } MonSession *session = op->get_session(); if (!session) { derr << __func__ << " no session" << dendl; - mon->reply_command(op, -EACCES, "access denied", get_last_committed()); + mon.reply_command(op, -EACCES, "access denied", get_last_committed()); return true; } @@ -5607,7 +5620,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) rdata.append(osdmap_bl); ss << "got osdmap epoch " << p->get_epoch(); } else if (prefix == "osd getcrushmap") { - p->crush->encode(rdata, mon->get_quorum_con_features()); + p->crush->encode(rdata, mon.get_quorum_con_features()); ss << p->get_crush_version(); } else if (prefix == "osd ls-tree") { string bucket_name; @@ -6120,6 +6133,9 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) {"target_size_bytes", TARGET_SIZE_BYTES}, {"target_size_ratio", TARGET_SIZE_RATIO}, {"pg_autoscale_bias", PG_AUTOSCALE_BIAS}, + {"dedup_tier", DEDUP_TIER}, + {"dedup_chunk_algorithm", DEDUP_CHUNK_ALGORITHM}, + {"dedup_cdc_chunk_size", DEDUP_CDC_CHUNK_SIZE}, }; typedef std::set<osd_pool_get_choices> choices_set_t; @@ -6336,6 +6352,9 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) case TARGET_SIZE_BYTES: case TARGET_SIZE_RATIO: case PG_AUTOSCALE_BIAS: + case DEDUP_TIER: + case DEDUP_CHUNK_ALGORITHM: + case DEDUP_CDC_CHUNK_SIZE: pool_opts_t::key_t key = pool_opts_t::get_opt_desc(i->first).key; if (p->opts.is_set(key)) { if(*it == CSUM_TYPE) { @@ -6493,6 +6512,9 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) case TARGET_SIZE_BYTES: case TARGET_SIZE_RATIO: case PG_AUTOSCALE_BIAS: + case DEDUP_TIER: + case DEDUP_CHUNK_ALGORITHM: + case DEDUP_CDC_CHUNK_SIZE: for (i = ALL_CHOICES.begin(); i != ALL_CHOICES.end(); ++i) { if (i->second == *it) break; @@ -6529,7 +6551,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) goto reply; } const pg_pool_t *p = osdmap.get_pg_pool(poolid); - const pool_stat_t* pstat = mon->mgrstatmon()->get_pool_stat(poolid); + const pool_stat_t* pstat = mon.mgrstatmon()->get_pool_stat(poolid); const object_stat_sum_t& sum = pstat->stats.sum; if (f) { f->open_object_section("pool_quotas"); @@ -6962,7 +6984,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) reply: string rs; getline(ss, rs); - mon->reply_command(op, r, rs, rdata, get_last_committed()); + mon.reply_command(op, r, rs, rdata, get_last_committed()); return true; } @@ -7015,7 +7037,7 @@ int OSDMonitor::lookup_purged_snap( snapid_t *begin, snapid_t *end) { string k = make_purged_snap_key(pool, snap); - auto it = mon->store->get_iterator(OSD_SNAP_PREFIX); + auto it = mon.store->get_iterator(OSD_SNAP_PREFIX); it->lower_bound(k); if (!it->valid()) { dout(20) << __func__ @@ -7118,7 +7140,7 @@ void OSDMonitor::insert_purged_snap_update( bool OSDMonitor::try_prune_purged_snaps() { - if (!mon->mgrstatmon()->is_readable()) { + if (!mon.mgrstatmon()->is_readable()) { return false; } if (!pending_inc.new_purged_snaps.empty()) { @@ -7133,7 +7155,7 @@ bool OSDMonitor::try_prune_purged_snaps() dout(10) << __func__ << " max_prune " << max_prune << dendl; unsigned actually_pruned = 0; - auto& purged_snaps = mon->mgrstatmon()->get_digest().purged_snaps; + auto& purged_snaps = mon.mgrstatmon()->get_digest().purged_snaps; for (auto& p : osdmap.get_pools()) { auto q = purged_snaps.find(p.first); if (q == purged_snaps.end()) { @@ -7197,14 +7219,14 @@ bool OSDMonitor::try_prune_purged_snaps() bool OSDMonitor::update_pools_status() { - if (!mon->mgrstatmon()->is_readable()) + if (!mon.mgrstatmon()->is_readable()) return false; bool ret = false; auto& pools = osdmap.get_pools(); for (auto it = pools.begin(); it != pools.end(); ++it) { - const pool_stat_t *pstat = mon->mgrstatmon()->get_pool_stat(it->first); + const pool_stat_t *pstat = mon.mgrstatmon()->get_pool_stat(it->first); if (!pstat) continue; const object_stat_sum_t& sum = pstat->stats.sum; @@ -7219,7 +7241,7 @@ bool OSDMonitor::update_pools_status() if (pool_is_full) continue; - mon->clog->info() << "pool '" << pool_name + mon.clog->info() << "pool '" << pool_name << "' no longer out of quota; removing NO_QUOTA flag"; // below we cancel FLAG_FULL too, we'll set it again in // OSDMonitor::encode_pending if it still fails the osd-full checking. @@ -7232,13 +7254,13 @@ bool OSDMonitor::update_pools_status() if (pool.quota_max_bytes > 0 && (uint64_t)sum.num_bytes >= pool.quota_max_bytes) { - mon->clog->warn() << "pool '" << pool_name << "' is full" + mon.clog->warn() << "pool '" << pool_name << "' is full" << " (reached quota's max_bytes: " << byte_u_t(pool.quota_max_bytes) << ")"; } if (pool.quota_max_objects > 0 && (uint64_t)sum.num_objects >= pool.quota_max_objects) { - mon->clog->warn() << "pool '" << pool_name << "' is full" + mon.clog->warn() << "pool '" << pool_name << "' is full" << " (reached quota's max_objects: " << pool.quota_max_objects << ")"; } @@ -7307,7 +7329,7 @@ int OSDMonitor::crush_rename_bucket(const string& srcname, return ret; pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); *ss << "renamed bucket " << srcname << " into " << dstname; return 0; } @@ -7414,7 +7436,7 @@ int OSDMonitor::crush_rule_create_erasure(const string &name, return err; *rule = err; pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); return 0; } } @@ -7447,7 +7469,7 @@ int OSDMonitor::check_cluster_features(uint64_t features, { stringstream unsupported_ss; int unsupported_count = 0; - if ((mon->get_quorum_con_features() & features) != features) { + if ((mon.get_quorum_con_features() & features) != features) { unsupported_ss << "the monitor cluster"; ++unsupported_count; } @@ -7490,7 +7512,7 @@ bool OSDMonitor::validate_crush_against_features(const CrushWrapper *newcrush, stringstream& ss) { OSDMap::Incremental new_pending = pending_inc; - encode(*newcrush, new_pending.crush, mon->get_quorum_con_features()); + encode(*newcrush, new_pending.crush, mon.get_quorum_con_features()); OSDMap newmap; newmap.deepish_copy_from(osdmap); newmap.apply_incremental(new_pending); @@ -8660,6 +8682,41 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap, ss << "pg_autoscale_bias must be between 0 and 1000"; return -EINVAL; } + } else if (var == "dedup_tier") { + if (interr.empty()) { + ss << "expecting value 'pool name'"; + return -EINVAL; + } + // Current base tier in dedup does not support ec pool + if (p.is_erasure()) { + ss << "pool '" << poolstr + << "' is an ec pool, which cannot be a base tier"; + return -ENOTSUP; + } + int64_t lowtierpool_id = osdmap.lookup_pg_pool_name(val); + if (lowtierpool_id < 0) { + ss << "unrecognized pool '" << val << "'"; + return -ENOENT; + } + const pg_pool_t *tp = osdmap.get_pg_pool(lowtierpool_id); + ceph_assert(tp); + n = lowtierpool_id; + // The original input is string (pool name), but we convert it to int64_t. + // So, clear interr + interr.clear(); + } else if (var == "dedup_chunk_algorithm") { + if (!unset) { + auto alg = pg_pool_t::get_dedup_chunk_algorithm_from_str(val); + if (!alg) { + ss << "unrecognized fingerprint_algorithm '" << val << "'"; + return -EINVAL; + } + } + } else if (var == "dedup_cdc_chunk_size") { + if (interr.length()) { + ss << "error parsing int value '" << val << "': " << interr; + return -EINVAL; + } } pool_opts_t::opt_desc_t desc = pool_opts_t::get_opt_desc(var); @@ -8931,7 +8988,7 @@ int OSDMonitor::_prepare_command_osd_crush_remove( void OSDMonitor::do_osd_crush_remove(CrushWrapper& newcrush) { pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); } int OSDMonitor::prepare_command_osd_crush_remove( @@ -9062,7 +9119,7 @@ out: dout(20) << __func__ << " set " << name << " device_class " << device_class << dendl; pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); } } else { dout(20) << __func__ << " no device_class" << dendl; @@ -9181,7 +9238,7 @@ int OSDMonitor::prepare_command_osd_new( string uuidstr; int64_t id = -1; - ceph_assert(paxos->is_plugged()); + ceph_assert(paxos.is_plugged()); dout(10) << __func__ << " " << op << dendl; @@ -9335,7 +9392,7 @@ int OSDMonitor::prepare_command_osd_new( dout(10) << __func__ << " validate secrets using osd id " << id << dendl; - err = mon->authmon()->validate_osd_new(id, uuid, + err = mon.authmon()->validate_osd_new(id, uuid, cephx_secret, lockbox_secret, cephx_entity, @@ -9352,7 +9409,7 @@ int OSDMonitor::prepare_command_osd_new( } if (has_lockbox) { - svc = (ConfigKeyService*)mon->config_key_service; + svc = mon.config_key_service.get(); err = svc->validate_osd_new(uuid, dmcrypt_key, ss); if (err < 0) { return err; @@ -9390,7 +9447,7 @@ int OSDMonitor::prepare_command_osd_new( ceph_assert((lockbox_secret.empty() && dmcrypt_key.empty()) || (!lockbox_secret.empty() && !dmcrypt_key.empty())); - err = mon->authmon()->do_osd_new(cephx_entity, + err = mon.authmon()->do_osd_new(cephx_entity, lockbox_entity, has_lockbox); ceph_assert(0 == err); @@ -9443,14 +9500,14 @@ bool OSDMonitor::prepare_command(MonOpRequestRef op) cmdmap_t cmdmap; if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); - mon->reply_command(op, -EINVAL, rs, get_last_committed()); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); return true; } MonSession *session = op->get_session(); if (!session) { derr << __func__ << " no session" << dendl; - mon->reply_command(op, -EACCES, "access denied", get_last_committed()); + mon.reply_command(op, -EACCES, "access denied", get_last_committed()); return true; } @@ -9497,7 +9554,7 @@ int OSDMonitor::prepare_command_osd_destroy( int32_t id, stringstream& ss) { - ceph_assert(paxos->is_plugged()); + ceph_assert(paxos.is_plugged()); // we check if the osd exists for the benefit of `osd purge`, which may // have previously removed the osd. If the osd does not exist, return @@ -9525,7 +9582,7 @@ int OSDMonitor::prepare_command_osd_destroy( EntityName cephx_entity, lockbox_entity; bool idempotent_auth = false, idempotent_cks = false; - int err = mon->authmon()->validate_osd_destroy(id, uuid, + int err = mon.authmon()->validate_osd_destroy(id, uuid, cephx_entity, lockbox_entity, ss); @@ -9537,7 +9594,7 @@ int OSDMonitor::prepare_command_osd_destroy( } } - ConfigKeyService *svc = (ConfigKeyService*)mon->config_key_service; + auto svc = mon.config_key_service.get(); err = svc->validate_osd_destroy(id, uuid); if (err < 0) { ceph_assert(err == -ENOENT); @@ -9546,7 +9603,7 @@ int OSDMonitor::prepare_command_osd_destroy( } if (!idempotent_auth) { - err = mon->authmon()->do_osd_destroy(cephx_entity, lockbox_entity); + err = mon.authmon()->do_osd_destroy(cephx_entity, lockbox_entity); ceph_assert(0 == err); } @@ -9569,7 +9626,7 @@ int OSDMonitor::prepare_command_osd_purge( int32_t id, stringstream& ss) { - ceph_assert(paxos->is_plugged()); + ceph_assert(paxos.is_plugged()); dout(10) << __func__ << " purging osd." << id << dendl; ceph_assert(!osdmap.is_up(id)); @@ -9723,8 +9780,8 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, // (multiple racing updaters may not both get reliable success) // but we expect crush updaters (via this interface) to be rare-ish. bufferlist current, proposed; - osdmap.crush->encode(current, mon->get_quorum_con_features()); - crush.encode(proposed, mon->get_quorum_con_features()); + osdmap.crush->encode(current, mon.get_quorum_con_features()); + crush.encode(proposed, mon.get_quorum_con_features()); if (current.contents_equal(proposed)) { dout(10) << __func__ << " proposed matches current and version equals previous" @@ -9799,7 +9856,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto reply; } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, get_last_committed() + 1)); return true; @@ -9877,7 +9934,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, if (!updated.empty()) { pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); ss << "set osd(s) " << updated << " to class '" << device_class << "'"; getline(ss, rs); wait_for_finished_proposal(op, @@ -9939,7 +9996,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, if (!updated.empty()) { pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); ss << "done removing class of osd(s): " << updated; getline(ss, rs); wait_for_finished_proposal(op, @@ -9971,7 +10028,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } int class_id = newcrush.get_or_create_class_id(device_class); pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); ss << "created class " << device_class << " with id " << class_id << " to crush map"; goto update; @@ -10053,7 +10110,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); ss << "removed class " << device_class << " with id " << class_id << " from crush map"; goto update; @@ -10086,7 +10143,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); ss << "rename class '" << srcname << "' to '" << dstname << "'"; goto update; } else if (prefix == "osd crush add-bucket") { @@ -10156,7 +10213,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); if (loc.empty()) { ss << "added bucket " << name << " type " << typestr << " to crush map"; @@ -10228,7 +10285,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto reply; } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); goto update; } else if (prefix == "osd crush weight-set rm" || @@ -10250,7 +10307,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } newcrush.rm_choose_args(pool); pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); goto update; } else if (prefix == "osd crush weight-set reweight" || @@ -10306,7 +10363,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } err = 0; pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); goto update; } else if (osdid_present && (prefix == "osd crush set" || prefix == "osd crush add")) { @@ -10372,7 +10429,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); ss << action << " item id " << osdid << " name '" << osd_name << "' weight " << weight << " at location " << loc << " to crush map"; getline(ss, rs); @@ -10421,7 +10478,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } if (err > 0) { pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); ss << "create-or-move updating item name '" << osd_name << "' weight " << weight << " at location " << loc << " to crush map"; @@ -10464,7 +10521,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, if (err >= 0) { ss << "moved item id " << id << " name '" << name << "' to location " << loc << " in crush map"; pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); getline(ss, rs); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, get_last_committed() + 1)); @@ -10519,7 +10576,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } ss << "swapped bucket of " << source << " to " << dest; pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, err, ss.str(), get_last_committed() + 1)); @@ -10566,7 +10623,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, ss << "linked item id " << id << " name '" << name << "' to location " << loc << " in crush map"; pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); } else { ss << "cannot link item id " << id << " name '" << name << "' to location " << loc; @@ -10647,7 +10704,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, newcrush.reweight(cct); pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); ss << "reweighted crush hierarchy"; getline(ss, rs); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, @@ -10685,7 +10742,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, if (err < 0) goto reply; pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); ss << "reweighted item id " << id << " name '" << name << "' to " << w << " in crush map"; getline(ss, rs); @@ -10724,7 +10781,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, if (err < 0) goto reply; pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); ss << "reweighted subtree id " << id << " name '" << name << "' to " << w << " in crush map"; getline(ss, rs); @@ -10764,7 +10821,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); ss << "adjusted tunables profile to " << profile; getline(ss, rs); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, @@ -10805,7 +10862,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); ss << "adjusted tunable " << tunable << " to " << value; getline(ss, rs); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, @@ -10846,7 +10903,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); } getline(ss, rs); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, @@ -10886,7 +10943,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); } getline(ss, rs); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, @@ -11094,7 +11151,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); } getline(ss, rs); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, @@ -11134,7 +11191,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto reply; } pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); getline(ss, rs); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, get_last_committed() + 1)); @@ -11225,7 +11282,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, cmd_getval(cmdmap, "yes_i_really_mean_it", sure); if (!sure) { FeatureMap m; - mon->get_combined_feature_map(&m); + mon.get_combined_feature_map(&m); uint64_t features = ceph_release_features(to_integer<int>(vno)); bool first = true; bool ok = true; @@ -11381,7 +11438,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto reply; } if (rel == ceph_release_t::mimic) { - if (!mon->monmap->get_required_features().contains_all( + if (!mon.monmap->get_required_features().contains_all( ceph::features::mon::FEATURE_MIMIC)) { ss << "not all mons are mimic"; err = -EPERM; @@ -11394,7 +11451,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto reply; } } else if (rel == ceph_release_t::nautilus) { - if (!mon->monmap->get_required_features().contains_all( + if (!mon.monmap->get_required_features().contains_all( ceph::features::mon::FEATURE_NAUTILUS)) { ss << "not all mons are nautilus"; err = -EPERM; @@ -11407,7 +11464,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto reply; } } else if (rel == ceph_release_t::octopus) { - if (!mon->monmap->get_required_features().contains_all( + if (!mon.monmap->get_required_features().contains_all( ceph::features::mon::FEATURE_OCTOPUS)) { ss << "not all mons are octopus"; err = -EPERM; @@ -11420,7 +11477,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto reply; } } else if (rel == ceph_release_t::pacific) { - if (!mon->monmap->get_required_features().contains_all( + if (!mon.monmap->get_required_features().contains_all( ceph::features::mon::FEATURE_PACIFIC)) { ss << "not all mons are pacific"; err = -EPERM; @@ -11530,7 +11587,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, << " seconds"; } - mon->clog->info() << msg.str(); + mon.clog->info() << msg.str(); any = true; } } else if (prefix == "osd in") { @@ -12272,10 +12329,10 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, */ // make sure authmon is writeable. - if (!mon->authmon()->is_writeable()) { + if (!mon.authmon()->is_writeable()) { dout(10) << __func__ << " waiting for auth mon to be writeable for " << "osd destroy" << dendl; - mon->authmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); + mon.authmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return false; } @@ -12330,7 +12387,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, bool goto_reply = false; - paxos->plug(); + paxos.plug(); if (is_destroy) { err = prepare_command_osd_destroy(id, ss); // we checked above that it should exist. @@ -12343,7 +12400,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, goto_reply = true; } } - paxos->unplug(); + paxos.unplug(); if (err < 0 || goto_reply) { goto reply; @@ -12364,10 +12421,10 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } else if (prefix == "osd new") { // make sure authmon is writeable. - if (!mon->authmon()->is_writeable()) { + if (!mon.authmon()->is_writeable()) { dout(10) << __func__ << " waiting for auth mon to be writeable for " << "osd new" << dendl; - mon->authmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); + mon.authmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return false; } @@ -12383,9 +12440,9 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, dout(20) << __func__ << " osd new params " << param_map << dendl; - paxos->plug(); + paxos.plug(); err = prepare_command_osd_new(op, cmdmap, param_map, ss, f.get()); - paxos->unplug(); + paxos.unplug(); if (err < 0) { goto reply; @@ -12958,7 +13015,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, // make sure new tier is empty string force_nonempty; cmd_getval(cmdmap, "force_nonempty", force_nonempty); - const pool_stat_t *pstats = mon->mgrstatmon()->get_pool_stat(tierpool_id); + const pool_stat_t *pstats = mon.mgrstatmon()->get_pool_stat(tierpool_id); if (pstats && pstats->stats.sum.num_objects != 0 && force_nonempty != "--force-nonempty") { ss << "tier pool '" << tierpoolstr << "' is not empty; --force-nonempty to force"; @@ -13265,7 +13322,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, mode != pg_pool_t::CACHEMODE_READPROXY))) { const pool_stat_t* pstats = - mon->mgrstatmon()->get_pool_stat(pool_id); + mon.mgrstatmon()->get_pool_stat(pool_id); if (pstats && pstats->stats.sum.num_objects_dirty > 0) { ss << "unable to set cache-mode '" @@ -13333,7 +13390,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } // make sure new tier is empty const pool_stat_t *pstats = - mon->mgrstatmon()->get_pool_stat(tierpool_id); + mon.mgrstatmon()->get_pool_stat(tierpool_id); if (pstats && pstats->stats.sum.num_objects != 0) { ss << "tier pool '" << tierpoolstr << "' is not empty"; err = -ENOTEMPTY; @@ -13525,11 +13582,11 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, err = -EPERM; goto reply; } - mon->go_recovery_stretch_mode(); + mon.go_recovery_stretch_mode(); ss << "Triggering recovery stretch mode"; err = 0; goto reply; -} else { + } else { err = -EINVAL; } @@ -13537,7 +13594,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, getline(ss, rs); if (err < 0 && rs.length() == 0) rs = cpp_strerror(err); - mon->reply_command(op, err, rs, rdata, get_last_committed()); + mon.reply_command(op, err, rs, rdata, get_last_committed()); return ret; update: @@ -13572,7 +13629,7 @@ bool OSDMonitor::enforce_pool_op_caps(MonOpRequestRef op) pool_name = &osdmap.get_pool_name(m->pool); } - if (!is_unmanaged_snap_op_permitted(cct, mon->key_server, + if (!is_unmanaged_snap_op_permitted(cct, mon.key_server, session->entity_name, session->caps, session->get_peer_socket_addr(), pool_name)) { @@ -13607,9 +13664,9 @@ bool OSDMonitor::preprocess_pool_op(MonOpRequestRef op) return true; } - if (m->fsid != mon->monmap->fsid) { + if (m->fsid != mon.monmap->fsid) { dout(0) << __func__ << " drop message on fsid " << m->fsid - << " != " << mon->monmap->fsid << " for " << *m << dendl; + << " != " << mon.monmap->fsid << " for " << *m << dendl; _pool_op_reply(op, -EINVAL, osdmap.get_epoch()); return true; } @@ -13905,7 +13962,7 @@ int OSDMonitor::_check_remove_pool(int64_t pool_id, const pg_pool_t& pool, const string& poolstr = osdmap.get_pool_name(pool_id); // If the Pool is in use by CephFS, refuse to delete it - FSMap const &pending_fsmap = mon->mdsmon()->get_pending_fsmap(); + FSMap const &pending_fsmap = mon.mdsmon()->get_pending_fsmap(); if (pending_fsmap.pool_in_use(pool_id)) { *ss << "pool '" << poolstr << "' is in use by CephFS"; return -EBUSY; @@ -13954,7 +14011,7 @@ bool OSDMonitor::_check_become_tier( const std::string &tier_pool_name = osdmap.get_pool_name(tier_pool_id); const std::string &base_pool_name = osdmap.get_pool_name(base_pool_id); - const FSMap &pending_fsmap = mon->mdsmon()->get_pending_fsmap(); + const FSMap &pending_fsmap = mon.mdsmon()->get_pending_fsmap(); if (pending_fsmap.pool_in_use(tier_pool_id)) { *ss << "pool '" << tier_pool_name << "' is in use by CephFS"; *err = -EBUSY; @@ -14014,7 +14071,7 @@ bool OSDMonitor::_check_remove_tier( const std::string &base_pool_name = osdmap.get_pool_name(base_pool_id); // Apply CephFS-specific checks - const FSMap &pending_fsmap = mon->mdsmon()->get_pending_fsmap(); + const FSMap &pending_fsmap = mon.mdsmon()->get_pending_fsmap(); if (pending_fsmap.pool_in_use(base_pool_id)) { if (base_pool->is_erasure() && !base_pool->allows_ecoverwrites()) { // If the underlying pool is erasure coded and does not allow EC @@ -14151,7 +14208,7 @@ int OSDMonitor::_prepare_remove_pool( dout(10) << __func__ << " removing choose_args for pool " << pool << dendl; newcrush.rm_choose_args(pool); pending_inc.crush.clear(); - newcrush.encode(pending_inc.crush, mon->get_quorum_con_features()); + newcrush.encode(pending_inc.crush, mon.get_quorum_con_features()); } return 0; } @@ -14200,7 +14257,7 @@ void OSDMonitor::_pool_op_reply(MonOpRequestRef op, dout(20) << "_pool_op_reply " << ret << dendl; MPoolOpReply *reply = new MPoolOpReply(m->fsid, m->get_tid(), ret, epoch, get_last_committed(), blp); - mon->send_reply(op, reply); + mon.send_reply(op, reply); } void OSDMonitor::convert_pool_priorities(void) @@ -14473,9 +14530,9 @@ struct CMonExitRecovery : public Context { void OSDMonitor::try_end_recovery_stretch_mode(bool force) { dout(20) << __func__ << dendl; - if (!mon->is_leader()) return; - if (!mon->is_degraded_stretch_mode()) return; - if (!mon->is_recovering_stretch_mode()) return; + if (!mon.is_leader()) return; + if (!mon.is_degraded_stretch_mode()) return; + if (!mon.is_recovering_stretch_mode()) return; if (!is_readable()) { wait_for_readable_ctx(new CMonExitRecovery(this, force)); return; @@ -14486,16 +14543,16 @@ void OSDMonitor::try_end_recovery_stretch_mode(bool force) ceph_clock_now() - g_conf().get_val<double>("mon_stretch_recovery_min_wait") > stretch_recovery_triggered) || force)) { - if (!mon->mgrstatmon()->is_readable()) { - mon->mgrstatmon()->wait_for_readable_ctx(new CMonExitRecovery(this, force)); + if (!mon.mgrstatmon()->is_readable()) { + mon.mgrstatmon()->wait_for_readable_ctx(new CMonExitRecovery(this, force)); return; } - const PGMapDigest& pgd = mon->mgrstatmon()->get_digest(); + const PGMapDigest& pgd = mon.mgrstatmon()->get_digest(); double misplaced, degraded, inactive, unknown; pgd.get_recovery_stats(&misplaced, °raded, &inactive, &unknown); if (force || (degraded == 0.0 && inactive == 0.0 && unknown == 0.0)) { // we can exit degraded stretch mode! - mon->trigger_healthy_stretch_mode(); + mon.trigger_healthy_stretch_mode(); } } } diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 02f0fa36cce..ea04eb83350 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -677,7 +677,7 @@ protected: void set_default_laggy_params(int target_osd); public: - OSDMonitor(CephContext *cct, Monitor *mn, Paxos *p, const std::string& service_name); + OSDMonitor(CephContext *cct, Monitor &mn, Paxos &p, const std::string& service_name); void tick() override; // check state, take actions diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 176b64e1571..d834a42e1a5 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -3487,7 +3487,7 @@ int process_pg_map_command( string omap_stats_note = "\n* NOTE: Omap statistics are gathered during deep scrub and " "may be inaccurate soon afterwards depending on utilization. See " - "http://docs.ceph.com/docs/master/dev/placement-group/#omap-statistics " + "http://docs.ceph.com/en/latest/dev/placement-group/#omap-statistics " "for further details.\n"; bool omap_stats_note_required = false; @@ -3496,13 +3496,11 @@ int process_pg_map_command( if (prefix == "pg dump_json") { vector<string> v; v.push_back(string("all")); - cmd_putval(g_ceph_context, cmdmap, "format", string("json")); cmd_putval(g_ceph_context, cmdmap, "dumpcontents", v); prefix = "pg dump"; } else if (prefix == "pg dump_pools_json") { vector<string> v; v.push_back(string("pools")); - cmd_putval(g_ceph_context, cmdmap, "format", string("json")); cmd_putval(g_ceph_context, cmdmap, "dumpcontents", v); prefix = "pg dump"; } else if (prefix == "pg ls-by-primary") { diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index 7f64bf7446a..21f244239c5 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -34,13 +34,13 @@ using ceph::to_timespan; #define dout_subsys ceph_subsys_paxos #undef dout_prefix -#define dout_prefix _prefix(_dout, mon, mon->name, mon->rank, paxos_name, state, first_committed, last_committed) -static std::ostream& _prefix(std::ostream *_dout, Monitor *mon, const string& name, +#define dout_prefix _prefix(_dout, mon, mon.name, mon.rank, paxos_name, state, first_committed, last_committed) +static std::ostream& _prefix(std::ostream *_dout, Monitor &mon, const string& name, int rank, const string& paxos_name, int state, version_t first_committed, version_t last_committed) { return *_dout << "mon." << name << "@" << rank - << "(" << mon->get_state_name() << ")" + << "(" << mon.get_state_name() << ")" << ".paxos(" << paxos_name << " " << Paxos::get_statename(state) << " c " << first_committed << ".." << last_committed << ") "; @@ -57,7 +57,7 @@ public: MonitorDBStore *Paxos::get_store() { - return mon->store; + return mon.store; } void Paxos::read_and_prepare_transactions(MonitorDBStore::TransactionRef tx, @@ -155,7 +155,7 @@ void Paxos::collect(version_t oldpn) { // we're recoverying, it seems! state = STATE_RECOVERING; - ceph_assert(mon->is_leader()); + ceph_assert(mon.is_leader()); // reset the number of lasts received uncommitted_v = 0; @@ -194,24 +194,24 @@ void Paxos::collect(version_t oldpn) dout(10) << "collect with pn " << accepted_pn << dendl; // send collect - for (auto p = mon->get_quorum().begin(); - p != mon->get_quorum().end(); + for (auto p = mon.get_quorum().begin(); + p != mon.get_quorum().end(); ++p) { - if (*p == mon->rank) continue; + if (*p == mon.rank) continue; - MMonPaxos *collect = new MMonPaxos(mon->get_epoch(), MMonPaxos::OP_COLLECT, + MMonPaxos *collect = new MMonPaxos(mon.get_epoch(), MMonPaxos::OP_COLLECT, ceph_clock_now()); collect->last_committed = last_committed; collect->first_committed = first_committed; collect->pn = accepted_pn; - mon->send_mon_message(collect, *p); + mon.send_mon_message(collect, *p); } // set timeout event - collect_timeout_event = mon->timer.add_event_after( + collect_timeout_event = mon.timer.add_event_after( g_conf()->mon_accept_timeout_factor * g_conf()->mon_lease, - new C_MonContext{mon, [this](int r) { + new C_MonContext{&mon, [this](int r) { if (r == -ECANCELED) return; collect_timeout(); @@ -228,7 +228,7 @@ void Paxos::handle_collect(MonOpRequestRef op) auto collect = op->get_req<MMonPaxos>(); dout(10) << "handle_collect " << *collect << dendl; - ceph_assert(mon->is_peon()); // mon epoch filter should catch strays + ceph_assert(mon.is_peon()); // mon epoch filter should catch strays // we're recoverying, it seems! state = STATE_RECOVERING; @@ -242,12 +242,12 @@ void Paxos::handle_collect(MonOpRequestRef op) << " (theirs: " << collect->first_committed << "; ours: " << last_committed << ") -- bootstrap!" << dendl; op->mark_paxos_event("need to bootstrap"); - mon->bootstrap(); + mon.bootstrap(); return; } // reply - MMonPaxos *last = new MMonPaxos(mon->get_epoch(), MMonPaxos::OP_LAST, + MMonPaxos *last = new MMonPaxos(mon.get_epoch(), MMonPaxos::OP_LAST, ceph_clock_now()); last->last_committed = last_committed; last->first_committed = first_committed; @@ -474,7 +474,7 @@ void Paxos::handle_last(MonOpRequestRef op) dout(10) << "handle_last " << *last << dendl; - if (!mon->is_leader()) { + if (!mon.is_leader()) { dout(10) << "not leader, dropping" << dendl; return; } @@ -491,7 +491,7 @@ void Paxos::handle_last(MonOpRequestRef op) << " (theirs: " << last->first_committed << "; ours: " << last_committed << ") -- bootstrap!" << dendl; op->mark_paxos_event("need to bootstrap"); - mon->bootstrap(); + mon.bootstrap(); return; } @@ -513,17 +513,17 @@ void Paxos::handle_last(MonOpRequestRef op) << ") is too low for our first_committed (" << first_committed << ") -- bootstrap!" << dendl; op->mark_paxos_event("need to bootstrap"); - mon->bootstrap(); + mon.bootstrap(); return; } if (p->second < last_committed) { // share committed values dout(10) << " sending commit to mon." << p->first << dendl; - MMonPaxos *commit = new MMonPaxos(mon->get_epoch(), + MMonPaxos *commit = new MMonPaxos(mon.get_epoch(), MMonPaxos::OP_COMMIT, ceph_clock_now()); share_state(commit, peer_first_committed[p->first], p->second); - mon->send_mon_message(commit, p->first); + mon.send_mon_message(commit, p->first); } } @@ -533,7 +533,7 @@ void Paxos::handle_last(MonOpRequestRef op) dout(10) << " they had a higher pn than us, picking a new one." << dendl; // cancel timeout event - mon->timer.cancel_event(collect_timeout_event); + mon.timer.cancel_event(collect_timeout_event); collect_timeout_event = 0; collect(last->pn); @@ -564,9 +564,9 @@ void Paxos::handle_last(MonOpRequestRef op) } // is that everyone? - if (num_last == mon->get_quorum().size()) { + if (num_last == mon.get_quorum().size()) { // cancel timeout event - mon->timer.cancel_event(collect_timeout_event); + mon.timer.cancel_event(collect_timeout_event); collect_timeout_event = 0; peer_first_committed.clear(); peer_last_committed.clear(); @@ -604,8 +604,8 @@ void Paxos::collect_timeout() dout(1) << "collect timeout, calling fresh election" << dendl; collect_timeout_event = 0; logger->inc(l_paxos_collect_timeout); - ceph_assert(mon->is_leader()); - mon->bootstrap(); + ceph_assert(mon.is_leader()); + mon.bootstrap(); } @@ -616,19 +616,19 @@ void Paxos::begin(bufferlist& v) << v.length() << " bytes" << dendl; - ceph_assert(mon->is_leader()); + ceph_assert(mon.is_leader()); ceph_assert(is_updating() || is_updating_previous()); // we must already have a majority for this to work. - ceph_assert(mon->get_quorum().size() == 1 || - num_last > (unsigned)mon->monmap->size()/2); + ceph_assert(mon.get_quorum().size() == 1 || + num_last > (unsigned)mon.monmap->size()/2); // and no value, yet. ceph_assert(new_value.length() == 0); // accept it ourselves accepted.clear(); - accepted.insert(mon->rank); + accepted.insert(mon.rank); new_value = v; if (last_committed == 0) { @@ -676,32 +676,32 @@ void Paxos::begin(bufferlist& v) ceph_assert(g_conf()->paxos_kill_at != 3); - if (mon->get_quorum().size() == 1) { + if (mon.get_quorum().size() == 1) { // we're alone, take it easy commit_start(); return; } // ask others to accept it too! - for (auto p = mon->get_quorum().begin(); - p != mon->get_quorum().end(); + for (auto p = mon.get_quorum().begin(); + p != mon.get_quorum().end(); ++p) { - if (*p == mon->rank) continue; + if (*p == mon.rank) continue; dout(10) << " sending begin to mon." << *p << dendl; - MMonPaxos *begin = new MMonPaxos(mon->get_epoch(), MMonPaxos::OP_BEGIN, + MMonPaxos *begin = new MMonPaxos(mon.get_epoch(), MMonPaxos::OP_BEGIN, ceph_clock_now()); begin->values[last_committed+1] = new_value; begin->last_committed = last_committed; begin->pn = accepted_pn; - mon->send_mon_message(begin, *p); + mon.send_mon_message(begin, *p); } // set timeout event - accept_timeout_event = mon->timer.add_event_after( + accept_timeout_event = mon.timer.add_event_after( g_conf()->mon_accept_timeout_factor * g_conf()->mon_lease, - new C_MonContext{mon, [this](int r) { + new C_MonContext{&mon, [this](int r) { if (r == -ECANCELED) return; accept_timeout(); @@ -761,7 +761,7 @@ void Paxos::handle_begin(MonOpRequestRef op) ceph_assert(g_conf()->paxos_kill_at != 5); // reply - MMonPaxos *accept = new MMonPaxos(mon->get_epoch(), MMonPaxos::OP_ACCEPT, + MMonPaxos *accept = new MMonPaxos(mon.get_epoch(), MMonPaxos::OP_ACCEPT, ceph_clock_now()); accept->pn = accepted_pn; accept->last_committed = last_committed; @@ -803,7 +803,7 @@ void Paxos::handle_accept(MonOpRequestRef op) // stale state. // FIXME: we can improve this with an additional lease revocation message // that doesn't block for the persist. - if (accepted == mon->get_quorum()) { + if (accepted == mon.get_quorum()) { // yay, commit! dout(10) << " got majority, committing, done with update" << dendl; op->mark_paxos_event("commit_start"); @@ -815,11 +815,11 @@ void Paxos::accept_timeout() { dout(1) << "accept timeout, calling fresh election" << dendl; accept_timeout_event = 0; - ceph_assert(mon->is_leader()); + ceph_assert(mon.is_leader()); ceph_assert(is_updating() || is_updating_previous() || is_writing() || is_writing_previous()); logger->inc(l_paxos_accept_timeout); - mon->bootstrap(); + mon.bootstrap(); } struct C_Committed : public Context { @@ -827,7 +827,7 @@ struct C_Committed : public Context { explicit C_Committed(Paxos *p) : paxos(p) {} void finish(int r) override { ceph_assert(r >= 0); - std::lock_guard l(paxos->mon->lock); + std::lock_guard l(paxos->mon.lock); if (paxos->is_shutdown()) { paxos->abort_commit(); return; @@ -880,9 +880,9 @@ void Paxos::commit_start() ceph_abort(); ++commits_started; - if (mon->get_quorum().size() > 1) { + if (mon.get_quorum().size() > 1) { // cancel timeout event - mon->timer.cancel_event(accept_timeout_event); + mon.timer.cancel_event(accept_timeout_event); accept_timeout_event = 0; } } @@ -909,19 +909,19 @@ void Paxos::commit_finish() _sanity_check_store(); // tell everyone - for (auto p = mon->get_quorum().begin(); - p != mon->get_quorum().end(); + for (auto p = mon.get_quorum().begin(); + p != mon.get_quorum().end(); ++p) { - if (*p == mon->rank) continue; + if (*p == mon.rank) continue; dout(10) << " sending commit to mon." << *p << dendl; - MMonPaxos *commit = new MMonPaxos(mon->get_epoch(), MMonPaxos::OP_COMMIT, + MMonPaxos *commit = new MMonPaxos(mon.get_epoch(), MMonPaxos::OP_COMMIT, ceph_clock_now()); commit->values[last_committed] = new_value; commit->pn = accepted_pn; commit->last_committed = last_committed; - mon->send_mon_message(commit, *p); + mon.send_mon_message(commit, *p); } ceph_assert(g_conf()->paxos_kill_at != 9); @@ -930,7 +930,7 @@ void Paxos::commit_finish() new_value.clear(); // WRITING -> REFRESH - // among other things, this lets do_refresh() -> mon->bootstrap() -> + // among other things, this lets do_refresh() -> mon.bootstrap() -> // wait_for_paxos_write() know that it doesn't need to flush the store // queue. and it should not, as we are in the async completion thread now! ceph_assert(is_writing() || is_writing_previous()); @@ -940,7 +940,7 @@ void Paxos::commit_finish() if (do_refresh()) { commit_proposal(); - if (mon->get_quorum().size() > 1) { + if (mon.get_quorum().size() > 1) { extend_lease(); } @@ -959,7 +959,7 @@ void Paxos::handle_commit(MonOpRequestRef op) logger->inc(l_paxos_commit); - if (!mon->is_peon()) { + if (!mon.is_peon()) { dout(10) << "not a peon, dropping" << dendl; ceph_abort(); return; @@ -973,36 +973,36 @@ void Paxos::handle_commit(MonOpRequestRef op) void Paxos::extend_lease() { - ceph_assert(mon->is_leader()); + ceph_assert(mon.is_leader()); //assert(is_active()); lease_expire = ceph::real_clock::now(); lease_expire += ceph::make_timespan(g_conf()->mon_lease); acked_lease.clear(); - acked_lease.insert(mon->rank); + acked_lease.insert(mon.rank); dout(7) << "extend_lease now+" << g_conf()->mon_lease << " (" << lease_expire << ")" << dendl; // bcast - for (auto p = mon->get_quorum().begin(); - p != mon->get_quorum().end(); ++p) { + for (auto p = mon.get_quorum().begin(); + p != mon.get_quorum().end(); ++p) { - if (*p == mon->rank) continue; - MMonPaxos *lease = new MMonPaxos(mon->get_epoch(), MMonPaxos::OP_LEASE, + if (*p == mon.rank) continue; + MMonPaxos *lease = new MMonPaxos(mon.get_epoch(), MMonPaxos::OP_LEASE, ceph_clock_now()); lease->last_committed = last_committed; lease->lease_timestamp = utime_t{lease_expire}; lease->first_committed = first_committed; - mon->send_mon_message(lease, *p); + mon.send_mon_message(lease, *p); } // set timeout event. // if old timeout is still in place, leave it. if (!lease_ack_timeout_event) { - lease_ack_timeout_event = mon->timer.add_event_after( + lease_ack_timeout_event = mon.timer.add_event_after( g_conf()->mon_lease_ack_timeout_factor * g_conf()->mon_lease, - new C_MonContext{mon, [this](int r) { + new C_MonContext{&mon, [this](int r) { if (r == -ECANCELED) return; lease_ack_timeout(); @@ -1014,8 +1014,8 @@ void Paxos::extend_lease() at -= ceph::make_timespan(g_conf()->mon_lease); at += ceph::make_timespan(g_conf()->mon_lease_renew_interval_factor * g_conf()->mon_lease); - lease_renew_event = mon->timer.add_event_at( - at, new C_MonContext{mon, [this](int r) { + lease_renew_event = mon.timer.add_event_at( + at, new C_MonContext{&mon, [this](int r) { if (r == -ECANCELED) return; lease_renew_timeout(); @@ -1031,7 +1031,7 @@ void Paxos::warn_on_future_time(utime_t t, entity_name_t from) utime_t warn_diff = now - last_clock_drift_warn; if (warn_diff > pow(g_conf()->mon_clock_drift_warn_backoff, clock_drift_warned)) { - mon->clog->warn() << "message from " << from << " was stamped " << diff + mon.clog->warn() << "message from " << from << " was stamped " << diff << "s in the future, clocks not synchronized"; last_clock_drift_warn = ceph_clock_now(); ++clock_drift_warned; @@ -1047,7 +1047,7 @@ bool Paxos::do_refresh() // make sure we have the latest state loaded up auto start = ceph::coarse_mono_clock::now(); - mon->refresh_from_paxos(&need_bootstrap); + mon.refresh_from_paxos(&need_bootstrap); auto end = ceph::coarse_mono_clock::now(); logger->inc(l_paxos_refresh); @@ -1055,7 +1055,7 @@ bool Paxos::do_refresh() if (need_bootstrap) { dout(10) << " doing requested bootstrap" << dendl; - mon->bootstrap(); + mon.bootstrap(); return false; } @@ -1065,7 +1065,7 @@ bool Paxos::do_refresh() void Paxos::commit_proposal() { dout(10) << __func__ << dendl; - ceph_assert(mon->is_leader()); + ceph_assert(mon.is_leader()); ceph_assert(is_refresh()); finish_contexts(g_ceph_context, committing_finishers); @@ -1074,7 +1074,7 @@ void Paxos::commit_proposal() void Paxos::finish_round() { dout(10) << __func__ << dendl; - ceph_assert(mon->is_leader()); + ceph_assert(mon.is_leader()); // ok, now go active! state = STATE_ACTIVE; @@ -1104,7 +1104,7 @@ void Paxos::handle_lease(MonOpRequestRef op) op->mark_paxos_event("handle_lease"); auto lease = op->get_req<MMonPaxos>(); // sanity - if (!mon->is_peon() || + if (!mon.is_peon() || last_committed != lease->last_committed) { dout(10) << "handle_lease i'm not a peon, or they're not the leader," << " or the last_committed doesn't match, dropping" << dendl; @@ -1132,12 +1132,12 @@ void Paxos::handle_lease(MonOpRequestRef op) << " now " << lease_expire << dendl; // ack - MMonPaxos *ack = new MMonPaxos(mon->get_epoch(), MMonPaxos::OP_LEASE_ACK, + MMonPaxos *ack = new MMonPaxos(mon.get_epoch(), MMonPaxos::OP_LEASE_ACK, ceph_clock_now()); ack->last_committed = last_committed; ack->first_committed = first_committed; ack->lease_timestamp = ceph_clock_now(); - encode(mon->session_map.feature_map, ack->feature_map); + encode(mon.session_map.feature_map, ack->feature_map); lease->get_connection()->send_message(ack); // (re)set timeout event. @@ -1163,21 +1163,21 @@ void Paxos::handle_lease_ack(MonOpRequestRef op) acked_lease.insert(from); if (ack->feature_map.length()) { auto p = ack->feature_map.cbegin(); - FeatureMap& t = mon->quorum_feature_map[from]; + FeatureMap& t = mon.quorum_feature_map[from]; decode(t, p); } - if (acked_lease == mon->get_quorum()) { + if (acked_lease == mon.get_quorum()) { // yay! dout(10) << "handle_lease_ack from " << ack->get_source() << " -- got everyone" << dendl; - mon->timer.cancel_event(lease_ack_timeout_event); + mon.timer.cancel_event(lease_ack_timeout_event); lease_ack_timeout_event = 0; } else { dout(10) << "handle_lease_ack from " << ack->get_source() << " -- still need " - << mon->get_quorum().size() - acked_lease.size() + << mon.get_quorum().size() - acked_lease.size() << " more" << dendl; } } else { @@ -1191,21 +1191,21 @@ void Paxos::handle_lease_ack(MonOpRequestRef op) void Paxos::lease_ack_timeout() { dout(1) << "lease_ack_timeout -- calling new election" << dendl; - ceph_assert(mon->is_leader()); + ceph_assert(mon.is_leader()); ceph_assert(is_active()); logger->inc(l_paxos_lease_ack_timeout); lease_ack_timeout_event = 0; - mon->bootstrap(); + mon.bootstrap(); } void Paxos::reset_lease_timeout() { dout(20) << "reset_lease_timeout - setting timeout event" << dendl; if (lease_timeout_event) - mon->timer.cancel_event(lease_timeout_event); - lease_timeout_event = mon->timer.add_event_after( + mon.timer.cancel_event(lease_timeout_event); + lease_timeout_event = mon.timer.add_event_after( g_conf()->mon_lease_ack_timeout_factor * g_conf()->mon_lease, - new C_MonContext{mon, [this](int r) { + new C_MonContext{&mon, [this](int r) { if (r == -ECANCELED) return; lease_timeout(); @@ -1215,10 +1215,10 @@ void Paxos::reset_lease_timeout() void Paxos::lease_timeout() { dout(1) << "lease_timeout -- calling new election" << dendl; - ceph_assert(mon->is_peon()); + ceph_assert(mon.is_peon()); logger->inc(l_paxos_lease_timeout); lease_timeout_event = 0; - mon->bootstrap(); + mon.bootstrap(); } void Paxos::lease_renew_timeout() @@ -1270,7 +1270,7 @@ version_t Paxos::get_new_proposal_number(version_t gt) last_pn /= 100; last_pn++; last_pn *= 100; - last_pn += (version_t)mon->rank; + last_pn += (version_t)mon.rank; // write auto t(std::make_shared<MonitorDBStore::Transaction>()); @@ -1298,23 +1298,23 @@ version_t Paxos::get_new_proposal_number(version_t gt) void Paxos::cancel_events() { if (collect_timeout_event) { - mon->timer.cancel_event(collect_timeout_event); + mon.timer.cancel_event(collect_timeout_event); collect_timeout_event = 0; } if (accept_timeout_event) { - mon->timer.cancel_event(accept_timeout_event); + mon.timer.cancel_event(accept_timeout_event); accept_timeout_event = 0; } if (lease_renew_event) { - mon->timer.cancel_event(lease_renew_event); + mon.timer.cancel_event(lease_renew_event); lease_renew_event = 0; } if (lease_ack_timeout_event) { - mon->timer.cancel_event(lease_ack_timeout_event); + mon.timer.cancel_event(lease_ack_timeout_event); lease_ack_timeout_event = 0; } if (lease_timeout_event) { - mon->timer.cancel_event(lease_timeout_event); + mon.timer.cancel_event(lease_timeout_event); lease_timeout_event = 0; } } @@ -1331,7 +1331,7 @@ void Paxos::shutdown() // Let store finish commits in progress // XXX: I assume I can't use finish_contexts() because the store // is going to trigger - unique_lock l{mon->lock, std::adopt_lock}; + unique_lock l{mon.lock, std::adopt_lock}; shutdown_cond.wait(l, [this] { return commits_started <= 0; }); // Monitor::shutdown() will unlock it l.release(); @@ -1343,7 +1343,6 @@ void Paxos::shutdown() finish_contexts(g_ceph_context, committing_finishers, -ECANCELED); if (logger) g_ceph_context->get_perfcounters_collection()->remove(logger); - delete logger; } void Paxos::leader_init() @@ -1358,7 +1357,7 @@ void Paxos::leader_init() logger->inc(l_paxos_start_leader); - if (mon->get_quorum().size() == 1) { + if (mon.get_quorum().size() == 1) { state = STATE_ACTIVE; return; } @@ -1399,9 +1398,9 @@ void Paxos::restart() if (is_writing() || is_writing_previous()) { dout(10) << __func__ << " flushing" << dendl; - mon->lock.unlock(); - mon->store->flush(); - mon->lock.lock(); + mon.lock.unlock(); + mon.store->flush(); + mon.lock.lock(); dout(10) << __func__ << " flushed" << dendl; } state = STATE_RECOVERING; @@ -1435,14 +1434,14 @@ void Paxos::dispatch(MonOpRequestRef op) auto *req = op->get_req<MMonPaxos>(); // election in progress? - if (!mon->is_leader() && !mon->is_peon()) { + if (!mon.is_leader() && !mon.is_peon()) { dout(5) << "election in progress, dropping " << *req << dendl; return; } // check sanity - ceph_assert(mon->is_leader() || - (mon->is_peon() && req->get_source().num() == mon->get_leader())); + ceph_assert(mon.is_leader() || + (mon.is_peon() && req->get_source().num() == mon.get_leader())); // NOTE: these ops are defined in messages/MMonPaxos.h switch (req->op) { @@ -1486,7 +1485,7 @@ bool Paxos::is_readable(version_t v) ret = false; else ret = - (mon->is_peon() || mon->is_leader()) && + (mon.is_peon() || mon.is_leader()) && (is_active() || is_updating() || is_writing()) && last_committed > 0 && is_lease_valid(); // must have a value alone, or have lease dout(5) << __func__ << " = " << (int)ret @@ -1514,7 +1513,7 @@ version_t Paxos::read_current(bufferlist &bl) bool Paxos::is_lease_valid() { - return ((mon->get_quorum().size() == 1) + return ((mon.get_quorum().size() == 1) || (ceph::real_clock::now() < lease_expire)); } @@ -1523,7 +1522,7 @@ bool Paxos::is_lease_valid() bool Paxos::is_writeable() { return - mon->is_leader() && + mon.is_leader() && is_active() && is_lease_valid(); } @@ -1562,7 +1561,7 @@ void Paxos::queue_pending_finisher(Context *onfinished) MonitorDBStore::TransactionRef Paxos::get_pending_transaction() { - ceph_assert(mon->is_leader()); + ceph_assert(mon.is_leader()); if (!pending_proposal) { pending_proposal.reset(new MonitorDBStore::Transaction); ceph_assert(pending_finishers.empty()); diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h index 56ef403631f..c197f26f7f0 100644 --- a/src/mon/Paxos.h +++ b/src/mon/Paxos.h @@ -179,7 +179,7 @@ class Paxos { /** * The Monitor to which this Paxos class is associated with. */ - Monitor *mon; + Monitor &mon; /// perf counter for internal instrumentations PerfCounters *logger; @@ -1045,7 +1045,7 @@ public: * @param name A name for the paxos service. It serves as the naming space * of the underlying persistent storage for this service. */ - Paxos(Monitor *m, const std::string &name) + Paxos(Monitor &m, const std::string &name) : mon(m), logger(NULL), paxos_name(name), @@ -1065,6 +1065,10 @@ public: clock_drift_warned(0), trimming(false) { } + ~Paxos() { + delete logger; + } + const std::string get_name() const { return paxos_name; } diff --git a/src/mon/PaxosService.cc b/src/mon/PaxosService.cc index 82fd2b93afb..8dff901c067 100644 --- a/src/mon/PaxosService.cc +++ b/src/mon/PaxosService.cc @@ -27,10 +27,10 @@ using ceph::bufferlist; #define dout_subsys ceph_subsys_paxos #undef dout_prefix #define dout_prefix _prefix(_dout, mon, paxos, service_name, get_first_committed(), get_last_committed()) -static ostream& _prefix(std::ostream *_dout, Monitor *mon, Paxos *paxos, string service_name, +static ostream& _prefix(std::ostream *_dout, Monitor &mon, Paxos &paxos, string service_name, version_t fc, version_t lc) { - return *_dout << "mon." << mon->name << "@" << mon->rank - << "(" << mon->get_state_name() + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() << ").paxosservice(" << service_name << " " << fc << ".." << lc << ") "; } @@ -44,15 +44,15 @@ bool PaxosService::dispatch(MonOpRequestRef op) << " from " << m->get_orig_source_inst() << " con " << m->get_connection() << dendl; - if (mon->is_shutdown()) { + if (mon.is_shutdown()) { return true; } // make sure this message isn't forwarded from a previous election epoch if (m->rx_election_epoch && - m->rx_election_epoch < mon->get_epoch()) { + m->rx_election_epoch < mon.get_epoch()) { dout(10) << " discarding forwarded message from previous election epoch " - << m->rx_election_epoch << " < " << mon->get_epoch() << dendl; + << m->rx_election_epoch << " < " << mon.get_epoch() << dendl; return true; } @@ -61,7 +61,7 @@ bool PaxosService::dispatch(MonOpRequestRef op) // those. also ignore loopback (e.g., log) messages. if (m->get_connection() && !m->get_connection()->is_connected() && - m->get_connection() != mon->con_self && + m->get_connection() != mon.con_self && m->get_connection()->get_messenger() != NULL) { dout(10) << " discarding message from disconnected client " << m->get_source_inst() << " " << *m << dendl; @@ -80,8 +80,8 @@ bool PaxosService::dispatch(MonOpRequestRef op) return true; // easy! // leader? - if (!mon->is_leader()) { - mon->forward_request_leader(op); + if (!mon.is_leader()) { + mon.forward_request_leader(op); return true; } @@ -122,7 +122,7 @@ bool PaxosService::dispatch(MonOpRequestRef op) * Callback class used to propose the pending value once the proposal_timer * fires up. */ - auto do_propose = new C_MonContext{mon, [this](int r) { + auto do_propose = new C_MonContext{&mon, [this](int r) { proposal_timer = 0; if (r >= 0) { propose_pending(); @@ -134,7 +134,7 @@ bool PaxosService::dispatch(MonOpRequestRef op) }}; dout(10) << " setting proposal_timer " << do_propose << " with delay of " << delay << dendl; - proposal_timer = mon->timer.add_event_after(delay, do_propose); + proposal_timer = mon.timer.add_event_after(delay, do_propose); } else { dout(10) << " proposal_timer already set" << dendl; } @@ -144,8 +144,8 @@ bool PaxosService::dispatch(MonOpRequestRef op) void PaxosService::refresh(bool *need_bootstrap) { // update cached versions - cached_first_committed = mon->store->get(get_service_name(), first_committed_name); - cached_last_committed = mon->store->get(get_service_name(), last_committed_name); + cached_first_committed = mon.store->get(get_service_name(), first_committed_name); + cached_last_committed = mon.store->get(get_service_name(), last_committed_name); version_t new_format = get_value("format_version"); if (new_format != format_version) { @@ -165,7 +165,7 @@ void PaxosService::post_refresh() post_paxos_update(); - if (mon->is_peon() && !waiting_for_finished_proposal.empty()) { + if (mon.is_peon() && !waiting_for_finished_proposal.empty()) { finish_contexts(g_ceph_context, waiting_for_finished_proposal, -EAGAIN); } } @@ -177,10 +177,10 @@ bool PaxosService::should_propose(double& delay) delay = 0.0; } else { utime_t now = ceph_clock_now(); - if ((now - paxos->last_commit_time) > g_conf()->paxos_propose_interval) + if ((now - paxos.last_commit_time) > g_conf()->paxos_propose_interval) delay = (double)g_conf()->paxos_min_wait; else - delay = (double)(g_conf()->paxos_propose_interval + paxos->last_commit_time + delay = (double)(g_conf()->paxos_propose_interval + paxos.last_commit_time - now); } return true; @@ -192,12 +192,12 @@ void PaxosService::propose_pending() dout(10) << __func__ << dendl; ceph_assert(have_pending); ceph_assert(!proposing); - ceph_assert(mon->is_leader()); + ceph_assert(mon.is_leader()); ceph_assert(is_active()); if (proposal_timer) { dout(10) << " canceling proposal_timer " << proposal_timer << dendl; - mon->timer.cancel_event(proposal_timer); + mon.timer.cancel_event(proposal_timer); proposal_timer = NULL; } @@ -210,7 +210,7 @@ void PaxosService::propose_pending() * bufferlist, so we can then propose that as a value through * Paxos. */ - MonitorDBStore::TransactionRef t = paxos->get_pending_transaction(); + MonitorDBStore::TransactionRef t = paxos.get_pending_transaction(); if (should_stash_full()) encode_full(t); @@ -248,8 +248,8 @@ void PaxosService::propose_pending() ceph_abort_msg("bad return value for C_Committed"); } }; - paxos->queue_pending_finisher(new C_Committed(this)); - paxos->trigger_propose(); + paxos.queue_pending_finisher(new C_Committed(this)); + paxos.trigger_propose(); } bool PaxosService::should_stash_full() @@ -270,7 +270,7 @@ void PaxosService::restart() dout(10) << __func__ << dendl; if (proposal_timer) { dout(10) << " canceling proposal_timer " << proposal_timer << dendl; - mon->timer.cancel_event(proposal_timer); + mon.timer.cancel_event(proposal_timer); proposal_timer = 0; } @@ -326,7 +326,7 @@ void PaxosService::_active() dout(10) << __func__ << dendl; // create pending state? - if (mon->is_leader()) { + if (mon.is_leader()) { dout(7) << __func__ << " creating new pending" << dendl; if (!have_pending) { create_pending(); @@ -348,7 +348,7 @@ void PaxosService::_active() // on this list; it is on Paxos's. finish_contexts(g_ceph_context, waiting_for_finished_proposal, 0); - if (mon->is_leader()) + if (mon.is_leader()) upgrade_format(); // NOTE: it's possible that this will get called twice if we commit @@ -363,7 +363,7 @@ void PaxosService::shutdown() if (proposal_timer) { dout(10) << " canceling proposal_timer " << proposal_timer << dendl; - mon->timer.cancel_event(proposal_timer); + mon.timer.cancel_event(proposal_timer); proposal_timer = 0; } @@ -378,8 +378,11 @@ void PaxosService::maybe_trim() return; version_t trim_to = get_trim_to(); - if (trim_to < get_first_committed()) + if (trim_to < get_first_committed()) { + dout(10) << __func__ << " trim_to " << trim_to << " < first_committed" + << get_first_committed() << dendl; return; + } version_t to_remove = trim_to - get_first_committed(); if (g_conf()->paxos_service_trim_min > 0 && @@ -399,7 +402,7 @@ void PaxosService::maybe_trim() } dout(10) << __func__ << " trimming to " << trim_to << ", " << to_remove << " states" << dendl; - MonitorDBStore::TransactionRef t = paxos->get_pending_transaction(); + MonitorDBStore::TransactionRef t = paxos.get_pending_transaction(); trim(t, get_first_committed(), trim_to); put_first_committed(t, trim_to); cached_first_committed = trim_to; @@ -407,7 +410,7 @@ void PaxosService::maybe_trim() // let the service add any extra stuff encode_trim_extra(t, trim_to); - paxos->trigger_propose(); + paxos.trigger_propose(); } void PaxosService::trim(MonitorDBStore::TransactionRef t, @@ -420,8 +423,8 @@ void PaxosService::trim(MonitorDBStore::TransactionRef t, dout(20) << __func__ << " " << v << dendl; t->erase(get_service_name(), v); - string full_key = mon->store->combine_strings("full", v); - if (mon->store->exists(get_service_name(), full_key)) { + string full_key = mon.store->combine_strings("full", v); + if (mon.store->exists(get_service_name(), full_key)) { dout(20) << __func__ << " " << full_key << dendl; t->erase(get_service_name(), full_key); } @@ -430,15 +433,15 @@ void PaxosService::trim(MonitorDBStore::TransactionRef t, dout(20) << " compacting prefix " << get_service_name() << dendl; t->compact_range(get_service_name(), stringify(from - 1), stringify(to)); t->compact_range(get_service_name(), - mon->store->combine_strings(full_prefix_name, from - 1), - mon->store->combine_strings(full_prefix_name, to)); + mon.store->combine_strings(full_prefix_name, from - 1), + mon.store->combine_strings(full_prefix_name, to)); } } void PaxosService::load_health() { bufferlist bl; - mon->store->get("health", service_name, bl); + mon.store->get("health", service_name, bl); if (bl.length()) { auto p = bl.cbegin(); using ceph::decode; diff --git a/src/mon/PaxosService.h b/src/mon/PaxosService.h index 0134f6a7054..93c5e7c81f9 100644 --- a/src/mon/PaxosService.h +++ b/src/mon/PaxosService.h @@ -34,11 +34,11 @@ class PaxosService { /** * The Monitor to which this class is associated with */ - Monitor *mon; + Monitor &mon; /** * The Paxos instance to which this class is associated with */ - Paxos *paxos; + Paxos &paxos; /** * Our name. This will be associated with the class implementing us, and will * be used mainly for store-related operations. @@ -122,7 +122,7 @@ public: }; class C_ReplyOp : public C_MonOp { - Monitor *mon; + Monitor &mon; MonOpRequestRef op; MessageRef reply; public: @@ -130,7 +130,7 @@ public: C_MonOp(o), mon(s->mon), op(o), reply(r) { } void _finish(int r) override { if (r >= 0) { - mon->send_reply(op, reply.detach()); + mon.send_reply(op, reply.detach()); } } }; @@ -144,7 +144,7 @@ public: * @param p A Paxos instance * @param name Our service's name. */ - PaxosService(Monitor *mn, Paxos *p, std::string name) + PaxosService(Monitor &mn, Paxos &p, std::string name) : mon(mn), paxos(p), service_name(name), proposing(false), service_version(0), proposal_timer(0), have_pending(false), @@ -442,7 +442,7 @@ public: ceph::buffer::list bl; encode(next, bl); t->put("health", service_name, bl); - mon->log_health(next, health_checks, t); + mon.log_health(next, health_checks, t); } void load_health(); @@ -507,7 +507,7 @@ public: bool is_active() const { return !is_proposing() && - (paxos->is_active() || paxos->is_updating() || paxos->is_writing()); + (paxos.is_active() || paxos.is_updating() || paxos.is_writing()); } /** @@ -524,7 +524,7 @@ public: */ bool is_readable(version_t ver = 0) const { if (ver > get_last_committed() || - !paxos->is_readable(0) || + !paxos.is_readable(0) || get_last_committed() == 0) return false; return true; @@ -573,7 +573,7 @@ public: op->mark_event(service_name + ":wait_for_active"); if (!is_proposing()) { - paxos->wait_for_active(op, c); + paxos.wait_for_active(op, c); return; } wait_for_finished_proposal(op, c); @@ -607,7 +607,7 @@ public: if (op) op->mark_event(service_name + ":wait_for_readable/paxos"); - paxos->wait_for_readable(op, c); + paxos.wait_for_readable(op, c); } } @@ -630,7 +630,7 @@ public: else if (!is_writeable()) wait_for_active(op, c); else - paxos->wait_for_writeable(op, c); + paxos.wait_for_writeable(op, c); } void wait_for_writeable_ctx(Context *c) { MonOpRequestRef o; @@ -712,7 +712,7 @@ public: * @note This function is a wrapper for Paxos::cancel_events */ void cancel_events() { - paxos->cancel_events(); + paxos.cancel_events(); } /** @@ -767,7 +767,7 @@ public: */ void put_version_full(MonitorDBStore::TransactionRef t, version_t ver, ceph::buffer::list& bl) { - std::string key = mon->store->combine_strings(full_prefix_name, ver); + std::string key = mon.store->combine_strings(full_prefix_name, ver); t->put(get_service_name(), key, bl); } /** @@ -778,7 +778,7 @@ public: * @param ver A version number */ void put_version_latest_full(MonitorDBStore::TransactionRef t, version_t ver) { - std::string key = mon->store->combine_strings(full_prefix_name, full_latest_name); + std::string key = mon.store->combine_strings(full_prefix_name, full_latest_name); t->put(get_service_name(), key, ver); } /** @@ -849,7 +849,7 @@ public: * @return 0 on success; <0 otherwise */ virtual int get_version(version_t ver, ceph::buffer::list& bl) { - return mon->store->get(get_service_name(), ver, bl); + return mon.store->get(get_service_name(), ver, bl); } /** * Get the contents of a given full version of this service. @@ -859,8 +859,8 @@ public: * @returns 0 on success; <0 otherwise */ virtual int get_version_full(version_t ver, ceph::buffer::list& bl) { - std::string key = mon->store->combine_strings(full_prefix_name, ver); - return mon->store->get(get_service_name(), key, bl); + std::string key = mon.store->combine_strings(full_prefix_name, ver); + return mon.store->get(get_service_name(), key, bl); } /** * Get the latest full version number @@ -868,8 +868,8 @@ public: * @returns A version number */ version_t get_version_latest_full() { - std::string key = mon->store->combine_strings(full_prefix_name, full_latest_name); - return mon->store->get(get_service_name(), key); + std::string key = mon.store->combine_strings(full_prefix_name, full_latest_name); + return mon.store->get(get_service_name(), key); } /** @@ -879,7 +879,7 @@ public: * @param[out] bl The ceph::buffer::list to be populated with the value */ int get_value(const std::string& key, ceph::buffer::list& bl) { - return mon->store->get(get_service_name(), key, bl); + return mon.store->get(get_service_name(), key, bl); } /** * Get an integer value from a given key. @@ -887,7 +887,7 @@ public: * @param[in] key The key */ version_t get_value(const std::string& key) { - return mon->store->get(get_service_name(), key); + return mon.store->get(get_service_name(), key); } /** diff --git a/src/mon/QuorumService.h b/src/mon/QuorumService.h deleted file mode 100644 index c4d30bea30f..00000000000 --- a/src/mon/QuorumService.h +++ /dev/null @@ -1,125 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2013 Inktank, Inc - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ -#ifndef CEPH_MON_QUORUM_SERVICE_H -#define CEPH_MON_QUORUM_SERVICE_H - -#include <errno.h> - -#include "include/types.h" -#include "include/Context.h" -#include "common/RefCountedObj.h" -#include "common/config.h" - -#include "mon/Monitor.h" - -class QuorumService -{ - Context *tick_event = nullptr; - double tick_period; - -public: - enum { - SERVICE_HEALTH = 0x01, - SERVICE_TIMECHECK = 0x02, - SERVICE_CONFIG_KEY = 0x03, - }; - -protected: - Monitor *mon; - epoch_t epoch; - - QuorumService(Monitor *m) : - tick_period(g_conf()->mon_tick_interval), - mon(m), - epoch(0) - { - } - - void cancel_tick() { - if (tick_event) - mon->timer.cancel_event(tick_event); - tick_event = NULL; - } - - void start_tick() { - generic_dout(10) << __func__ << dendl; - - cancel_tick(); - if (tick_period <= 0) - return; - - tick_event = new C_MonContext{mon, [this](int r) { - if (r < 0) - return; - tick(); - }}; - mon->timer.add_event_after(tick_period, tick_event); - } - - void set_update_period(double t) { - tick_period = t; - } - - bool in_quorum() { - return (mon->is_leader() || mon->is_peon()); - } - - virtual bool service_dispatch(MonOpRequestRef op) = 0; - virtual void service_tick() = 0; - virtual void service_shutdown() = 0; - - virtual void start_epoch() = 0; - virtual void finish_epoch() = 0; - virtual void cleanup() = 0; - -public: - virtual ~QuorumService() { } - - void start(epoch_t new_epoch) { - epoch = new_epoch; - start_epoch(); - } - - void finish() { - generic_dout(20) << "QuorumService::finish" << dendl; - finish_epoch(); - } - - epoch_t get_epoch() const { - return epoch; - } - - bool dispatch(MonOpRequestRef op) { - return service_dispatch(op); - } - - void tick() { - service_tick(); - start_tick(); - } - - void shutdown() { - generic_dout(0) << "quorum service shutdown" << dendl; - cancel_tick(); - service_shutdown(); - } - - virtual void init() { } - - virtual int get_type() = 0; - virtual std::string get_name() const = 0; - -}; - -#endif /* CEPH_MON_QUORUM_SERVICE_H */ diff --git a/src/msg/async/AsyncConnection.cc b/src/msg/async/AsyncConnection.cc index 8b2a4a2b7b3..5769c580e07 100644 --- a/src/msg/async/AsyncConnection.cc +++ b/src/msg/async/AsyncConnection.cc @@ -384,8 +384,10 @@ void AsyncConnection::process() { // clear timer (if any) since we are connecting/re-connecting if (last_tick_id) { center->delete_time_event(last_tick_id); - last_tick_id = 0; } + last_connect_started = ceph::coarse_mono_clock::now(); + last_tick_id = center->create_time_event( + connect_timeout_us, tick_handler); if (cs) { center->delete_file_event(cs.fd(), EVENT_READABLE | EVENT_WRITABLE); @@ -432,11 +434,6 @@ void AsyncConnection::process() { ldout(async_msgr->cct, 10) << __func__ << " connect successfully, ready to send banner" << dendl; state = STATE_CONNECTION_ESTABLISHED; - ceph_assert(last_tick_id == 0); - // exclude TCP nonblock connect time - last_connect_started = ceph::coarse_mono_clock::now(); - last_tick_id = center->create_time_event( - connect_timeout_us, tick_handler); break; } diff --git a/src/msg/async/PosixStack.h b/src/msg/async/PosixStack.h index 4aed9dd6444..e1d85f1f4b7 100644 --- a/src/msg/async/PosixStack.h +++ b/src/msg/async/PosixStack.h @@ -40,6 +40,10 @@ class PosixWorker : public Worker { class PosixNetworkStack : public NetworkStack { std::vector<std::thread> threads; + virtual Worker* create_worker(CephContext *c, unsigned worker_id) override { + return new PosixWorker(c, worker_id); + } + public: explicit PosixNetworkStack(CephContext *c, const std::string &t); diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc index fff0e762062..855006447f7 100644 --- a/src/msg/async/ProtocolV2.cc +++ b/src/msg/async/ProtocolV2.cc @@ -749,7 +749,7 @@ CtPtr ProtocolV2::read(CONTINUATION_RXBPTR_TYPE<ProtocolV2> &next, if (unlikely(pre_auth.enabled) && r >= 0) { pre_auth.rxbuf.append(*next.node); ceph_assert(!cct->_conf->ms_die_on_bug || - pre_auth.rxbuf.length() < 10000000); + pre_auth.rxbuf.length() < 20000000); } next.r = r; run_continuation(next); @@ -759,7 +759,7 @@ CtPtr ProtocolV2::read(CONTINUATION_RXBPTR_TYPE<ProtocolV2> &next, if (unlikely(pre_auth.enabled) && r == 0) { pre_auth.rxbuf.append(*next.node); ceph_assert(!cct->_conf->ms_die_on_bug || - pre_auth.rxbuf.length() < 10000000); + pre_auth.rxbuf.length() < 20000000); } next.r = r; return &next; @@ -791,7 +791,7 @@ CtPtr ProtocolV2::write(const std::string &desc, if (unlikely(pre_auth.enabled)) { pre_auth.txbuf.append(buffer); ceph_assert(!cct->_conf->ms_die_on_bug || - pre_auth.txbuf.length() < 10000000); + pre_auth.txbuf.length() < 20000000); } ssize_t r = diff --git a/src/msg/async/Stack.cc b/src/msg/async/Stack.cc index 6885fd4f9db..bf1cd8203b6 100644 --- a/src/msg/async/Stack.cc +++ b/src/msg/async/Stack.cc @@ -66,47 +66,42 @@ std::function<void ()> NetworkStack::add_thread(unsigned worker_id) std::shared_ptr<NetworkStack> NetworkStack::create(CephContext *c, const std::string &t) { + std::shared_ptr<NetworkStack> stack = nullptr; + if (t == "posix") - return std::make_shared<PosixNetworkStack>(c, t); + stack.reset(new PosixNetworkStack(c, t)); #ifdef HAVE_RDMA else if (t == "rdma") - return std::make_shared<RDMAStack>(c, t); + stack.reset(new RDMAStack(c, t)); #endif #ifdef HAVE_DPDK else if (t == "dpdk") - return std::make_shared<DPDKStack>(c, t); + stack.reset(new DPDKStack(c, t)); #endif - lderr(c) << __func__ << " ms_async_transport_type " << t << + if (stack == nullptr) { + lderr(c) << __func__ << " ms_async_transport_type " << t << " is not supported! " << dendl; - ceph_abort(); - return nullptr; -} - -Worker* NetworkStack::create_worker(CephContext *c, const std::string &type, unsigned worker_id) -{ - if (type == "posix") - return new PosixWorker(c, worker_id); -#ifdef HAVE_RDMA - else if (type == "rdma") - return new RDMAWorker(c, worker_id); -#endif -#ifdef HAVE_DPDK - else if (type == "dpdk") - return new DPDKWorker(c, worker_id); -#endif + ceph_abort(); + return nullptr; + } + + const int InitEventNumber = 5000; + for (unsigned worker_id = 0; worker_id < stack->num_workers; ++worker_id) { + Worker *w = stack->create_worker(c, worker_id); + int ret = w->center.init(InitEventNumber, worker_id, t); + if (ret) + throw std::system_error(-ret, std::generic_category()); + stack->workers.push_back(w); + } - lderr(c) << __func__ << " ms_async_transport_type " << type << - " is not supported! " << dendl; - ceph_abort(); - return nullptr; + return stack; } NetworkStack::NetworkStack(CephContext *c, const std:: string &t): type(t), started(false), cct(c) { ceph_assert(cct->_conf->ms_async_op_threads > 0); - const int InitEventNumber = 5000; num_workers = cct->_conf->ms_async_op_threads; if (num_workers >= EventCenter::MAX_EVENTCENTER) { ldout(cct, 0) << __func__ << " max thread limit is " @@ -115,14 +110,6 @@ NetworkStack::NetworkStack(CephContext *c, const std:: string &t): type(t), star << dendl; num_workers = EventCenter::MAX_EVENTCENTER; } - - for (unsigned worker_id = 0; worker_id < num_workers; ++worker_id) { - Worker *w = create_worker(cct, type, worker_id); - int ret = w->center.init(InitEventNumber, worker_id, type); - if (ret) - throw std::system_error(-ret, std::generic_category()); - workers.push_back(w); - } } void NetworkStack::start() diff --git a/src/msg/async/Stack.h b/src/msg/async/Stack.h index 7b8b62f36fb..1bf4baa519c 100644 --- a/src/msg/async/Stack.h +++ b/src/msg/async/Stack.h @@ -300,6 +300,8 @@ class NetworkStack { std::function<void ()> add_thread(unsigned i); + virtual Worker* create_worker(CephContext *c, unsigned i) = 0; + protected: CephContext *cct; std::vector<Worker*> workers; @@ -316,8 +318,6 @@ class NetworkStack { static std::shared_ptr<NetworkStack> create( CephContext *c, const std::string &type); - static Worker* create_worker( - CephContext *c, const std::string &t, unsigned i); // backend need to override this method if backend doesn't support shared // listen table. // For example, posix backend has in kernel global listen table. If one diff --git a/src/msg/async/dpdk/DPDKStack.h b/src/msg/async/dpdk/DPDKStack.h index 37626bee492..43ae8003c39 100644 --- a/src/msg/async/dpdk/DPDKStack.h +++ b/src/msg/async/dpdk/DPDKStack.h @@ -248,6 +248,11 @@ class DPDKWorker : public Worker { class DPDKStack : public NetworkStack { vector<std::function<void()> > funcs; + + virtual Worker* create_worker(CephContext *c, unsigned worker_id) override { + return new DPDKWorker(c, worker_id); + } + public: explicit DPDKStack(CephContext *cct, const string &t): NetworkStack(cct, t) { funcs.resize(cct->_conf->ms_async_max_op_threads); diff --git a/src/msg/async/rdma/RDMAStack.h b/src/msg/async/rdma/RDMAStack.h index ac33fcae8e7..d2eafb74c14 100644 --- a/src/msg/async/rdma/RDMAStack.h +++ b/src/msg/async/rdma/RDMAStack.h @@ -326,6 +326,10 @@ class RDMAStack : public NetworkStack { std::atomic<bool> fork_finished = {false}; + virtual Worker* create_worker(CephContext *c, unsigned worker_id) override { + return new RDMAWorker(c, worker_id); + } + public: explicit RDMAStack(CephContext *cct, const std::string &t); virtual ~RDMAStack(); diff --git a/src/mypy.ini b/src/mypy.ini index 3d6cb96c0d0..cf8a8b5f4e8 100755 --- a/src/mypy.ini +++ b/src/mypy.ini @@ -22,6 +22,21 @@ ignore_missing_imports = True disallow_untyped_defs = True +[mypy-cephadm.upgrade.*] +disallow_untyped_defs = True + +[mypy-cephadm.serve.*] +disallow_untyped_defs = True + +[mypy-cephadm.inventory] +disallow_untyped_defs = True + +[mypy-cephadm.schedule] +disallow_untyped_defs = True + +[mypy-cephadm.module] +disallow_untyped_defs = True + # Make cephadm and rook happy [mypy-OpenSSL] ignore_missing_imports = True diff --git a/src/nasm-wrapper b/src/nasm-wrapper index 9e480ff6941..9e950ff5861 100755 --- a/src/nasm-wrapper +++ b/src/nasm-wrapper @@ -1,4 +1,6 @@ -#!/bin/bash -e +#!/usr/bin/env bash + +set -e refine_nasm_options="" while [ -n "$*" ]; do diff --git a/src/neorados/RADOS.cc b/src/neorados/RADOS.cc index 4c85d44d834..a0baaab287d 100644 --- a/src/neorados/RADOS.cc +++ b/src/neorados/RADOS.cc @@ -899,8 +899,9 @@ void RADOS::lookup_pool(std::string_view name, objecter = impl->objecter] (bs::error_code ec) mutable { int64_t ret = - objecter->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name), - name); + objecter->with_osdmap([&](const OSDMap &osdmap) { + return osdmap.lookup_pg_pool_name(name); + }); if (ret < 0) ca::dispatch(std::move(c), osdc_errc::pool_dne, std::int64_t(0)); diff --git a/src/os/bluestore/Allocator.cc b/src/os/bluestore/Allocator.cc index eaabccfe565..91a001b5c97 100644 --- a/src/os/bluestore/Allocator.cc +++ b/src/os/bluestore/Allocator.cc @@ -69,7 +69,13 @@ public: bufferlist& out) override { int r = 0; if (command == "bluestore allocator dump " + name) { - f->open_array_section("free_regions"); + f->open_object_section("allocator_dump"); + f->dump_unsigned("capacity", alloc->get_capacity()); + f->dump_unsigned("alloc_unit", alloc->get_block_size()); + f->dump_string("alloc_type", alloc->get_type()); + f->dump_string("alloc_name", name); + + f->open_array_section("extents"); auto iterated_allocation = [&](size_t off, size_t len) { ceph_assert(len > 0); f->open_object_section("free"); @@ -83,6 +89,7 @@ public: }; alloc->dump(iterated_allocation); f->close_section(); + f->close_section(); } else if (command == "bluestore allocator score " + name) { f->open_object_section("fragmentation_score"); f->dump_float("fragmentation_rating", alloc->get_fragmentation_score()); @@ -99,7 +106,10 @@ public: } }; -Allocator::Allocator(const std::string& name) +Allocator::Allocator(const std::string& name, + int64_t _capacity, + int64_t _block_size) + : capacity(_capacity), block_size(_block_size) { asok_hook = new SocketHook(this, name); } @@ -119,7 +129,7 @@ Allocator *Allocator::create(CephContext* cct, string type, { Allocator* alloc = nullptr; if (type == "stupid") { - alloc = new StupidAllocator(cct, name, block_size); + alloc = new StupidAllocator(cct, name, size, block_size); } else if (type == "bitmap") { alloc = new BitmapAllocator(cct, size, block_size, name); } else if (type == "avl") { diff --git a/src/os/bluestore/Allocator.h b/src/os/bluestore/Allocator.h index 2104c2cc113..177438025d7 100644 --- a/src/os/bluestore/Allocator.h +++ b/src/os/bluestore/Allocator.h @@ -20,25 +20,32 @@ class Allocator { public: - explicit Allocator(const std::string& name); + explicit Allocator(const std::string& name, + int64_t _capacity, + int64_t _block_size); virtual ~Allocator(); /* + * returns allocator type name as per names in config + */ + virtual const char* get_type() const = 0; + + /* * Allocate required number of blocks in n number of extents. * Min and Max number of extents are limited by: * a. alloc unit * b. max_alloc_size. - * as no extent can be lesser than alloc_unit and greater than max_alloc size. + * as no extent can be lesser than block_size and greater than max_alloc size. * Apart from that extents can vary between these lower and higher limits according * to free block search algorithm and availability of contiguous space. */ - virtual int64_t allocate(uint64_t want_size, uint64_t alloc_unit, + virtual int64_t allocate(uint64_t want_size, uint64_t block_size, uint64_t max_alloc_size, int64_t hint, PExtentVector *extents) = 0; - int64_t allocate(uint64_t want_size, uint64_t alloc_unit, + int64_t allocate(uint64_t want_size, uint64_t block_size, int64_t hint, PExtentVector *extents) { - return allocate(want_size, alloc_unit, want_size, hint, extents); + return allocate(want_size, block_size, want_size, hint, extents); } /* Bulk release. Implementations may override this method to handle the whole @@ -49,7 +56,11 @@ public: virtual void dump() = 0; virtual void dump(std::function<void(uint64_t offset, uint64_t length)> notify) = 0; - virtual void set_zone_states(std::vector<zone_state_t> &&_zone_states) {} + virtual void zoned_set_zone_states(std::vector<zone_state_t> &&_zone_states) {} + virtual bool zoned_get_zones_to_clean(std::deque<uint64_t> *zones_to_clean) { + return false; + } + virtual void init_add_free(uint64_t offset, uint64_t length) = 0; virtual void init_rm_free(uint64_t offset, uint64_t length) = 0; @@ -64,11 +75,23 @@ public: static Allocator *create(CephContext* cct, std::string type, int64_t size, int64_t block_size, const std::string& name = ""); + const string& get_name() const; + int64_t get_capacity() const + { + return capacity; + } + int64_t get_block_size() const + { + return block_size; + } private: class SocketHook; SocketHook* asok_hook = nullptr; + + int64_t capacity = 0; + int64_t block_size = 0; }; #endif diff --git a/src/os/bluestore/AvlAllocator.cc b/src/os/bluestore/AvlAllocator.cc index e9d05107986..8c43b37dc26 100644 --- a/src/os/bluestore/AvlAllocator.cc +++ b/src/os/bluestore/AvlAllocator.cc @@ -36,8 +36,8 @@ uint64_t AvlAllocator::_block_picker(const Tree& t, uint64_t align) { const auto compare = t.key_comp(); - for (auto rs = t.lower_bound(range_t{*cursor, size}, compare); - rs != t.end(); ++rs) { + auto rs_start = t.lower_bound(range_t{*cursor, size}, compare); + for (auto rs = rs_start; rs != t.end(); ++rs) { uint64_t offset = p2roundup(rs->start, align); if (offset + size <= rs->end) { *cursor = offset + size; @@ -48,7 +48,7 @@ uint64_t AvlAllocator::_block_picker(const Tree& t, * If we know we've searched the whole tree (*cursor == 0), give up. * Otherwise, reset the cursor to the beginning and try again. */ - if (*cursor == 0) { + if (*cursor == 0 || rs_start == t.begin()) { return -1ULL; } *cursor = 0; @@ -240,7 +240,15 @@ int AvlAllocator::_allocate( max_size < range_size_alloc_threshold || free_pct < range_size_alloc_free_pct) { *cursor = 0; - start = _block_picker(range_size_tree, cursor, size, unit); + do { + start = _block_picker(range_size_tree, cursor, size, unit); + if (start != -1ULL || !force_range_size_alloc) { + break; + } + // try to collect smaller extents as we could fail to retrieve + // that large block due to misaligned extents + size = p2align(size >> 1, unit); + } while (size >= unit); } else { start = _block_picker(range_tree, cursor, size, unit); } @@ -289,7 +297,7 @@ AvlAllocator::AvlAllocator(CephContext* cct, int64_t block_size, uint64_t max_mem, const std::string& name) : - Allocator(name), + Allocator(name, device_size, block_size), num_total(device_size), block_size(block_size), range_size_alloc_threshold( @@ -304,7 +312,7 @@ AvlAllocator::AvlAllocator(CephContext* cct, int64_t device_size, int64_t block_size, const std::string& name) : - Allocator(name), + Allocator(name, device_size, block_size), num_total(device_size), block_size(block_size), range_size_alloc_threshold( diff --git a/src/os/bluestore/AvlAllocator.h b/src/os/bluestore/AvlAllocator.h index bcc3f8b051b..426db78ad39 100644 --- a/src/os/bluestore/AvlAllocator.h +++ b/src/os/bluestore/AvlAllocator.h @@ -71,6 +71,10 @@ public: AvlAllocator(CephContext* cct, int64_t device_size, int64_t block_size, const std::string& name); ~AvlAllocator(); + const char* get_type() const override + { + return "avl"; + } int64_t allocate( uint64_t want, uint64_t unit, diff --git a/src/os/bluestore/BitmapAllocator.cc b/src/os/bluestore/BitmapAllocator.cc index c24a333aae3..a744eb17bfe 100644 --- a/src/os/bluestore/BitmapAllocator.cc +++ b/src/os/bluestore/BitmapAllocator.cc @@ -12,7 +12,7 @@ BitmapAllocator::BitmapAllocator(CephContext* _cct, int64_t capacity, int64_t alloc_unit, const std::string& name) : - Allocator(name), + Allocator(name, capacity, alloc_unit), cct(_cct) { ldout(cct, 10) << __func__ << " 0x" << std::hex << capacity << "/" diff --git a/src/os/bluestore/BitmapAllocator.h b/src/os/bluestore/BitmapAllocator.h index 51ebaa4208c..5c768a4ac2f 100644 --- a/src/os/bluestore/BitmapAllocator.h +++ b/src/os/bluestore/BitmapAllocator.h @@ -22,6 +22,10 @@ public: { } + const char* get_type() const override + { + return "bitmap"; + } int64_t allocate( uint64_t want_size, uint64_t alloc_unit, uint64_t max_alloc_size, int64_t hint, PExtentVector *extents) override; diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index f958ab8f784..ded9d957a03 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -70,11 +70,14 @@ public: AdminSocket* admin_socket = bluefs->cct->get_admin_socket(); if (admin_socket) { hook = new BlueFS::SocketHook(bluefs); - int r = admin_socket->register_command("bluestore bluefs available " + int r = admin_socket->register_command("bluestore bluefs device info " "name=alloc_size,type=CephInt,req=false", hook, - "Report available space for bluefs. " - "If alloc_size set, make simulation."); + "Shows space report for bluefs devices. " + "This also includes an estimation for space " + "available to bluefs at main device. " + "alloc_size, if set, specifies the custom bluefs " + "allocation unit size for the estimation above."); if (r != 0) { ldout(bluefs->cct, 1) << __func__ << " cannot register SocketHook" << dendl; delete hook; @@ -104,7 +107,7 @@ private: Formatter *f, std::ostream& errss, bufferlist& out) override { - if (command == "bluestore bluefs available") { + if (command == "bluestore bluefs device info") { int64_t alloc_size = 0; cmd_getval(cmdmap, "alloc_size", alloc_size); if ((alloc_size & (alloc_size - 1)) != 0) { @@ -112,8 +115,8 @@ private: return -EINVAL; } if (alloc_size == 0) - alloc_size = bluefs->cct->_conf->bluefs_alloc_size; - f->open_object_section("bluefs_available_space"); + alloc_size = bluefs->cct->_conf->bluefs_shared_alloc_size; + f->open_object_section("bluefs_device_info"); for (unsigned dev = BDEV_WAL; dev <= BDEV_SLOW; dev++) { if (bluefs->bdev[dev]) { f->open_object_section("dev"); @@ -126,9 +129,14 @@ private: f->dump_int("total", total); f->dump_int("free", free); f->dump_int("bluefs_used", used); - f->close_section(); - } + if (bluefs->is_shared_alloc(dev)) { + size_t avail = bluefs->probe_alloc_avail(dev, alloc_size); + f->dump_int("bluefs max available", avail); + } + f->close_section(); + } } + f->close_section(); } else if (command == "bluefs stats") { std::stringstream ss; @@ -554,9 +562,10 @@ void BlueFS::_init_alloc() ceph_assert(bdev[id]->get_size()); ceph_assert(alloc_size[id]); if (is_shared_alloc(id)) { - dout(1) << __func__ << " shared, id " << id - << " alloc_size 0x" << std::hex << alloc_size[id] - << " size 0x" << bdev[id]->get_size() << std::dec << dendl; + dout(1) << __func__ << " shared, id " << id << std::hex + << ", capacity 0x" << bdev[id]->get_size() + << ", block size 0x" << alloc_size[id] + << std::dec << dendl; } else { std::string name = "bluefs-"; const char* devnames[] = { "wal","db","slow" }; @@ -564,9 +573,12 @@ void BlueFS::_init_alloc() name += devnames[id]; else name += to_string(uintptr_t(this)); - dout(1) << __func__ << " new, id " << id - << " alloc_size 0x" << std::hex << alloc_size[id] - << " size 0x" << bdev[id]->get_size() << std::dec << dendl; + dout(1) << __func__ << " new, id " << id << std::hex + << ", allocator name " << name + << ", allocator type " << cct->_conf->bluefs_allocator + << ", capacity 0x" << bdev[id]->get_size() + << ", block size 0x" << alloc_size[id] + << std::dec << dendl; alloc[id] = Allocator::create(cct, cct->_conf->bluefs_allocator, bdev[id]->get_size(), alloc_size[id], name); @@ -2854,22 +2866,23 @@ int BlueFS::_allocate_without_fallback(uint8_t id, uint64_t len, return -ENOENT; } extents->reserve(4); // 4 should be (more than) enough for most allocations - uint64_t min_alloc_size = alloc_size[id]; - uint64_t left = round_up_to(len, min_alloc_size); - int64_t alloc_len = alloc[id]->allocate(left, min_alloc_size, 0, extents); - if (alloc_len < 0 || alloc_len < (int64_t)left) { + int64_t need = round_up_to(len, alloc_size[id]); + int64_t alloc_len = alloc[id]->allocate(need, alloc_size[id], 0, extents); + if (alloc_len < 0 || alloc_len < need) { if (alloc_len > 0) { alloc[id]->release(*extents); } - if (bdev[id]) - derr << __func__ << " failed to allocate 0x" << std::hex << left - << " on bdev " << (int)id - << ", free 0x" << alloc[id]->get_free() << std::dec << dendl; - else - derr << __func__ << " failed to allocate 0x" << std::hex << left - << " on bdev " << (int)id << ", dne" << std::dec << dendl; - if (alloc[id]) - alloc[id]->dump(); + derr << __func__ << " unable to allocate 0x" << std::hex << need + << " on bdev " << (int)id + << ", allocator name " << alloc[id]->get_name() + << ", allocator type " << alloc[id]->get_type() + << ", capacity 0x" << alloc[id]->get_capacity() + << ", block size 0x" << alloc[id]->get_block_size() + << ", free 0x" << alloc[id]->get_free() + << ", fragmentation " << alloc[id]->get_fragmentation() + << ", allocated 0x" << (alloc_len > 0 ? alloc_len : 0) + << std::dec << dendl; + alloc[id]->dump(); return -ENOSPC; } if (is_shared_alloc(id)) { @@ -2888,34 +2901,41 @@ int BlueFS::_allocate(uint8_t id, uint64_t len, int64_t alloc_len = 0; PExtentVector extents; uint64_t hint = 0; + int64_t need = len; if (alloc[id]) { + need = round_up_to(len, alloc_size[id]); if (!node->extents.empty() && node->extents.back().bdev == id) { hint = node->extents.back().end(); } extents.reserve(4); // 4 should be (more than) enough for most allocations - alloc_len = alloc[id]->allocate(round_up_to(len, alloc_size[id]), - alloc_size[id], hint, &extents); + alloc_len = alloc[id]->allocate(need, alloc_size[id], hint, &extents); } - if (!alloc[id] || - alloc_len < 0 || - alloc_len < (int64_t)round_up_to(len, alloc_size[id])) { - if (alloc_len > 0) { - alloc[id]->release(extents); + if (alloc_len < 0 || alloc_len < need) { + if (alloc[id]) { + if (alloc_len > 0) { + alloc[id]->release(extents); + } + dout(1) << __func__ << " unable to allocate 0x" << std::hex << need + << " on bdev " << (int)id + << ", allocator name " << alloc[id]->get_name() + << ", allocator type " << alloc[id]->get_type() + << ", capacity 0x" << alloc[id]->get_capacity() + << ", block size 0x" << alloc[id]->get_block_size() + << ", free 0x" << alloc[id]->get_free() + << ", fragmentation " << alloc[id]->get_fragmentation() + << ", allocated 0x" << (alloc_len > 0 ? alloc_len : 0) + << std::dec << dendl; } + if (id != BDEV_SLOW) { - if (bdev[id]) { - dout(1) << __func__ << " failed to allocate 0x" << std::hex << len - << " on bdev " << (int)id - << ", free 0x" << alloc[id]->get_free() - << "; fallback to bdev " << (int)id + 1 - << std::dec << dendl; - } + dout(20) << __func__ << " fallback to bdev " + << (int)id + 1 + << dendl; return _allocate(id + 1, len, node); + } else { + derr << __func__ << " allocation failed, needed 0x" << std::hex << need + << dendl; } - dout(1) << __func__ << " unable to allocate 0x" << std::hex << len - << " on bdev " << (int)id << ", free 0x" - << (alloc[id] ? alloc[id]->get_free() : (uint64_t)-1) - << std::dec << dendl; return -ENOSPC; } else { uint64_t used = _get_used(id); @@ -3559,6 +3579,27 @@ int BlueFS::do_replay_recovery_read(FileReader *log_reader, return 0; } +size_t BlueFS::probe_alloc_avail(int dev, uint64_t alloc_size) +{ + size_t total = 0; + auto iterated_allocation = [&](size_t off, size_t len) { + //only count in size that is alloc_size aligned + size_t dist_to_alignment; + size_t offset_in_block = off & (alloc_size - 1); + if (offset_in_block == 0) + dist_to_alignment = 0; + else + dist_to_alignment = alloc_size - offset_in_block; + if (dist_to_alignment >= len) + return; + len -= dist_to_alignment; + total += p2align(len, alloc_size); + }; + if (alloc[dev]) { + alloc[dev]->dump(iterated_allocation); + } + return total; +} // =============================================== // OriginalVolumeSelector diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index caff6140088..68d37bb33a6 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -607,6 +607,8 @@ public: size_t read_len, bufferlist* bl); + size_t probe_alloc_avail(int dev, uint64_t alloc_size); + /// test purpose methods const PerfCounters* get_perf_counters() const { return logger; diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 3be0dd1660f..e87e8a7bddc 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -3511,7 +3511,8 @@ void BlueStore::Onode::get() { } } void BlueStore::Onode::put() { - if (--nref == 2) { + int n = --nref; + if (n == 2) { c->get_onode_cache()->unpin(this, [&]() { bool was_pinned = pinned; pinned = pinned && nref > 2; // intentionally use > not >= as we have @@ -3519,12 +3520,12 @@ void BlueStore::Onode::put() { bool r = was_pinned && !pinned; // additional decrement for newly unpinned instance if (r) { - --nref; + n = --nref; } return cached && r; }); } - if (nref == 0) { + if (n == 0) { delete this; } } @@ -4408,6 +4409,7 @@ BlueStore::BlueStore(CephContext *cct, finisher(cct, "commit_finisher", "cfin"), kv_sync_thread(this), kv_finalize_thread(this), + zoned_cleaner_thread(this), min_alloc_size(_min_alloc_size), min_alloc_size_order(ctz(_min_alloc_size)), mempool_thread(this) @@ -4915,8 +4917,15 @@ void BlueStore::_init_logger() "Average omap iterator lower_bound call latency"); b.add_time_avg(l_bluestore_omap_next_lat, "omap_next_lat", "Average omap iterator next call latency"); + b.add_time_avg(l_bluestore_omap_get_keys_lat, "omap_get_keys_lat", + "Average omap get_keys call latency"); + b.add_time_avg(l_bluestore_omap_get_values_lat, "omap_get_values_lat", + "Average omap get_values call latency"); b.add_time_avg(l_bluestore_clist_lat, "clist_lat", "Average collection listing latency"); + b.add_time_avg(l_bluestore_remove_lat, "remove_lat", + "Average removal latency"); + logger = b.create_perf_counters(); cct->get_perfcounters_collection()->add(logger); } @@ -5336,7 +5345,7 @@ int BlueStore::_init_alloc() ceph_assert(shared_alloc.a != NULL); if (bdev->is_smr()) { - shared_alloc.a->set_zone_states(fm->get_zone_states(db)); + shared_alloc.a->zoned_set_zone_states(fm->get_zone_states(db)); } uint64_t num = 0, bytes = 0; @@ -5352,10 +5361,15 @@ int BlueStore::_init_alloc() } fm->enumerate_reset(); - dout(1) << __func__ << " loaded " << byte_u_t(bytes) - << " in " << num << " extents" - << " available " << byte_u_t(shared_alloc.a->get_free()) - << dendl; + dout(1) << __func__ + << " loaded " << byte_u_t(bytes) << " in " << num << " extents" + << std::hex + << ", allocator type " << shared_alloc.a->get_type() + << ", capacity 0x" << shared_alloc.a->get_capacity() + << ", block size 0x" << shared_alloc.a->get_block_size() + << ", free 0x" << shared_alloc.a->get_free() + << ", fragmentation " << shared_alloc.a->get_fragmentation() + << std::dec << dendl; return 0; } @@ -6875,6 +6889,10 @@ int BlueStore::_mount() _kv_start(); + if (bdev->is_smr()) { + _zoned_cleaner_start(); + } + r = _deferred_replay(); if (r < 0) goto out_stop; @@ -6904,6 +6922,9 @@ int BlueStore::_mount() return 0; out_stop: + if (bdev->is_smr()) { + _zoned_cleaner_stop(); + } _kv_stop(); out_coll: _shutdown_cache(); @@ -6922,6 +6943,10 @@ int BlueStore::umount() mounted = false; if (!_kv_only) { mempool_thread.shutdown(); + if (bdev->is_smr()) { + dout(20) << __func__ << " stopping zone cleaner thread" << dendl; + _zoned_cleaner_stop(); + } dout(20) << __func__ << " stopping kv thread" << dendl; _kv_stop(); _shutdown_cache(); @@ -10474,6 +10499,7 @@ int BlueStore::omap_get_keys( dout(15) << __func__ << " " << c->get_cid() << " oid " << oid << dendl; if (!c->exists) return -ENOENT; + auto start1 = mono_clock::now(); std::shared_lock l(c->lock); int r = 0; OnodeRef o = c->get_onode(oid, false); @@ -10505,6 +10531,12 @@ int BlueStore::omap_get_keys( } } out: + c->store->log_latency( + __func__, + l_bluestore_omap_get_keys_lat, + mono_clock::now() - start1, + c->store->cct->_conf->bluestore_log_omap_iterator_age); + dout(10) << __func__ << " " << c->get_cid() << " oid " << oid << " = " << r << dendl; return r; @@ -10522,6 +10554,7 @@ int BlueStore::omap_get_values( if (!c->exists) return -ENOENT; std::shared_lock l(c->lock); + auto start1 = mono_clock::now(); int r = 0; string final_key; OnodeRef o = c->get_onode(oid, false); @@ -10549,6 +10582,12 @@ int BlueStore::omap_get_values( } } out: + c->store->log_latency( + __func__, + l_bluestore_omap_get_values_lat, + mono_clock::now() - start1, + c->store->cct->_conf->bluestore_log_omap_iterator_age); + dout(10) << __func__ << " " << c->get_cid() << " oid " << oid << " = " << r << dendl; return r; @@ -11691,7 +11730,25 @@ void BlueStore::_kv_sync_thread() ceph_assert(!kv_sync_started); kv_sync_started = true; kv_cond.notify_all(); + + auto t0 = mono_clock::now(); + timespan twait = ceph::make_timespan(0); + size_t kv_submitted = 0; + while (true) { + auto period = cct->_conf->bluestore_kv_sync_util_logging_s; + auto observation_period = + ceph::make_timespan(period); + auto elapsed = mono_clock::now() - t0; + if (period && elapsed >= observation_period) { + dout(5) << __func__ << " utilization: idle " + << twait << " of " << elapsed + << ", submitted: " << kv_submitted + <<dendl; + t0 = mono_clock::now(); + twait = ceph::make_timespan(0); + kv_submitted = 0; + } ceph_assert(kv_committing.empty()); if (kv_queue.empty() && ((deferred_done_queue.empty() && deferred_stable_queue.empty()) || @@ -11699,8 +11756,11 @@ void BlueStore::_kv_sync_thread() if (kv_stop) break; dout(20) << __func__ << " sleep" << dendl; + auto t = mono_clock::now(); kv_sync_in_progress = false; kv_cond.wait(l); + twait += mono_clock::now() - t; + dout(20) << __func__ << " wake" << dendl; } else { deque<TransContext*> kv_submitting; @@ -11794,6 +11854,7 @@ void BlueStore::_kv_sync_thread() for (auto txc : kv_committing) { throttle.log_state_latency(*txc, logger, l_bluestore_state_kv_queued_lat); if (txc->get_state() == TransContext::STATE_KV_QUEUED) { + ++kv_submitted; _txc_apply_kv(txc, false); --txc->osr->kv_committing_serially; } else { @@ -12000,6 +12061,63 @@ void BlueStore::_kv_finalize_thread() kv_finalize_started = false; } +void BlueStore::_zoned_cleaner_start() { + dout(10) << __func__ << dendl; + + zoned_cleaner_thread.create("bstore_zcleaner"); +} + +void BlueStore::_zoned_cleaner_stop() { + dout(10) << __func__ << dendl; + { + std::unique_lock l{zoned_cleaner_lock}; + while (!zoned_cleaner_started) { + zoned_cleaner_cond.wait(l); + } + zoned_cleaner_stop = true; + zoned_cleaner_cond.notify_all(); + } + zoned_cleaner_thread.join(); + { + std::lock_guard l{zoned_cleaner_lock}; + zoned_cleaner_stop = false; + } + dout(10) << __func__ << " done" << dendl; +} + +void BlueStore::_zoned_cleaner_thread() { + dout(10) << __func__ << " start" << dendl; + std::unique_lock l{zoned_cleaner_lock}; + ceph_assert(!zoned_cleaner_started); + zoned_cleaner_started = true; + zoned_cleaner_cond.notify_all(); + std::deque<uint64_t> zones_to_clean; + while (true) { + if (zoned_cleaner_queue.empty()) { + if (zoned_cleaner_stop) { + break; + } + dout(20) << __func__ << " sleep" << dendl; + zoned_cleaner_cond.wait(l); + dout(20) << __func__ << " wake" << dendl; + } else { + zones_to_clean.swap(zoned_cleaner_queue); + l.unlock(); + while (!zones_to_clean.empty()) { + _zoned_clean_zone(zones_to_clean.front()); + zones_to_clean.pop_front(); + } + l.lock(); + } + } + dout(10) << __func__ << " finish" << dendl; + zoned_cleaner_started = false; +} + +void BlueStore::_zoned_clean_zone(uint64_t zone_num) { + dout(10) << __func__ << " cleaning zone " << zone_num << dendl; +} + bluestore_deferred_op_t *BlueStore::_get_deferred_op( TransContext *txc) { @@ -13658,6 +13776,15 @@ int BlueStore::_do_alloc_write( } _collect_allocation_stats(need, min_alloc_size, prealloc.size()); + if (bdev->is_smr()) { + std::deque<uint64_t> zones_to_clean; + if (shared_alloc.a->zoned_get_zones_to_clean(&zones_to_clean)) { + std::lock_guard l{zoned_cleaner_lock}; + zoned_cleaner_queue.swap(zones_to_clean); + zoned_cleaner_cond.notify_one(); + } + } + dout(20) << __func__ << " prealloc " << prealloc << dendl; auto prealloc_pos = prealloc.begin(); @@ -14429,7 +14556,23 @@ int BlueStore::_remove(TransContext *txc, dout(15) << __func__ << " " << c->cid << " " << o->oid << " onode " << o.get() << " txc "<< txc << dendl; + + auto start_time = mono_clock::now(); int r = _do_remove(txc, c, o); + log_latency_fn( + __func__, + l_bluestore_remove_lat, + mono_clock::now() - start_time, + cct->_conf->bluestore_log_op_age, + [&](const ceph::timespan& lat) { + ostringstream ostr; + ostr << ", lat = " << timespan_str(lat) + << " cid =" << c->cid + << " oid =" << o->oid; + return ostr.str(); + } + ); + dout(10) << __func__ << " " << c->cid << " " << o->oid << " = " << r << dendl; return r; } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index b84ee546ab8..d9a107218cd 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -140,7 +140,10 @@ enum { l_bluestore_omap_upper_bound_lat, l_bluestore_omap_lower_bound_lat, l_bluestore_omap_next_lat, + l_bluestore_omap_get_keys_lat, + l_bluestore_omap_get_values_lat, l_bluestore_clist_lat, + l_bluestore_remove_lat, l_bluestore_last }; @@ -1991,11 +1994,19 @@ public: struct KVFinalizeThread : public Thread { BlueStore *store; explicit KVFinalizeThread(BlueStore *s) : store(s) {} - void *entry() { + void *entry() override { store->_kv_finalize_thread(); return NULL; } }; + struct ZonedCleanerThread : public Thread { + BlueStore *store; + explicit ZonedCleanerThread(BlueStore *s) : store(s) {} + void *entry() override { + store->_zoned_cleaner_thread(); + return nullptr; + } + }; struct DBHistogram { struct value_dist { @@ -2110,6 +2121,13 @@ private: std::deque<DeferredBatch*> deferred_stable_to_finalize; ///< pending finalization bool kv_finalize_in_progress = false; + ZonedCleanerThread zoned_cleaner_thread; + ceph::mutex zoned_cleaner_lock = ceph::make_mutex("BlueStore::zoned_cleaner_lock"); + ceph::condition_variable zoned_cleaner_cond; + bool zoned_cleaner_started = false; + bool zoned_cleaner_stop = false; + std::deque<uint64_t> zoned_cleaner_queue; + PerfCounters *logger = nullptr; std::list<CollectionRef> removed_collections; @@ -2467,6 +2485,11 @@ private: void _kv_sync_thread(); void _kv_finalize_thread(); + void _zoned_cleaner_start(); + void _zoned_cleaner_stop(); + void _zoned_cleaner_thread(); + void _zoned_clean_zone(uint64_t zone_num); + bluestore_deferred_op_t *_get_deferred_op(TransContext *txc); void _deferred_queue(TransContext *txc); public: diff --git a/src/os/bluestore/HybridAllocator.h b/src/os/bluestore/HybridAllocator.h index e8246cf4dfc..92e8b9e80b1 100644 --- a/src/os/bluestore/HybridAllocator.h +++ b/src/os/bluestore/HybridAllocator.h @@ -16,6 +16,10 @@ public: const std::string& name) : AvlAllocator(cct, device_size, _block_size, max_mem, name) { } + const char* get_type() const override + { + return "hybrid"; + } int64_t allocate( uint64_t want, uint64_t unit, diff --git a/src/os/bluestore/StupidAllocator.cc b/src/os/bluestore/StupidAllocator.cc index 2660657d9e9..533f279d780 100644 --- a/src/os/bluestore/StupidAllocator.cc +++ b/src/os/bluestore/StupidAllocator.cc @@ -12,11 +12,13 @@ StupidAllocator::StupidAllocator(CephContext* cct, const std::string& name, + int64_t _size, int64_t _block_size) - : Allocator(name), cct(cct), num_free(0), - block_size(_block_size), + : Allocator(name, _size, _block_size), cct(cct), num_free(0), free(10) { + ceph_assert(cct != nullptr); + bdev_block_size = cct->_conf->bdev_block_size; } StupidAllocator::~StupidAllocator() @@ -25,7 +27,8 @@ StupidAllocator::~StupidAllocator() unsigned StupidAllocator::_choose_bin(uint64_t orig_len) { - uint64_t len = orig_len / cct->_conf->bdev_block_size; + ceph_assert(bdev_block_size > 0); + uint64_t len = orig_len / bdev_block_size; int bin = std::min((int)cbits(len), (int)free.size() - 1); ldout(cct, 30) << __func__ << " len 0x" << std::hex << orig_len << std::dec << " -> " << bin << dendl; @@ -257,13 +260,14 @@ uint64_t StupidAllocator::get_free() double StupidAllocator::get_fragmentation() { - ceph_assert(block_size); + ceph_assert(get_block_size()); double res; uint64_t max_intervals = 0; uint64_t intervals = 0; { std::lock_guard l(lock); - max_intervals = p2roundup<uint64_t>(num_free, block_size) / block_size; + max_intervals = p2roundup<uint64_t>(num_free, + get_block_size()) / get_block_size(); for (unsigned bin = 0; bin < free.size(); ++bin) { intervals += free[bin].num_intervals(); } diff --git a/src/os/bluestore/StupidAllocator.h b/src/os/bluestore/StupidAllocator.h index 4139de81b60..bd99a7c835f 100644 --- a/src/os/bluestore/StupidAllocator.h +++ b/src/os/bluestore/StupidAllocator.h @@ -18,7 +18,7 @@ class StupidAllocator : public Allocator { ceph::mutex lock = ceph::make_mutex("StupidAllocator::lock"); int64_t num_free; ///< total bytes in freelist - int64_t block_size; + uint64_t bdev_block_size; template <typename K, typename V> using allocator_t = mempool::bluestore_alloc::pool_allocator<std::pair<const K, V>>; @@ -37,8 +37,15 @@ class StupidAllocator : public Allocator { uint64_t alloc_unit); public: - StupidAllocator(CephContext* cct, const std::string& name, int64_t block_size); + StupidAllocator(CephContext* cct, + const std::string& name, + int64_t size, + int64_t block_size); ~StupidAllocator() override; + const char* get_type() const override + { + return "stupid"; + } int64_t allocate( uint64_t want_size, uint64_t alloc_unit, uint64_t max_alloc_size, diff --git a/src/os/bluestore/ZonedAllocator.cc b/src/os/bluestore/ZonedAllocator.cc index 3a80593f40c..bac6f016997 100644 --- a/src/os/bluestore/ZonedAllocator.cc +++ b/src/os/bluestore/ZonedAllocator.cc @@ -101,7 +101,6 @@ void ZonedAllocator::release(const interval_set<uint64_t>& release_set) { } uint64_t ZonedAllocator::get_free() { - std::lock_guard l(lock); return num_free; } @@ -117,8 +116,7 @@ void ZonedAllocator::dump(std::function<void(uint64_t offset, // This just increments |num_free|. The actual free space is added by // set_zone_states, as it updates the write pointer for each zone. void ZonedAllocator::init_add_free(uint64_t offset, uint64_t length) { - std::lock_guard l(lock); - ldout(cct, 10) << __func__ << " " << std::hex + ldout(cct, 40) << __func__ << " " << std::hex << offset << "~" << length << dendl; num_free += length; @@ -126,7 +124,7 @@ void ZonedAllocator::init_add_free(uint64_t offset, uint64_t length) { void ZonedAllocator::init_rm_free(uint64_t offset, uint64_t length) { std::lock_guard l(lock); - ldout(cct, 10) << __func__ << " 0x" << std::hex + ldout(cct, 40) << __func__ << " 0x" << std::hex << offset << "~" << length << dendl; num_free -= length; @@ -140,7 +138,7 @@ void ZonedAllocator::init_rm_free(uint64_t offset, uint64_t length) { ceph_assert(remaining_space <= length); advance_write_pointer(zone_num, remaining_space); - ldout(cct, 10) << __func__ << " set zone 0x" << std::hex + ldout(cct, 40) << __func__ << " set zone 0x" << std::hex << zone_num << " write pointer to 0x" << zone_size << dendl; length -= remaining_space; @@ -148,12 +146,25 @@ void ZonedAllocator::init_rm_free(uint64_t offset, uint64_t length) { for ( ; length; length -= zone_size) { advance_write_pointer(++zone_num, zone_size); - ldout(cct, 10) << __func__ << " set zone 0x" << std::hex + ldout(cct, 40) << __func__ << " set zone 0x" << std::hex << zone_num << " write pointer to 0x" << zone_size << dendl; } } -void ZonedAllocator::set_zone_states(std::vector<zone_state_t> &&_zone_states) { +bool ZonedAllocator::zoned_get_zones_to_clean(std::deque<uint64_t> *zones_to_clean) { + // TODO: make 0.25 tunable + if (static_cast<double>(num_free) / size > 0.25) { + return false; + } + { + std::lock_guard l(lock); + // TODO: populate |zones_to_clean| with the numbers of zones that should be + // cleaned. + } + return true; +} + +void ZonedAllocator::zoned_set_zone_states(std::vector<zone_state_t> &&_zone_states) { std::lock_guard l(lock); ldout(cct, 10) << __func__ << dendl; zone_states = std::move(_zone_states); diff --git a/src/os/bluestore/ZonedAllocator.h b/src/os/bluestore/ZonedAllocator.h index 22b40221f7b..4b03fe5e822 100644 --- a/src/os/bluestore/ZonedAllocator.h +++ b/src/os/bluestore/ZonedAllocator.h @@ -30,7 +30,7 @@ class ZonedAllocator : public Allocator { // atomic_alloc_and_submit_lock will be removed. ceph::mutex lock = ceph::make_mutex("ZonedAllocator::lock"); - int64_t num_free; ///< total bytes in freelist + std::atomic<int64_t> num_free; ///< total bytes in freelist uint64_t size; uint64_t block_size; uint64_t zone_size; @@ -75,7 +75,9 @@ public: void dump(std::function<void(uint64_t offset, uint64_t length)> notify) override; - void set_zone_states(std::vector<zone_state_t> &&_zone_states) override; + void zoned_set_zone_states(std::vector<zone_state_t> &&_zone_states) override; + bool zoned_get_zones_to_clean(std::deque<uint64_t> *zones_to_clean) override; + void init_add_free(uint64_t offset, uint64_t length) override; void init_rm_free(uint64_t offset, uint64_t length) override; diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 3e6e9587a85..b3e030f4eb1 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -1248,6 +1248,7 @@ void ECBackend::handle_sub_read_reply( ceph_assert(rop.in_progress.count(from)); rop.in_progress.erase(from); unsigned is_complete = 0; + bool need_resend = false; // For redundant reads check for completion as each shard comes in, // or in a non-recovery read check for completion once all the shards read. if (rop.do_redundant_reads || rop.in_progress.empty()) { @@ -1274,7 +1275,8 @@ void ECBackend::handle_sub_read_reply( if (!rop.do_redundant_reads) { int r = send_all_remaining_reads(iter->first, rop); if (r == 0) { - // We added to in_progress and not incrementing is_complete + // We changed the rop's to_read and not incrementing is_complete + need_resend = true; continue; } // Couldn't read any additional shards so handle as completed with errors @@ -1302,11 +1304,17 @@ void ECBackend::handle_sub_read_reply( rop.complete[iter->first].errors.clear(); } } + // avoid re-read for completed object as we may send remaining reads for uncopmpleted objects + rop.to_read.at(iter->first).need.clear(); + rop.to_read.at(iter->first).want_attrs = false; ++is_complete; } } } - if (rop.in_progress.empty() || is_complete == rop.complete.size()) { + if (need_resend) { + do_read_op(rop); + } else if (rop.in_progress.empty() || + is_complete == rop.complete.size()) { dout(20) << __func__ << " Complete: " << rop << dendl; rop.trace.event("ec read complete"); complete_read_op(rop, m); @@ -2452,7 +2460,6 @@ int ECBackend::send_all_remaining_reads( shards, want_attrs, c))); - do_read_op(rop); return 0; } diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index d13be3f25c6..c39de1bfeeb 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -348,8 +348,8 @@ public: }; struct read_request_t { const std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > to_read; - const std::map<pg_shard_t, std::vector<std::pair<int, int>>> need; - const bool want_attrs; + std::map<pg_shard_t, std::vector<std::pair<int, int>>> need; + bool want_attrs; GenContext<std::pair<RecoveryMessages *, read_result_t& > &> *cb; read_request_t( const std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > &to_read, diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 194a8b6a415..df33e819ff7 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -5955,7 +5955,7 @@ void OSDMap::check_health(CephContext *cct, ss << "crush map has legacy tunables (require " << min << ", min is " << cct->_conf->mon_crush_min_required_version << ")"; auto& d = checks->add("OLD_CRUSH_TUNABLES", HEALTH_WARN, ss.str(), 0); - d.detail.push_back("see http://docs.ceph.com/docs/master/rados/operations/crush-map/#tunables"); + d.detail.push_back("see http://docs.ceph.com/en/latest/rados/operations/crush-map/#tunables"); } } @@ -5966,7 +5966,7 @@ void OSDMap::check_health(CephContext *cct, ss << "crush map has straw_calc_version=0"; auto& d = checks->add("OLD_CRUSH_STRAW_CALC_VERSION", HEALTH_WARN, ss.str(), 0); d.detail.push_back( - "see http://docs.ceph.com/docs/master/rados/operations/crush-map/#tunables"); + "see http://docs.ceph.com/en/latest/rados/operations/crush-map/#tunables"); } } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 9bd8aa5f1fe..9b66cb7cd4c 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -3869,7 +3869,8 @@ void PG::C_DeleteMore::complete(int r) { delete this; } -void PG::do_delete_work(ObjectStore::Transaction &t) +ghobject_t PG::do_delete_work(ObjectStore::Transaction &t, + ghobject_t _next) { dout(10) << __func__ << dendl; @@ -3895,25 +3896,45 @@ void PG::do_delete_work(ObjectStore::Transaction &t) osd->sleep_timer.add_event_at(delete_schedule_time, delete_requeue_callback); dout(20) << __func__ << " Delete scheduled at " << delete_schedule_time << dendl; - return; + return _next; } } delete_needs_sleep = true; + ghobject_t next; + vector<ghobject_t> olist; int max = std::min(osd->store->get_ideal_list_max(), (int)cct->_conf->osd_target_transaction_size); - ghobject_t next; + osd->store->collection_list( ch, - next, + _next, ghobject_t::get_max(), max, &olist, &next); dout(20) << __func__ << " " << olist << dendl; + // make sure we've removed everything + // by one more listing from the beginning + if (_next != ghobject_t() && olist.empty()) { + next = ghobject_t(); + osd->store->collection_list( + ch, + next, + ghobject_t::get_max(), + max, + &olist, + &next); + if (!olist.empty()) { + dout(0) << __func__ << " additional unexpected onode list" + <<" (new onodes has appeared since PG removal started" + << olist << dendl; + } + } + OSDriver::OSTransaction _t(osdriver.get_transaction(&t)); int64_t num = 0; for (auto& oid : olist) { @@ -3936,7 +3957,6 @@ void PG::do_delete_work(ObjectStore::Transaction &t) Context *fin = new C_DeleteMore(this, get_osdmap_epoch()); t.register_on_commit(fin); } else { - dout(20) << __func__ << " finished" << dendl; if (cct->_conf->osd_inject_failure_on_pg_removal) { _exit(1); } @@ -3971,6 +3991,7 @@ void PG::do_delete_work(ObjectStore::Transaction &t) osd->logger->dec(l_osd_pg_removing); } } + return next; } int PG::pg_stat_adjust(osd_stat_t *ns) diff --git a/src/osd/PG.h b/src/osd/PG.h index 3f7c1cd7c9b..5031861e816 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -457,7 +457,8 @@ public: return std::make_unique<PG::PGLogEntryHandler>(this, &t); } - void do_delete_work(ObjectStore::Transaction &t) override; + ghobject_t do_delete_work(ObjectStore::Transaction &t, + ghobject_t _next) override; void clear_ready_to_merge() override; void set_not_ready_to_merge_target(pg_t pgid, pg_t src) override; diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index c7637c004de..80208f1e772 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -6700,7 +6700,10 @@ PeeringState::Deleting::Deleting(my_context ctx) : my_base(ctx), NamedState(context< PeeringMachine >().state_history, "Started/ToDelete/Deleting") { + start = ceph::mono_clock::now(); + context< PeeringMachine >().log_enter(state_name); + DECLARE_LOCALS; ps->deleting = true; ObjectStore::Transaction &t = context<PeeringMachine>().get_cur_transaction(); @@ -6721,7 +6724,8 @@ boost::statechart::result PeeringState::Deleting::react( const DeleteSome& evt) { DECLARE_LOCALS; - pl->do_delete_work(context<PeeringMachine>().get_cur_transaction()); + next = pl->do_delete_work(context<PeeringMachine>().get_cur_transaction(), + next); return discard_event(); } @@ -6731,6 +6735,9 @@ void PeeringState::Deleting::exit() DECLARE_LOCALS; ps->deleting = false; pl->cancel_local_background_io_reservation(); + psdout(20) << "Deleting::" << __func__ << this <<" finished in " + << ceph::mono_clock::now() - start + << dendl; } /*--------GetInfo---------*/ diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index 06954865093..d036c1d00a6 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -377,7 +377,8 @@ public: /// Notification of removal complete, t must be populated to complete removal virtual void on_removal(ObjectStore::Transaction &t) = 0; /// Perform incremental removal work - virtual void do_delete_work(ObjectStore::Transaction &t) = 0; + virtual ghobject_t do_delete_work(ObjectStore::Transaction &t, + ghobject_t _next) = 0; // ======================= PG Merge ========================= virtual void clear_ready_to_merge() = 0; @@ -1242,6 +1243,8 @@ public: boost::statechart::custom_reaction< DeleteSome >, boost::statechart::transition<DeleteInterrupted, WaitDeleteReserved> > reactions; + ghobject_t next; + ceph::mono_clock::time_point start; explicit Deleting(my_context ctx); boost::statechart::result react(const DeleteSome &evt); void exit(); diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 156da3b0a38..e06001401b7 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -71,6 +71,8 @@ #include <errno.h> +#include <common/CDC.h> + MEMPOOL_DEFINE_OBJECT_FACTORY(PrimaryLogPG, replicatedpg, osd); using std::list; @@ -3218,6 +3220,34 @@ struct C_SetManifestRefCountDone : public Context { } pg->manifest_ops.erase(it); cb->complete(r); + cb = nullptr; + } + ~C_SetManifestRefCountDone() { + if (cb) { + delete cb; + } + } +}; + +struct C_SetDedupChunks : public Context { + PrimaryLogPGRef pg; + hobject_t oid; + epoch_t last_peering_reset; + ceph_tid_t tid; + uint64_t offset; + + C_SetDedupChunks(PrimaryLogPG *p, hobject_t o, epoch_t lpr, uint64_t offset) + : pg(p), oid(o), last_peering_reset(lpr), + tid(0), offset(offset) + {} + void finish(int r) override { + if (r == -ECANCELED) + return; + std::scoped_lock locker{*pg}; + if (last_peering_reset != pg->get_last_peering_reset()) { + return; + } + pg->finish_set_dedup(oid, r, tid, offset); } }; @@ -3262,17 +3292,18 @@ void PrimaryLogPG::dec_refcount(const hobject_t& soid, const object_ref_delta_t& while (dec_ref_count < 0) { dout(10) << __func__ << ": decrement reference on offset oid: " << p->first << dendl; refcount_manifest(soid, p->first, - refcount_t::DECREMENT_REF, NULL); + refcount_t::DECREMENT_REF, NULL, std::nullopt); dec_ref_count++; } } } -void PrimaryLogPG::get_adjacent_clones(const object_info_t& oi, OpContext* ctx, +void PrimaryLogPG::get_adjacent_clones(ObjectContextRef src_obc, ObjectContextRef& _l, ObjectContextRef& _g) { - const SnapSet& snapset = ctx->obc->ssc->snapset; + const SnapSet& snapset = src_obc->ssc->snapset; + const object_info_t& oi = src_obc->obs.oi; auto get_context = [this, &oi, &snapset](auto iter) -> ObjectContextRef { @@ -3304,7 +3335,7 @@ bool PrimaryLogPG::inc_refcount_by_set(OpContext* ctx, object_manifest_t& set_ch { object_ref_delta_t refs; ObjectContextRef obc_l, obc_g; - get_adjacent_clones(ctx->obs->oi, ctx, obc_l, obc_g); + get_adjacent_clones(ctx->obc, obc_l, obc_g); set_chunk.calc_refs_to_inc_on_set( obc_l ? &(obc_l->obs.oi.manifest) : nullptr, obc_g ? &(obc_g->obs.oi.manifest) : nullptr, @@ -3320,16 +3351,20 @@ bool PrimaryLogPG::inc_refcount_by_set(OpContext* ctx, object_manifest_t& set_ch * the reference the targe object has prior to update object_manifest in object_info_t. * So, call directly refcount_manifest. */ - RefCountCallback *fin = new RefCountCallback(ctx, osd_op); - refcount_manifest(ctx->obs->oi.soid, p->first, - refcount_t::INCREMENT_REF, fin); + C_SetManifestRefCountDone* fin = new C_SetManifestRefCountDone( + new RefCountCallback(ctx, osd_op), + ctx->obs->oi.soid); + ceph_tid_t tid = refcount_manifest(ctx->obs->oi.soid, p->first, + refcount_t::INCREMENT_REF, fin, std::nullopt); + manifest_ops[ctx->obs->oi.soid] = std::make_shared<ManifestOp>(fin->cb, tid); + ctx->obc->start_block(); return true; } else if (inc_ref_count < 0) { hobject_t src = ctx->obs->oi.soid; hobject_t tgt = p->first; ctx->register_on_commit( [src, tgt, this](){ - refcount_manifest(src, tgt, refcount_t::DECREMENT_REF, NULL); + refcount_manifest(src, tgt, refcount_t::DECREMENT_REF, NULL, std::nullopt); }); return false; } @@ -3376,7 +3411,7 @@ void PrimaryLogPG::dec_all_refcount_manifest(const object_info_t& oi, OpContext* if (oi.manifest.is_chunked()) { object_ref_delta_t refs; ObjectContextRef obc_l, obc_g; - get_adjacent_clones(oi, ctx, obc_l, obc_g); + get_adjacent_clones(ctx->obc, obc_l, obc_g); oi.manifest.calc_refs_to_drop_on_removal( obc_l ? &(obc_l->obs.oi.manifest) : nullptr, obc_g ? &(obc_g->obs.oi.manifest) : nullptr, @@ -3394,39 +3429,46 @@ void PrimaryLogPG::dec_all_refcount_manifest(const object_info_t& oi, OpContext* ctx->register_on_commit( [oi, this](){ refcount_manifest(oi.soid, oi.manifest.redirect_target, - refcount_t::DECREMENT_REF, NULL); + refcount_t::DECREMENT_REF, NULL, std::nullopt); }); } } -void PrimaryLogPG::refcount_manifest(hobject_t src_soid, hobject_t tgt_soid, refcount_t type, - RefCountCallback* cb) +ceph_tid_t PrimaryLogPG::refcount_manifest(hobject_t src_soid, hobject_t tgt_soid, refcount_t type, + Context *cb, std::optional<bufferlist> chunk) { unsigned flags = CEPH_OSD_FLAG_IGNORE_CACHE | CEPH_OSD_FLAG_IGNORE_OVERLAY | - CEPH_OSD_FLAG_RWORDERED; + CEPH_OSD_FLAG_RWORDERED; + + dout(10) << __func__ << " Start refcount from " << src_soid + << " to " << tgt_soid << dendl; - dout(10) << __func__ << " Start refcount from " << src_soid - << " to " << tgt_soid << dendl; - ObjectOperation obj_op; bufferlist in; - if (type == refcount_t::INCREMENT_REF) { + if (type == refcount_t::INCREMENT_REF) { cls_cas_chunk_get_ref_op call; call.source = src_soid.get_head(); - ::encode(call, in); + ::encode(call, in); obj_op.call("cas", "chunk_get_ref", in); - } else if (type == refcount_t::DECREMENT_REF) { + } else if (type == refcount_t::DECREMENT_REF) { cls_cas_chunk_put_ref_op call; call.source = src_soid.get_head(); - ::encode(call, in); + ::encode(call, in); obj_op.call("cas", "chunk_put_ref", in); - } - + } else if (type == refcount_t::CREATE_OR_GET_REF) { + cls_cas_chunk_create_or_get_ref_op get_call; + get_call.source = src_soid.get_head(); + ceph_assert(chunk); + get_call.data = move(*chunk); + ::encode(get_call, in); + obj_op.call("cas", "chunk_create_or_get_ref", in); + } else { + ceph_assert(0 == "unrecognized type"); + } + Context *c = nullptr; if (cb) { - C_SetManifestRefCountDone *fin = - new C_SetManifestRefCountDone(cb, src_soid); - c = new C_OnFinisher(fin, osd->get_objecter_finisher(get_pg_shard())); + c = new C_OnFinisher(cb, osd->get_objecter_finisher(get_pg_shard())); } object_locator_t oloc(tgt_soid); @@ -3436,11 +3478,8 @@ void PrimaryLogPG::refcount_manifest(hobject_t src_soid, hobject_t tgt_soid, ref tgt_soid.oid, oloc, obj_op, SnapContext(), ceph::real_clock::from_ceph_timespec(src_obc->obs.oi.mtime), flags, c); - if (cb) { - manifest_ops[src_soid] = std::make_shared<ManifestOp>(cb, tid); - src_obc->start_block(); - } -} + return tid; +} void PrimaryLogPG::do_proxy_chunked_read(OpRequestRef op, ObjectContextRef obc, int op_index, uint64_t chunk_index, uint64_t req_offset, uint64_t req_length, @@ -6779,9 +6818,13 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) // start ctx->op_finishers[ctx->current_osd_subop_num].reset( new SetManifestFinisher(osd_op)); - RefCountCallback *fin = new RefCountCallback(ctx, osd_op); - refcount_manifest(ctx->obc->obs.oi.soid, target, - refcount_t::INCREMENT_REF, fin); + C_SetManifestRefCountDone* fin = new C_SetManifestRefCountDone( + new RefCountCallback(ctx, osd_op), + soid); + ceph_tid_t tid = refcount_manifest(soid, target, + refcount_t::INCREMENT_REF, fin, std::nullopt); + manifest_ops[soid] = std::make_shared<ManifestOp>(fin->cb, tid); + ctx->obc->start_block(); result = -EINPROGRESS; } else { // finish @@ -6883,6 +6926,10 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops) result = -EOPNOTSUPP; break; } + if (pool.info.is_erasure()) { + result = -EOPNOTSUPP; + break; + } for (auto &p : oi.manifest.chunk_map) { interval_set<uint64_t> chunk; @@ -9975,6 +10022,264 @@ struct C_Flush : public Context { } }; +int PrimaryLogPG::start_dedup(OpRequestRef op, ObjectContextRef obc) +{ + const object_info_t& oi = obc->obs.oi; + const hobject_t& soid = oi.soid; + + ceph_assert(obc->is_blocked()); + if (oi.size == 0) { + // evicted + return 0; + } + if (pool.info.get_fingerprint_type() == pg_pool_t::TYPE_FINGERPRINT_NONE) { + dout(0) << " fingerprint algorithm is not set " << dendl; + return -EINVAL; + } + + /* + * The operations to make dedup chunks are tracked by a ManifestOp. + * This op will be finished if all the operations are completed. + */ + ManifestOpRef mop(std::make_shared<ManifestOp>(nullptr, 0)); + + // cdc + std::map<uint64_t, bufferlist> chunks; + int r = do_cdc(oi, mop->new_manifest.chunk_map, chunks); + if (r < 0) { + return r; + } + if (!chunks.size()) { + return 0; + } + + // chunks issued here are different with chunk_map newly generated + // because the same chunks in previous snap will not be issued + // So, we need two data structures; the first is the issued chunk list to track + // issued operations, and the second is the new chunk_map to update chunk_map after + // all operations are finished + object_ref_delta_t refs; + ObjectContextRef obc_l, obc_g; + get_adjacent_clones(obc, obc_l, obc_g); + // skip if the same content exits in prev snap at same offset + mop->new_manifest.calc_refs_to_inc_on_set( + obc_l ? &(obc_l->obs.oi.manifest) : nullptr, + obc_g ? &(obc_g->obs.oi.manifest) : nullptr, + refs); + + for (auto p : chunks) { + hobject_t target = mop->new_manifest.chunk_map[p.first].oid; + if (refs.find(target) == refs.end()) { + continue; + } + C_SetDedupChunks *fin = new C_SetDedupChunks(this, soid, get_last_peering_reset(), p.first); + ceph_tid_t tid = refcount_manifest(soid, target, refcount_t::CREATE_OR_GET_REF, + fin, move(chunks[p.first])); + mop->chunks[target] = make_pair(p.first, p.second.length()); + mop->num_chunks++; + mop->tids[p.first] = tid; + fin->tid = tid; + dout(10) << __func__ << " oid: " << soid << " tid: " << tid + << " target: " << target << " offset: " << p.first + << " length: " << p.second.length() << dendl; + } + + if (mop->tids.size()) { + manifest_ops[soid] = mop; + manifest_ops[soid]->op = op; + } else { + // size == 0 + return 0; + } + + return -EINPROGRESS; +} + +int PrimaryLogPG::do_cdc(const object_info_t& oi, + std::map<uint64_t, chunk_info_t>& chunk_map, + std::map<uint64_t, bufferlist>& chunks) +{ + string chunk_algo = pool.info.get_dedup_chunk_algorithm_name(); + int64_t chunk_size = pool.info.get_dedup_cdc_chunk_size(); + uint64_t total_length = 0; + + std::unique_ptr<CDC> cdc = CDC::create(chunk_algo, cbits(chunk_size)-1); + if (!cdc) { + dout(0) << __func__ << " unrecognized chunk-algorithm " << dendl; + return -EINVAL; + } + + bufferlist bl; + /** + * We disable EC pool as a base tier of distributed dedup. + * The reason why we disallow erasure code pool here is that the EC pool does not support objects_read_sync(). + * Therefore, we should change the current implementation totally to make EC pool compatible. + * As s result, we leave this as a future work. + */ + int r = pgbackend->objects_read_sync( + oi.soid, 0, oi.size, 0, &bl); + if (r < 0) { + dout(0) << __func__ << " read fail " << oi.soid + << " len: " << oi.size << " r: " << r << dendl; + return r; + } + if (bl.length() != oi.size) { + dout(0) << __func__ << " bl.length: " << bl.length() << " != oi.size: " + << oi.size << " during chunking " << dendl; + return -EIO; + } + + dout(10) << __func__ << " oid: " << oi.soid << " len: " << bl.length() + << " oi.size: " << oi.size + << " chunk_size: " << chunk_size << dendl; + + vector<pair<uint64_t, uint64_t>> cdc_chunks; + cdc->calc_chunks(bl, &cdc_chunks); + + // get fingerprint + for (auto p : cdc_chunks) { + bufferlist chunk; + chunk.substr_of(bl, p.first, p.second); + hobject_t target = get_fpoid_from_chunk(oi.soid, chunk); + chunks[p.first] = move(chunk); + chunk_map[p.first] = chunk_info_t(0, p.second, target); + total_length += p.second; + } + return total_length; +} + +hobject_t PrimaryLogPG::get_fpoid_from_chunk(const hobject_t soid, bufferlist& chunk) +{ + pg_pool_t::fingerprint_t fp_algo = pool.info.get_fingerprint_type(); + if (fp_algo == pg_pool_t::TYPE_FINGERPRINT_NONE) { + return hobject_t(); + } + object_t fp_oid = [&fp_algo, &chunk]() -> string { + switch (fp_algo) { + case pg_pool_t::TYPE_FINGERPRINT_SHA1: + return ceph::crypto::digest<ceph::crypto::SHA1>(chunk).to_str(); + case pg_pool_t::TYPE_FINGERPRINT_SHA256: + return ceph::crypto::digest<ceph::crypto::SHA256>(chunk).to_str(); + case pg_pool_t::TYPE_FINGERPRINT_SHA512: + return ceph::crypto::digest<ceph::crypto::SHA512>(chunk).to_str(); + default: + assert(0 == "unrecognized fingerprint type"); + return {}; + } + }(); + + pg_t raw_pg; + object_locator_t oloc(soid); + oloc.pool = pool.info.get_dedup_tier(); + get_osdmap()->object_locator_to_pg(fp_oid, oloc, raw_pg); + hobject_t target(fp_oid, oloc.key, snapid_t(), + raw_pg.ps(), raw_pg.pool(), + oloc.nspace); + return target; +} + +int PrimaryLogPG::finish_set_dedup(hobject_t oid, int r, ceph_tid_t tid, uint64_t offset) +{ + dout(10) << __func__ << " " << oid << " tid " << tid + << " " << cpp_strerror(r) << dendl; + map<hobject_t,ManifestOpRef>::iterator p = manifest_ops.find(oid); + if (p == manifest_ops.end()) { + dout(10) << __func__ << " no manifest_op found" << dendl; + return -EINVAL; + } + ManifestOpRef mop = p->second; + mop->results[offset] = r; + if (r < 0) { + // if any failure occurs, put a mark on the results to recognize the failure + mop->results[0] = r; + } + if (mop->num_chunks != mop->results.size()) { + // there are on-going works + return -EINPROGRESS; + } + ObjectContextRef obc = get_object_context(oid, false); + if (!obc) { + if (mop->op) + osd->reply_op_error(mop->op, -EINVAL); + return -EINVAL; + } + ceph_assert(obc->is_blocked()); + obc->stop_block(); + kick_object_context_blocked(obc); + if (mop->results[0] < 0) { + // check if the previous op returns fail + ceph_assert(mop->num_chunks == mop->results.size()); + manifest_ops.erase(oid); + osd->reply_op_error(mop->op, mop->results[0]); + return -EIO; + } + + if (mop->chunks.size()) { + OpContextUPtr ctx = simple_opc_create(obc); + ceph_assert(ctx); + if (ctx->lock_manager.get_lock_type( + RWState::RWWRITE, + oid, + obc, + mop->op)) { + dout(20) << __func__ << " took write lock" << dendl; + } else if (mop->op) { + dout(10) << __func__ << " waiting on write lock " << mop->op << dendl; + close_op_ctx(ctx.release()); + return -EAGAIN; + } + + ctx->at_version = get_next_version(); + ctx->new_obs = obc->obs; + ctx->new_obs.oi.clear_flag(object_info_t::FLAG_DIRTY); + + /* + * Let's assume that there is a manifest snapshotted object, and we issue tier_flush() to head. + * head: [0, 2) aaa <-- tier_flush() + * 20: [0, 2) ddd, [6, 2) bbb, [8, 2) ccc + * + * In this case, if the new chunk_map is as follows, + * new_chunk_map : [0, 2) ddd, [6, 2) bbb, [8, 2) ccc + * we should drop aaa from head by using calc_refs_to_drop_on_removal(). + * So, the precedure is + * 1. calc_refs_to_drop_on_removal() + * 2. register old references to drop after tier_flush() is committed + * 3. update new chunk_map + */ + + ObjectCleanRegions c_regions = ctx->clean_regions; + ObjectContextRef cobc = get_prev_clone_obc(obc); + c_regions.mark_fully_dirty(); + // CDC was done on entire range of manifest object, + // so the first thing we should do here is to drop the reference to old chunks + ObjectContextRef obc_l, obc_g; + get_adjacent_clones(obc, obc_l, obc_g); + // clear all old references + object_ref_delta_t refs; + ctx->obs->oi.manifest.calc_refs_to_drop_on_removal( + obc_l ? &(obc_l->obs.oi.manifest) : nullptr, + obc_g ? &(obc_g->obs.oi.manifest) : nullptr, + refs); + if (!refs.is_empty()) { + ctx->register_on_commit( + [oid, this, refs](){ + dec_refcount(oid, refs); + }); + } + + // set new references + ctx->new_obs.oi.manifest.chunk_map = mop->new_manifest.chunk_map; + + finish_ctx(ctx.get(), pg_log_entry_t::CLEAN); + simple_opc_submit(std::move(ctx)); + } + if (mop->op) + osd->reply_op_error(mop->op, r); + + manifest_ops.erase(oid); + return 0; +} + int PrimaryLogPG::start_flush( OpRequestRef op, ObjectContextRef obc, bool blocking, hobject_t *pmissing, @@ -9991,15 +10296,6 @@ int PrimaryLogPG::start_flush( bool preoctopus_compat = get_osdmap()->require_osd_release < ceph_release_t::octopus; SnapSet snapset; - if (obc->obs.oi.has_manifest() && obc->obs.oi.manifest.is_chunked()) { - /* - * TODO: "flush" for a manifest object means re-running the CDC algorithm on the portions of the - * object that are not currently dedup'd (not in the manifest chunk_map) and re-deduping the resulting - * chunks. Adding support for that operation here is future work. - * - */ - return -EOPNOTSUPP; - } if (preoctopus_compat) { // for pre-octopus compatibility, filter SnapSet::snaps. not // certain we need this, but let's be conservative. @@ -10009,6 +10305,13 @@ int PrimaryLogPG::start_flush( snapset = obc->ssc->snapset; } + if (obc->obs.oi.has_manifest() && obc->obs.oi.manifest.is_chunked()) { + // current dedup tier only supports blocking operation + if (!blocking) { + return -EOPNOTSUPP; + } + } + // verify there are no (older) check for dirty clones { dout(20) << " snapset " << snapset << dendl; @@ -10078,6 +10381,15 @@ int PrimaryLogPG::start_flush( osd->objecter->op_cancel(tids, -ECANCELED); } + if (obc->obs.oi.has_manifest() && obc->obs.oi.manifest.is_chunked()) { + int r = start_dedup(op, obc); + if (r != -EINPROGRESS) { + if (blocking) + obc->stop_block(); + } + return r; + } + /** * In general, we need to send a delete and a copyfrom. * Consider snapc 10:[10, 9, 8, 4, 3, 2]:[10(10, 9), 4(4,3,2)] diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 479994d80eb..bc682332dba 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -260,6 +260,13 @@ public: struct ManifestOp { RefCountCallback *cb; ceph_tid_t objecter_tid; + OpRequestRef op; + std::map<uint64_t, int> results; + std::map<uint64_t, ceph_tid_t> tids; + std::map<hobject_t, pair<uint64_t, uint64_t>> chunks; + uint64_t num_chunks = 0; + object_manifest_t new_manifest; + ManifestOp(RefCountCallback* cb, ceph_tid_t tid) : cb(cb), objecter_tid(tid) {} @@ -1482,6 +1489,7 @@ protected: enum class refcount_t { INCREMENT_REF, DECREMENT_REF, + CREATE_OR_GET_REF, }; void do_proxy_chunked_op(OpRequestRef op, const hobject_t& missing_oid, ObjectContextRef obc, bool write_ordered); @@ -1494,21 +1502,27 @@ protected: void finish_promote_manifest(int r, CopyResults *results, ObjectContextRef obc); void cancel_and_requeue_proxy_ops(hobject_t oid); void cancel_manifest_ops(bool requeue, vector<ceph_tid_t> *tids); - void refcount_manifest(hobject_t src_soid, hobject_t tgt_soid, refcount_t type, - RefCountCallback* cb); + ceph_tid_t refcount_manifest(hobject_t src_soid, hobject_t tgt_soid, refcount_t type, + Context *cb, std::optional<bufferlist> chunk); void dec_all_refcount_manifest(const object_info_t& oi, OpContext* ctx); void dec_refcount(const hobject_t& soid, const object_ref_delta_t& refs); void dec_refcount_by_dirty(OpContext* ctx); ObjectContextRef get_prev_clone_obc(ObjectContextRef obc); - void get_adjacent_clones(const object_info_t& oi, OpContext* ctx, + void get_adjacent_clones(ObjectContextRef src_obc, ObjectContextRef& _l, ObjectContextRef& _g); bool inc_refcount_by_set(OpContext* ctx, object_manifest_t& tgt, OSDOp& osd_op); + int do_cdc(const object_info_t& oi, std::map<uint64_t, chunk_info_t>& chunk_map, + std::map<uint64_t, bufferlist>& chunks); + int start_dedup(OpRequestRef op, ObjectContextRef obc); + hobject_t get_fpoid_from_chunk(const hobject_t soid, bufferlist& chunk); + int finish_set_dedup(hobject_t oid, int r, ceph_tid_t tid, uint64_t offset); friend struct C_ProxyChunkRead; friend class PromoteManifestCallback; friend struct C_CopyChunk; friend struct RefCountCallback; + friend struct C_SetDedupChunks; public: PrimaryLogPG(OSDService *o, OSDMapRef curmap, diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 30f1c94ff6f..677730ca7b8 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1354,7 +1354,13 @@ static opt_mapping_t opt_mapping = boost::assign::map_list_of ("pg_autoscale_bias", pool_opts_t::opt_desc_t( pool_opts_t::PG_AUTOSCALE_BIAS, pool_opts_t::DOUBLE)) ("read_lease_interval", pool_opts_t::opt_desc_t( - pool_opts_t::READ_LEASE_INTERVAL, pool_opts_t::DOUBLE)); + pool_opts_t::READ_LEASE_INTERVAL, pool_opts_t::DOUBLE)) + ("dedup_tier", pool_opts_t::opt_desc_t( + pool_opts_t::DEDUP_TIER, pool_opts_t::INT)) + ("dedup_chunk_algorithm", pool_opts_t::opt_desc_t( + pool_opts_t::DEDUP_CHUNK_ALGORITHM, pool_opts_t::STR)) + ("dedup_cdc_chunk_size", pool_opts_t::opt_desc_t( + pool_opts_t::DEDUP_CDC_CHUNK_SIZE, pool_opts_t::INT)); bool pool_opts_t::is_opt_name(const std::string& name) { diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 0d61765ba14..6558158843b 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1060,6 +1060,9 @@ public: TARGET_SIZE_RATIO, // fraction of total cluster PG_AUTOSCALE_BIAS, READ_LEASE_INTERVAL, + DEDUP_TIER, + DEDUP_CHUNK_ALGORITHM, + DEDUP_CDC_CHUNK_SIZE, }; enum type_t { @@ -1545,6 +1548,52 @@ public: } } + typedef enum { + TYPE_DEDUP_CHUNK_NONE = 0, + TYPE_DEDUP_CHUNK_FASTCDC = 1, + TYPE_DEDUP_CHUNK_FIXEDCDC = 2, + } dedup_chunk_algo_t; + static dedup_chunk_algo_t get_dedup_chunk_algorithm_from_str(const std::string& s) { + if (s == "none") + return TYPE_DEDUP_CHUNK_NONE; + if (s == "fastcdc") + return TYPE_DEDUP_CHUNK_FASTCDC; + if (s == "fixed") + return TYPE_DEDUP_CHUNK_FIXEDCDC; + return (dedup_chunk_algo_t)-1; + } + const dedup_chunk_algo_t get_dedup_chunk_algorithm_type() const { + std::string algo_str; + opts.get(pool_opts_t::DEDUP_CHUNK_ALGORITHM, &algo_str); + return get_dedup_chunk_algorithm_from_str(algo_str); + } + const char *get_dedup_chunk_algorithm_name() const { + std::string dedup_chunk_algo_str; + dedup_chunk_algo_t dedup_chunk_algo_t; + opts.get(pool_opts_t::DEDUP_CHUNK_ALGORITHM, &dedup_chunk_algo_str); + dedup_chunk_algo_t = get_dedup_chunk_algorithm_from_str(dedup_chunk_algo_str); + return get_dedup_chunk_algorithm_name(dedup_chunk_algo_t); + } + static const char *get_dedup_chunk_algorithm_name(dedup_chunk_algo_t m) { + switch (m) { + case TYPE_DEDUP_CHUNK_NONE: return "none"; + case TYPE_DEDUP_CHUNK_FASTCDC: return "fastcdc"; + case TYPE_DEDUP_CHUNK_FIXEDCDC: return "fixed"; + default: return "unknown"; + } + } + + int64_t get_dedup_tier() const { + int64_t tier_id; + opts.get(pool_opts_t::DEDUP_TIER, &tier_id); + return tier_id; + } + int64_t get_dedup_cdc_chunk_size() const { + int64_t chunk_size; + opts.get(pool_opts_t::DEDUP_CDC_CHUNK_SIZE, &chunk_size); + return chunk_size; + } + /// application -> key/value metadata std::map<std::string, std::map<std::string, std::string>> application_metadata; @@ -5487,6 +5536,7 @@ public: auto begin() const { return ref_delta.begin(); } auto end() const { return ref_delta.end(); } + auto find(hobject_t &key) const { return ref_delta.find(key); } bool operator==(const object_ref_delta_t &rhs) const { return ref_delta == rhs.ref_delta; @@ -5516,6 +5566,8 @@ struct chunk_info_t { cflag_t flags; // FLAG_* chunk_info_t() : offset(0), length(0), flags((cflag_t)0) { } + chunk_info_t(uint32_t offset, uint32_t length, hobject_t oid) : + offset(offset), length(length), oid(oid), flags((cflag_t)0) { } static std::string get_flag_string(uint64_t flags) { std::string r; @@ -5629,7 +5681,7 @@ struct object_manifest_t { void calc_refs_to_inc_on_set( const object_manifest_t* g, ///< [in] manifest for clone > *this const object_manifest_t* l, ///< [in] manifest for clone < *this - object_ref_delta_t &delta ///< [out] set of refs to drop + object_ref_delta_t &delta ///< [out] set of refs to drop ) const; /** diff --git a/src/pybind/cephfs/c_cephfs.pxd b/src/pybind/cephfs/c_cephfs.pxd new file mode 100644 index 00000000000..4dfcb49d589 --- /dev/null +++ b/src/pybind/cephfs/c_cephfs.pxd @@ -0,0 +1,132 @@ +from libc.stdint cimport * +from types cimport * + +cdef extern from "cephfs/ceph_ll_client.h": + cdef struct statx "ceph_statx": + uint32_t stx_mask + uint32_t stx_blksize + uint32_t stx_nlink + uint32_t stx_uid + uint32_t stx_gid + uint16_t stx_mode + uint64_t stx_ino + uint64_t stx_size + uint64_t stx_blocks + uint64_t stx_dev + uint64_t stx_rdev + timespec stx_atime + timespec stx_ctime + timespec stx_mtime + timespec stx_btime + uint64_t stx_version + +cdef extern from "cephfs/libcephfs.h" nogil: + cdef struct ceph_mount_info: + pass + + cdef struct ceph_dir_result: + pass + + ctypedef void* rados_t + + const char *ceph_version(int *major, int *minor, int *patch) + + int ceph_create(ceph_mount_info **cmount, const char * const id) + int ceph_create_from_rados(ceph_mount_info **cmount, rados_t cluster) + int ceph_init(ceph_mount_info *cmount) + void ceph_shutdown(ceph_mount_info *cmount) + + int ceph_getaddrs(ceph_mount_info* cmount, char** addrs) + int ceph_conf_read_file(ceph_mount_info *cmount, const char *path_list) + int ceph_conf_parse_argv(ceph_mount_info *cmount, int argc, const char **argv) + int ceph_conf_get(ceph_mount_info *cmount, const char *option, char *buf, size_t len) + int ceph_conf_set(ceph_mount_info *cmount, const char *option, const char *value) + + int ceph_mount(ceph_mount_info *cmount, const char *root) + int ceph_select_filesystem(ceph_mount_info *cmount, const char *fs_name) + int ceph_unmount(ceph_mount_info *cmount) + int ceph_abort_conn(ceph_mount_info *cmount) + uint64_t ceph_get_instance_id(ceph_mount_info *cmount) + int ceph_fstatx(ceph_mount_info *cmount, int fd, statx *stx, unsigned want, unsigned flags) + int ceph_statx(ceph_mount_info *cmount, const char *path, statx *stx, unsigned want, unsigned flags) + int ceph_statfs(ceph_mount_info *cmount, const char *path, statvfs *stbuf) + + int ceph_setattrx(ceph_mount_info *cmount, const char *relpath, statx *stx, int mask, int flags) + int ceph_fsetattrx(ceph_mount_info *cmount, int fd, statx *stx, int mask) + int ceph_mds_command(ceph_mount_info *cmount, const char *mds_spec, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) + int ceph_rename(ceph_mount_info *cmount, const char *from_, const char *to) + int ceph_link(ceph_mount_info *cmount, const char *existing, const char *newname) + int ceph_unlink(ceph_mount_info *cmount, const char *path) + int ceph_symlink(ceph_mount_info *cmount, const char *existing, const char *newname) + int ceph_readlink(ceph_mount_info *cmount, const char *path, char *buf, int64_t size) + int ceph_setxattr(ceph_mount_info *cmount, const char *path, const char *name, + const void *value, size_t size, int flags) + int ceph_fsetxattr(ceph_mount_info *cmount, int fd, const char *name, + const void *value, size_t size, int flags) + int ceph_lsetxattr(ceph_mount_info *cmount, const char *path, const char *name, + const void *value, size_t size, int flags) + int ceph_getxattr(ceph_mount_info *cmount, const char *path, const char *name, + void *value, size_t size) + int ceph_fgetxattr(ceph_mount_info *cmount, int fd, const char *name, + void *value, size_t size) + int ceph_lgetxattr(ceph_mount_info *cmount, const char *path, const char *name, + void *value, size_t size) + int ceph_removexattr(ceph_mount_info *cmount, const char *path, const char *name) + int ceph_fremovexattr(ceph_mount_info *cmount, int fd, const char *name) + int ceph_lremovexattr(ceph_mount_info *cmount, const char *path, const char *name) + int ceph_listxattr(ceph_mount_info *cmount, const char *path, char *list, size_t size) + int ceph_flistxattr(ceph_mount_info *cmount, int fd, char *list, size_t size) + int ceph_llistxattr(ceph_mount_info *cmount, const char *path, char *list, size_t size) + int ceph_write(ceph_mount_info *cmount, int fd, const char *buf, int64_t size, int64_t offset) + int ceph_pwritev(ceph_mount_info *cmount, int fd, iovec *iov, int iovcnt, int64_t offset) + int ceph_read(ceph_mount_info *cmount, int fd, char *buf, int64_t size, int64_t offset) + int ceph_preadv(ceph_mount_info *cmount, int fd, iovec *iov, int iovcnt, int64_t offset) + int ceph_flock(ceph_mount_info *cmount, int fd, int operation, uint64_t owner) + int ceph_mknod(ceph_mount_info *cmount, const char *path, mode_t mode, dev_t rdev) + int ceph_close(ceph_mount_info *cmount, int fd) + int ceph_open(ceph_mount_info *cmount, const char *path, int flags, mode_t mode) + int ceph_mkdir(ceph_mount_info *cmount, const char *path, mode_t mode) + int ceph_mkdirs(ceph_mount_info *cmount, const char *path, mode_t mode) + int ceph_closedir(ceph_mount_info *cmount, ceph_dir_result *dirp) + int ceph_opendir(ceph_mount_info *cmount, const char *name, ceph_dir_result **dirpp) + void ceph_rewinddir(ceph_mount_info *cmount, ceph_dir_result *dirp) + int64_t ceph_telldir(ceph_mount_info *cmount, ceph_dir_result *dirp) + void ceph_seekdir(ceph_mount_info *cmount, ceph_dir_result *dirp, int64_t offset) + int ceph_chdir(ceph_mount_info *cmount, const char *path) + dirent * ceph_readdir(ceph_mount_info *cmount, ceph_dir_result *dirp) + int ceph_rmdir(ceph_mount_info *cmount, const char *path) + const char* ceph_getcwd(ceph_mount_info *cmount) + int ceph_sync_fs(ceph_mount_info *cmount) + int ceph_fsync(ceph_mount_info *cmount, int fd, int syncdataonly) + int ceph_lazyio(ceph_mount_info *cmount, int fd, int enable) + int ceph_lazyio_propagate(ceph_mount_info *cmount, int fd, int64_t offset, size_t count) + int ceph_lazyio_synchronize(ceph_mount_info *cmount, int fd, int64_t offset, size_t count) + int ceph_fallocate(ceph_mount_info *cmount, int fd, int mode, int64_t offset, int64_t length) + int ceph_chmod(ceph_mount_info *cmount, const char *path, mode_t mode) + int ceph_fchmod(ceph_mount_info *cmount, int fd, mode_t mode) + int ceph_chown(ceph_mount_info *cmount, const char *path, int uid, int gid) + int ceph_lchown(ceph_mount_info *cmount, const char *path, int uid, int gid) + int ceph_fchown(ceph_mount_info *cmount, int fd, int uid, int gid) + int64_t ceph_lseek(ceph_mount_info *cmount, int fd, int64_t offset, int whence) + void ceph_buffer_free(char *buf) + mode_t ceph_umask(ceph_mount_info *cmount, mode_t mode) + int ceph_utime(ceph_mount_info *cmount, const char *path, utimbuf *buf) + int ceph_futime(ceph_mount_info *cmount, int fd, utimbuf *buf) + int ceph_utimes(ceph_mount_info *cmount, const char *path, timeval times[2]) + int ceph_lutimes(ceph_mount_info *cmount, const char *path, timeval times[2]) + int ceph_futimes(ceph_mount_info *cmount, int fd, timeval times[2]) + int ceph_futimens(ceph_mount_info *cmount, int fd, timespec times[2]) + int ceph_get_file_replication(ceph_mount_info *cmount, int fh) + int ceph_get_path_replication(ceph_mount_info *cmount, const char *path) + int ceph_get_pool_id(ceph_mount_info *cmount, const char *pool_name) + int ceph_get_pool_replication(ceph_mount_info *cmount, int pool_id) + int ceph_debug_get_fd_caps(ceph_mount_info *cmount, int fd) + int ceph_debug_get_file_caps(ceph_mount_info *cmount, const char *path) + uint32_t ceph_get_cap_return_timeout(ceph_mount_info *cmount) + void ceph_set_uuid(ceph_mount_info *cmount, const char *uuid) + void ceph_set_session_timeout(ceph_mount_info *cmount, unsigned timeout) + int ceph_get_file_layout(ceph_mount_info *cmount, int fh, int *stripe_unit, int *stripe_count, int *object_size, int *pg_pool) + int ceph_get_file_pool_name(ceph_mount_info *cmount, int fh, char *buf, size_t buflen) + int ceph_get_default_data_pool_name(ceph_mount_info *cmount, char *buf, size_t buflen) diff --git a/src/pybind/cephfs/cephfs.pyx b/src/pybind/cephfs/cephfs.pyx index c40c2eb0600..c7342d70fef 100644 --- a/src/pybind/cephfs/cephfs.pyx +++ b/src/pybind/cephfs/cephfs.pyx @@ -7,7 +7,15 @@ from libc cimport errno from libc.stdint cimport * from libc.stdlib cimport malloc, realloc, free -cimport rados +from types cimport * +IF BUILD_DOC: + include "mock_cephfs.pxi" + cdef class Rados: + cdef: + rados_t cluster +ELSE: + from c_cephfs cimport * + from rados cimport Rados from collections import namedtuple from datetime import datetime @@ -58,198 +66,6 @@ cdef extern from "Python.h": void PyEval_InitThreads() -cdef extern from "sys/statvfs.h": - cdef struct statvfs: - unsigned long int f_bsize - unsigned long int f_frsize - unsigned long int f_blocks - unsigned long int f_bfree - unsigned long int f_bavail - unsigned long int f_files - unsigned long int f_ffree - unsigned long int f_favail - unsigned long int f_fsid - unsigned long int f_flag - unsigned long int f_namemax - unsigned long int f_padding[32] - - -IF UNAME_SYSNAME == "FreeBSD" or UNAME_SYSNAME == "Darwin": - cdef extern from "dirent.h": - cdef struct dirent: - long int d_ino - unsigned short int d_reclen - unsigned char d_type - char d_name[256] -ELSE: - cdef extern from "dirent.h": - cdef struct dirent: - long int d_ino - unsigned long int d_off - unsigned short int d_reclen - unsigned char d_type - char d_name[256] - - -cdef extern from "time.h": - ctypedef long int time_t - -cdef extern from "time.h": - cdef struct timespec: - time_t tv_sec - long int tv_nsec - -cdef extern from "<sys/uio.h>": - cdef struct iovec: - void *iov_base - size_t iov_len - -cdef extern from "sys/types.h": - ctypedef unsigned long mode_t - ctypedef unsigned long dev_t - -cdef extern from "<utime.h>": - cdef struct utimbuf: - time_t actime - time_t modtime - -cdef extern from "sys/time.h": - cdef struct timeval: - long tv_sec - long tv_usec - -cdef extern from "cephfs/ceph_ll_client.h": - cdef struct statx "ceph_statx": - uint32_t stx_mask - uint32_t stx_blksize - uint32_t stx_nlink - uint32_t stx_uid - uint32_t stx_gid - uint16_t stx_mode - uint64_t stx_ino - uint64_t stx_size - uint64_t stx_blocks - uint64_t stx_dev - uint64_t stx_rdev - timespec stx_atime - timespec stx_ctime - timespec stx_mtime - timespec stx_btime - uint64_t stx_version - -cdef extern from "cephfs/libcephfs.h" nogil: - cdef struct ceph_mount_info: - pass - - cdef struct ceph_dir_result: - pass - - ctypedef void* rados_t - - const char *ceph_version(int *major, int *minor, int *patch) - - int ceph_create(ceph_mount_info **cmount, const char * const id) - int ceph_create_from_rados(ceph_mount_info **cmount, rados_t cluster) - int ceph_init(ceph_mount_info *cmount) - void ceph_shutdown(ceph_mount_info *cmount) - - int ceph_getaddrs(ceph_mount_info* cmount, char** addrs) - int ceph_conf_read_file(ceph_mount_info *cmount, const char *path_list) - int ceph_conf_parse_argv(ceph_mount_info *cmount, int argc, const char **argv) - int ceph_conf_get(ceph_mount_info *cmount, const char *option, char *buf, size_t len) - int ceph_conf_set(ceph_mount_info *cmount, const char *option, const char *value) - - int ceph_mount(ceph_mount_info *cmount, const char *root) - int ceph_select_filesystem(ceph_mount_info *cmount, const char *fs_name) - int ceph_unmount(ceph_mount_info *cmount) - int ceph_abort_conn(ceph_mount_info *cmount) - uint64_t ceph_get_instance_id(ceph_mount_info *cmount) - int ceph_fstatx(ceph_mount_info *cmount, int fd, statx *stx, unsigned want, unsigned flags) - int ceph_statx(ceph_mount_info *cmount, const char *path, statx *stx, unsigned want, unsigned flags) - int ceph_statfs(ceph_mount_info *cmount, const char *path, statvfs *stbuf) - - int ceph_setattrx(ceph_mount_info *cmount, const char *relpath, statx *stx, int mask, int flags) - int ceph_fsetattrx(ceph_mount_info *cmount, int fd, statx *stx, int mask) - int ceph_mds_command(ceph_mount_info *cmount, const char *mds_spec, const char **cmd, size_t cmdlen, - const char *inbuf, size_t inbuflen, char **outbuf, size_t *outbuflen, - char **outs, size_t *outslen) - int ceph_rename(ceph_mount_info *cmount, const char *from_, const char *to) - int ceph_link(ceph_mount_info *cmount, const char *existing, const char *newname) - int ceph_unlink(ceph_mount_info *cmount, const char *path) - int ceph_symlink(ceph_mount_info *cmount, const char *existing, const char *newname) - int ceph_readlink(ceph_mount_info *cmount, const char *path, char *buf, int64_t size) - int ceph_setxattr(ceph_mount_info *cmount, const char *path, const char *name, - const void *value, size_t size, int flags) - int ceph_fsetxattr(ceph_mount_info *cmount, int fd, const char *name, - const void *value, size_t size, int flags) - int ceph_lsetxattr(ceph_mount_info *cmount, const char *path, const char *name, - const void *value, size_t size, int flags) - int ceph_getxattr(ceph_mount_info *cmount, const char *path, const char *name, - void *value, size_t size) - int ceph_fgetxattr(ceph_mount_info *cmount, int fd, const char *name, - void *value, size_t size) - int ceph_lgetxattr(ceph_mount_info *cmount, const char *path, const char *name, - void *value, size_t size) - int ceph_removexattr(ceph_mount_info *cmount, const char *path, const char *name) - int ceph_fremovexattr(ceph_mount_info *cmount, int fd, const char *name) - int ceph_lremovexattr(ceph_mount_info *cmount, const char *path, const char *name) - int ceph_listxattr(ceph_mount_info *cmount, const char *path, char *list, size_t size) - int ceph_flistxattr(ceph_mount_info *cmount, int fd, char *list, size_t size) - int ceph_llistxattr(ceph_mount_info *cmount, const char *path, char *list, size_t size) - int ceph_write(ceph_mount_info *cmount, int fd, const char *buf, int64_t size, int64_t offset) - int ceph_pwritev(ceph_mount_info *cmount, int fd, iovec *iov, int iovcnt, int64_t offset) - int ceph_read(ceph_mount_info *cmount, int fd, char *buf, int64_t size, int64_t offset) - int ceph_preadv(ceph_mount_info *cmount, int fd, iovec *iov, int iovcnt, int64_t offset) - int ceph_flock(ceph_mount_info *cmount, int fd, int operation, uint64_t owner) - int ceph_mknod(ceph_mount_info *cmount, const char *path, mode_t mode, dev_t rdev) - int ceph_close(ceph_mount_info *cmount, int fd) - int ceph_open(ceph_mount_info *cmount, const char *path, int flags, mode_t mode) - int ceph_mkdir(ceph_mount_info *cmount, const char *path, mode_t mode) - int ceph_mkdirs(ceph_mount_info *cmount, const char *path, mode_t mode) - int ceph_closedir(ceph_mount_info *cmount, ceph_dir_result *dirp) - int ceph_opendir(ceph_mount_info *cmount, const char *name, ceph_dir_result **dirpp) - void ceph_rewinddir(ceph_mount_info *cmount, ceph_dir_result *dirp) - int64_t ceph_telldir(ceph_mount_info *cmount, ceph_dir_result *dirp) - void ceph_seekdir(ceph_mount_info *cmount, ceph_dir_result *dirp, int64_t offset) - int ceph_chdir(ceph_mount_info *cmount, const char *path) - dirent * ceph_readdir(ceph_mount_info *cmount, ceph_dir_result *dirp) - int ceph_rmdir(ceph_mount_info *cmount, const char *path) - const char* ceph_getcwd(ceph_mount_info *cmount) - int ceph_sync_fs(ceph_mount_info *cmount) - int ceph_fsync(ceph_mount_info *cmount, int fd, int syncdataonly) - int ceph_lazyio(ceph_mount_info *cmount, int fd, int enable) - int ceph_lazyio_propagate(ceph_mount_info *cmount, int fd, int64_t offset, size_t count) - int ceph_lazyio_synchronize(ceph_mount_info *cmount, int fd, int64_t offset, size_t count) - int ceph_fallocate(ceph_mount_info *cmount, int fd, int mode, int64_t offset, int64_t length) - int ceph_conf_parse_argv(ceph_mount_info *cmount, int argc, const char **argv) - int ceph_chmod(ceph_mount_info *cmount, const char *path, mode_t mode) - int ceph_fchmod(ceph_mount_info *cmount, int fd, mode_t mode) - int ceph_chown(ceph_mount_info *cmount, const char *path, int uid, int gid) - int ceph_lchown(ceph_mount_info *cmount, const char *path, int uid, int gid) - int ceph_fchown(ceph_mount_info *cmount, int fd, int uid, int gid) - int64_t ceph_lseek(ceph_mount_info *cmount, int fd, int64_t offset, int whence) - void ceph_buffer_free(char *buf) - mode_t ceph_umask(ceph_mount_info *cmount, mode_t mode) - int ceph_utime(ceph_mount_info *cmount, const char *path, utimbuf *buf) - int ceph_futime(ceph_mount_info *cmount, int fd, utimbuf *buf) - int ceph_utimes(ceph_mount_info *cmount, const char *path, timeval times[2]) - int ceph_lutimes(ceph_mount_info *cmount, const char *path, timeval times[2]) - int ceph_futimes(ceph_mount_info *cmount, int fd, timeval times[2]) - int ceph_futimens(ceph_mount_info *cmount, int fd, timespec times[2]) - int ceph_get_file_replication(ceph_mount_info *cmount, int fh) - int ceph_get_path_replication(ceph_mount_info *cmount, const char *path) - int ceph_get_pool_id(ceph_mount_info *cmount, const char *pool_name) - int ceph_get_pool_replication(ceph_mount_info *cmount, int pool_id) - int ceph_debug_get_fd_caps(ceph_mount_info *cmount, int fd) - int ceph_debug_get_file_caps(ceph_mount_info *cmount, const char *path) - uint32_t ceph_get_cap_return_timeout(ceph_mount_info *cmount) - void ceph_set_uuid(ceph_mount_info *cmount, const char *uuid) - void ceph_set_session_timeout(ceph_mount_info *cmount, unsigned timeout) - int ceph_get_file_layout(ceph_mount_info *cmount, int fh, int *stripe_unit, int *stripe_count, int *object_size, int *pg_pool) - int ceph_get_file_pool_name(ceph_mount_info *cmount, int fh, char *buf, size_t buflen) - int ceph_get_default_data_pool_name(ceph_mount_info *cmount, char *buf, size_t buflen) - - class Error(Exception): def get_error_code(self): return 1 @@ -595,7 +411,7 @@ cdef class LibCephFS(object): else: self.create(conf, conffile, auth_id) - def create_with_rados(self, rados.Rados rados_inst): + def create_with_rados(self, Rados rados_inst): cdef int ret with nogil: ret = ceph_create_from_rados(&self.cluster, rados_inst.cluster) @@ -2167,7 +1983,7 @@ cdef class LibCephFS(object): ret = ceph_readlink(self.cluster, _path, buf, _size) if ret < 0: raise make_ex(ret, "error in readlink") - return buf + return buf[:ret] finally: free(buf) diff --git a/src/pybind/cephfs/mock_cephfs.pxi b/src/pybind/cephfs/mock_cephfs.pxi new file mode 100644 index 00000000000..c1c93ac100a --- /dev/null +++ b/src/pybind/cephfs/mock_cephfs.pxi @@ -0,0 +1,224 @@ +# cython: embedsignature=True + +from libc.stdint cimport * +from types cimport timespec + + +cdef: + cdef struct statx "ceph_statx": + uint32_t stx_mask + uint32_t stx_blksize + uint32_t stx_nlink + uint32_t stx_uid + uint32_t stx_gid + uint16_t stx_mode + uint64_t stx_ino + uint64_t stx_size + uint64_t stx_blocks + uint64_t stx_dev + uint64_t stx_rdev + timespec stx_atime + timespec stx_ctime + timespec stx_mtime + timespec stx_btime + uint64_t stx_version + +cdef nogil: + cdef struct ceph_mount_info: + int dummy + + cdef struct ceph_dir_result: + int dummy + + ctypedef void* rados_t + + const char *ceph_version(int *major, int *minor, int *patch): + pass + + int ceph_create(ceph_mount_info **cmount, const char * const id): + pass + int ceph_create_from_rados(ceph_mount_info **cmount, rados_t cluster): + pass + int ceph_init(ceph_mount_info *cmount): + pass + void ceph_shutdown(ceph_mount_info *cmount): + pass + + int ceph_getaddrs(ceph_mount_info* cmount, char** addrs): + pass + int ceph_conf_read_file(ceph_mount_info *cmount, const char *path_list): + pass + int ceph_conf_parse_argv(ceph_mount_info *cmount, int argc, const char **argv): + pass + int ceph_conf_get(ceph_mount_info *cmount, const char *option, char *buf, size_t len): + pass + int ceph_conf_set(ceph_mount_info *cmount, const char *option, const char *value): + pass + + int ceph_mount(ceph_mount_info *cmount, const char *root): + pass + int ceph_select_filesystem(ceph_mount_info *cmount, const char *fs_name): + pass + int ceph_unmount(ceph_mount_info *cmount): + pass + int ceph_abort_conn(ceph_mount_info *cmount): + pass + uint64_t ceph_get_instance_id(ceph_mount_info *cmount): + pass + int ceph_fstatx(ceph_mount_info *cmount, int fd, statx *stx, unsigned want, unsigned flags): + pass + int ceph_statx(ceph_mount_info *cmount, const char *path, statx *stx, unsigned want, unsigned flags): + pass + int ceph_statfs(ceph_mount_info *cmount, const char *path, statvfs *stbuf): + pass + + int ceph_setattrx(ceph_mount_info *cmount, const char *relpath, statx *stx, int mask, int flags): + pass + int ceph_fsetattrx(ceph_mount_info *cmount, int fd, statx *stx, int mask): + pass + int ceph_mds_command(ceph_mount_info *cmount, const char *mds_spec, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen): + pass + int ceph_rename(ceph_mount_info *cmount, const char *from_, const char *to): + pass + int ceph_link(ceph_mount_info *cmount, const char *existing, const char *newname): + pass + int ceph_unlink(ceph_mount_info *cmount, const char *path): + pass + int ceph_symlink(ceph_mount_info *cmount, const char *existing, const char *newname): + pass + int ceph_readlink(ceph_mount_info *cmount, const char *path, char *buf, int64_t size): + pass + int ceph_setxattr(ceph_mount_info *cmount, const char *path, const char *name, + const void *value, size_t size, int flags): + pass + int ceph_fsetxattr(ceph_mount_info *cmount, int fd, const char *name, + const void *value, size_t size, int flags): + pass + int ceph_lsetxattr(ceph_mount_info *cmount, const char *path, const char *name, + const void *value, size_t size, int flags): + pass + int ceph_getxattr(ceph_mount_info *cmount, const char *path, const char *name, + void *value, size_t size): + pass + int ceph_fgetxattr(ceph_mount_info *cmount, int fd, const char *name, + void *value, size_t size): + pass + int ceph_lgetxattr(ceph_mount_info *cmount, const char *path, const char *name, + void *value, size_t size): + pass + int ceph_removexattr(ceph_mount_info *cmount, const char *path, const char *name): + pass + int ceph_fremovexattr(ceph_mount_info *cmount, int fd, const char *name): + pass + int ceph_lremovexattr(ceph_mount_info *cmount, const char *path, const char *name): + pass + int ceph_listxattr(ceph_mount_info *cmount, const char *path, char *list, size_t size): + pass + int ceph_flistxattr(ceph_mount_info *cmount, int fd, char *list, size_t size): + pass + int ceph_llistxattr(ceph_mount_info *cmount, const char *path, char *list, size_t size): + pass + int ceph_write(ceph_mount_info *cmount, int fd, const char *buf, int64_t size, int64_t offset): + pass + int ceph_pwritev(ceph_mount_info *cmount, int fd, iovec *iov, int iovcnt, int64_t offset): + pass + int ceph_read(ceph_mount_info *cmount, int fd, char *buf, int64_t size, int64_t offset): + pass + int ceph_preadv(ceph_mount_info *cmount, int fd, iovec *iov, int iovcnt, int64_t offset): + pass + int ceph_flock(ceph_mount_info *cmount, int fd, int operation, uint64_t owner): + pass + int ceph_mknod(ceph_mount_info *cmount, const char *path, mode_t mode, dev_t rdev): + pass + int ceph_close(ceph_mount_info *cmount, int fd): + pass + int ceph_open(ceph_mount_info *cmount, const char *path, int flags, mode_t mode): + pass + int ceph_mkdir(ceph_mount_info *cmount, const char *path, mode_t mode): + pass + int ceph_mkdirs(ceph_mount_info *cmount, const char *path, mode_t mode): + pass + int ceph_closedir(ceph_mount_info *cmount, ceph_dir_result *dirp): + pass + int ceph_opendir(ceph_mount_info *cmount, const char *name, ceph_dir_result **dirpp): + pass + void ceph_rewinddir(ceph_mount_info *cmount, ceph_dir_result *dirp): + pass + int64_t ceph_telldir(ceph_mount_info *cmount, ceph_dir_result *dirp): + pass + void ceph_seekdir(ceph_mount_info *cmount, ceph_dir_result *dirp, int64_t offset): + pass + int ceph_chdir(ceph_mount_info *cmount, const char *path): + pass + dirent * ceph_readdir(ceph_mount_info *cmount, ceph_dir_result *dirp): + pass + int ceph_rmdir(ceph_mount_info *cmount, const char *path): + pass + const char* ceph_getcwd(ceph_mount_info *cmount): + pass + int ceph_sync_fs(ceph_mount_info *cmount): + pass + int ceph_fsync(ceph_mount_info *cmount, int fd, int syncdataonly): + pass + int ceph_lazyio(ceph_mount_info *cmount, int fd, int enable): + pass + int ceph_lazyio_propagate(ceph_mount_info *cmount, int fd, int64_t offset, size_t count): + pass + int ceph_lazyio_synchronize(ceph_mount_info *cmount, int fd, int64_t offset, size_t count): + pass + int ceph_fallocate(ceph_mount_info *cmount, int fd, int mode, int64_t offset, int64_t length): + pass + int ceph_chmod(ceph_mount_info *cmount, const char *path, mode_t mode): + pass + int ceph_fchmod(ceph_mount_info *cmount, int fd, mode_t mode): + pass + int ceph_chown(ceph_mount_info *cmount, const char *path, int uid, int gid): + pass + int ceph_lchown(ceph_mount_info *cmount, const char *path, int uid, int gid): + pass + int ceph_fchown(ceph_mount_info *cmount, int fd, int uid, int gid): + pass + int64_t ceph_lseek(ceph_mount_info *cmount, int fd, int64_t offset, int whence): + pass + void ceph_buffer_free(char *buf): + pass + mode_t ceph_umask(ceph_mount_info *cmount, mode_t mode): + pass + int ceph_utime(ceph_mount_info *cmount, const char *path, utimbuf *buf): + pass + int ceph_futime(ceph_mount_info *cmount, int fd, utimbuf *buf): + pass + int ceph_utimes(ceph_mount_info *cmount, const char *path, timeval times[2]): + pass + int ceph_lutimes(ceph_mount_info *cmount, const char *path, timeval times[2]): + pass + int ceph_futimes(ceph_mount_info *cmount, int fd, timeval times[2]): + pass + int ceph_futimens(ceph_mount_info *cmount, int fd, timespec times[2]): + pass + int ceph_get_file_replication(ceph_mount_info *cmount, int fh): + pass + int ceph_get_path_replication(ceph_mount_info *cmount, const char *path): + pass + int ceph_get_pool_id(ceph_mount_info *cmount, const char *pool_name): + pass + int ceph_get_pool_replication(ceph_mount_info *cmount, int pool_id): + pass + int ceph_debug_get_fd_caps(ceph_mount_info *cmount, int fd): + pass + int ceph_debug_get_file_caps(ceph_mount_info *cmount, const char *path): + pass + uint32_t ceph_get_cap_return_timeout(ceph_mount_info *cmount): + pass + void ceph_set_uuid(ceph_mount_info *cmount, const char *uuid): + pass + void ceph_set_session_timeout(ceph_mount_info *cmount, unsigned timeout): + pass + int ceph_get_file_layout(ceph_mount_info *cmount, int fh, int *stripe_unit, int *stripe_count, int *object_size, int *pg_pool): + pass + int ceph_get_file_pool_name(ceph_mount_info *cmount, int fh, char *buf, size_t buflen): + pass + int ceph_get_default_data_pool_name(ceph_mount_info *cmount, char *buf, size_t buflen): + pass diff --git a/src/pybind/cephfs/setup.py b/src/pybind/cephfs/setup.py index 4eec949731a..c3d40df05a8 100755 --- a/src/pybind/cephfs/setup.py +++ b/src/pybind/cephfs/setup.py @@ -138,10 +138,16 @@ def check_sanity(): shutil.rmtree(tmp_dir) -if 'BUILD_DOC' in os.environ.keys(): - pass +if 'BUILD_DOC' in os.environ or 'READTHEDOCS' in os.environ: + ext_args = {} + cython_constants = dict(BUILD_DOC=True) + cythonize_args = dict(compile_time_env=cython_constants) elif check_sanity(): - pass + ext_args = get_python_flags(['cephfs']) + cython_constants = dict(BUILD_DOC=False) + include_path = [os.path.join(os.path.dirname(__file__), "..", "rados")] + cythonize_args = dict(compile_time_env=cython_constants, + include_path=include_path) else: sys.exit(1) @@ -192,14 +198,12 @@ setup( Extension( "cephfs", [source], - **get_python_flags(['cephfs']) + **ext_args ) ], compiler_directives={'language_level': sys.version_info.major}, build_dir=os.environ.get("CYTHON_BUILD_DIR", None), - include_path=[ - os.path.join(os.path.dirname(__file__), "..", "rados") - ] + **cythonize_args ), classifiers=[ 'Intended Audience :: Developers', diff --git a/src/pybind/cephfs/types.pxd b/src/pybind/cephfs/types.pxd new file mode 100644 index 00000000000..d20ea87dc9c --- /dev/null +++ b/src/pybind/cephfs/types.pxd @@ -0,0 +1,55 @@ +cdef extern from "time.h": + ctypedef long int time_t + cdef struct timespec: + time_t tv_sec + long int tv_nsec + +cdef extern from "<utime.h>": + cdef struct utimbuf: + time_t actime + time_t modtime + +cdef extern from "sys/types.h": + ctypedef unsigned long mode_t + ctypedef unsigned long dev_t + +cdef extern from "sys/time.h": + cdef struct timeval: + long tv_sec + long tv_usec + +cdef extern from "sys/statvfs.h": + cdef struct statvfs: + unsigned long int f_bsize + unsigned long int f_frsize + unsigned long int f_blocks + unsigned long int f_bfree + unsigned long int f_bavail + unsigned long int f_files + unsigned long int f_ffree + unsigned long int f_favail + unsigned long int f_fsid + unsigned long int f_flag + unsigned long int f_namemax + unsigned long int f_padding[32] + +cdef extern from "<sys/uio.h>": + cdef struct iovec: + void *iov_base + size_t iov_len + +IF UNAME_SYSNAME == "FreeBSD" or UNAME_SYSNAME == "Darwin": + cdef extern from "dirent.h": + cdef struct dirent: + long int d_ino + unsigned short int d_reclen + unsigned char d_type + char d_name[256] +ELSE: + cdef extern from "dirent.h": + cdef struct dirent: + long int d_ino + unsigned long int d_off + unsigned short int d_reclen + unsigned char d_type + char d_name[256] diff --git a/src/pybind/mgr/balancer/module.py b/src/pybind/mgr/balancer/module.py index 5fffe01dcb0..acdf936b65e 100644 --- a/src/pybind/mgr/balancer/module.py +++ b/src/pybind/mgr/balancer/module.py @@ -1005,10 +1005,8 @@ class Module(MgrModule): random.shuffle(adjusted_pools) pool_dump = osdmap_dump.get('pools', []) for pool in adjusted_pools: - num_pg = 0 for p in pool_dump: if p['pool_name'] == pool: - num_pg = p['pg_num'] pool_id = p['pool'] break @@ -1023,7 +1021,7 @@ class Module(MgrModule): if s['state_name'] == 'active+clean': num_pg_active_clean += s['count'] break - available = left - (num_pg - num_pg_active_clean) + available = min(left, num_pg_active_clean) did = plan.osdmap.calc_pg_upmaps(inc, max_deviation, available, [pool]) total_did += did left -= did diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index 9d8816aa60f..f309504155f 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -21,6 +21,10 @@ SPEC_STORE_PREFIX = "spec." class Inventory: + """ + The inventory stores a HostSpec for all hosts persistently. + """ + def __init__(self, mgr: 'CephadmOrchestrator'): self.mgr = mgr # load inventory @@ -37,25 +41,25 @@ class Inventory: def __contains__(self, host: str) -> bool: return host in self._inventory - def assert_host(self, host): + def assert_host(self, host: str) -> None: if host not in self._inventory: raise OrchestratorError('host %s does not exist' % host) - def add_host(self, spec: HostSpec): + def add_host(self, spec: HostSpec) -> None: self._inventory[spec.hostname] = spec.to_json() self.save() - def rm_host(self, host: str): + def rm_host(self, host: str) -> None: self.assert_host(host) del self._inventory[host] self.save() - def set_addr(self, host, addr): + def set_addr(self, host: str, addr: str) -> None: self.assert_host(host) self._inventory[host]['addr'] = addr self.save() - def add_label(self, host, label): + def add_label(self, host: str, label: str) -> None: self.assert_host(host) if 'labels' not in self._inventory[host]: @@ -64,7 +68,7 @@ class Inventory: self._inventory[host]['labels'].append(label) self.save() - def rm_label(self, host, label): + def rm_label(self, host: str, label: str) -> None: self.assert_host(host) if 'labels' not in self._inventory[host]: @@ -73,7 +77,7 @@ class Inventory: self._inventory[host]['labels'].remove(label) self.save() - def get_addr(self, host) -> str: + def get_addr(self, host: str) -> str: self.assert_host(host) return self._inventory[host].get('addr', host) @@ -85,7 +89,7 @@ class Inventory: else: yield h - def spec_from_dict(self, info) -> HostSpec: + def spec_from_dict(self, info: dict) -> HostSpec: hostname = info['hostname'] return HostSpec( hostname, @@ -97,7 +101,7 @@ class Inventory: def all_specs(self) -> List[HostSpec]: return list(map(self.spec_from_dict, self._inventory.values())) - def save(self): + def save(self) -> None: self.mgr.set_store('inventory', json.dumps(self._inventory)) @@ -164,12 +168,44 @@ class SpecStore(): class HostCache(): + """ + HostCache stores different things: + + 1. `daemons`: Deployed daemons O(daemons) + + They're part of the configuration nowadays and need to be + persistent. The name "daemon cache" is unfortunately a bit misleading. + Like for example we really need to know where daemons are deployed on + hosts that are offline. + + 2. `devices`: ceph-volume inventory cache O(hosts) + + As soon as this is populated, it becomes more or less read-only. + + 3. `networks`: network interfaces for each host. O(hosts) + + This is needed in order to deploy MONs. As this is mostly read-only. + + 4. `last_etc_ceph_ceph_conf` O(hosts) + + Stores the last refresh time for the /etc/ceph/ceph.conf. Used + to avoid deploying new configs when failing over to a new mgr. + + 5. `scheduled_daemon_actions`: O(daemons) + + Used to run daemon actions after deploying a daemon. We need to + store it persistently, in order to stay consistent across + MGR failovers. + """ + def __init__(self, mgr): # type: (CephadmOrchestrator) -> None self.mgr: CephadmOrchestrator = mgr self.daemons = {} # type: Dict[str, Dict[str, orchestrator.DaemonDescription]] self.last_daemon_update = {} # type: Dict[str, datetime.datetime] self.devices = {} # type: Dict[str, List[inventory.Device]] + self.facts = {} # type: Dict[str, Dict[str, Any]] + self.last_facts_update = {} # type: Dict[str, datetime.datetime] self.osdspec_previews = {} # type: Dict[str, List[Dict[str, Any]]] self.networks = {} # type: Dict[str, Dict[str, List[str]]] self.last_device_update = {} # type: Dict[str, datetime.datetime] @@ -244,13 +280,18 @@ class HostCache(): self.daemons[host] = dm self.last_daemon_update[host] = datetime.datetime.utcnow() + def update_host_facts(self, host, facts): + # type: (str, Dict[str, Dict[str, Any]]) -> None + self.facts[host] = facts + self.last_facts_update[host] = datetime.datetime.utcnow() + def update_host_devices_networks(self, host, dls, nets): # type: (str, List[inventory.Device], Dict[str,List[str]]) -> None self.devices[host] = dls self.networks[host] = nets self.last_device_update[host] = datetime.datetime.utcnow() - def update_daemon_config_deps(self, host, name, deps, stamp): + def update_daemon_config_deps(self, host: str, name: str, deps: List[str], stamp: datetime.datetime) -> None: self.daemon_config_deps[host][name] = { 'deps': deps, 'last_config': stamp, @@ -289,7 +330,7 @@ class HostCache(): del self.last_device_update[host] self.mgr.event.set() - def distribute_new_registry_login_info(self): + def distribute_new_registry_login_info(self) -> None: self.registry_login_queue = set(self.mgr.inventory.keys()) def save_host(self, host: str) -> None: @@ -303,17 +344,21 @@ class HostCache(): j['last_daemon_update'] = datetime_to_str(self.last_daemon_update[host]) if host in self.last_device_update: j['last_device_update'] = datetime_to_str(self.last_device_update[host]) - for name, dd in self.daemons[host].items(): - j['daemons'][name] = dd.to_json() - for d in self.devices[host]: - j['devices'].append(d.to_json()) - j['networks'] = self.networks[host] - for name, depi in self.daemon_config_deps[host].items(): - j['daemon_config_deps'][name] = { - 'deps': depi.get('deps', []), - 'last_config': datetime_to_str(depi['last_config']), - } - if self.osdspec_previews[host]: + if host in self.daemons: + for name, dd in self.daemons[host].items(): + j['daemons'][name] = dd.to_json() + if host in self.devices: + for d in self.devices[host]: + j['devices'].append(d.to_json()) + if host in self.networks: + j['networks'] = self.networks[host] + if host in self.daemon_config_deps: + for name, depi in self.daemon_config_deps[host].items(): + j['daemon_config_deps'][name] = { + 'deps': depi.get('deps', []), + 'last_config': datetime_to_str(depi['last_config']), + } + if host in self.osdspec_previews and self.osdspec_previews[host]: j['osdspec_previews'] = self.osdspec_previews[host] if host in self.last_host_check: @@ -321,7 +366,7 @@ class HostCache(): if host in self.last_etc_ceph_ceph_conf: j['last_etc_ceph_ceph_conf'] = datetime_to_str(self.last_etc_ceph_ceph_conf[host]) - if self.scheduled_daemon_actions.get(host, {}): + if host in self.scheduled_daemon_actions: j['scheduled_daemon_actions'] = self.scheduled_daemon_actions[host] self.mgr.set_store(HOST_CACHE_PREFIX + host, json.dumps(j)) @@ -332,6 +377,10 @@ class HostCache(): del self.daemons[host] if host in self.devices: del self.devices[host] + if host in self.facts: + del self.facts[host] + if host in self.last_facts_update: + del self.last_facts_update[host] if host in self.osdspec_previews: del self.osdspec_previews[host] if host in self.loading_osdspec_preview: @@ -371,7 +420,7 @@ class HostCache(): raise orchestrator.OrchestratorError(f'Unable to find {daemon_name} daemon(s)') def get_daemons_with_volatile_status(self) -> Iterator[Tuple[str, Dict[str, orchestrator.DaemonDescription]]]: - def alter(host, dd_orig: orchestrator.DaemonDescription) -> orchestrator.DaemonDescription: + def alter(host: str, dd_orig: orchestrator.DaemonDescription) -> orchestrator.DaemonDescription: dd = copy(dd_orig) if host in self.mgr.offline_hosts: dd.status = -1 @@ -408,7 +457,7 @@ class HostCache(): r.append(name) return r - def get_daemon_last_config_deps(self, host, name) -> Tuple[Optional[List[str]], Optional[datetime.datetime]]: + def get_daemon_last_config_deps(self, host: str, name: str) -> Tuple[Optional[List[str]], Optional[datetime.datetime]]: if host in self.daemon_config_deps: if name in self.daemon_config_deps[host]: return self.daemon_config_deps[host][name].get('deps', []), \ @@ -429,6 +478,17 @@ class HostCache(): return True return False + def host_needs_facts_refresh(self, host): + # type: (str) -> bool + if host in self.mgr.offline_hosts: + logger.debug(f'Host "{host}" marked as offline. Skipping gather facts refresh') + return False + cutoff = datetime.datetime.utcnow() - datetime.timedelta( + seconds=self.mgr.facts_cache_timeout) + if host not in self.last_facts_update or self.last_facts_update[host] < cutoff: + return True + return False + def host_had_daemon_refresh(self, host: str) -> bool: """ ... at least once. @@ -453,7 +513,7 @@ class HostCache(): return True return False - def host_needs_osdspec_preview_refresh(self, host): + def host_needs_osdspec_preview_refresh(self, host: str) -> bool: if host in self.mgr.offline_hosts: logger.debug(f'Host "{host}" marked as offline. Skipping osdspec preview refresh') return False @@ -470,7 +530,7 @@ class HostCache(): seconds=self.mgr.host_check_interval) return host not in self.last_host_check or self.last_host_check[host] < cutoff - def host_needs_new_etc_ceph_ceph_conf(self, host: str): + def host_needs_new_etc_ceph_ceph_conf(self, host: str) -> bool: if not self.mgr.manage_etc_ceph_ceph_conf: return False if self.mgr.paused: @@ -488,7 +548,7 @@ class HostCache(): # already up to date: return False - def update_last_etc_ceph_ceph_conf(self, host: str): + def update_last_etc_ceph_ceph_conf(self, host: str) -> None: if not self.mgr.last_monmap: return self.last_etc_ceph_ceph_conf[host] = datetime.datetime.utcnow() @@ -506,12 +566,12 @@ class HostCache(): assert host in self.daemons self.daemons[host][dd.name()] = dd - def rm_daemon(self, host, name): + def rm_daemon(self, host: str, name: str) -> None: if host in self.daemons: if name in self.daemons[host]: del self.daemons[host][name] - def daemon_cache_filled(self): + def daemon_cache_filled(self) -> bool: """ i.e. we have checked the daemons for each hosts at least once. excluding offline hosts. @@ -522,7 +582,7 @@ class HostCache(): return all((self.host_had_daemon_refresh(h) or h in self.mgr.offline_hosts) for h in self.get_hosts()) - def schedule_daemon_action(self, host: str, daemon_name: str, action: str): + def schedule_daemon_action(self, host: str, daemon_name: str, action: str) -> None: priorities = { 'start': 1, 'restart': 2, @@ -540,14 +600,14 @@ class HostCache(): self.scheduled_daemon_actions[host] = {} self.scheduled_daemon_actions[host][daemon_name] = action - def rm_scheduled_daemon_action(self, host: str, daemon_name: str): + def rm_scheduled_daemon_action(self, host: str, daemon_name: str) -> None: if host in self.scheduled_daemon_actions: if daemon_name in self.scheduled_daemon_actions[host]: del self.scheduled_daemon_actions[host][daemon_name] if not self.scheduled_daemon_actions[host]: del self.scheduled_daemon_actions[host] - def get_scheduled_daemon_action(self, host, daemon) -> Optional[str]: + def get_scheduled_daemon_action(self, host: str, daemon: str) -> Optional[str]: return self.scheduled_daemon_actions.get(host, {}).get(daemon) @@ -571,12 +631,12 @@ class EventStore(): # limit to five events for now. self.events[event.kind_subject()] = self.events[event.kind_subject()][-5:] - def for_service(self, spec: ServiceSpec, level, message) -> None: + def for_service(self, spec: ServiceSpec, level: str, message: str) -> None: e = OrchestratorEvent(datetime.datetime.utcnow(), 'service', spec.service_name(), level, message) self.add(e) - def from_orch_error(self, e: OrchestratorError): + def from_orch_error(self, e: OrchestratorError) -> None: if e.event_subject is not None: self.add(OrchestratorEvent( datetime.datetime.utcnow(), @@ -586,11 +646,11 @@ class EventStore(): str(e) )) - def for_daemon(self, daemon_name, level, message): + def for_daemon(self, daemon_name: str, level: str, message: str) -> None: e = OrchestratorEvent(datetime.datetime.utcnow(), 'daemon', daemon_name, level, message) self.add(e) - def for_daemon_from_exception(self, daemon_name, e: Exception): + def for_daemon_from_exception(self, daemon_name: str, e: Exception) -> None: self.for_daemon( daemon_name, "ERROR", @@ -615,8 +675,8 @@ class EventStore(): for k_s in unknowns: del self.events[k_s] - def get_for_service(self, name) -> List[OrchestratorEvent]: + def get_for_service(self, name: str) -> List[OrchestratorEvent]: return self.events.get('service:' + name, []) - def get_for_daemon(self, name) -> List[OrchestratorEvent]: + def get_for_daemon(self, name: str) -> List[OrchestratorEvent]: return self.events.get('daemon:' + name, []) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 3a923738a5b..223612e5423 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -25,11 +25,13 @@ from ceph.deployment import inventory from ceph.deployment.drive_group import DriveGroupSpec from ceph.deployment.service_spec import \ NFSServiceSpec, ServiceSpec, PlacementSpec, assert_valid_host, \ - CustomContainerSpec + CustomContainerSpec, HostPlacementSpec from cephadm.serve import CephadmServe from cephadm.services.cephadmservice import CephadmDaemonSpec from mgr_module import MgrModule, HandleCommandResult +from mgr_util import create_self_signed_cert, verify_tls, ServerConfigException +import secrets import orchestrator from orchestrator import OrchestratorError, OrchestratorValidationError, HostSpec, \ CLICommandMeta, OrchestratorEvent, set_exception_subject, DaemonDescription @@ -39,7 +41,7 @@ from . import remotes from . import utils from .migrations import Migrations from .services.cephadmservice import MonService, MgrService, MdsService, RgwService, \ - RbdMirrorService, CrashService, CephadmService + RbdMirrorService, CrashService, CephadmService, CephadmExporter, CephadmExporterConfig from .services.container import CustomContainerService from .services.iscsi import IscsiService from .services.nfs import NFSService @@ -59,7 +61,7 @@ try: # (https://github.com/alfredodeza/remoto/pull/56) lands from distutils.version import StrictVersion if StrictVersion(remoto.__version__) <= StrictVersion('1.2'): - def remoto_has_connection(self): + def remoto_has_connection(self: Any) -> bool: return self.gateway.hasreceiver() from remoto.backends import BaseConnection @@ -93,7 +95,7 @@ CEPH_TYPES = set(CEPH_UPGRADE_ORDER) class CephadmCompletion(orchestrator.Completion[T]): - def evaluate(self): + def evaluate(self) -> None: self.finalize(None) @@ -104,12 +106,24 @@ def trivial_completion(f: Callable[..., T]) -> Callable[..., CephadmCompletion[T """ @wraps(f) - def wrapper(*args, **kwargs): + def wrapper(*args: Any, **kwargs: Any) -> CephadmCompletion: return CephadmCompletion(on_complete=lambda _: f(*args, **kwargs)) return wrapper +def service_inactive(spec_name: str) -> Callable: + def inner(func: Callable) -> Callable: + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + obj = args[0] + if obj.get_store(f"spec.{spec_name}") is not None: + return 1, "", f"Unable to change configuration of an active service {spec_name}" + return func(*args, **kwargs) + return wrapper + return inner + + class ContainerInspectInfo(NamedTuple): image_id: str ceph_version: Optional[str] @@ -143,6 +157,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, 'desc': 'seconds to cache service (daemon) inventory', }, { + 'name': 'facts_cache_timeout', + 'type': 'secs', + 'default': 1 * 60, + 'desc': 'seconds to cache host facts data', + }, + { 'name': 'host_check_interval', 'type': 'secs', 'default': 10 * 60, @@ -274,7 +294,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, } ] - def __init__(self, *args, **kwargs): + def __init__(self, *args: Any, **kwargs: Any): super(CephadmOrchestrator, self).__init__(*args, **kwargs) self._cluster_fsid = self.get('mon_map')['fsid'] self.last_monmap: Optional[datetime.datetime] = None @@ -293,6 +313,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.ssh_config_file = None # type: Optional[str] self.device_cache_timeout = 0 self.daemon_cache_timeout = 0 + self.facts_cache_timeout = 0 self.host_check_interval = 0 self.mode = '' self.container_image_base = '' @@ -336,7 +357,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.upgrade = CephadmUpgrade(self) - self.health_checks = {} + self.health_checks: Dict[str, dict] = {} self.all_progress_references = list() # type: List[orchestrator.ProgressReference] @@ -381,6 +402,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.crash_service = CrashService(self) self.iscsi_service = IscsiService(self) self.container_service = CustomContainerService(self) + self.cephadm_exporter_service = CephadmExporter(self) self.cephadm_services = { 'mon': self.mon_service, 'mgr': self.mgr_service, @@ -396,13 +418,14 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, 'crash': self.crash_service, 'iscsi': self.iscsi_service, 'container': self.container_service, + 'cephadm-exporter': self.cephadm_exporter_service, } self.template = TemplateMgr(self) - self.requires_post_actions = set() + self.requires_post_actions: Set[str] = set() - def shutdown(self): + def shutdown(self) -> None: self.log.debug('shutdown') self._worker_pool.close() self._worker_pool.join() @@ -413,22 +436,25 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, assert service_type in ServiceSpec.KNOWN_SERVICE_TYPES return self.cephadm_services[service_type] - def _kick_serve_loop(self): + def _kick_serve_loop(self) -> None: self.log.debug('_kick_serve_loop') self.event.set() # function responsible for logging single host into custom registry - def _registry_login(self, host, url, username, password): + def _registry_login(self, host: str, url: Optional[str], username: Optional[str], password: Optional[str]) -> Optional[str]: self.log.debug(f"Attempting to log host {host} into custom registry @ {url}") # want to pass info over stdin rather than through normal list of args - args_str = ("{\"url\": \"" + url + "\", \"username\": \"" + username + "\", " - " \"password\": \"" + password + "\"}") + args_str = json.dumps({ + 'url': url, + 'username': username, + 'password': password, + }) out, err, code = self._run_cephadm( host, 'mon', 'registry-login', ['--registry-json', '-'], stdin=args_str, error_ok=True) if code: return f"Host {host} failed to login to {url} as {username} with given password" - return + return None def serve(self) -> None: """ @@ -440,7 +466,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, serve = CephadmServe(self) serve.serve() - def set_container_image(self, entity: str, image): + def set_container_image(self, entity: str, image: str) -> None: self.check_mon_command({ 'prefix': 'config set', 'name': 'container_image', @@ -448,7 +474,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, 'who': entity, }) - def config_notify(self): + def config_notify(self) -> None: """ This method is called whenever one of our config options is changed. @@ -468,7 +494,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.event.set() - def notify(self, notify_type, notify_id): + def notify(self, notify_type: str, notify_id: Optional[str]) -> None: if notify_type == "mon_map": # get monmap mtime so we can refresh configs when mons change monmap = self.get('mon_map') @@ -482,7 +508,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, if notify_type == "pg_summary": self._trigger_osd_removal() - def _trigger_osd_removal(self): + def _trigger_osd_removal(self) -> None: data = self.get("osd_stats") for osd in data.get('osd_stats', []): if osd.get('num_pgs') == 0: @@ -494,7 +520,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, # start the process self.rm_util.process_removal_queue() - def pause(self): + def pause(self) -> None: if not self.paused: self.log.info('Paused') self.set_store('pause', 'true') @@ -502,7 +528,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, # wake loop so we update the health status self._kick_serve_loop() - def resume(self): + def resume(self) -> None: if self.paused: self.log.info('Resumed') self.paused = False @@ -520,7 +546,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, suffix = daemon_type not in [ 'mon', 'crash', 'nfs', 'prometheus', 'node-exporter', 'grafana', 'alertmanager', - 'container' + 'container', 'cephadm-exporter', ] if forcename: if len([d for d in existing if d.daemon_id == forcename]): @@ -547,7 +573,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, continue return name - def _reconfig_ssh(self): + def _reconfig_ssh(self) -> None: temp_files = [] # type: list ssh_options = [] # type: List[str] @@ -598,7 +624,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self._reset_cons() - def validate_ssh_config_content(self, ssh_config): + def validate_ssh_config_content(self, ssh_config: Optional[str]) -> None: if ssh_config is None or len(ssh_config.strip()) == 0: raise OrchestratorValidationError('ssh_config cannot be empty') # StrictHostKeyChecking is [yes|no] ? @@ -609,38 +635,38 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, if 'ask' in s.lower(): raise OrchestratorValidationError(f'ssh_config cannot contain: \'{s}\'') - def validate_ssh_config_fname(self, ssh_config_fname): + def validate_ssh_config_fname(self, ssh_config_fname: str) -> None: if not os.path.isfile(ssh_config_fname): raise OrchestratorValidationError("ssh_config \"{}\" does not exist".format( ssh_config_fname)) - def _reset_con(self, host): + def _reset_con(self, host: str) -> None: conn, r = self._cons.get(host, (None, None)) if conn: self.log.debug('_reset_con close %s' % host) conn.exit() del self._cons[host] - def _reset_cons(self): + def _reset_cons(self) -> None: for host, conn_and_r in self._cons.items(): self.log.debug('_reset_cons close %s' % host) conn, r = conn_and_r conn.exit() self._cons = {} - def offline_hosts_remove(self, host): + def offline_hosts_remove(self, host: str) -> None: if host in self.offline_hosts: self.offline_hosts.remove(host) @staticmethod - def can_run(): + def can_run() -> Tuple[bool, str]: if remoto is not None: return True, "" else: return False, "loading remoto library:{}".format( remoto_import_error) - def available(self): + def available(self) -> Tuple[bool, str]: """ The cephadm orchestrator is always available. """ @@ -651,7 +677,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, return False, 'SSH keys not set. Use `ceph cephadm set-priv-key` and `ceph cephadm set-pub-key` or `ceph cephadm generate-key`' return True, '' - def process(self, completions): + def process(self, completions: List[CephadmCompletion]) -> None: """ Does nothing, as completions are processed in another thread. """ @@ -665,7 +691,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, @orchestrator._cli_write_command( prefix='cephadm set-ssh-config', desc='Set the ssh_config file (use -i <ssh_config>)') - def _set_ssh_config(self, inbuf=None): + def _set_ssh_config(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]: """ Set an ssh_config file provided from stdin """ @@ -680,7 +706,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, @orchestrator._cli_write_command( prefix='cephadm clear-ssh-config', desc='Clear the ssh_config file') - def _clear_ssh_config(self): + def _clear_ssh_config(self) -> Tuple[int, str, str]: """ Clear the ssh_config file provided from stdin """ @@ -694,7 +720,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, prefix='cephadm get-ssh-config', desc='Returns the ssh config as used by cephadm' ) - def _get_ssh_config(self): + def _get_ssh_config(self) -> HandleCommandResult: if self.ssh_config_file: self.validate_ssh_config_fname(self.ssh_config_file) with open(self.ssh_config_file) as f: @@ -707,7 +733,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, @orchestrator._cli_write_command( 'cephadm generate-key', desc='Generate a cluster SSH key (if not present)') - def _generate_key(self): + def _generate_key(self) -> Tuple[int, str, str]: if not self.ssh_pub or not self.ssh_key: self.log.info('Generating ssh key...') tmp_dir = TemporaryDirectory() @@ -735,7 +761,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, @orchestrator._cli_write_command( 'cephadm set-priv-key', desc='Set cluster SSH private key (use -i <private_key>)') - def _set_priv_key(self, inbuf=None): + def _set_priv_key(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]: if inbuf is None or len(inbuf) == 0: return -errno.EINVAL, "", "empty private ssh key provided" if inbuf == self.ssh_key: @@ -748,7 +774,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, @orchestrator._cli_write_command( 'cephadm set-pub-key', desc='Set cluster SSH public key (use -i <public_key>)') - def _set_pub_key(self, inbuf=None): + def _set_pub_key(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]: if inbuf is None or len(inbuf) == 0: return -errno.EINVAL, "", "empty public ssh key provided" if inbuf == self.ssh_pub: @@ -761,7 +787,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, @orchestrator._cli_write_command( 'cephadm clear-key', desc='Clear cluster SSH key') - def _clear_key(self): + def _clear_key(self) -> Tuple[int, str, str]: self.set_store('ssh_identity_key', None) self.set_store('ssh_identity_pub', None) self._reconfig_ssh() @@ -771,7 +797,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, @orchestrator._cli_read_command( 'cephadm get-pub-key', desc='Show SSH public key for connecting to cluster hosts') - def _get_pub_key(self): + def _get_pub_key(self) -> Tuple[int, str, str]: if self.ssh_pub: return 0, self.ssh_pub, '' else: @@ -780,14 +806,14 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, @orchestrator._cli_read_command( 'cephadm get-user', desc='Show user for SSHing to cluster hosts') - def _get_user(self): + def _get_user(self) -> Tuple[int, str, str]: return 0, self.ssh_user, '' @orchestrator._cli_read_command( 'cephadm set-user', 'name=user,type=CephString', 'Set user for SSHing to cluster hosts, passwordless sudo will be needed for non-root users') - def set_ssh_user(self, user): + def set_ssh_user(self, user: str) -> Tuple[int, str, str]: current_user = self.ssh_user if user == current_user: return 0, "value unchanged", "" @@ -815,12 +841,13 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, "name=username,type=CephString,req=false " "name=password,type=CephString,req=false", 'Set custom registry login info by providing url, username and password or json file with login info (-i <file>)') - def registry_login(self, url=None, username=None, password=None, inbuf=None): + def registry_login(self, url: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, inbuf: Optional[str] = None) -> Tuple[int, str, str]: # if password not given in command line, get it through file input if not (url and username and password) and (inbuf is None or len(inbuf) == 0): return -errno.EINVAL, "", ("Invalid arguments. Please provide arguments <url> <username> <password> " "or -i <login credentials json file>") elif not (url and username and password): + assert isinstance(inbuf, str) login_info = json.loads(inbuf) if "url" in login_info and "username" in login_info and "password" in login_info: url = login_info["url"] @@ -858,7 +885,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, 'name=host,type=CephString ' 'name=addr,type=CephString,req=false', 'Check whether we can access and manage a remote host') - def check_host(self, host, addr=None): + def check_host(self, host: str, addr: Optional[str] = None) -> Tuple[int, str, str]: try: out, err, code = self._run_cephadm(host, cephadmNoImage, 'check-host', ['--expect-hostname', host], @@ -876,14 +903,14 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, for item in self.health_checks['CEPHADM_HOST_CHECK_FAILED']['detail']: if item.startswith('host %s ' % host): self.event.set() - return 0, '%s (%s) ok' % (host, addr), err + return 0, '%s (%s) ok' % (host, addr), '\n'.join(err) @orchestrator._cli_read_command( 'cephadm prepare-host', 'name=host,type=CephString ' 'name=addr,type=CephString,req=false', 'Prepare a remote host for use with cephadm') - def _prepare_host(self, host, addr=None): + def _prepare_host(self, host: str, addr: Optional[str] = None) -> Tuple[int, str, str]: out, err, code = self._run_cephadm(host, cephadmNoImage, 'prepare-host', ['--expect-hostname', host], addr=addr, @@ -896,7 +923,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, for item in self.health_checks['CEPHADM_HOST_CHECK_FAILED']['detail']: if item.startswith('host %s ' % host): self.event.set() - return 0, '%s (%s) ok' % (host, addr), err + return 0, '%s (%s) ok' % (host, addr), '\n'.join(err) @orchestrator._cli_write_command( prefix='cephadm set-extra-ceph-conf', @@ -904,7 +931,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, "Mainly a workaround, till `config generate-minimal-conf` generates\n" "a complete ceph.conf.\n\n" "Warning: this is a dangerous operation.") - def _set_extra_ceph_conf(self, inbuf=None) -> HandleCommandResult: + def _set_extra_ceph_conf(self, inbuf: Optional[str] = None) -> HandleCommandResult: if inbuf: # sanity check. cp = ConfigParser() @@ -924,6 +951,94 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, def _get_extra_ceph_conf(self) -> HandleCommandResult: return HandleCommandResult(stdout=self.extra_ceph_conf().conf) + def _set_exporter_config(self, config: Dict[str, str]) -> None: + self.set_store('exporter_config', json.dumps(config)) + + def _get_exporter_config(self) -> Dict[str, str]: + cfg_str = self.get_store('exporter_config') + return json.loads(cfg_str) if cfg_str else {} + + def _set_exporter_option(self, option: str, value: Optional[str] = None) -> None: + kv_option = f'exporter_{option}' + self.set_store(kv_option, value) + + def _get_exporter_option(self, option: str) -> Optional[str]: + kv_option = f'exporter_{option}' + return self.get_store(kv_option) + + @orchestrator._cli_write_command( + prefix='cephadm generate-exporter-config', + desc='Generate default SSL crt/key and token for cephadm exporter daemons') + @service_inactive('cephadm-exporter') + def _generate_exporter_config(self) -> Tuple[int, str, str]: + self._set_exporter_defaults() + self.log.info('Default settings created for cephadm exporter(s)') + return 0, "", "" + + def _set_exporter_defaults(self) -> None: + crt, key = self._generate_exporter_ssl() + token = self._generate_exporter_token() + self._set_exporter_config({ + "crt": crt, + "key": key, + "token": token, + "port": CephadmExporterConfig.DEFAULT_PORT + }) + self._set_exporter_option('enabled', 'true') + + def _generate_exporter_ssl(self) -> Tuple[str, str]: + return create_self_signed_cert(dname={"O": "Ceph", "OU": "cephadm-exporter"}) + + def _generate_exporter_token(self) -> str: + return secrets.token_hex(32) + + @orchestrator._cli_write_command( + prefix='cephadm clear-exporter-config', + desc='Clear the SSL configuration used by cephadm exporter daemons') + @service_inactive('cephadm-exporter') + def _clear_exporter_config(self) -> Tuple[int, str, str]: + self._clear_exporter_config_settings() + self.log.info('Cleared cephadm exporter configuration') + return 0, "", "" + + def _clear_exporter_config_settings(self) -> None: + self.set_store('exporter_config', None) + self._set_exporter_option('enabled', None) + + @orchestrator._cli_write_command( + prefix='cephadm set-exporter-config', + desc='Set custom cephadm-exporter configuration from a json file (-i <file>). JSON must contain crt, key, token and port') + @service_inactive('cephadm-exporter') + def _store_exporter_config(self, inbuf: Optional[str] = None) -> Tuple[int, str, str]: + + if not inbuf: + return 1, "", "JSON configuration has not been provided (-i <filename>)" + + cfg = CephadmExporterConfig(self) + rc, reason = cfg.load_from_json(inbuf) + if rc: + return 1, "", reason + + rc, reason = cfg.validate_config() + if rc: + return 1, "", reason + + self._set_exporter_config({ + "crt": cfg.crt, + "key": cfg.key, + "token": cfg.token, + "port": cfg.port + }) + self.log.info("Loaded and verified the TLS configuration") + return 0, "", "" + + @orchestrator._cli_read_command( + 'cephadm get-exporter-config', + desc='Show the current cephadm-exporter configuraion (JSON)') + def _show_exporter_config(self) -> Tuple[int, str, str]: + cfg = self._get_exporter_config() + return 0, json.dumps(cfg, indent=2), "" + class ExtraCephConf(NamedTuple): conf: str last_modified: Optional[datetime.datetime] @@ -935,7 +1050,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, try: j = json.loads(data) except ValueError: - self.log.exception('unable to laod extra_ceph_conf') + msg = 'Unable to load extra_ceph_conf: Cannot decode JSON' + self.log.exception('%s: \'%s\'', msg, data) return CephadmOrchestrator.ExtraCephConf('', None) return CephadmOrchestrator.ExtraCephConf(j['conf'], str_to_datetime(j['last_modified'])) @@ -945,7 +1061,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, return False return conf.last_modified > dt - def _get_connection(self, host: str): + def _get_connection(self, host: str) -> Tuple['remoto.backends.BaseConnection', + 'remoto.backends.LegacyModuleExecute']: """ Setup a connection for running commands on remote host. """ @@ -972,7 +1089,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, return conn, r - def _executable_path(self, conn, executable): + def _executable_path(self, conn: 'remoto.backends.BaseConnection', executable: str) -> str: """ Remote validator that accepts a connection object to ensure that a certain executable is available returning its full path if so. @@ -1083,11 +1200,18 @@ To check that the host is reachable: :env_vars: in format -> [KEY=VALUE, ..] """ + self.log.debug(f"_run_cephadm : command = {command}") + self.log.debug(f"_run_cephadm : args = {args}") + + bypass_image = ('cephadm-exporter',) + with self._remote_connection(host, addr) as tpl: conn, connr = tpl assert image or entity - if not image and entity is not cephadmNoImage: - image = self._get_container_image(entity) + # Skip the image check for daemons deployed that are not ceph containers + if not str(entity).startswith(bypass_image): + if not image and entity is not cephadmNoImage: + image = self._get_container_image(entity) final_args = [] @@ -1212,7 +1336,7 @@ To check that the host is reachable: return "Removed host '{}'".format(host) @trivial_completion - def update_host_addr(self, host, addr) -> str: + def update_host_addr(self, host: str, addr: str) -> str: self.inventory.set_addr(host, addr) self._reset_con(host) self.event.set() # refresh stray health check @@ -1231,19 +1355,19 @@ To check that the host is reachable: return list(self.inventory.all_specs()) @trivial_completion - def add_host_label(self, host, label) -> str: + def add_host_label(self, host: str, label: str) -> str: self.inventory.add_label(host, label) self.log.info('Added label %s to host %s' % (label, host)) return 'Added label %s to host %s' % (label, host) @trivial_completion - def remove_host_label(self, host, label) -> str: + def remove_host_label(self, host: str, label: str) -> str: self.inventory.rm_label(host, label) self.log.info('Removed label %s to host %s' % (label, host)) return 'Removed label %s from host %s' % (label, host) @trivial_completion - def host_ok_to_stop(self, hostname: str): + def host_ok_to_stop(self, hostname: str) -> str: if hostname not in self.cache.get_hosts(): raise OrchestratorError(f'Cannot find host "{hostname}"') @@ -1274,7 +1398,7 @@ To check that the host is reachable: config += '\n\n' + extra.strip() + '\n' return config - def _invalidate_daemons_and_kick_serve(self, filter_host=None): + def _invalidate_daemons_and_kick_serve(self, filter_host: Optional[str] = None) -> None: if filter_host: self.cache.invalidate_host_daemons(filter_host) else: @@ -1399,7 +1523,7 @@ To check that the host is reachable: return result @trivial_completion - def service_action(self, action, service_name) -> List[str]: + def service_action(self, action: str, service_name: str) -> List[str]: args = [] for host, dm in self.cache.daemons.items(): for name, d in dm.items(): @@ -1410,14 +1534,14 @@ To check that the host is reachable: return self._daemon_actions(args) @forall_hosts - def _daemon_actions(self, daemon_type, daemon_id, host, action) -> str: + def _daemon_actions(self, daemon_type: str, daemon_id: str, host: str, action: str) -> str: with set_exception_subject('daemon', DaemonDescription( daemon_type=daemon_type, daemon_id=daemon_id ).name()): return self._daemon_action(daemon_type, daemon_id, host, action) - def _daemon_action(self, daemon_type, daemon_id, host, action, image=None) -> str: + def _daemon_action(self, daemon_type: str, daemon_id: str, host: str, action: str, image: Optional[str] = None) -> str: daemon_spec: CephadmDaemonSpec = CephadmDaemonSpec( host=host, daemon_id=daemon_id, @@ -1453,7 +1577,7 @@ To check that the host is reachable: self.events.for_daemon(name, 'INFO', msg) return msg - def _daemon_action_set_image(self, action: str, image: Optional[str], daemon_type: str, daemon_id: str): + def _daemon_action_set_image(self, action: str, image: Optional[str], daemon_type: str, daemon_id: str) -> None: if image is not None: if action != 'redeploy': raise OrchestratorError( @@ -1487,7 +1611,7 @@ To check that the host is reachable: def daemon_is_self(self, daemon_type: str, daemon_id: str) -> bool: return daemon_type == 'mgr' and daemon_id == self.get_mgr_id() - def _schedule_daemon_action(self, daemon_name: str, action: str): + def _schedule_daemon_action(self, daemon_name: str, action: str) -> str: dd = self.cache.get_daemon(daemon_name) if action == 'redeploy' and self.daemon_is_self(dd.daemon_type, dd.daemon_id) \ and not self.mgr_service.mgr_map_has_standby(): @@ -1512,19 +1636,22 @@ To check that the host is reachable: return self._remove_daemons(args) @trivial_completion - def remove_service(self, service_name) -> str: + def remove_service(self, service_name: str) -> str: self.log.info('Remove service %s' % service_name) self._trigger_preview_refresh(service_name=service_name) found = self.spec_store.rm(service_name) if found: self._kick_serve_loop() + service = self.cephadm_services.get(service_name, None) + if service: + service.purge() return 'Removed service %s' % service_name else: # must be idempotent: still a success. return f'Failed to remove service. <{service_name}> was not found.' @trivial_completion - def get_inventory(self, host_filter: Optional[orchestrator.InventoryFilter] = None, refresh=False) -> List[orchestrator.InventoryHost]: + def get_inventory(self, host_filter: Optional[orchestrator.InventoryFilter] = None, refresh: bool = False) -> List[orchestrator.InventoryHost]: """ Return the storage inventory of hosts matching the given filter. @@ -1553,7 +1680,7 @@ To check that the host is reachable: return result @trivial_completion - def zap_device(self, host, path) -> str: + def zap_device(self, host: str, path: str) -> str: self.log.info('Zap device %s:%s' % (host, path)) out, err, code = self._run_cephadm( host, 'osd', 'ceph-volume', @@ -1579,7 +1706,7 @@ To check that the host is reachable: See templates/blink_device_light_cmd.j2 """ @forall_hosts - def blink(host, dev, path): + def blink(host: str, dev: str, path: str) -> str: cmd_line = self.template.render('blink_device_light_cmd.j2', { 'on': on, @@ -1658,7 +1785,7 @@ To check that the host is reachable: def _preview_osdspecs(self, osdspecs: Optional[List[DriveGroupSpec]] = None - ): + ) -> dict: if not osdspecs: return {'n/a': [{'error': True, 'message': 'No OSDSpec or matching hosts found.'}]} @@ -1685,7 +1812,7 @@ To check that the host is reachable: previews_for_specs.update({host: osd_reports}) return previews_for_specs - def _calc_daemon_deps(self, daemon_type, daemon_id): + def _calc_daemon_deps(self, daemon_type: str, daemon_id: str) -> List[str]: need = { 'prometheus': ['mgr', 'alertmanager', 'node-exporter'], 'grafana': ['prometheus'], @@ -1699,7 +1826,7 @@ To check that the host is reachable: def _create_daemon(self, daemon_spec: CephadmDaemonSpec, - reconfig=False, + reconfig: bool = False, osd_uuid_map: Optional[Dict[str, Any]] = None, ) -> str: @@ -1730,6 +1857,15 @@ To check that the host is reachable: if spec.ports: ports.extend(spec.ports) + if daemon_spec.daemon_type == 'cephadm-exporter': + if not reconfig: + assert daemon_spec.host + deploy_ok = self._deploy_cephadm_binary(daemon_spec.host) + if not deploy_ok: + msg = f"Unable to deploy the cephadm binary to {daemon_spec.host}" + self.log.warning(msg) + return msg + cephadm_config, deps = self.cephadm_services[daemon_spec.daemon_type].generate_config( daemon_spec) @@ -1796,11 +1932,22 @@ To check that the host is reachable: daemon_spec.name(), OrchestratorEvent.ERROR, f'Failed to {what}: {err}') return msg + def _deploy_cephadm_binary(self, host: str) -> bool: + # Use tee (from coreutils) to create a copy of cephadm on the target machine + self.log.info(f"Deploying cephadm binary to {host}") + with self._remote_connection(host) as tpl: + conn, _connr = tpl + _out, _err, code = remoto.process.check( + conn, + ['tee', '-', '/var/lib/ceph/{}/cephadm'.format(self._cluster_fsid)], + stdin=self._cephadm.encode('utf-8')) + return code == 0 + @forall_hosts - def _remove_daemons(self, name, host) -> str: + def _remove_daemons(self, name: str, host: str) -> str: return self._remove_daemon(name, host) - def _remove_daemon(self, name, host) -> str: + def _remove_daemon(self, name: str, host: str) -> str: """ Remove a daemon """ @@ -1827,14 +1974,17 @@ To check that the host is reachable: return "Removed {} from host '{}'".format(name, host) - def _check_pool_exists(self, pool, service_name): + def _check_pool_exists(self, pool: str, service_name: str) -> None: logger.info(f'Checking pool "{pool}" exists for service {service_name}') if not self.rados.pool_exists(pool): raise OrchestratorError(f'Cannot find pool "{pool}" for ' f'service {service_name}') - def _add_daemon(self, daemon_type, spec, - create_func: Callable[..., CephadmDaemonSpec], config_func=None) -> List[str]: + def _add_daemon(self, + daemon_type: str, + spec: ServiceSpec, + create_func: Callable[..., CephadmDaemonSpec], + config_func: Optional[Callable] = None) -> List[str]: """ Add (and place) a daemon. Require explicit host placement. Do not schedule, and do not apply the related scheduling limitations. @@ -1848,9 +1998,14 @@ To check that the host is reachable: spec.placement.hosts, count, create_func, config_func) - def _create_daemons(self, daemon_type, spec, daemons, - hosts, count, - create_func: Callable[..., CephadmDaemonSpec], config_func=None) -> List[str]: + def _create_daemons(self, + daemon_type: str, + spec: ServiceSpec, + daemons: List[DaemonDescription], + hosts: List[HostPlacementSpec], + count: int, + create_func: Callable[..., CephadmDaemonSpec], + config_func: Optional[Callable] = None) -> List[str]: if count > len(hosts): raise OrchestratorError('too few hosts: want %d, have %s' % ( count, hosts)) @@ -1885,14 +2040,14 @@ To check that the host is reachable: daemons.append(sd) @forall_hosts - def create_func_map(*args): + def create_func_map(*args: Any) -> str: daemon_spec = create_func(*args) return self._create_daemon(daemon_spec) return create_func_map(args) @trivial_completion - def apply_mon(self, spec) -> str: + def apply_mon(self, spec: ServiceSpec) -> str: return self._apply(spec) @trivial_completion @@ -1916,7 +2071,7 @@ To check that the host is reachable: return self._apply_service_spec(cast(ServiceSpec, spec)) - def _plan(self, spec: ServiceSpec): + def _plan(self, spec: ServiceSpec) -> dict: if spec.service_type == 'osd': return {'service_name': spec.service_name(), 'service_type': spec.service_type, @@ -1969,6 +2124,7 @@ To check that the host is reachable: 'node-exporter': PlacementSpec(host_pattern='*'), 'crash': PlacementSpec(host_pattern='*'), 'container': PlacementSpec(count=1), + 'cephadm-exporter': PlacementSpec(host_pattern='*'), } spec.placement = defaults[spec.service_type] elif spec.service_type in ['mon', 'mgr'] and \ @@ -1997,7 +2153,7 @@ To check that the host is reachable: return results @trivial_completion - def apply_mgr(self, spec) -> str: + def apply_mgr(self, spec: ServiceSpec) -> str: return self._apply(spec) @trivial_completion @@ -2009,11 +2165,11 @@ To check that the host is reachable: return self._apply(spec) @trivial_completion - def add_rgw(self, spec) -> List[str]: + def add_rgw(self, spec: ServiceSpec) -> List[str]: return self._add_daemon('rgw', spec, self.rgw_service.prepare_create, self.rgw_service.config) @trivial_completion - def apply_rgw(self, spec) -> str: + def apply_rgw(self, spec: ServiceSpec) -> str: return self._apply(spec) @trivial_completion @@ -2022,23 +2178,23 @@ To check that the host is reachable: return self._add_daemon('iscsi', spec, self.iscsi_service.prepare_create, self.iscsi_service.config) @trivial_completion - def apply_iscsi(self, spec) -> str: + def apply_iscsi(self, spec: ServiceSpec) -> str: return self._apply(spec) @trivial_completion - def add_rbd_mirror(self, spec) -> List[str]: + def add_rbd_mirror(self, spec: ServiceSpec) -> List[str]: return self._add_daemon('rbd-mirror', spec, self.rbd_mirror_service.prepare_create) @trivial_completion - def apply_rbd_mirror(self, spec) -> str: + def apply_rbd_mirror(self, spec: ServiceSpec) -> str: return self._apply(spec) @trivial_completion - def add_nfs(self, spec) -> List[str]: + def add_nfs(self, spec: ServiceSpec) -> List[str]: return self._add_daemon('nfs', spec, self.nfs_service.prepare_create, self.nfs_service.config) @trivial_completion - def apply_nfs(self, spec) -> str: + def apply_nfs(self, spec: ServiceSpec) -> str: return self._apply(spec) def _get_dashboard_url(self): @@ -2046,11 +2202,11 @@ To check that the host is reachable: return self.get('mgr_map').get('services', {}).get('dashboard', '') @trivial_completion - def add_prometheus(self, spec) -> List[str]: + def add_prometheus(self, spec: ServiceSpec) -> List[str]: return self._add_daemon('prometheus', spec, self.prometheus_service.prepare_create) @trivial_completion - def apply_prometheus(self, spec) -> str: + def apply_prometheus(self, spec: ServiceSpec) -> str: return self._apply(spec) @trivial_completion @@ -2060,7 +2216,7 @@ To check that the host is reachable: self.node_exporter_service.prepare_create) @trivial_completion - def apply_node_exporter(self, spec) -> str: + def apply_node_exporter(self, spec: ServiceSpec) -> str: return self._apply(spec) @trivial_completion @@ -2070,7 +2226,7 @@ To check that the host is reachable: self.crash_service.prepare_create) @trivial_completion - def apply_crash(self, spec) -> str: + def apply_crash(self, spec: ServiceSpec) -> str: return self._apply(spec) @trivial_completion @@ -2100,7 +2256,18 @@ To check that the host is reachable: def apply_container(self, spec: ServiceSpec) -> str: return self._apply(spec) - def _get_container_image_info(self, image_name) -> ContainerInspectInfo: + @trivial_completion + def add_cephadm_exporter(self, spec): + # type: (ServiceSpec) -> List[str] + return self._add_daemon('cephadm-exporter', + spec, + self.cephadm_exporter_service.prepare_create) + + @trivial_completion + def apply_cephadm_exporter(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + def _get_container_image_info(self, image_name: str) -> ContainerInspectInfo: # pick a random host... host = None for host_name in self.inventory.keys(): @@ -2129,12 +2296,12 @@ To check that the host is reachable: self.log.debug(f'image {image_name} -> {r}') return r except (ValueError, KeyError) as _: - msg = 'Failed to pull %s on %s: %s' % (image_name, host, '\n'.join(out)) - self.log.exception(msg) + msg = 'Failed to pull %s on %s: Cannot decode JSON' % (image_name, host) + self.log.exception('%s: \'%s\'' % (msg, '\n'.join(out))) raise OrchestratorError(msg) @trivial_completion - def upgrade_check(self, image, version) -> str: + def upgrade_check(self, image: str, version: str) -> str: if version: target_name = self.container_image_base + ':v' + version elif image: @@ -2144,7 +2311,7 @@ To check that the host is reachable: image_info = self._get_container_image_info(target_name) self.log.debug(f'image info {image} -> {image_info}') - r = { + r: dict = { 'target_name': target_name, 'target_id': image_info.image_id, 'target_version': image_info.ceph_version, @@ -2171,7 +2338,7 @@ To check that the host is reachable: return self.upgrade.upgrade_status() @trivial_completion - def upgrade_start(self, image, version) -> str: + def upgrade_start(self, image: str, version: str) -> str: return self.upgrade.upgrade_start(image, version) @trivial_completion @@ -2221,7 +2388,7 @@ To check that the host is reachable: return "Scheduled OSD(s) for removal" @trivial_completion - def stop_remove_osds(self, osd_ids: List[str]): + def stop_remove_osds(self, osd_ids: List[str]) -> str: """ Stops a `removal` process for a List of OSDs. This will revert their weight and remove it from the osds_to_remove queue @@ -2238,7 +2405,7 @@ To check that the host is reachable: return "Stopped OSD(s) removal" @trivial_completion - def remove_osds_status(self): + def remove_osds_status(self) -> List[OSD]: """ The CLI call to retrieve an osd removal report """ diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py index 26bee82d9d5..e2e3c926ab9 100644 --- a/src/pybind/mgr/cephadm/schedule.py +++ b/src/pybind/mgr/cephadm/schedule.py @@ -36,7 +36,7 @@ class SimpleScheduler(BaseScheduler): 2) Select from list up to :count """ - def __init__(self, spec): + def __init__(self, spec: ServiceSpec): super(SimpleScheduler, self).__init__(spec) def place(self, host_pool, count=None): @@ -74,7 +74,7 @@ class HostAssignment(object): def get_hostnames(self) -> List[str]: return [h.hostname for h in self.hosts] - def validate(self): + def validate(self) -> None: self.spec.validate() if self.spec.placement.count == 0: @@ -186,7 +186,7 @@ class HostAssignment(object): # remove duplicates before returning return list(dict.fromkeys(active_hosts)) - def prefer_hosts_with_active_daemons(self, hosts: List[HostPlacementSpec], count) -> List[HostPlacementSpec]: + def prefer_hosts_with_active_daemons(self, hosts: List[HostPlacementSpec], count: int) -> List[HostPlacementSpec]: # try to prefer host with active daemon if possible active_hosts = self.get_hosts_with_active_daemon(hosts) if len(active_hosts) != 0 and count > 0: diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 1e91a33abb4..e1a90ee4314 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -84,13 +84,13 @@ class CephadmServe: self._serve_sleep() self.log.debug("serve exit") - def _serve_sleep(self): + def _serve_sleep(self) -> None: sleep_interval = 600 self.log.debug('Sleeping for %d seconds', sleep_interval) ret = self.mgr.event.wait(sleep_interval) self.mgr.event.clear() - def _update_paused_health(self): + def _update_paused_health(self) -> None: if self.mgr.paused: self.mgr.health_checks['CEPHADM_PAUSED'] = { 'severity': 'warning', @@ -109,7 +109,8 @@ class CephadmServe: failures = [] @forall_hosts - def refresh(host): + def refresh(host: str) -> None: + if self.mgr.cache.host_needs_check(host): r = self._check_host(host) if r is not None: @@ -133,6 +134,12 @@ class CephadmServe: if r: failures.append(r) + if self.mgr.cache.host_needs_facts_refresh(host): + self.log.info(('refreshing %s facts' % host)) + r = self._refresh_facts(host) + if r: + failures.append(r) + if self.mgr.cache.host_needs_osdspec_preview_refresh(host): self.log.debug(f"refreshing OSDSpec previews for {host}") r = self._refresh_host_osdspec_previews(host) @@ -173,9 +180,9 @@ class CephadmServe: if health_changed: self.mgr.set_health_checks(self.mgr.health_checks) - def _check_host(self, host): + def _check_host(self, host: str) -> Optional[str]: if host not in self.mgr.inventory: - return + return None self.log.debug(' checking %s' % host) try: out, err, code = self.mgr._run_cephadm( @@ -192,17 +199,22 @@ class CephadmServe: except Exception as e: self.log.debug(' host %s failed check' % host) return 'host %s failed check: %s' % (host, e) + return None - def _refresh_host_daemons(self, host) -> Optional[str]: + def _refresh_host_daemons(self, host: str) -> Optional[str]: try: out, err, code = self.mgr._run_cephadm( host, 'mon', 'ls', [], no_fsid=True) if code: return 'host %s cephadm ls returned %d: %s' % ( host, code, err) + ls = json.loads(''.join(out)) + except ValueError: + msg = 'host %s scrape failed: Cannot decode JSON' % host + self.log.exception('%s: \'%s\'' % (msg, ''.join(out))) + return msg except Exception as e: return 'host %s scrape failed: %s' % (host, e) - ls = json.loads(''.join(out)) dm = {} for d in ls: if not d['style'].startswith('cephadm'): @@ -246,7 +258,22 @@ class CephadmServe: self.mgr.cache.save_host(host) return None - def _refresh_host_devices(self, host) -> Optional[str]: + def _refresh_facts(self, host: str) -> Optional[str]: + try: + out, err, code = self.mgr._run_cephadm( + host, cephadmNoImage, 'gather-facts', [], + error_ok=True, no_fsid=True) + + if code: + return 'host %s gather-facts returned %d: %s' % ( + host, code, err) + except Exception as e: + return 'host %s gather facts failed: %s' % (host, e) + self.log.debug('Refreshed host %s facts' % (host)) + self.mgr.cache.update_host_facts(host, json.loads(''.join(out))) + return None + + def _refresh_host_devices(self, host: str) -> Optional[str]: try: out, err, code = self.mgr._run_cephadm( host, 'osd', @@ -255,9 +282,13 @@ class CephadmServe: if code: return 'host %s ceph-volume inventory returned %d: %s' % ( host, code, err) + devices = json.loads(''.join(out)) + except ValueError: + msg = 'host %s scrape failed: Cannot decode JSON' % host + self.log.exception('%s: \'%s\'' % (msg, ''.join(out))) + return msg except Exception as e: return 'host %s ceph-volume inventory failed: %s' % (host, e) - devices = json.loads(''.join(out)) try: out, err, code = self.mgr._run_cephadm( host, 'mon', @@ -267,9 +298,13 @@ class CephadmServe: if code: return 'host %s list-networks returned %d: %s' % ( host, code, err) + networks = json.loads(''.join(out)) + except ValueError: + msg = 'host %s scrape failed: Cannot decode JSON' % host + self.log.exception('%s: \'%s\'' % (msg, ''.join(out))) + return msg except Exception as e: return 'host %s list-networks failed: %s' % (host, e) - networks = json.loads(''.join(out)) self.log.debug('Refreshed host %s devices (%d) networks (%s)' % ( host, len(devices), len(networks))) devices = inventory.Devices.from_json(devices) @@ -278,13 +313,13 @@ class CephadmServe: self.mgr.cache.save_host(host) return None - def _refresh_host_osdspec_previews(self, host) -> bool: + def _refresh_host_osdspec_previews(self, host: str) -> Optional[str]: self.update_osdspec_previews(host) self.mgr.cache.save_host(host) self.log.debug(f'Refreshed OSDSpec previews for host <{host}>') - return True + return None - def update_osdspec_previews(self, search_host: str = ''): + def update_osdspec_previews(self, search_host: str = '') -> None: # Set global 'pending' flag for host self.mgr.cache.loading_osdspec_preview.add(search_host) previews = [] @@ -370,7 +405,7 @@ class CephadmServe: if self.mgr.warn_on_stray_daemons and daemon_detail: self.mgr.health_checks['CEPHADM_STRAY_DAEMON'] = { 'severity': 'warning', - 'summary': '%d stray daemons(s) not managed by cephadm' % ( + 'summary': '%d stray daemon(s) not managed by cephadm' % ( len(daemon_detail)), 'count': len(daemon_detail), 'detail': daemon_detail, @@ -393,7 +428,7 @@ class CephadmServe: return r - def _config_fn(self, service_type) -> Optional[Callable[[ServiceSpec], None]]: + def _config_fn(self, service_type: str) -> Optional[Callable[[ServiceSpec], None]]: fn = { 'mds': self.mgr.mds_service.config, 'rgw': self.mgr.rgw_service.config, @@ -615,7 +650,7 @@ class CephadmServe: self.mgr.requires_post_actions.remove(daemon_type) self.mgr._get_cephadm_service(daemon_type).daemon_check_post(daemon_descs) - def convert_tags_to_repo_digest(self): + def convert_tags_to_repo_digest(self) -> None: if not self.mgr.use_repo_digest: return settings = self.mgr.upgrade.get_distinct_container_image_settings() diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index c779ff34f18..48c078f58fe 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -1,7 +1,9 @@ import json import re import logging +import secrets import subprocess +import collections from abc import ABCMeta, abstractmethod from typing import TYPE_CHECKING, List, Callable, Any, TypeVar, Generic, \ Optional, Dict, Any, Tuple, NewType @@ -12,6 +14,7 @@ from ceph.deployment.service_spec import ServiceSpec, RGWSpec from ceph.deployment.utils import is_ipv6, unwrap_ipv6 from orchestrator import OrchestratorError, DaemonDescription from cephadm import utils +from mgr_util import create_self_signed_cert, ServerConfigException, verify_tls if TYPE_CHECKING: from cephadm.module import CephadmOrchestrator @@ -239,6 +242,10 @@ class CephadmService(metaclass=ABCMeta): assert self.TYPE == daemon.daemon_type logger.debug(f'Post remove daemon {self.TYPE}.{daemon.daemon_id}') + def purge(self) -> None: + """Called to carry out any purge tasks following service removal""" + logger.debug(f'Purge called for {self.TYPE} - no action taken') + class CephService(CephadmService): def generate_config(self, daemon_spec: CephadmDaemonSpec) -> Tuple[Dict[str, Any], List[str]]: @@ -614,7 +621,7 @@ class RgwService(CephService): def create_realm_zonegroup_zone(self, spec: RGWSpec, rgw_id: str) -> None: if utils.get_cluster_health(self.mgr) != 'HEALTH_OK': - raise OrchestratorError('Health not ok, will try agin when health ok') + raise OrchestratorError('Health not ok, will try again when health ok') # get keyring needed to run rados commands and strip out just the keyring keyring = self.get_keyring(rgw_id).split('key = ', 1)[1].rstrip() @@ -765,3 +772,136 @@ class CrashService(CephService): daemon_spec.keyring = keyring return daemon_spec + + +class CephadmExporterConfig: + required_keys = ['crt', 'key', 'token', 'port'] + DEFAULT_PORT = '9443' + + def __init__(self, mgr, crt="", key="", token="", port=""): + # type: (CephadmOrchestrator, str, str, str, str) -> None + self.mgr = mgr + self.crt = crt + self.key = key + self.token = token + self.port = port + + @property + def ready(self) -> bool: + return all([self.crt, self.key, self.token, self.port]) + + def load_from_store(self) -> None: + cfg = self.mgr._get_exporter_config() + + assert isinstance(cfg, dict) + self.crt = cfg.get('crt', "") + self.key = cfg.get('key', "") + self.token = cfg.get('token', "") + self.port = cfg.get('port', "") + + def load_from_json(self, json_str: str) -> Tuple[int, str]: + try: + cfg = json.loads(json_str) + except ValueError: + return 1, "Invalid JSON provided - unable to load" + + if not all([k in cfg for k in CephadmExporterConfig.required_keys]): + return 1, "JSON file must contain crt, key, token and port" + + self.crt = cfg.get('crt') + self.key = cfg.get('key') + self.token = cfg.get('token') + self.port = cfg.get('port') + + return 0, "" + + def validate_config(self) -> Tuple[int, str]: + if not self.ready: + return 1, "Incomplete configuration. cephadm-exporter needs crt, key, token and port to be set" + + for check in [self._validate_tls, self._validate_token, self._validate_port]: + rc, reason = check() + if rc: + return 1, reason + + return 0, "" + + def _validate_tls(self) -> Tuple[int, str]: + + try: + verify_tls(self.crt, self.key) + except ServerConfigException as e: + return 1, str(e) + + return 0, "" + + def _validate_token(self) -> Tuple[int, str]: + if not isinstance(self.token, str): + return 1, "token must be a string" + if len(self.token) < 8: + return 1, "Token must be a string of at least 8 chars in length" + + return 0, "" + + def _validate_port(self) -> Tuple[int, str]: + try: + p = int(str(self.port)) + if p <= 1024: + raise ValueError + except ValueError: + return 1, "Port must be a integer (>1024)" + + return 0, "" + + +class CephadmExporter(CephadmService): + TYPE = 'cephadm-exporter' + + def prepare_create(self, daemon_spec: CephadmDaemonSpec) -> CephadmDaemonSpec: + assert self.TYPE == daemon_spec.daemon_type + + cfg = CephadmExporterConfig(self.mgr) + cfg.load_from_store() + + if cfg.ready: + rc, reason = cfg.validate_config() + if rc: + raise OrchestratorError(reason) + else: + logger.info( + "Incomplete/Missing configuration, applying defaults") + self.mgr._set_exporter_defaults() + cfg.load_from_store() + + if not daemon_spec.ports: + daemon_spec.ports = [int(cfg.port)] + + return daemon_spec + + def generate_config(self, daemon_spec: CephadmDaemonSpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + assert daemon_spec.spec + deps: List[str] = [] + + cfg = CephadmExporterConfig(self.mgr) + cfg.load_from_store() + + if cfg.ready: + rc, reason = cfg.validate_config() + if rc: + raise OrchestratorError(reason) + else: + logger.info("Using default configuration for cephadm-exporter") + self.mgr._set_exporter_defaults() + cfg.load_from_store() + + config = { + "crt": cfg.crt, + "key": cfg.key, + "token": cfg.token + } + return config, deps + + def purge(self) -> None: + logger.info("Purging cephadm-exporter settings from mon K/V store") + self.mgr._clear_exporter_config_settings() diff --git a/src/pybind/mgr/cephadm/services/iscsi.py b/src/pybind/mgr/cephadm/services/iscsi.py index a6e8f03cc04..6c3514ce348 100644 --- a/src/pybind/mgr/cephadm/services/iscsi.py +++ b/src/pybind/mgr/cephadm/services/iscsi.py @@ -17,6 +17,7 @@ class IscsiService(CephService): def config(self, spec: IscsiServiceSpec) -> None: assert self.TYPE == spec.service_type + assert spec.pool self.mgr._check_pool_exists(spec.pool, spec.service_name()) logger.info('Saving service %s spec with placement %s' % ( diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py index 3eaf50cac68..0323b4110d5 100644 --- a/src/pybind/mgr/cephadm/services/nfs.py +++ b/src/pybind/mgr/cephadm/services/nfs.py @@ -20,6 +20,7 @@ class NFSService(CephService): def config(self, spec: NFSServiceSpec) -> None: assert self.TYPE == spec.service_type + assert spec.pool self.mgr._check_pool_exists(spec.pool, spec.service_name()) logger.info('Saving service %s spec with placement %s' % ( diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py index 8f8df8a6f0b..8dd49c1e414 100644 --- a/src/pybind/mgr/cephadm/services/osd.py +++ b/src/pybind/mgr/cephadm/services/osd.py @@ -27,17 +27,19 @@ class OSDService(CephService): def create_from_spec(self, drive_group: DriveGroupSpec) -> str: logger.debug(f"Processing DriveGroup {drive_group}") osd_id_claims = self.find_destroyed_osds() - logger.info(f"Found osd claims for drivegroup {drive_group.service_id} -> {osd_id_claims}") + if osd_id_claims: + logger.info( + f"Found osd claims for drivegroup {drive_group.service_id} -> {osd_id_claims}") @forall_hosts def create_from_spec_one(host: str, drive_selection: DriveSelection) -> Optional[str]: - logger.info('Applying %s on host %s...' % (drive_group.service_id, host)) cmd = self.driveselection_to_ceph_volume(drive_selection, osd_id_claims.get(host, [])) if not cmd: logger.debug("No data_devices, skipping DriveGroup: {}".format( drive_group.service_id)) return None + logger.info('Applying drive group %s on host %s...' % (drive_group.service_id, host)) env_vars: List[str] = [f"CEPH_VOLUME_OSDSPEC_AFFINITY={drive_group.service_id}"] ret_msg = self.create_single_host( host, cmd, replace_osd_ids=osd_id_claims.get(host, []), env_vars=env_vars @@ -71,7 +73,11 @@ class OSDService(CephService): '--format', 'json', ]) before_osd_uuid_map = self.mgr.get_osd_uuid_map(only_up=True) - osds_elems = json.loads('\n'.join(out)) + try: + osds_elems = json.loads('\n'.join(out)) + except ValueError: + logger.exception('Cannot decode JSON: \'%s\'' % '\n'.join(out)) + osds_elems = {} fsid = self.mgr._cluster_fsid osd_uuid_map = self.mgr.get_osd_uuid_map() created = [] @@ -201,7 +207,12 @@ class OSDService(CephService): # get preview data from ceph-volume out, err, code = self._run_ceph_volume_command(host, cmd) if out: - concat_out: Dict[str, Any] = json.loads(" ".join(out)) + try: + concat_out: Dict[str, Any] = json.loads(' '.join(out)) + except ValueError: + logger.exception('Cannot decode JSON: \'%s\'' % ' '.join(out)) + concat_out = {} + ret_all.append({'data': concat_out, 'osdspec': osdspec.service_id, 'host': host}) @@ -274,8 +285,8 @@ class OSDService(CephService): raise OrchestratorError(str(e)) try: tree = json.loads(out) - except json.decoder.JSONDecodeError: - logger.exception(f"Could not decode json -> {out}") + except ValueError: + logger.exception(f'Cannot decode JSON: \'{out}\'') return osd_host_map nodes = tree.get('nodes', {}) @@ -284,8 +295,8 @@ class OSDService(CephService): osd_host_map.update( {node.get('name'): [str(_id) for _id in node.get('children', list())]} ) - self.mgr.log.info( - f"Found osd claims -> {osd_host_map}") + if osd_host_map: + self.mgr.log.info(f"Found osd claims -> {osd_host_map}") return osd_host_map @@ -345,6 +356,8 @@ class RemoveUtil(object): if not osd.exists: continue + assert osd.fullname is not None + assert osd.hostname is not None self.mgr._remove_daemon(osd.fullname, osd.hostname) logger.info(f"Successfully removed OSD <{osd.osd_id}> on {osd.hostname}") logger.debug(f"Removing {osd.osd_id} from the queue.") @@ -371,7 +384,12 @@ class RemoveUtil(object): 'prefix': base_cmd, 'format': 'json' }) - return json.loads(out) + try: + ret = json.loads(out) + except ValueError: + logger.exception(f'Cannot decode JSON: \'{out}\'') + return {} + return ret def get_pg_count(self, osd_id: int, osd_df: Optional[dict] = None) -> int: if not osd_df: diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py index 5548c53e2ef..90db5309724 100644 --- a/src/pybind/mgr/cephadm/tests/fixtures.py +++ b/src/pybind/mgr/cephadm/tests/fixtures.py @@ -147,3 +147,9 @@ def with_service(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, meth, h yield [dd.name() for dd in own_dds] assert_rm_service(cephadm_module, spec.service_name()) + + +def _deploy_cephadm_binary(host): + def foo(*args, **kwargs): + return True + return foo diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 349665ff08d..7cc61269860 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -24,7 +24,7 @@ from orchestrator import ServiceDescription, DaemonDescription, InventoryHost, \ HostSpec, OrchestratorError from tests import mock from .fixtures import cephadm_module, wait, _run_cephadm, match_glob, with_host, \ - with_cephadm_module, with_service, assert_rm_service + with_cephadm_module, with_service, assert_rm_service, _deploy_cephadm_binary from cephadm.module import CephadmOrchestrator, CEPH_DATEFMT """ @@ -39,9 +39,16 @@ def assert_rm_daemon(cephadm: CephadmOrchestrator, prefix, host): dds: List[DaemonDescription] = wait(cephadm, cephadm.list_daemons(host=host)) d_names = [dd.name() for dd in dds if dd.name().startswith(prefix)] assert d_names + # there should only be one daemon (if not match_glob will throw mismatch) + assert len(d_names) == 1 + c = cephadm.remove_daemons(d_names) [out] = wait(cephadm, c) - match_glob(out, f"Removed {d_names}* from host '{host}'") + # picking the 1st element is needed, rather than passing the list when the daemon + # name contains '-' char. If not, the '-' is treated as a range i.e. cephadm-exporter + # is treated like a m-e range which is invalid. rbd-mirror (d-m) and node-exporter (e-e) + # are valid, so pass without incident! Also, match_gob acts on strings anyway! + match_glob(out, f"Removed {d_names[0]}* from host '{host}'") @contextmanager @@ -565,8 +572,10 @@ class TestCephadm(object): (ServiceSpec('rbd-mirror'), CephadmOrchestrator.add_rbd_mirror), (ServiceSpec('mds', service_id='fsname'), CephadmOrchestrator.add_mds), (RGWSpec(rgw_realm='realm', rgw_zone='zone'), CephadmOrchestrator.add_rgw), + (ServiceSpec('cephadm-exporter'), CephadmOrchestrator.add_cephadm_exporter), ] ) + @mock.patch("cephadm.module.CephadmOrchestrator._deploy_cephadm_binary", _deploy_cephadm_binary('test')) @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}')) @mock.patch("cephadm.services.cephadmservice.RgwService.create_realm_zonegroup_zone", lambda _, __, ___: None) def test_daemon_add(self, spec: ServiceSpec, meth, cephadm_module): @@ -730,8 +739,10 @@ class TestCephadm(object): envs=['SECRET=password'], ports=[8080, 8443] ), CephadmOrchestrator.apply_container), + (ServiceSpec('cephadm-exporter'), CephadmOrchestrator.apply_cephadm_exporter), ] ) + @mock.patch("cephadm.module.CephadmOrchestrator._deploy_cephadm_binary", _deploy_cephadm_binary('test')) @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}')) @mock.patch("cephadm.services.cephadmservice.RgwService.create_realm_zonegroup_zone", lambda _, __, ___: None) def test_apply_save(self, spec: ServiceSpec, meth, cephadm_module: CephadmOrchestrator): @@ -816,19 +827,19 @@ class TestCephadm(object): raise Exception("boom: connection is dead") else: conn.fuse = True - return '{}', None, 0 + return '{}', [], 0 with mock.patch("remoto.Connection", side_effect=[Connection(), Connection(), Connection()]): with mock.patch("remoto.process.check", _check): with with_host(cephadm_module, 'test', refresh_hosts=False): code, out, err = cephadm_module.check_host('test') # First should succeed. - assert err is None + assert err is '' # On second it should attempt to reuse the connection, where the # connection is "down" so will recreate the connection. The old # code will blow up here triggering the BOOM! code, out, err = cephadm_module.check_host('test') - assert err is None + assert err is '' @mock.patch("cephadm.module.CephadmOrchestrator._get_connection") @mock.patch("remoto.process.check") diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 2a66665bb63..da53e510145 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -3,7 +3,7 @@ import pytest from unittest.mock import MagicMock from cephadm.services.cephadmservice import MonService, MgrService, MdsService, RgwService, \ - RbdMirrorService, CrashService, CephadmService, AuthEntity + RbdMirrorService, CrashService, CephadmService, AuthEntity, CephadmExporter from cephadm.services.iscsi import IscsiService from cephadm.services.nfs import NFSService from cephadm.services.osd import RemoveUtil, OSDQueue, OSDService, OSD, NotFoundError @@ -57,6 +57,7 @@ class TestCephadmService: node_exporter_service = NodeExporterService(mgr) crash_service = CrashService(mgr) iscsi_service = IscsiService(mgr) + cephadm_exporter_service = CephadmExporter(mgr) cephadm_services = { 'mon': mon_service, 'mgr': mgr_service, @@ -71,6 +72,7 @@ class TestCephadmService: 'node-exporter': node_exporter_service, 'crash': crash_service, 'iscsi': iscsi_service, + 'cephadm-exporter': cephadm_exporter_service, } return cephadm_services @@ -108,8 +110,9 @@ class TestCephadmService: assert "%s.id1" % daemon_type == \ cephadm_services[daemon_type].get_auth_entity("id1") + # services based on CephadmService shouldn't have get_auth_entity with pytest.raises(AttributeError): - for daemon_type in ['grafana', 'alertmanager', 'prometheus', 'node-exporter']: + for daemon_type in ['grafana', 'alertmanager', 'prometheus', 'node-exporter', 'cephadm-exporter']: cephadm_services[daemon_type].get_auth_entity("id1", "host") cephadm_services[daemon_type].get_auth_entity("id1", "") cephadm_services[daemon_type].get_auth_entity("id1") diff --git a/src/pybind/mgr/cephadm/tests/test_spec.py b/src/pybind/mgr/cephadm/tests/test_spec.py index ab2d059126c..a0d22710a3b 100644 --- a/src/pybind/mgr/cephadm/tests/test_spec.py +++ b/src/pybind/mgr/cephadm/tests/test_spec.py @@ -558,6 +558,19 @@ def test_dd_octopus(dd_json): ), True ), + + ( + # daemon_id only contains hostname + ServiceSpec( + service_type='cephadm-exporter', + ), + DaemonDescription( + daemon_type='cephadm-exporter', + daemon_id="testhost", + hostname="testhost", + ), + True + ), ]) def test_daemon_description_service_name(spec: ServiceSpec, dd: DaemonDescription, diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index 6a130681d0c..45c352b503a 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -49,7 +49,7 @@ class UpgradeState: } @classmethod - def from_json(cls, data) -> Optional['UpgradeState']: + def from_json(cls, data: dict) -> Optional['UpgradeState']: if data: return cls(**data) else: @@ -57,6 +57,12 @@ class UpgradeState: class CephadmUpgrade: + UPGRADE_ERRORS = [ + 'UPGRADE_NO_STANDBY_MGR', + 'UPGRADE_FAILED_PULL', + 'UPGRADE_REDEPLOY_DAEMON', + ] + def __init__(self, mgr: "CephadmOrchestrator"): self.mgr = mgr @@ -87,7 +93,7 @@ class CephadmUpgrade: r.message = 'Upgrade paused' return r - def upgrade_start(self, image, version) -> str: + def upgrade_start(self, image: str, version: str) -> str: if self.mgr.mode != 'root': raise OrchestratorError('upgrade is not supported in %s mode' % ( self.mgr.mode)) @@ -186,13 +192,13 @@ class CephadmUpgrade: return False def _clear_upgrade_health_checks(self) -> None: - for k in ['UPGRADE_NO_STANDBY_MGR', - 'UPGRADE_FAILED_PULL']: + for k in self.UPGRADE_ERRORS: if k in self.mgr.health_checks: del self.mgr.health_checks[k] self.mgr.set_health_checks(self.mgr.health_checks) - def _fail_upgrade(self, alert_id, alert) -> None: + def _fail_upgrade(self, alert_id: str, alert: dict) -> None: + assert alert_id in self.UPGRADE_ERRORS logger.error('Upgrade: Paused due to %s: %s' % (alert_id, alert['summary'])) if not self.upgrade_state: @@ -204,7 +210,7 @@ class CephadmUpgrade: self.mgr.health_checks[alert_id] = alert self.mgr.set_health_checks(self.mgr.health_checks) - def _update_upgrade_progress(self, progress) -> None: + def _update_upgrade_progress(self, progress: float) -> None: if not self.upgrade_state: assert False, 'No upgrade in progress' @@ -329,13 +335,23 @@ class CephadmUpgrade: return logger.info('Upgrade: Redeploying %s.%s' % (d.daemon_type, d.daemon_id)) - self.mgr._daemon_action( - d.daemon_type, - d.daemon_id, - d.hostname, - 'redeploy', - image=target_image - ) + try: + self.mgr._daemon_action( + d.daemon_type, + d.daemon_id, + d.hostname, + 'redeploy', + image=target_image + ) + except Exception as e: + self._fail_upgrade('UPGRADE_REDEPLOY_DAEMON', { + 'severity': 'warning', + 'summary': f'Upgrading daemon {d.name()} on host {d.hostname} failed.', + 'count': 1, + 'detail': [ + f'Upgrade daemon: {d.name()}: {e}' + ], + }) return if need_upgrade_self: diff --git a/src/pybind/mgr/cephadm/utils.py b/src/pybind/mgr/cephadm/utils.py index 4c3d595010f..03a28fbd9ef 100644 --- a/src/pybind/mgr/cephadm/utils.py +++ b/src/pybind/mgr/cephadm/utils.py @@ -78,7 +78,9 @@ def get_cluster_health(mgr: 'CephadmOrchestrator') -> str: }) try: j = json.loads(out) - except Exception as e: + except ValueError: + msg = 'Failed to parse health status: Cannot decode JSON' + logger.exception('%s: \'%s\'' % (msg, out)) raise OrchestratorError('failed to parse health status') return j['status'] diff --git a/src/pybind/mgr/dashboard/cherrypy_backports.py b/src/pybind/mgr/dashboard/cherrypy_backports.py index 4d2a2cb05c3..4fc59ba0605 100644 --- a/src/pybind/mgr/dashboard/cherrypy_backports.py +++ b/src/pybind/mgr/dashboard/cherrypy_backports.py @@ -94,11 +94,16 @@ def accept_exceptions_from_builtin_ssl(v): # Check if it's one of the known errors # Errors that are caught by PyOpenSSL, but thrown by # built-in ssl - _block_errors = ('unknown protocol', 'unknown ca', - 'unknown_ca', 'inappropriate fallback', + _block_errors = ('unknown protocol', 'unknown ca', 'unknown_ca', + 'unknown error', + 'https proxy request', 'inappropriate fallback', 'wrong version number', 'no shared cipher', 'certificate unknown', - 'ccs received early') + 'ccs received early', + 'certificate verify failed', # client cert w/o trusted CA + 'version too low', # caused by SSL3 connections + 'unsupported protocol', # caused by TLS1 connections + 'sslv3 alert bad certificate') for error_text in _block_errors: if error_text in e.args[1].lower(): # Accepted error, let's pass diff --git a/src/pybind/mgr/dashboard/constraints.txt b/src/pybind/mgr/dashboard/constraints.txt index 3e8532d4514..96cfdb99e4f 100644 --- a/src/pybind/mgr/dashboard/constraints.txt +++ b/src/pybind/mgr/dashboard/constraints.txt @@ -2,7 +2,6 @@ CherryPy==13.1.0 enum34==1.1.6 more-itertools==4.1.0 PyJWT==1.6.4 -pyopenssl==17.5.0 bcrypt==3.1.4 python3-saml==1.4.1 requests==2.20.0 diff --git a/src/pybind/mgr/dashboard/controllers/__init__.py b/src/pybind/mgr/dashboard/controllers/__init__.py index 1fb40f5317e..2f3c186b952 100644 --- a/src/pybind/mgr/dashboard/controllers/__init__.py +++ b/src/pybind/mgr/dashboard/controllers/__init__.py @@ -16,10 +16,12 @@ from urllib.parse import unquote # pylint: disable=wrong-import-position import cherrypy +# pylint: disable=import-error +from ceph_argparse import ArgumentFormat # type: ignore from .. import DEFAULT_VERSION from ..api.doc import SchemaInput, SchemaType -from ..exceptions import PermissionNotValid, ScopeNotValid +from ..exceptions import DashboardException, PermissionNotValid, ScopeNotValid from ..plugins import PLUGIN_MANAGER from ..security import Permission, Scope from ..services.auth import AuthManager, JwtManager @@ -700,7 +702,11 @@ class BaseController(object): if isinstance(ret, bytes): ret = ret.decode('utf-8') if xml: - cherrypy.response.headers['Content-Type'] = 'application/xml' + if version: + cherrypy.response.headers['Content-Type'] = \ + 'application/vnd.ceph.api.v{}+xml'.format(version) + else: + cherrypy.response.headers['Content-Type'] = 'application/xml' return ret.encode('utf8') if json_response: if version: @@ -1008,3 +1014,20 @@ def allow_empty_body(func): # noqa: N802 except (AttributeError, KeyError): func._cp_config = {'tools.json_in.force': False} return func + + +def validate_ceph_type(validations, component=''): + def decorator(func): + @wraps(func) + def validate_args(*args, **kwargs): + input_values = kwargs + for key, ceph_type in validations: + try: + ceph_type.valid(input_values[key]) + except ArgumentFormat as e: + raise DashboardException(msg=e, + code='ceph_type_not_valid', + component=component) + return func(*args, **kwargs) + return validate_args + return decorator diff --git a/src/pybind/mgr/dashboard/controllers/host.py b/src/pybind/mgr/dashboard/controllers/host.py index 8394fc578e3..cdce5c895a4 100644 --- a/src/pybind/mgr/dashboard/controllers/host.py +++ b/src/pybind/mgr/dashboard/controllers/host.py @@ -2,7 +2,9 @@ from __future__ import absolute_import import copy -from typing import Dict, List +import os +import time +from typing import Dict, List, Optional import cherrypy from mgr_util import merge_dicts @@ -14,9 +16,10 @@ from ..security import Scope from ..services.ceph_service import CephService from ..services.exception import handle_orchestrator_error from ..services.orchestrator import OrchClient, OrchFeature +from ..tools import TaskManager, str_to_bool from . import ApiController, BaseController, ControllerDoc, Endpoint, \ EndpointDoc, ReadPermission, RESTController, Task, UiApiController, \ - allow_empty_body + UpdatePermission, allow_empty_body from .orchestrator import raise_if_no_orchestrator LIST_HOST_SCHEMA = { @@ -36,6 +39,74 @@ LIST_HOST_SCHEMA = { "status": (str, "") } +INVENTORY_SCHEMA = { + "name": (str, "Hostname"), + "addr": (str, "Host address"), + "devices": ([{ + "rejected_reasons": ([str], ""), + "available": (bool, "If the device can be provisioned to an OSD"), + "path": (str, "Device path"), + "sys_api": ({ + "removable": (str, ""), + "ro": (str, ""), + "vendor": (str, ""), + "model": (str, ""), + "rev": (str, ""), + "sas_address": (str, ""), + "sas_device_handle": (str, ""), + "support_discard": (str, ""), + "rotational": (str, ""), + "nr_requests": (str, ""), + "scheduler_mode": (str, ""), + "partitions": ({ + "partition_name": ({ + "start": (str, ""), + "sectors": (str, ""), + "sectorsize": (int, ""), + "size": (int, ""), + "human_readable_size": (str, ""), + "holders": ([str], "") + }, "") + }, ""), + "sectors": (int, ""), + "sectorsize": (str, ""), + "size": (int, ""), + "human_readable_size": (str, ""), + "path": (str, ""), + "locked": (int, "") + }, ""), + "lvs": ([{ + "name": (str, ""), + "osd_id": (str, ""), + "cluster_name": (str, ""), + "type": (str, ""), + "osd_fsid": (str, ""), + "cluster_fsid": (str, ""), + "osdspec_affinity": (str, ""), + "block_uuid": (str, ""), + }], ""), + "human_readable_type": (str, "Device type. ssd or hdd"), + "device_id": (str, "Device's udev ID"), + "lsm_data": ({ + "serialNum": (str, ""), + "transport": (str, ""), + "mediaType": (str, ""), + "rpm": (str, ""), + "linkSpeed": (str, ""), + "health": (str, ""), + "ledSupport": ({ + "IDENTsupport": (str, ""), + "IDENTstatus": (str, ""), + "FAILsupport": (str, ""), + "FAILstatus": (str, ""), + }, ""), + "errors": ([str], "") + }, ""), + "osd_ids": ([int], "Device OSD IDs") + }], "Host devices"), + "labels": ([str], "Host labels") +} + def host_task(name, metadata, wait_for=10.0): return Task("host/{}".format(name), metadata, wait_for) @@ -121,6 +192,69 @@ def get_host(hostname: str) -> Dict: raise cherrypy.HTTPError(404) +def get_device_osd_map(): + """Get mappings from inventory devices to OSD IDs. + + :return: Returns a dictionary containing mappings. Note one device might + shared between multiple OSDs. + e.g. { + 'node1': { + 'nvme0n1': [0, 1], + 'vdc': [0], + 'vdb': [1] + }, + 'node2': { + 'vdc': [2] + } + } + :rtype: dict + """ + result: dict = {} + for osd_id, osd_metadata in mgr.get('osd_metadata').items(): + hostname = osd_metadata.get('hostname') + devices = osd_metadata.get('devices') + if not hostname or not devices: + continue + if hostname not in result: + result[hostname] = {} + # for OSD contains multiple devices, devices is in `sda,sdb` + for device in devices.split(','): + if device not in result[hostname]: + result[hostname][device] = [int(osd_id)] + else: + result[hostname][device].append(int(osd_id)) + return result + + +def get_inventories(hosts: Optional[List[str]] = None, + refresh: Optional[bool] = None) -> List[dict]: + """Get inventories from the Orchestrator and link devices with OSD IDs. + + :param hosts: Hostnames to query. + :param refresh: Ask the Orchestrator to refresh the inventories. Note the this is an + asynchronous operation, the updated version of inventories need to + be re-qeuried later. + :return: Returns list of inventory. + :rtype: list + """ + do_refresh = False + if refresh is not None: + do_refresh = str_to_bool(refresh) + orch = OrchClient.instance() + inventory_hosts = [host.to_json() + for host in orch.inventory.list(hosts=hosts, refresh=do_refresh)] + device_osd_map = get_device_osd_map() + for inventory_host in inventory_hosts: + host_osds = device_osd_map.get(inventory_host['name']) + for device in inventory_host['devices']: + if host_osds: # pragma: no cover + dev_name = os.path.basename(device['path']) + device['osd_ids'] = sorted(host_osds.get(dev_name, [])) + else: + device['osd_ids'] = [] + return inventory_hosts + + @ApiController('/host', Scope.HOSTS) @ControllerDoc("Get Host Details", "Host") class Host(RESTController): @@ -186,6 +320,45 @@ class Host(RESTController): return CephService.get_smart_data_by_host(hostname) @RESTController.Resource('GET') + @raise_if_no_orchestrator([OrchFeature.DEVICE_LIST]) + @handle_orchestrator_error('host') + @EndpointDoc('Get inventory of a host', + parameters={ + 'hostname': (str, 'Hostname'), + 'refresh': (str, 'Trigger asynchronous refresh'), + }, + responses={200: INVENTORY_SCHEMA}) + def inventory(self, hostname, refresh=None): + inventory = get_inventories([hostname], refresh) + if inventory: + return inventory[0] + return {} + + @RESTController.Resource('POST') + @UpdatePermission + @raise_if_no_orchestrator([OrchFeature.DEVICE_BLINK_LIGHT]) + @handle_orchestrator_error('host') + @host_task('identify_device', ['{hostname}', '{device}'], wait_for=2.0) + def identify_device(self, hostname, device, duration): + # type: (str, str, int) -> None + """ + Identify a device by switching on the device light for N seconds. + :param hostname: The hostname of the device to process. + :param device: The device identifier to process, e.g. ``/dev/dm-0`` or + ``ABC1234DEF567-1R1234_ABC8DE0Q``. + :param duration: The duration in seconds how long the LED should flash. + """ + orch = OrchClient.instance() + TaskManager.current_task().set_progress(0) + orch.blink_device_light(hostname, device, 'ident', True) + for i in range(int(duration)): + percentage = int(round(i / float(duration) * 100)) + TaskManager.current_task().set_progress(percentage) + time.sleep(1) + orch.blink_device_light(hostname, device, 'ident', False) + TaskManager.current_task().set_progress(100) + + @RESTController.Resource('GET') @raise_if_no_orchestrator([OrchFeature.DAEMON_LIST]) def daemons(self, hostname: str) -> List[dict]: orch = OrchClient.instance() @@ -241,3 +414,10 @@ class HostUi(BaseController): labels.extend(host.labels) labels.sort() return list(set(labels)) # Filter duplicate labels. + + @Endpoint('GET') + @ReadPermission + @raise_if_no_orchestrator([OrchFeature.DEVICE_LIST]) + @handle_orchestrator_error('host') + def inventory(self, refresh=None): + return get_inventories(None, refresh) diff --git a/src/pybind/mgr/dashboard/controllers/orchestrator.py b/src/pybind/mgr/dashboard/controllers/orchestrator.py index 13b4a171a89..085870a0f4a 100644 --- a/src/pybind/mgr/dashboard/controllers/orchestrator.py +++ b/src/pybind/mgr/dashboard/controllers/orchestrator.py @@ -1,18 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -import os.path -import time from functools import wraps -from .. import mgr from ..exceptions import DashboardException -from ..security import Scope -from ..services.exception import handle_orchestrator_error -from ..services.orchestrator import OrchClient, OrchFeature -from ..tools import TaskManager, str_to_bool -from . import ApiController, ControllerDoc, Endpoint, EndpointDoc, \ - ReadPermission, RESTController, Task, UpdatePermission +from ..services.orchestrator import OrchClient +from . import ApiController, ControllerDoc, Endpoint, EndpointDoc, ReadPermission, RESTController STATUS_SCHEMA = { "available": (bool, "Orchestrator status"), @@ -20,44 +13,6 @@ STATUS_SCHEMA = { } -def get_device_osd_map(): - """Get mappings from inventory devices to OSD IDs. - - :return: Returns a dictionary containing mappings. Note one device might - shared between multiple OSDs. - e.g. { - 'node1': { - 'nvme0n1': [0, 1], - 'vdc': [0], - 'vdb': [1] - }, - 'node2': { - 'vdc': [2] - } - } - :rtype: dict - """ - result: dict = {} - for osd_id, osd_metadata in mgr.get('osd_metadata').items(): - hostname = osd_metadata.get('hostname') - devices = osd_metadata.get('devices') - if not hostname or not devices: - continue - if hostname not in result: - result[hostname] = {} - # for OSD contains multiple devices, devices is in `sda,sdb` - for device in devices.split(','): - if device not in result[hostname]: - result[hostname][device] = [int(osd_id)] - else: - result[hostname][device].append(int(osd_id)) - return result - - -def orchestrator_task(name, metadata, wait_for=2.0): - return Task("orchestrator/{}".format(name), metadata, wait_for) - - def raise_if_no_orchestrator(features=None): def inner(method): @wraps(method) @@ -91,51 +46,3 @@ class Orchestrator(RESTController): responses={200: STATUS_SCHEMA}) def status(self): return OrchClient.instance().status() - - @Endpoint(method='POST') - @UpdatePermission - @raise_if_no_orchestrator([OrchFeature.DEVICE_BLINK_LIGHT]) - @handle_orchestrator_error('osd') - @orchestrator_task('identify_device', ['{hostname}', '{device}']) - def identify_device(self, hostname, device, duration): # pragma: no cover - # type: (str, str, int) -> None - """ - Identify a device by switching on the device light for N seconds. - :param hostname: The hostname of the device to process. - :param device: The device identifier to process, e.g. ``/dev/dm-0`` or - ``ABC1234DEF567-1R1234_ABC8DE0Q``. - :param duration: The duration in seconds how long the LED should flash. - """ - orch = OrchClient.instance() - TaskManager.current_task().set_progress(0) - orch.blink_device_light(hostname, device, 'ident', True) - for i in range(int(duration)): - percentage = int(round(i / float(duration) * 100)) - TaskManager.current_task().set_progress(percentage) - time.sleep(1) - orch.blink_device_light(hostname, device, 'ident', False) - TaskManager.current_task().set_progress(100) - - -@ApiController('/orchestrator/inventory', Scope.HOSTS) -@ControllerDoc("Get Orchestrator Inventory Details", "OrchestratorInventory") -class OrchestratorInventory(RESTController): - - @raise_if_no_orchestrator([OrchFeature.DEVICE_LIST]) - def list(self, hostname=None, refresh=None): - orch = OrchClient.instance() - hosts = [hostname] if hostname else None - do_refresh = False - if refresh is not None: - do_refresh = str_to_bool(refresh) - inventory_hosts = [host.to_json() for host in orch.inventory.list(hosts, do_refresh)] - device_osd_map = get_device_osd_map() - for inventory_host in inventory_hosts: - host_osds = device_osd_map.get(inventory_host['name']) - for device in inventory_host['devices']: - if host_osds: # pragma: no cover - dev_name = os.path.basename(device['path']) - device['osd_ids'] = sorted(host_osds.get(dev_name, [])) - else: - device['osd_ids'] = [] - return inventory_hosts diff --git a/src/pybind/mgr/dashboard/controllers/saml2.py b/src/pybind/mgr/dashboard/controllers/saml2.py index f53c7f0e1f6..76a7e193a9a 100644 --- a/src/pybind/mgr/dashboard/controllers/saml2.py +++ b/src/pybind/mgr/dashboard/controllers/saml2.py @@ -16,7 +16,7 @@ from .. import mgr from ..exceptions import UserDoesNotExist from ..services.auth import JwtManager from ..tools import prepare_url_prefix -from . import BaseController, Controller, Endpoint +from . import BaseController, Controller, Endpoint, allow_empty_body @Controller('/auth/saml2', secure=False) @@ -42,7 +42,8 @@ class Saml2(BaseController): except OneLogin_Saml2_Error: raise cherrypy.HTTPError(400, 'Single Sign-On is not configured.') - @Endpoint('POST', path="") + @Endpoint('POST', path="", version=None) + @allow_empty_body def auth_response(self, **kwargs): Saml2._check_python_saml() req = Saml2._build_req(self._request, kwargs) @@ -78,27 +79,27 @@ class Saml2(BaseController): 'reason': auth.get_last_error_reason() } - @Endpoint(xml=True) + @Endpoint(xml=True, version=None) def metadata(self): Saml2._check_python_saml() saml_settings = OneLogin_Saml2_Settings(mgr.SSO_DB.saml2.onelogin_settings) return saml_settings.get_sp_metadata() - @Endpoint(json_response=False) + @Endpoint(json_response=False, version=None) def login(self): Saml2._check_python_saml() req = Saml2._build_req(self._request, {}) auth = OneLogin_Saml2_Auth(req, mgr.SSO_DB.saml2.onelogin_settings) raise cherrypy.HTTPRedirect(auth.login()) - @Endpoint(json_response=False) + @Endpoint(json_response=False, version=None) def slo(self): Saml2._check_python_saml() req = Saml2._build_req(self._request, {}) auth = OneLogin_Saml2_Auth(req, mgr.SSO_DB.saml2.onelogin_settings) raise cherrypy.HTTPRedirect(auth.logout()) - @Endpoint(json_response=False) + @Endpoint(json_response=False, version=None) def logout(self, **kwargs): # pylint: disable=unused-argument Saml2._check_python_saml() diff --git a/src/pybind/mgr/dashboard/controllers/user.py b/src/pybind/mgr/dashboard/controllers/user.py index 5ee188032d8..8d340fedccb 100644 --- a/src/pybind/mgr/dashboard/controllers/user.py +++ b/src/pybind/mgr/dashboard/controllers/user.py @@ -5,6 +5,7 @@ import time from datetime import datetime import cherrypy +from ceph_argparse import CephString from .. import mgr from ..exceptions import DashboardException, PasswordPolicyException, \ @@ -13,7 +14,7 @@ from ..security import Scope from ..services.access_control import SYSTEM_ROLES, PasswordPolicy from ..services.auth import JwtManager from . import ApiController, BaseController, ControllerDoc, Endpoint, \ - EndpointDoc, RESTController, allow_empty_body + EndpointDoc, RESTController, allow_empty_body, validate_ceph_type USER_SCHEMA = ([{ "username": (str, 'Username of the user'), @@ -81,6 +82,7 @@ class User(RESTController): raise cherrypy.HTTPError(404) return User._user_to_dict(user) + @validate_ceph_type([('username', CephString())], 'user') def create(self, username=None, password=None, name=None, email=None, roles=None, enabled=True, pwdExpirationDate=None, pwdUpdateRequired=True): if not username: diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/block/images.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/block/images.po.ts index f744920c3cd..bf6cbc05263 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/block/images.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/block/images.po.ts @@ -22,7 +22,7 @@ export class ImagesPageHelper extends PageHelper { cy.get('#size').type(size); // Click the create button and wait for image to be made - cy.contains('button', 'Create RBD').click(); + cy.get('[data-cy=submitBtn]').click(); this.getFirstTableCell(name).should('exist'); } @@ -35,7 +35,7 @@ export class ImagesPageHelper extends PageHelper { cy.get('#name').clear().type(newName); cy.get('#size').clear().type(newSize); // click the size box and send new size - cy.contains('button', 'Edit RBD').click(); + cy.get('[data-cy=submitBtn]').click(); this.getExpandCollapseElement(newName).click(); cy.get('.table.table-striped.table-bordered').contains('td', newSize); @@ -53,7 +53,7 @@ export class ImagesPageHelper extends PageHelper { cy.get('.table-actions button.dropdown-toggle').first().click(); cy.get('button.move-to-trash').click(); - cy.contains('button', 'Move Image').should('be.visible').click(); + cy.get('[data-cy=submitBtn]').should('be.visible').click(); // Clicks trash tab cy.contains('.nav-link', 'Trash').click(); @@ -79,7 +79,7 @@ export class ImagesPageHelper extends PageHelper { cy.get('cd-modal #name').clear().type(newName); } - cy.contains('button', 'Restore Image').click(); + cy.get('[data-cy=submitBtn]').click(); // clicks images tab cy.contains('.nav-link', 'Images').click(); @@ -102,7 +102,7 @@ export class ImagesPageHelper extends PageHelper { this.selectOption('poolName', pool); cy.get('#poolName').should('have.class', 'ng-valid'); // check if pool is selected } - cy.get('#purgeFormButton').click(); + cy.get('[data-cy=submitBtn]').click(); // Wait for image to delete and check it is not present this.getFirstTableCell(name).should('not.exist'); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/configuration.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/configuration.po.ts index eb160b052e1..f309120c60d 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/configuration.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/cluster/configuration.po.ts @@ -20,7 +20,7 @@ export class ConfigurationPageHelper extends PageHelper { cy.get(`#${i}`).clear(); } // Clicks save button and checks that values are not present for the selected config - cy.contains('button', 'Save').click(); + cy.get('[data-cy=submitBtn]').click(); // Enter config setting name into filter box this.seachTable(name); @@ -57,7 +57,7 @@ export class ConfigurationPageHelper extends PageHelper { // Clicks save button then waits until the desired config is visible, clicks it, // then checks that each desired value appears with the desired number - cy.contains('button', 'Save').click(); + cy.get('[data-cy=submitBtn]').click(); // Enter config setting name into filter box this.seachTable(name); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/ui/login.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/ui/login.po.ts index a44d935bfb8..d4d2c692116 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/ui/login.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/ui/login.po.ts @@ -9,7 +9,7 @@ export class LoginPageHelper extends PageHelper { doLogin() { cy.get('[name=username]').type('admin'); cy.get('#password').type('admin'); - cy.contains('input', 'Login').click(); + cy.get('[type=submit]').click(); cy.get('cd-dashboard').should('exist'); } diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/ui/role-mgmt.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/ui/role-mgmt.po.ts index 2cbdef39b98..1cc3630a463 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/ui/role-mgmt.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/ui/role-mgmt.po.ts @@ -16,7 +16,7 @@ export class RoleMgmtPageHelper extends PageHelper { cy.get('#description').type(description); // Click the create button and wait for role to be made - cy.contains('button', 'Create Role').click(); + cy.get('[data-cy=submitBtn]').click(); cy.get('.breadcrumb-item.active').should('not.have.text', 'Create'); this.getFirstTableCell(name).should('exist'); @@ -31,7 +31,7 @@ export class RoleMgmtPageHelper extends PageHelper { cy.get('#description').clear().type(description); // Click the edit button and check new values are present in table - cy.contains('button', 'Edit Role').click(); + cy.get('[data-cy=submitBtn]').click(); cy.get('.breadcrumb-item.active').should('not.have.text', 'Edit'); this.getFirstTableCell(name).should('exist'); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/integration/ui/user-mgmt.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/integration/ui/user-mgmt.po.ts index 5afcdf37ca5..fb2b7912944 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/integration/ui/user-mgmt.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/integration/ui/user-mgmt.po.ts @@ -17,7 +17,7 @@ export class UserMgmtPageHelper extends PageHelper { cy.get('#email').type(email); // Click the create button and wait for user to be made - cy.contains('button', 'Create User').click(); + cy.get('[data-cy=submitBtn]').click(); this.getFirstTableCell(username).should('exist'); } @@ -31,7 +31,7 @@ export class UserMgmtPageHelper extends PageHelper { cy.get('#email').clear().type(email); // Click the edit button and check new values are present in table - const editButton = cy.contains('button', 'Edit User'); + const editButton = cy.get('[data-cy=submitBtn]'); editButton.click(); this.getFirstTableCell(email).should('exist'); this.getFirstTableCell(name).should('exist'); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/block.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/block.module.ts index e6c5e68ca0d..472fe37f19f 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/block.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/block.module.ts @@ -5,6 +5,7 @@ import { RouterModule, Routes } from '@angular/router'; import { TreeModule } from '@circlon/angular-tree-component'; import { NgbNavModule, NgbPopoverModule, NgbTooltipModule } from '@ng-bootstrap/ng-bootstrap'; +import { NgxPipeFunctionModule } from 'ngx-pipe-function'; import { ActionLabels, URLVerbs } from '~/app/shared/constants/app.constants'; import { FeatureTogglesGuardService } from '~/app/shared/services/feature-toggles-guard.service'; @@ -45,6 +46,7 @@ import { RbdTrashRestoreModalComponent } from './rbd-trash-restore-modal/rbd-tra NgbNavModule, NgbPopoverModule, NgbTooltipModule, + NgxPipeFunctionModule, SharedModule, RouterModule, TreeModule diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-discovery-modal/iscsi-target-discovery-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-discovery-modal/iscsi-target-discovery-modal.component.html index e27c6a5a265..c7724e9ec9a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-discovery-modal/iscsi-target-discovery-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-discovery-modal/iscsi-target-discovery-modal.component.html @@ -126,14 +126,10 @@ </div> <div class="modal-footer"> - <cd-submit-button (submitAction)="submitAction()" - [form]="discoveryForm" - *ngIf="hasPermission" - i18n>Submit</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="submitAction()" + [form]="discoveryForm" + [showSubmit]="hasPermission" + [submitText]="actionLabels.SUBMIT"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-discovery-modal/iscsi-target-discovery-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-discovery-modal/iscsi-target-discovery-modal.component.ts index fd00db7c142..68958cfaa2b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-discovery-modal/iscsi-target-discovery-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-discovery-modal/iscsi-target-discovery-modal.component.ts @@ -4,6 +4,7 @@ import { FormControl, Validators } from '@angular/forms'; import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import { IscsiService } from '~/app/shared/api/iscsi.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { NotificationType } from '~/app/shared/enum/notification-type.enum'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { CdValidators } from '~/app/shared/forms/cd-validators'; @@ -27,6 +28,7 @@ export class IscsiTargetDiscoveryModalComponent implements OnInit { constructor( private authStorageService: AuthStorageService, public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private iscsiService: IscsiService, private notificationService: NotificationService ) { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-form/iscsi-target-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-form/iscsi-target-form.component.html index 825e821144a..6bed1db95f6 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-form/iscsi-target-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-form/iscsi-target-form.component.html @@ -572,7 +572,7 @@ <ng-container i18n>Group</ng-container>: {{ group.getValue('group_id') }} <button type="button" class="close" - (click)="groups.removeAt(gi)"> + (click)="removeGroup(gi)"> <i [ngClass]="[icons.destroy]"></i> </button> </div> @@ -618,7 +618,7 @@ <cd-select [data]="group.getValue('members')" [options]="groupMembersSelections[gi]" [messages]="messages.groupInitiator" - (selection)="onGroupMemberSelection($event)" + (selection)="onGroupMemberSelection($event, gi)" elemClass="btn btn-light float-right"> <i [ngClass]="[icons.add]"></i> <ng-container i18n>Add initiator</ng-container> @@ -690,12 +690,10 @@ </div> <div class="card-footer"> - <div class="text-right"> - <cd-submit-button (submitAction)="submit()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="formDir">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button></cd-back-button> - </div> + <cd-form-button-panel (submitActionEvent)="submit()" + [form]="targetForm" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> </form> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-form/iscsi-target-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-form/iscsi-target-form.component.spec.ts index d10ab0ef5da..10475872a2e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-form/iscsi-target-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-form/iscsi-target-form.component.spec.ts @@ -7,6 +7,7 @@ import { RouterTestingModule } from '@angular/router/testing'; import { ToastrModule } from 'ngx-toastr'; import { LoadingPanelComponent } from '~/app/shared/components/loading-panel/loading-panel.component'; +import { SelectOption } from '~/app/shared/components/select/select-option.model'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { SharedModule } from '~/app/shared/shared.module'; import { ActivatedRouteStub } from '~/testing/activated-route-stub'; @@ -329,6 +330,14 @@ describe('IscsiTargetFormComponent', () => { component.initiators.controls[0].patchValue({ luns: ['rbd/disk_2'] }); + component.imagesInitiatorSelections[0] = [ + { + description: '', + enabled: true, + name: 'rbd/disk_2', + selected: true + } + ]; expect(component.initiators.controls[0].value).toEqual({ auth: { mutual_password: '', mutual_user: '', password: '', user: '' }, cdIsInGroup: false, @@ -336,17 +345,19 @@ describe('IscsiTargetFormComponent', () => { luns: ['rbd/disk_2'] }); - component.addGroup(); component.groups.controls[0].patchValue({ group_id: 'foo', members: ['iqn.initiator'] }); - component.onGroupMemberSelection({ - option: { - name: 'iqn.initiator', - selected: true - } - }); + component.onGroupMemberSelection( + { + option: { + name: 'iqn.initiator', + selected: true + } + }, + 0 + ); expect(component.initiators.controls[0].value).toEqual({ auth: { mutual_password: '', mutual_user: '', password: '', user: '' }, @@ -354,6 +365,14 @@ describe('IscsiTargetFormComponent', () => { client_iqn: 'iqn.initiator', luns: [] }); + expect(component.imagesInitiatorSelections[0]).toEqual([ + { + description: '', + enabled: true, + name: 'rbd/disk_2', + selected: false + } + ]); }); it('should disabled the initiator when selected', () => { @@ -363,7 +382,7 @@ describe('IscsiTargetFormComponent', () => { ]); component.groupMembersSelections[0][0].selected = true; - component.onGroupMemberSelection({ option: { name: 'iqn.initiator', selected: true } }); + component.onGroupMemberSelection({ option: { name: 'iqn.initiator', selected: true } }, 0); expect(component.groupMembersSelections).toEqual([ [{ description: '', enabled: false, name: 'iqn.initiator', selected: true }], @@ -371,6 +390,49 @@ describe('IscsiTargetFormComponent', () => { ]); }); + describe('should remove from group', () => { + beforeEach(() => { + component.onGroupMemberSelection( + { option: new SelectOption(true, 'iqn.initiator', '') }, + 0 + ); + component.groupDiskSelections[0][0].selected = true; + component.groups.controls[0].patchValue({ + disks: ['rbd/disk_2'], + members: ['iqn.initiator'] + }); + + expect(component.initiators.value[0].luns).toEqual([]); + expect(component.imagesInitiatorSelections[0]).toEqual([ + { description: '', enabled: true, name: 'rbd/disk_2', selected: false } + ]); + expect(component.initiators.value[0].cdIsInGroup).toBe(true); + }); + + it('should update initiator images when deselecting', () => { + component.onGroupMemberSelection( + { option: new SelectOption(false, 'iqn.initiator', '') }, + 0 + ); + + expect(component.initiators.value[0].luns).toEqual(['rbd/disk_2']); + expect(component.imagesInitiatorSelections[0]).toEqual([ + { description: '', enabled: true, name: 'rbd/disk_2', selected: true } + ]); + expect(component.initiators.value[0].cdIsInGroup).toBe(false); + }); + + it('should update initiator when removing', () => { + component.removeGroupInitiator(component.groups.controls[0] as CdFormGroup, 0, 0); + + expect(component.initiators.value[0].luns).toEqual(['rbd/disk_2']); + expect(component.imagesInitiatorSelections[0]).toEqual([ + { description: '', enabled: true, name: 'rbd/disk_2', selected: true } + ]); + expect(component.initiators.value[0].cdIsInGroup).toBe(false); + }); + }); + it('should validate authentication', () => { const control = component.initiators.controls[0]; const formHelper = new FormHelper(control as CdFormGroup); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-form/iscsi-target-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-form/iscsi-target-form.component.ts index 91b85d5554c..d84647e91dd 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-form/iscsi-target-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-form/iscsi-target-form.component.ts @@ -264,12 +264,12 @@ export class IscsiTargetFormComponent extends CdForm implements OnInit { // updatedInitiatorSelector() }); - _.forEach(res.groups, (group) => { + (res.groups as any[]).forEach((group: any, group_index: number) => { const fg = this.addGroup(); group.disks = _.map(group.disks, (disk) => `${disk.pool}/${disk.image}`); fg.patchValue(group); _.forEach(group.members, (member) => { - this.onGroupMemberSelection({ option: new SelectOption(true, member, '') }); + this.onGroupMemberSelection({ option: new SelectOption(true, member, '') }, group_index); }); }); } @@ -577,26 +577,44 @@ export class IscsiTargetFormComponent extends CdForm implements OnInit { } removeGroup(index: number) { + // Remove group and disk selections this.groups.removeAt(index); + + // Free initiator from group + const selectedMembers = this.groupMembersSelections[index].filter((value) => value.selected); + selectedMembers.forEach((selection) => { + selection.selected = false; + this.onGroupMemberSelection({ option: selection }, index); + }); + + this.groupMembersSelections.splice(index, 1); this.groupDiskSelections.splice(index, 1); } - onGroupMemberSelection($event: any) { + onGroupMemberSelection($event: any, group_index: number) { const option = $event.option; - let initiator_index: number; + let luns: string[] = []; + if (!option.selected) { + const selectedDisks = this.groupDiskSelections[group_index].filter((value) => value.selected); + luns = selectedDisks.map((value) => value.name); + } + this.initiators.controls.forEach((element, index) => { if (element.value.client_iqn === option.name) { - element.patchValue({ luns: [] }); + element.patchValue({ luns: luns }); element.get('cdIsInGroup').setValue(option.selected); - initiator_index = index; - } - }); - // Members can only be at one group at a time, so when a member is selected - // in one group we need to disable its selection in other groups - _.forEach(this.groupMembersSelections, (group) => { - group[initiator_index].enabled = !option.selected; + // Members can only be at one group at a time, so when a member is selected + // in one group we need to disable its selection in other groups + _.forEach(this.groupMembersSelections, (group) => { + group[index].enabled = !option.selected; + }); + + this.imagesInitiatorSelections[index].forEach((image) => { + image.selected = luns.includes(image.name); + }); + } }); } @@ -604,14 +622,7 @@ export class IscsiTargetFormComponent extends CdForm implements OnInit { const name = group.getValue('members')[member_index]; group.getValue('members').splice(member_index, 1); - this.groupMembersSelections[group_index].forEach((value) => { - if (value.name === name) { - value.selected = false; - } - }); - this.groupMembersSelections[group_index] = [...this.groupMembersSelections[group_index]]; - - this.onGroupMemberSelection({ option: new SelectOption(false, name, '') }); + this.onGroupMemberSelection({ option: new SelectOption(false, name, '') }, group_index); } removeGroupDisk(group: CdFormGroup, disk_index: number, group_index: number) { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-image-settings-modal/iscsi-target-image-settings-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-image-settings-modal/iscsi-target-image-settings-modal.component.html index 21d8d00e213..3c34a2b85f2 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-image-settings-modal/iscsi-target-image-settings-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-image-settings-modal/iscsi-target-image-settings-modal.component.html @@ -83,13 +83,9 @@ </div> <div class="modal-footer"> - <cd-submit-button i18n - [form]="settingsForm" - (submitAction)="save()">Confirm</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="save()" + [form]="settingsForm" + [submitText]="actionLabels.UPDATE"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-image-settings-modal/iscsi-target-image-settings-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-image-settings-modal/iscsi-target-image-settings-modal.component.ts index 4521a0db197..e9c9c7d90da 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-image-settings-modal/iscsi-target-image-settings-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-image-settings-modal/iscsi-target-image-settings-modal.component.ts @@ -5,6 +5,7 @@ import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import _ from 'lodash'; import { IscsiService } from '~/app/shared/api/iscsi.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; @Component({ @@ -23,7 +24,11 @@ export class IscsiTargetImageSettingsModalComponent implements OnInit { settingsForm: CdFormGroup; - constructor(public activeModal: NgbActiveModal, public iscsiService: IscsiService) {} + constructor( + public activeModal: NgbActiveModal, + public iscsiService: IscsiService, + public actionLabels: ActionLabelsI18n + ) {} ngOnInit() { const fg: Record<string, FormControl> = { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-iqn-settings-modal/iscsi-target-iqn-settings-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-iqn-settings-modal/iscsi-target-iqn-settings-modal.component.html index 2df24f42c63..a5d1269f6df 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-iqn-settings-modal/iscsi-target-iqn-settings-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-iqn-settings-modal/iscsi-target-iqn-settings-modal.component.html @@ -23,13 +23,9 @@ </div> <div class="modal-footer"> - <cd-submit-button i18n - [form]="settingsForm" - (submitAction)="save()">Confirm</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="save()" + [form]="settingsForm" + [submitText]="actionLabels.UPDATE"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-iqn-settings-modal/iscsi-target-iqn-settings-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-iqn-settings-modal/iscsi-target-iqn-settings-modal.component.ts index 4a6ac9df9f4..36fdb9026c3 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-iqn-settings-modal/iscsi-target-iqn-settings-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/iscsi-target-iqn-settings-modal/iscsi-target-iqn-settings-modal.component.ts @@ -5,6 +5,7 @@ import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import _ from 'lodash'; import { IscsiService } from '~/app/shared/api/iscsi.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; @Component({ @@ -19,7 +20,11 @@ export class IscsiTargetIqnSettingsModalComponent implements OnInit { settingsForm: CdFormGroup; - constructor(public activeModal: NgbActiveModal, public iscsiService: IscsiService) {} + constructor( + public activeModal: NgbActiveModal, + public iscsiService: IscsiService, + public actionLabels: ActionLabelsI18n + ) {} ngOnInit() { const fg: Record<string, FormControl> = {}; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/bootstrap-create-modal/bootstrap-create-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/bootstrap-create-modal/bootstrap-create-modal.component.html index 3e2a1867ae3..6ab22cc70f7 100755 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/bootstrap-create-modal/bootstrap-create-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/bootstrap-create-modal/bootstrap-create-modal.component.html @@ -82,7 +82,7 @@ </div> <div class="modal-footer"> - <cd-back-button [back]="activeModal.close" + <cd-back-button (backAction)="activeModal.close()" name="Close" i18n-name> </cd-back-button> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/bootstrap-import-modal/bootstrap-import-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/bootstrap-import-modal/bootstrap-import-modal.component.html index 9f901d98ec9..3770ef099a6 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/bootstrap-import-modal/bootstrap-import-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/bootstrap-import-modal/bootstrap-import-modal.component.html @@ -87,13 +87,9 @@ </div> <div class="modal-footer"> - <cd-submit-button i18n - [form]="importBootstrapForm" - (submitAction)="import()">Import</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Close" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="import()" + [form]="importBootstrapForm" + [submitText]="actionLabels.SUBMIT"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/bootstrap-import-modal/bootstrap-import-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/bootstrap-import-modal/bootstrap-import-modal.component.ts index df3cc2bee03..d79096f6be6 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/bootstrap-import-modal/bootstrap-import-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/bootstrap-import-modal/bootstrap-import-modal.component.ts @@ -8,6 +8,7 @@ import { last } from 'rxjs/operators'; import { Pool } from '~/app/ceph/pool/pool'; import { RbdMirroringService } from '~/app/shared/api/rbd-mirroring.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { FinishedTask } from '~/app/shared/models/finished-task'; import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; @@ -33,6 +34,7 @@ export class BootstrapImportModalComponent implements OnInit, OnDestroy { constructor( public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private rbdMirroringService: RbdMirroringService, private taskWrapper: TaskWrapperService ) { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/edit-site-name-modal/edit-site-name-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/edit-site-name-modal/edit-site-name-modal.component.html index 09f97bfa54f..f0c91976802 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/edit-site-name-modal/edit-site-name-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/edit-site-name-modal/edit-site-name-modal.component.html @@ -30,13 +30,9 @@ </div> <div class="modal-footer"> - <cd-submit-button i18n - [form]="editSiteNameForm" - (submitAction)="update()">Update</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="update()" + [form]="editSiteNameForm" + [submitText]="actionLabels.UPDATE"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/edit-site-name-modal/edit-site-name-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/edit-site-name-modal/edit-site-name-modal.component.ts index 71c41a1267a..aa43bd0e8db 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/edit-site-name-modal/edit-site-name-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/edit-site-name-modal/edit-site-name-modal.component.ts @@ -4,6 +4,7 @@ import { FormControl } from '@angular/forms'; import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import { RbdMirroringService } from '~/app/shared/api/rbd-mirroring.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { FinishedTask } from '~/app/shared/models/finished-task'; import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; @@ -20,6 +21,7 @@ export class EditSiteNameModalComponent implements OnInit { constructor( public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private rbdMirroringService: RbdMirroringService, private taskWrapper: TaskWrapperService ) { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-mode-modal/pool-edit-mode-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-mode-modal/pool-edit-mode-modal.component.html index 7facf5ba9b4..2e88059ba0c 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-mode-modal/pool-edit-mode-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-mode-modal/pool-edit-mode-modal.component.html @@ -34,13 +34,9 @@ </div> <div class="modal-footer"> - <cd-submit-button i18n - [form]="editModeForm" - (submitAction)="update()">Update</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="update()" + [form]="editModeForm" + [submitText]="actionLabels.UPDATE"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-mode-modal/pool-edit-mode-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-mode-modal/pool-edit-mode-modal.component.ts index f435e84b75f..137e787174d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-mode-modal/pool-edit-mode-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-mode-modal/pool-edit-mode-modal.component.ts @@ -5,6 +5,7 @@ import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import { Subscription } from 'rxjs'; import { RbdMirroringService } from '~/app/shared/api/rbd-mirroring.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { FinishedTask } from '~/app/shared/models/finished-task'; import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; @@ -37,6 +38,7 @@ export class PoolEditModeModalComponent implements OnInit, OnDestroy { constructor( public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private rbdMirroringService: RbdMirroringService, private taskWrapper: TaskWrapperService ) { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-peer-modal/pool-edit-peer-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-peer-modal/pool-edit-peer-modal.component.html index b563faed1e8..97774ebe3ff 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-peer-modal/pool-edit-peer-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-peer-modal/pool-edit-peer-modal.component.html @@ -91,13 +91,9 @@ </div> <div class="modal-footer"> - <cd-submit-button i18n - [form]="editPeerForm" - (submitAction)="update()">Submit</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="update()" + [form]="editPeerForm" + [submitText]="actionLabels.SUBMIT"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-peer-modal/pool-edit-peer-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-peer-modal/pool-edit-peer-modal.component.ts index 83f1fe89769..6569c3b24b0 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-peer-modal/pool-edit-peer-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/mirroring/pool-edit-peer-modal/pool-edit-peer-modal.component.ts @@ -4,6 +4,7 @@ import { AbstractControl, FormControl, Validators } from '@angular/forms'; import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import { RbdMirroringService } from '~/app/shared/api/rbd-mirroring.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { FinishedTask } from '~/app/shared/models/finished-task'; import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; @@ -29,6 +30,7 @@ export class PoolEditPeerModalComponent implements OnInit { constructor( public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private rbdMirroringService: RbdMirroringService, private taskWrapper: TaskWrapperService ) { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-configuration-list/rbd-configuration-list.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-configuration-list/rbd-configuration-list.component.html index 64364f67da6..6c3e8c0278c 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-configuration-list/rbd-configuration-list.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-configuration-list/rbd-configuration-list.component.html @@ -5,7 +5,6 @@ </cd-table> <ng-template #configurationSourceTpl - let-row="row" let-value="value"> <div [ngSwitch]="value"> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-configuration-list/rbd-configuration-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-configuration-list/rbd-configuration-list.component.spec.ts index ae1a5265735..f54ad02720c 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-configuration-list/rbd-configuration-list.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-configuration-list/rbd-configuration-list.component.spec.ts @@ -8,11 +8,10 @@ import { NgxDatatableModule } from '@swimlane/ngx-datatable'; import { ChartsModule } from 'ng2-charts'; import { ComponentsModule } from '~/app/shared/components/components.module'; -import { TableComponent } from '~/app/shared/datatable/table/table.component'; import { RbdConfigurationEntry } from '~/app/shared/models/configuration'; -import { PipesModule } from '~/app/shared/pipes/pipes.module'; import { FormatterService } from '~/app/shared/services/formatter.service'; import { RbdConfigurationService } from '~/app/shared/services/rbd-configuration.service'; +import { SharedModule } from '~/app/shared/shared.module'; import { configureTestBed } from '~/testing/unit-test-helper'; import { RbdConfigurationListComponent } from './rbd-configuration-list.component'; @@ -29,10 +28,10 @@ describe('RbdConfigurationListComponent', () => { ComponentsModule, NgbDropdownModule, ChartsModule, - PipesModule, + SharedModule, NgbTooltipModule ], - declarations: [RbdConfigurationListComponent, TableComponent], + declarations: [RbdConfigurationListComponent], providers: [FormatterService, RbdConfigurationService] }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.html index 91a4439b904..6a12cb64f16 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.html @@ -328,12 +328,10 @@ </div> <div class="card-footer"> - <div class="text-right"> - <cd-submit-button (submitAction)="submit()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="formDir">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button></cd-back-button> - </div> + <cd-form-button-panel (submitActionEvent)="submit()" + [form]="formDir" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> </form> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-list/rbd-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-list/rbd-list.component.spec.ts index 1186aadbd0f..7e90907b01e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-list/rbd-list.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-list/rbd-list.component.spec.ts @@ -297,4 +297,38 @@ describe('RbdListComponent', () => { } }); }); + + const getActionDisable = (name: string) => + component.tableActions.find((o) => o.name === name).disable; + + const testActions = (selection: any, expected: string | boolean) => { + expect(getActionDisable('Edit')(selection)).toBe(expected); + expect(getActionDisable('Delete')(selection)).toBe(expected); + expect(getActionDisable('Copy')(selection)).toBe(expected); + expect(getActionDisable('Flatten')(selection)).toBeTruthy(); + expect(getActionDisable('Move to Trash')(selection)).toBe(expected); + }; + + it('should test TableActions with valid/invalid image name', () => { + component.selection.selected = [ + { + name: 'foobar', + pool_name: 'rbd', + snapshots: [] + } + ]; + testActions(component.selection, false); + + component.selection.selected = [ + { + name: 'foo/bar', + pool_name: 'rbd', + snapshots: [] + } + ]; + testActions( + component.selection, + `This RBD image has an invalid name and can't be managed by ceph.` + ); + }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-list/rbd-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-list/rbd-list.component.ts index 049b417a6db..4df87596133 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-list/rbd-list.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-list/rbd-list.component.ts @@ -131,7 +131,8 @@ export class RbdListComponent extends ListWithDetails implements OnInit { permission: 'update', icon: Icons.edit, routerLink: () => this.urlBuilder.getEdit(getImageUri()), - name: this.actionLabels.EDIT + name: this.actionLabels.EDIT, + disable: this.getInvalidNameDisable }; const deleteAction: CdTableAction = { permission: 'delete', @@ -144,7 +145,7 @@ export class RbdListComponent extends ListWithDetails implements OnInit { permission: 'create', canBePrimary: (selection: CdTableSelection) => selection.hasSingleSelection, disable: (selection: CdTableSelection) => - !selection.hasSingleSelection || selection.first().cdExecuting, + this.getInvalidNameDisable(selection) || !!selection.first().cdExecuting, icon: Icons.copy, routerLink: () => `/block/rbd/copy/${getImageUri()}`, name: this.actionLabels.COPY @@ -152,7 +153,9 @@ export class RbdListComponent extends ListWithDetails implements OnInit { const flattenAction: CdTableAction = { permission: 'update', disable: (selection: CdTableSelection) => - !selection.hasSingleSelection || selection.first().cdExecuting || !selection.first().parent, + this.getInvalidNameDisable(selection) || + selection.first().cdExecuting || + !selection.first().parent, icon: Icons.flatten, click: () => this.flattenRbdModal(), name: this.actionLabels.FLATTEN @@ -163,8 +166,7 @@ export class RbdListComponent extends ListWithDetails implements OnInit { click: () => this.trashRbdModal(), name: this.actionLabels.TRASH, disable: (selection: CdTableSelection) => - !selection.first() || - !selection.hasSingleSelection || + this.getInvalidNameDisable(selection) || selection.first().image_format === RBDImageFormat.V1 }; this.tableActions = [ @@ -442,10 +444,16 @@ export class RbdListComponent extends ListWithDetails implements OnInit { return $localize`This RBD has cloned snapshots. Please delete related RBDs before deleting this RBD.`; } - return ( - !selection.first() || - !selection.hasSingleSelection || - this.hasClonedSnapshots(selection.first()) - ); + return this.getInvalidNameDisable(selection) || this.hasClonedSnapshots(selection.first()); + } + + getInvalidNameDisable(selection: CdTableSelection): string | boolean { + const first = selection.first(); + + if (first?.name?.match(/[@/]/)) { + return $localize`This RBD image has an invalid name and can't be managed by ceph.`; + } + + return !selection.first() || !selection.hasSingleSelection; } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-namespace-form/rbd-namespace-form-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-namespace-form/rbd-namespace-form-modal.component.html index c7a339f344b..debbf8643a3 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-namespace-form/rbd-namespace-form-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-namespace-form/rbd-namespace-form-modal.component.html @@ -70,13 +70,9 @@ </div> <div class="modal-footer"> - <cd-submit-button [form]="namespaceForm" - (submitAction)="submit()" - i18n>Create Namespace</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Close" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="submit()" + [form]="namespaceForm" + [submitText]="actionLabels.CREATE"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-namespace-form/rbd-namespace-form-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-namespace-form/rbd-namespace-form-modal.component.ts index 5bca4266327..bad32c3c554 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-namespace-form/rbd-namespace-form-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-namespace-form/rbd-namespace-form-modal.component.ts @@ -13,6 +13,7 @@ import { Subject } from 'rxjs'; import { Pool } from '~/app/ceph/pool/pool'; import { PoolService } from '~/app/shared/api/pool.service'; import { RbdService } from '~/app/shared/api/rbd.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { NotificationType } from '~/app/shared/enum/notification-type.enum'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { FinishedTask } from '~/app/shared/models/finished-task'; @@ -39,6 +40,7 @@ export class RbdNamespaceFormModalComponent implements OnInit { constructor( public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private authStorageService: AuthStorageService, private notificationService: NotificationService, private poolService: PoolService, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-snapshot-form/rbd-snapshot-form-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-snapshot-form/rbd-snapshot-form-modal.component.html index ce948ed8d45..3b15b0aae69 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-snapshot-form/rbd-snapshot-form-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-snapshot-form/rbd-snapshot-form-modal.component.html @@ -29,14 +29,9 @@ </div> <div class="modal-footer"> - <cd-submit-button [form]="snapshotForm" - i18n="form action button|Example: Create rbdSnapshot@@formActionButton" - (submitAction)="submit()">{{ action | titlecase }} - {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Close" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="submit()" + [form]="snapshotForm" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-list/rbd-trash-list.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-list/rbd-trash-list.component.html index 340d0d0d499..044a1e9ac0a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-list/rbd-trash-list.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-list/rbd-trash-list.component.html @@ -41,9 +41,10 @@ </ng-template> <ng-template #deleteTpl - let-expiresAt> + let-expiresAt="expiresAt" + let-isExpired="isExpired"> <p class="text-danger" - *ngIf="!isExpired(expiresAt)"> + *ngIf="!isExpired"> <strong> <ng-container i18n>This image is protected until {{ expiresAt | cdDate }}.</ng-container> </strong> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-list/rbd-trash-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-list/rbd-trash-list.component.spec.ts index 939f04e66f8..17d8eed0fb6 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-list/rbd-trash-list.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-list/rbd-trash-list.component.spec.ts @@ -6,6 +6,7 @@ import { RouterTestingModule } from '@angular/router/testing'; import { NgbNavModule } from '@ng-bootstrap/ng-bootstrap'; import moment from 'moment'; +import { NgxPipeFunctionModule } from 'ngx-pipe-function'; import { ToastrModule } from 'ngx-toastr'; import { of } from 'rxjs'; @@ -34,6 +35,7 @@ describe('RbdTrashListComponent', () => { RouterTestingModule, SharedModule, NgbNavModule, + NgxPipeFunctionModule, ToastrModule.forRoot() ], providers: [TaskListService] diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-list/rbd-trash-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-list/rbd-trash-list.component.ts index a0fafc19238..43fe42b99fa 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-list/rbd-trash-list.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-list/rbd-trash-list.component.ts @@ -201,13 +201,14 @@ export class RbdTrashListComponent implements OnInit { const namespace = this.selection.first().namespace; const imageId = this.selection.first().id; const expiresAt = this.selection.first().deferment_end_time; + const isExpired = moment().isAfter(expiresAt); const imageIdSpec = new ImageSpec(poolName, namespace, imageId); this.modalRef = this.modalService.show(CriticalConfirmationModalComponent, { itemDescription: 'RBD', itemNames: [imageIdSpec], bodyTemplate: this.deleteTpl, - bodyContext: { $implicit: expiresAt }, + bodyContext: { expiresAt, isExpired }, submitActionObservable: () => this.taskWrapper.wrapTaskAroundCall({ task: new FinishedTask('rbd/trash/remove', { @@ -218,10 +219,6 @@ export class RbdTrashListComponent implements OnInit { }); } - isExpired(expiresAt: string): boolean { - return moment().isAfter(expiresAt); - } - purgeModal() { this.modalService.show(RbdTrashPurgeModalComponent); } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-move-modal/rbd-trash-move-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-move-modal/rbd-trash-move-modal.component.html index 588bc78c47e..b87dbfe6b6d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-move-modal/rbd-trash-move-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-move-modal/rbd-trash-move-modal.component.html @@ -44,13 +44,9 @@ </div> <div class="modal-footer"> - <cd-submit-button i18n - [form]="moveForm" - (submitAction)="moveImage()">Move Image</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="moveImage()" + [form]="moveForm" + [submitText]="actionLabels.MOVE"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-move-modal/rbd-trash-move-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-move-modal/rbd-trash-move-modal.component.ts index 28b82658162..ccf381f9c88 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-move-modal/rbd-trash-move-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-move-modal/rbd-trash-move-modal.component.ts @@ -4,6 +4,7 @@ import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import moment from 'moment'; import { RbdService } from '~/app/shared/api/rbd.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { CdFormBuilder } from '~/app/shared/forms/cd-form-builder'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { CdValidators } from '~/app/shared/forms/cd-validators'; @@ -34,6 +35,7 @@ export class RbdTrashMoveModalComponent implements OnInit { constructor( private rbdService: RbdService, public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private fb: CdFormBuilder, private taskWrapper: TaskWrapperService ) { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-purge-modal/rbd-trash-purge-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-purge-modal/rbd-trash-purge-modal.component.html index 692014a3862..4958173083b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-purge-modal/rbd-trash-purge-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-purge-modal/rbd-trash-purge-modal.component.html @@ -39,14 +39,9 @@ </div> <div class="modal-footer"> - <cd-submit-button id="purgeFormButton" - [form]="purgeForm" - (submitAction)="purge()" - i18n>Purge Trash</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="purge()" + [form]="purgeForm" + [submitText]="actionLabels.PURGE"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-purge-modal/rbd-trash-purge-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-purge-modal/rbd-trash-purge-modal.component.ts index 3969926ef46..e4df25d15ec 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-purge-modal/rbd-trash-purge-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-purge-modal/rbd-trash-purge-modal.component.ts @@ -5,6 +5,7 @@ import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import { Pool } from '~/app/ceph/pool/pool'; import { PoolService } from '~/app/shared/api/pool.service'; import { RbdService } from '~/app/shared/api/rbd.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { CdFormBuilder } from '~/app/shared/forms/cd-form-builder'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { FinishedTask } from '~/app/shared/models/finished-task'; @@ -26,6 +27,7 @@ export class RbdTrashPurgeModalComponent implements OnInit { private authStorageService: AuthStorageService, private rbdService: RbdService, public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private fb: CdFormBuilder, private poolService: PoolService, private taskWrapper: TaskWrapperService diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-restore-modal/rbd-trash-restore-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-restore-modal/rbd-trash-restore-modal.component.html index 6aa4105d0d0..ab64c78285d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-restore-modal/rbd-trash-restore-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-restore-modal/rbd-trash-restore-modal.component.html @@ -34,13 +34,9 @@ </div> <div class="modal-footer"> - <cd-submit-button [form]="restoreForm" - (submitAction)="restore()" - i18n>Restore Image</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="restore()" + [form]="restoreForm" + [submitText]="actionLabels.RESTORE"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-restore-modal/rbd-trash-restore-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-restore-modal/rbd-trash-restore-modal.component.ts index a2eae8c1b28..860d66cc017 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-restore-modal/rbd-trash-restore-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-trash-restore-modal/rbd-trash-restore-modal.component.ts @@ -3,6 +3,7 @@ import { Component, OnInit } from '@angular/core'; import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import { RbdService } from '~/app/shared/api/rbd.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { CdFormBuilder } from '~/app/shared/forms/cd-form-builder'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { ExecutingTask } from '~/app/shared/models/executing-task'; @@ -28,6 +29,7 @@ export class RbdTrashRestoreModalComponent implements OnInit { constructor( private rbdService: RbdService, public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private fb: CdFormBuilder, private taskWrapper: TaskWrapperService ) {} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-directories/cephfs-directories.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-directories/cephfs-directories.component.spec.ts index f23ae749f5b..c8f6c22a14b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-directories/cephfs-directories.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-directories/cephfs-directories.component.spec.ts @@ -936,7 +936,10 @@ describe('CephfsDirectoriesComponent', () => { }); it('should test all quota table actions permission combinations', () => { - const permissionHelper: PermissionHelper = new PermissionHelper(component.permission); + const permissionHelper: PermissionHelper = new PermissionHelper(component.permission, { + single: { dirValue: 0 }, + multiple: [{ dirValue: 0 }, {}] + }); const tableActions = permissionHelper.setPermissionsAndGetActions( component.quota.tableActions ); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts index 62464cadefe..cc58c38b8dc 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/cluster.module.ts @@ -13,6 +13,7 @@ import { NgbTooltipModule, NgbTypeaheadModule } from '@ng-bootstrap/ng-bootstrap'; +import { NgxPipeFunctionModule } from 'ngx-pipe-function'; import { SharedModule } from '~/app/shared/shared.module'; import { PerformanceCounterModule } from '../performance-counter/performance-counter.module'; @@ -47,6 +48,7 @@ import { RulesListComponent } from './prometheus/rules-list/rules-list.component import { SilenceFormComponent } from './prometheus/silence-form/silence-form.component'; import { SilenceListComponent } from './prometheus/silence-list/silence-list.component'; import { SilenceMatcherModalComponent } from './prometheus/silence-matcher-modal/silence-matcher-modal.component'; +import { PlacementPipe } from './services/placement.pipe'; import { ServiceDaemonListComponent } from './services/service-daemon-list/service-daemon-list.component'; import { ServiceDetailsComponent } from './services/service-details/service-details.component'; import { ServiceFormComponent } from './services/service-form/service-form.component'; @@ -70,7 +72,8 @@ import { TelemetryComponent } from './telemetry/telemetry.component'; CephSharedModule, NgbDatepickerModule, NgbPopoverModule, - NgbDropdownModule + NgbDropdownModule, + NgxPipeFunctionModule ], declarations: [ HostsComponent, @@ -108,7 +111,8 @@ import { TelemetryComponent } from './telemetry/telemetry.component'; TelemetryComponent, PrometheusTabsComponent, ServiceFormComponent, - OsdFlagsIndivModalComponent + OsdFlagsIndivModalComponent, + PlacementPipe ] }) export class ClusterModule {} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/configuration/configuration-form/configuration-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/configuration/configuration-form/configuration-form.component.html index 3df68d267b8..27225e5b0e0 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/configuration/configuration-form/configuration-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/configuration/configuration-form/configuration-form.component.html @@ -150,13 +150,10 @@ </div> <!-- Footer --> <div class="card-footer"> - <div class="text-right"> - <cd-submit-button [form]="formDir" - (submitAction)="submit()"> - <span i18n>Save</span> - </cd-submit-button> - <cd-back-button></cd-back-button> - </div> + <cd-form-button-panel (submitActionEvent)="submit()" + [form]="configForm" + [submitText]="actionLabels.UPDATE" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> </form> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/configuration/configuration-form/configuration-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/configuration/configuration-form/configuration-form.component.ts index aef0eceb557..18099109d5d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/configuration/configuration-form/configuration-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/configuration/configuration-form/configuration-form.component.ts @@ -7,6 +7,7 @@ import _ from 'lodash'; import { ConfigurationService } from '~/app/shared/api/configuration.service'; import { ConfigFormModel } from '~/app/shared/components/config-option/config-option.model'; import { ConfigOptionTypes } from '~/app/shared/components/config-option/config-option.types'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { NotificationType } from '~/app/shared/enum/notification-type.enum'; import { CdForm } from '~/app/shared/forms/cd-form'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; @@ -30,6 +31,7 @@ export class ConfigurationFormComponent extends CdForm implements OnInit { availSections = ['global', 'mon', 'mgr', 'osd', 'mds', 'client']; constructor( + public actionLabels: ActionLabelsI18n, private route: ActivatedRoute, private router: Router, private configService: ConfigurationService, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.html index 0fcf0cd1a5f..0bb3fb8dbfd 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/host-form/host-form.component.html @@ -34,12 +34,10 @@ </div> <div class="card-footer"> - <div class="text-right"> - <cd-submit-button [form]="formDir" - i18n="form action button|Example: Create Pool@@formActionButton" - (submitAction)="submit()">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button></cd-back-button> - </div> + <cd-form-button-panel (submitActionEvent)="submit()" + [form]="hostForm" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> </form> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.ts index 669a9bc89e2..fa778d5b4f2 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component.ts @@ -11,6 +11,7 @@ import { import _ from 'lodash'; import { Subscription } from 'rxjs'; +import { HostService } from '~/app/shared/api/host.service'; import { OrchestratorService } from '~/app/shared/api/orchestrator.service'; import { FormModalComponent } from '~/app/shared/components/form-modal/form-modal.component'; import { TableComponent } from '~/app/shared/datatable/table/table.component'; @@ -80,7 +81,8 @@ export class InventoryDevicesComponent implements OnInit, OnDestroy { private dimlessBinary: DimlessBinaryPipe, private modalService: ModalService, private notificationService: NotificationService, - private orchService: OrchestratorService + private orchService: OrchestratorService, + private hostService: HostService ) {} ngOnInit() { @@ -223,7 +225,7 @@ export class InventoryDevicesComponent implements OnInit, OnDestroy { ], submitButtonText: $localize`Execute`, onSubmit: (values: any) => { - this.orchService.identifyDevice(hostname, device, values.duration).subscribe(() => { + this.hostService.identifyDevice(hostname, device, values.duration).subscribe(() => { this.notificationService.show( NotificationType.success, $localize`Identifying '${device}' started on host '${hostname}'` diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.html index e05f6dc59e1..122aab2ed6d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.html @@ -1,4 +1,4 @@ -<cd-orchestrator-doc-panel *ngIf="!orchStatus?.available"></cd-orchestrator-doc-panel> +<cd-orchestrator-doc-panel *ngIf="showDocPanel"></cd-orchestrator-doc-panel> <ng-container *ngIf="orchStatus?.available"> <legend i18n>Devices</legend> <div class="row"> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.spec.ts index 48edbaec598..da24403dee1 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.spec.ts @@ -7,6 +7,7 @@ import { RouterTestingModule } from '@angular/router/testing'; import { ToastrModule } from 'ngx-toastr'; import { of } from 'rxjs'; +import { HostService } from '~/app/shared/api/host.service'; import { OrchestratorService } from '~/app/shared/api/orchestrator.service'; import { SharedModule } from '~/app/shared/shared.module'; import { configureTestBed } from '~/testing/unit-test-helper'; @@ -17,6 +18,7 @@ describe('InventoryComponent', () => { let component: InventoryComponent; let fixture: ComponentFixture<InventoryComponent>; let orchService: OrchestratorService; + let hostService: HostService; configureTestBed({ imports: [ @@ -34,28 +36,33 @@ describe('InventoryComponent', () => { fixture = TestBed.createComponent(InventoryComponent); component = fixture.componentInstance; orchService = TestBed.inject(OrchestratorService); + hostService = TestBed.inject(HostService); spyOn(orchService, 'status').and.returnValue(of({ available: true })); - spyOn(orchService, 'inventoryDeviceList').and.callThrough(); + spyOn(hostService, 'inventoryDeviceList').and.callThrough(); }); it('should create', () => { expect(component).toBeTruthy(); }); + it('should not display doc panel if orchestrator is available', () => { + expect(component.showDocPanel).toBeFalsy(); + }); + describe('after ngOnInit', () => { it('should load devices', () => { fixture.detectChanges(); - expect(orchService.inventoryDeviceList).toHaveBeenNthCalledWith(1, undefined, false); + expect(hostService.inventoryDeviceList).toHaveBeenNthCalledWith(1, undefined, false); component.refresh(); // click refresh button - expect(orchService.inventoryDeviceList).toHaveBeenNthCalledWith(2, undefined, true); + expect(hostService.inventoryDeviceList).toHaveBeenNthCalledWith(2, undefined, true); const newHost = 'host0'; component.hostname = newHost; fixture.detectChanges(); component.ngOnChanges(); - expect(orchService.inventoryDeviceList).toHaveBeenNthCalledWith(3, newHost, false); + expect(hostService.inventoryDeviceList).toHaveBeenNthCalledWith(3, newHost, false); component.refresh(); // click refresh button - expect(orchService.inventoryDeviceList).toHaveBeenNthCalledWith(4, newHost, true); + expect(hostService.inventoryDeviceList).toHaveBeenNthCalledWith(4, newHost, true); }); }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.ts index edf7f61e107..a60f5d698fc 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/inventory/inventory.component.ts @@ -2,6 +2,7 @@ import { Component, Input, NgZone, OnChanges, OnDestroy, OnInit } from '@angular import { Subscription, timer as observableTimer } from 'rxjs'; +import { HostService } from '~/app/shared/api/host.service'; import { OrchestratorService } from '~/app/shared/api/orchestrator.service'; import { Icons } from '~/app/shared/enum/icons.enum'; import { OrchestratorStatus } from '~/app/shared/models/orchestrator.interface'; @@ -23,14 +24,20 @@ export class InventoryComponent implements OnChanges, OnInit, OnDestroy { icons = Icons; orchStatus: OrchestratorStatus; + showDocPanel = false; devices: Array<InventoryDevice> = []; - constructor(private orchService: OrchestratorService, private ngZone: NgZone) {} + constructor( + private orchService: OrchestratorService, + private hostService: HostService, + private ngZone: NgZone + ) {} ngOnInit() { this.orchService.status().subscribe((status) => { this.orchStatus = status; + this.showDocPanel = !status.available; if (status.available) { // Create a timer to get cached inventory from the orchestrator. // Do not ask the orchestrator frequently to refresh its cache data because it's expensive. @@ -64,7 +71,7 @@ export class InventoryComponent implements OnChanges, OnInit, OnDestroy { if (this.hostname === '') { return; } - this.orchService.inventoryDeviceList(this.hostname, refresh).subscribe( + this.hostService.inventoryDeviceList(this.hostname, refresh).subscribe( (devices: InventoryDevice[]) => { this.devices = devices; }, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/mgr-modules/mgr-module-form/mgr-module-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/mgr-modules/mgr-module-form/mgr-module-form.component.html index de235b58b6d..89cb7b4d7ee 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/mgr-modules/mgr-module-form/mgr-module-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/mgr-modules/mgr-module-form/mgr-module-form.component.html @@ -100,16 +100,10 @@ </div> </div> <div class="card-footer"> - <div class="text-right"> - <cd-submit-button (submitAction)="onSubmit()" - [form]="mgrModuleForm"> - <ng-container i18n>Update</ng-container> - </cd-submit-button> - <button type="button" - class="btn btn-light" - routerLink="/mgr-modules" - i18n>Back</button> - </div> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="mgrModuleForm" + [submitText]="actionLabels.UPDATE" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> </form> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/mgr-modules/mgr-module-form/mgr-module-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/mgr-modules/mgr-module-form/mgr-module-form.component.ts index b07cc46bb38..c40af28031e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/mgr-modules/mgr-module-form/mgr-module-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/mgr-modules/mgr-module-form/mgr-module-form.component.ts @@ -6,6 +6,7 @@ import _ from 'lodash'; import { forkJoin as observableForkJoin } from 'rxjs'; import { MgrModuleService } from '~/app/shared/api/mgr-module.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { NotificationType } from '~/app/shared/enum/notification-type.enum'; import { CdForm } from '~/app/shared/forms/cd-form'; import { CdFormBuilder } from '~/app/shared/forms/cd-form-builder'; @@ -24,6 +25,7 @@ export class MgrModuleFormComponent extends CdForm implements OnInit { moduleOptions: any[] = []; constructor( + public actionLabels: ActionLabelsI18n, private route: ActivatedRoute, private router: Router, private formBuilder: CdFormBuilder, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-creation-preview-modal/osd-creation-preview-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-creation-preview-modal/osd-creation-preview-modal.component.html index bbbe426a45e..9b442dbc78d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-creation-preview-modal/osd-creation-preview-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-creation-preview-modal/osd-creation-preview-modal.component.html @@ -11,9 +11,9 @@ <pre>{{ driveGroups | json}}</pre> </div> <div class="modal-footer"> - <cd-submit-button (submitAction)="onSubmit()" - [form]="formGroup">{{ action | titlecase }}</cd-submit-button> - <cd-back-button [back]="activeModal.close"></cd-back-button> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="formGroup" + [submitText]="action | titlecase"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.html index ce323c7d61a..30effc21b53 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.html @@ -31,10 +31,10 @@ </div> </div> <div class="modal-footer"> - <cd-submit-button (submitAction)="onSubmit()" - [form]="formGroup" - [disabled]="!canSubmit || filteredDevices.length === 0">{{ action | titlecase }}</cd-submit-button> - <cd-back-button [back]="activeModal.close"></cd-back-button> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="formGroup" + [disabled]="!canSubmit || filteredDevices.length === 0" + [submitText]="action | titlecase"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.spec.ts index cbcabfba6f9..60ef65d0517 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-devices-selection-modal/osd-devices-selection-modal.component.spec.ts @@ -23,7 +23,7 @@ describe('OsdDevicesSelectionModalComponent', () => { const expectSubmitButton = (enabled: boolean) => { const nativeElement = fixture.debugElement.nativeElement; - const button = nativeElement.querySelector('.modal-footer button'); + const button = nativeElement.querySelector('.modal-footer .tc_submitButton'); expect(button.disabled).toBe(!enabled); }; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.html index c392b8346c5..f8a10ff24de 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.html @@ -38,14 +38,10 @@ class="btn btn-light" (click)="resetSelection()" i18n>Restore previous selection</button> - <cd-submit-button *ngIf="permissions.osd.update" - (submitAction)="submitAction()" - [form]="osdFlagsForm" - i18n>Submit</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="submitAction()" + [form]="osdFlagsForm" + [showSubmit]="permissions.osd.update" + [submitText]="actionLabels.UPDATE"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.ts index 652c9afa58b..e9e0b876f3b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-indiv-modal/osd-flags-indiv-modal.component.ts @@ -5,6 +5,7 @@ import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import _ from 'lodash'; import { OsdService } from '~/app/shared/api/osd.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { NotificationType } from '~/app/shared/enum/notification-type.enum'; import { Flag } from '~/app/shared/models/flag'; import { Permissions } from '~/app/shared/models/permissions'; @@ -59,6 +60,7 @@ export class OsdFlagsIndivModalComponent implements OnInit { constructor( public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private authStorageService: AuthStorageService, private osdService: OsdService, private notificationService: NotificationService diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-modal/osd-flags-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-modal/osd-flags-modal.component.html index 0b39576b7d3..2ae6460fbb7 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-modal/osd-flags-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-modal/osd-flags-modal.component.html @@ -31,14 +31,10 @@ </div> <div class="modal-footer"> - <cd-submit-button *ngIf="permissions.osd.update" - (submitAction)="submitAction()" - [form]="osdFlagsForm" - i18n>Submit</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="submitAction()" + [form]="osdFlagsForm" + [showSubmit]="permissions.osd.update" + [submitText]="actionLabels.UPDATE"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-modal/osd-flags-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-modal/osd-flags-modal.component.ts index 05e61324e14..640719382b4 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-modal/osd-flags-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-flags-modal/osd-flags-modal.component.ts @@ -5,6 +5,7 @@ import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import _ from 'lodash'; import { OsdService } from '~/app/shared/api/osd.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { NotificationType } from '~/app/shared/enum/notification-type.enum'; import { Permissions } from '~/app/shared/models/permissions'; import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; @@ -115,6 +116,7 @@ export class OsdFlagsModalComponent implements OnInit { constructor( public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private authStorageService: AuthStorageService, private osdService: OsdService, private notificationService: NotificationService diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.html index 998d5d9856b..8907b161605 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.html @@ -122,14 +122,11 @@ </fieldset> </div> <div class="card-footer"> - <div class="text-right"> - <cd-submit-button #previewButton - (submitAction)="submit()" - i18n - [form]="formDir" - [disabled]="dataDeviceSelectionGroups.devices.length === 0">Preview</cd-submit-button> - <cd-back-button></cd-back-button> - </div> + <cd-form-button-panel (submitActionEvent)="submit()" + [form]="form" + [disabled]="dataDeviceSelectionGroups.devices.length === 0" + [submitText]="actionLabels.PREVIEW" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> </form> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.spec.ts index b58b9c0a6f7..2044b084c7a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.spec.ts @@ -9,6 +9,7 @@ import { BehaviorSubject, of } from 'rxjs'; import { InventoryDevice } from '~/app/ceph/cluster/inventory/inventory-devices/inventory-device.model'; import { InventoryDevicesComponent } from '~/app/ceph/cluster/inventory/inventory-devices/inventory-devices.component'; +import { HostService } from '~/app/shared/api/host.service'; import { OrchestratorService } from '~/app/shared/api/orchestrator.service'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { SummaryService } from '~/app/shared/services/summary.service'; @@ -26,6 +27,7 @@ describe('OsdFormComponent', () => { let fixture: ComponentFixture<OsdFormComponent>; let fixtureHelper: FixtureHelper; let orchService: OrchestratorService; + let hostService: HostService; let summaryService: SummaryService; const devices: InventoryDevice[] = [ { @@ -49,7 +51,7 @@ describe('OsdFormComponent', () => { ]; const expectPreviewButton = (enabled: boolean) => { - const debugElement = fixtureHelper.getElementByCss('.card-footer button'); + const debugElement = fixtureHelper.getElementByCss('.tc_submitButton'); expect(debugElement.nativeElement.disabled).toBe(!enabled); }; @@ -109,6 +111,7 @@ describe('OsdFormComponent', () => { form = component.form; formHelper = new FormHelper(form); orchService = TestBed.inject(OrchestratorService); + hostService = TestBed.inject(HostService); summaryService = TestBed.inject(SummaryService); summaryService['summaryDataSource'] = new BehaviorSubject(null); summaryService['summaryData$'] = summaryService['summaryDataSource'].asObservable(); @@ -122,7 +125,7 @@ describe('OsdFormComponent', () => { describe('without orchestrator', () => { beforeEach(() => { spyOn(orchService, 'status').and.returnValue(of({ available: false })); - spyOn(orchService, 'inventoryDeviceList').and.callThrough(); + spyOn(hostService, 'inventoryDeviceList').and.callThrough(); fixture.detectChanges(); }); @@ -132,14 +135,14 @@ describe('OsdFormComponent', () => { }); it('should not call inventoryDeviceList', () => { - expect(orchService.inventoryDeviceList).not.toHaveBeenCalled(); + expect(hostService.inventoryDeviceList).not.toHaveBeenCalled(); }); }); describe('with orchestrator', () => { beforeEach(() => { spyOn(orchService, 'status').and.returnValue(of({ available: true })); - spyOn(orchService, 'inventoryDeviceList').and.returnValue(of([])); + spyOn(hostService, 'inventoryDeviceList').and.returnValue(of([])); fixture.detectChanges(); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.ts index 71bec7adfc1..4ddf454c640 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.ts @@ -5,6 +5,7 @@ import { Router } from '@angular/router'; import _ from 'lodash'; import { InventoryDevice } from '~/app/ceph/cluster/inventory/inventory-devices/inventory-device.model'; +import { HostService } from '~/app/shared/api/host.service'; import { OrchestratorService } from '~/app/shared/api/orchestrator.service'; import { SubmitButtonComponent } from '~/app/shared/components/submit-button/submit-button.component'; import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; @@ -65,6 +66,7 @@ export class OsdFormComponent extends CdForm implements OnInit { public actionLabels: ActionLabelsI18n, private authStorageService: AuthStorageService, private orchService: OrchestratorService, + private hostService: HostService, private router: Router, private modalService: ModalService ) { @@ -120,7 +122,7 @@ export class OsdFormComponent extends CdForm implements OnInit { } getDataDevices() { - this.orchService.inventoryDeviceList().subscribe( + this.hostService.inventoryDeviceList().subscribe( (devices: InventoryDevice[]) => { this.allDevices = _.filter(devices, 'available'); this.availDevices = [...this.allDevices]; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.html index fe236902e97..9f4f3e2152c 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.html @@ -54,8 +54,9 @@ <div [ngbNavOutlet]="nav"></div> <ng-template #markOsdConfirmationTpl - let-markActionDescription="markActionDescription"> - <ng-container i18n><strong>OSD(s) {{ getSelectedOsdIds() | join }}</strong> will be marked + let-markActionDescription="markActionDescription" + let-osdIds="osdIds"> + <ng-container i18n><strong>OSD(s) {{ osdIds | join }}</strong> will be marked <strong>{{ markActionDescription }}</strong> if you proceed.</ng-container> </ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.spec.ts index 7d4c85e44f5..d6f86547148 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.spec.ts @@ -425,7 +425,7 @@ describe('OsdListComponent', () => { const tableActionElement = fixture.debugElement.query(By.directive(TableActionsComponent)); const toClassName = TestBed.inject(TableActionsComponent).toClassName; const getActionClasses = (action: CdTableAction) => - tableActionElement.query(By.css(`[ngbDropdownItem].${toClassName(action.name)}`)).classes; + tableActionElement.query(By.css(`[ngbDropdownItem].${toClassName(action)}`)).classes; component.tableActions.forEach((action) => { if (action.name === 'Create') { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.ts index b03daa392df..45dc840655d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.ts @@ -373,7 +373,10 @@ export class OsdListComponent extends ListWithDetails implements OnInit { */ getSelectedOsdIds(): number[] { const osdIds = this.osds.map((osd) => osd.id); - return this.selection.selected.map((row) => row.id).filter((id) => osdIds.includes(id)); + return this.selection.selected + .map((row) => row.id) + .filter((id) => osdIds.includes(id)) + .sort(); } getSelectedOsds(): any[] { @@ -483,12 +486,14 @@ export class OsdListComponent extends ListWithDetails implements OnInit { } showConfirmationModal(markAction: string, onSubmit: (id: number) => Observable<any>) { + const osdIds = this.getSelectedOsdIds(); this.bsModalRef = this.modalService.show(ConfirmationModalComponent, { titleText: $localize`Mark OSD ${markAction}`, buttonText: $localize`Mark ${markAction}`, bodyTpl: this.markOsdConfirmationTpl, bodyContext: { - markActionDescription: markAction + markActionDescription: markAction, + osdIds }, onSubmit: () => { observableForkJoin( diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-pg-scrub-modal/osd-pg-scrub-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-pg-scrub-modal/osd-pg-scrub-modal.component.html index 14b51cc6707..841f41b575a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-pg-scrub-modal/osd-pg-scrub-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-pg-scrub-modal/osd-pg-scrub-modal.component.html @@ -34,12 +34,11 @@ </div> </div> <div class="modal-footer"> - <cd-submit-button *ngIf="permissions.configOpt.update" - (submitAction)="submitAction()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="osdPgScrubForm">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button [back]="activeModal.close"> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="submitAction()" + [form]="osdPgScrubForm" + [showSubmit]="permissions.configOpt.update" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)"> + </cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-recv-speed-modal/osd-recv-speed-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-recv-speed-modal/osd-recv-speed-modal.component.html index 892df49b9ab..6544872726e 100755 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-recv-speed-modal/osd-recv-speed-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-recv-speed-modal/osd-recv-speed-modal.component.html @@ -82,14 +82,10 @@ </div> </div> <div class="modal-footer"> - <cd-submit-button *ngIf="permissions.configOpt.update" - (submitAction)="submitAction()" - [form]="osdRecvSpeedForm" - i18n>Submit</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="submitAction()" + [form]="osdRecvSpeedForm" + [submitText]="actionLabels.UPDATE" + [showSubmit]="permissions.configOpt.update"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-recv-speed-modal/osd-recv-speed-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-recv-speed-modal/osd-recv-speed-modal.component.ts index 6c41d0180ab..6546e086569 100755 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-recv-speed-modal/osd-recv-speed-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-recv-speed-modal/osd-recv-speed-modal.component.ts @@ -7,6 +7,7 @@ import _ from 'lodash'; import { ConfigurationService } from '~/app/shared/api/configuration.service'; import { OsdService } from '~/app/shared/api/osd.service'; import { ConfigOptionTypes } from '~/app/shared/components/config-option/config-option.types'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { NotificationType } from '~/app/shared/enum/notification-type.enum'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { Permissions } from '~/app/shared/models/permissions'; @@ -27,6 +28,7 @@ export class OsdRecvSpeedModalComponent implements OnInit { constructor( public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private authStorageService: AuthStorageService, private configService: ConfigurationService, private notificationService: NotificationService, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-reweight-modal/osd-reweight-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-reweight-modal/osd-reweight-modal.component.html index 34b47b30836..e5aa22311f1 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-reweight-modal/osd-reweight-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-reweight-modal/osd-reweight-modal.component.html @@ -29,14 +29,9 @@ </div> <div class="modal-footer"> - <cd-submit-button (submitAction)="reweight()" - [form]="reweightForm" - [disabled]="reweightForm.invalid" - i18n>Reweight</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="reweight()" + [form]="reweightForm" + [submitText]="actionLabels.REWEIGHT"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-reweight-modal/osd-reweight-modal.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-reweight-modal/osd-reweight-modal.component.spec.ts index d240f68fd34..41e05021efc 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-reweight-modal/osd-reweight-modal.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-reweight-modal/osd-reweight-modal.component.spec.ts @@ -1,4 +1,5 @@ import { HttpClientTestingModule } from '@angular/common/http/testing'; +import { NO_ERRORS_SCHEMA } from '@angular/core'; import { ComponentFixture, TestBed } from '@angular/core/testing'; import { ReactiveFormsModule } from '@angular/forms'; import { RouterTestingModule } from '@angular/router/testing'; @@ -26,6 +27,7 @@ describe('OsdReweightModalComponent', () => { SubmitButtonComponent, BackButtonComponent ], + schemas: [NO_ERRORS_SCHEMA], providers: [OsdService, NgbActiveModal, CdFormBuilder] }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-reweight-modal/osd-reweight-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-reweight-modal/osd-reweight-modal.component.ts index 392f3200394..d101079776e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-reweight-modal/osd-reweight-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-reweight-modal/osd-reweight-modal.component.ts @@ -4,6 +4,7 @@ import { Validators } from '@angular/forms'; import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import { OsdService } from '~/app/shared/api/osd.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { CdFormBuilder } from '~/app/shared/forms/cd-form-builder'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; @@ -18,6 +19,7 @@ export class OsdReweightModalComponent implements OnInit { reweightForm: CdFormGroup; constructor( + public actionLabels: ActionLabelsI18n, public activeModal: NgbActiveModal, private osdService: OsdService, private fb: CdFormBuilder diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-scrub-modal/osd-scrub-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-scrub-modal/osd-scrub-modal.component.html index 6f135adf17c..568c700fa69 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-scrub-modal/osd-scrub-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-scrub-modal/osd-scrub-modal.component.html @@ -13,13 +13,9 @@ </div> <div class="modal-footer"> - <cd-submit-button (submitAction)="scrub()" - [form]="scrubForm" - i18n>Submit</cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="scrub()" + [form]="scrubForm" + [submitText]="actionLabels.UPDATE"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-scrub-modal/osd-scrub-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-scrub-modal/osd-scrub-modal.component.ts index 9a1b58160c5..b2f636708b9 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-scrub-modal/osd-scrub-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-scrub-modal/osd-scrub-modal.component.ts @@ -5,6 +5,7 @@ import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; import { forkJoin } from 'rxjs'; import { OsdService } from '~/app/shared/api/osd.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { NotificationType } from '~/app/shared/enum/notification-type.enum'; import { JoinPipe } from '~/app/shared/pipes/join.pipe'; import { NotificationService } from '~/app/shared/services/notification.service'; @@ -21,6 +22,7 @@ export class OsdScrubModalComponent implements OnInit { constructor( public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, private osdService: OsdService, private notificationService: NotificationService, private joinPipe: JoinPipe diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/prometheus/silence-form/silence-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/prometheus/silence-form/silence-form.component.html index 40dded4dbfc..ce8928f7f02 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/prometheus/silence-form/silence-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/prometheus/silence-form/silence-form.component.html @@ -205,12 +205,9 @@ <div class="card-footer"> <div class="text-right"> - <cd-submit-button (submitAction)="submit()" - [form]="formDir" - i18n="@@formTitle"> - {{ action | titlecase }} {{ resource | upperFirst }} - </cd-submit-button> - <cd-back-button></cd-back-button> + <cd-form-button-panel (submitActionEvent)="submit()" + [form]="form" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)"></cd-form-button-panel> </div> </div> </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/prometheus/silence-matcher-modal/silence-matcher-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/prometheus/silence-matcher-modal/silence-matcher-modal.component.html index c849c79d316..db89adc5369 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/prometheus/silence-matcher-modal/silence-matcher-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/prometheus/silence-matcher-modal/silence-matcher-modal.component.html @@ -76,14 +76,9 @@ </div> <div class="modal-footer"> - <cd-submit-button (submitAction)="onSubmit()" - [form]="form"> - <span i18n>{editMode, select, true {Edit} other {Add}}</span> - </cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Close" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="form" + [submitText]="getMode()"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/prometheus/silence-matcher-modal/silence-matcher-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/prometheus/silence-matcher-modal/silence-matcher-modal.component.ts index 5c24a9bdf1c..bdd616ce9c6 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/prometheus/silence-matcher-modal/silence-matcher-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/prometheus/silence-matcher-modal/silence-matcher-modal.component.ts @@ -6,6 +6,7 @@ import _ from 'lodash'; import { merge, Observable, Subject } from 'rxjs'; import { debounceTime, distinctUntilChanged, filter, map } from 'rxjs/operators'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { CdFormBuilder } from '~/app/shared/forms/cd-form-builder'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { @@ -54,7 +55,8 @@ export class SilenceMatcherModalComponent { constructor( private formBuilder: CdFormBuilder, private silenceMatcher: PrometheusSilenceMatcherService, - public activeModal: NgbActiveModal + public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n ) { this.createForm(); this.subscribeToChanges(); @@ -90,6 +92,10 @@ export class SilenceMatcherModalComponent { ); } + getMode() { + return this.editMode ? this.actionLabels.EDIT : this.actionLabels.ADD; + } + preFillControls(matcher: AlertmanagerSilenceMatcher) { this.form.setValue(matcher); } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/placement.pipe.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/placement.pipe.spec.ts new file mode 100644 index 00000000000..6aef3c364cb --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/placement.pipe.spec.ts @@ -0,0 +1,78 @@ +import { PlacementPipe } from './placement.pipe'; + +describe('PlacementPipe', () => { + const pipe = new PlacementPipe(); + + it('create an instance', () => { + expect(pipe).toBeTruthy(); + }); + + it('transforms to no spec', () => { + expect(pipe.transform(undefined)).toBe('no spec'); + }); + + it('transforms to unmanaged', () => { + expect(pipe.transform({ unmanaged: true })).toBe('unmanaged'); + }); + + it('transforms placement (1)', () => { + expect( + pipe.transform({ + placement: { + hosts: ['mon0'] + } + }) + ).toBe('mon0'); + }); + + it('transforms placement (2)', () => { + expect( + pipe.transform({ + placement: { + hosts: ['mon0', 'mgr0'] + } + }) + ).toBe('mon0;mgr0'); + }); + + it('transforms placement (3)', () => { + expect( + pipe.transform({ + placement: { + count: 1 + } + }) + ).toBe('count:1'); + }); + + it('transforms placement (4)', () => { + expect( + pipe.transform({ + placement: { + label: 'foo' + } + }) + ).toBe('label:foo'); + }); + + it('transforms placement (5)', () => { + expect( + pipe.transform({ + placement: { + host_pattern: '*' + } + }) + ).toBe('*'); + }); + + it('transforms placement (6)', () => { + expect( + pipe.transform({ + placement: { + count: 2, + hosts: ['mon0', 'mgr0'] + } + }) + ).toBe('mon0;mgr0;count:2'); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/placement.pipe.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/placement.pipe.ts new file mode 100644 index 00000000000..bd461bceb2f --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/placement.pipe.ts @@ -0,0 +1,41 @@ +import { Pipe, PipeTransform } from '@angular/core'; + +import _ from 'lodash'; + +@Pipe({ + name: 'placement' +}) +export class PlacementPipe implements PipeTransform { + /** + * Convert the placement configuration into human readable form. + * The output is equal to the column 'PLACEMENT' in 'ceph orch ls'. + * @param serviceSpec The service specification to process. + * @return The placement configuration as human readable string. + */ + transform(serviceSpec: object | undefined): string { + if (_.isUndefined(serviceSpec)) { + return $localize`no spec`; + } + if (_.get(serviceSpec, 'unmanaged', false)) { + return $localize`unmanaged`; + } + const kv: Array<any> = []; + const hosts: Array<string> = _.get(serviceSpec, 'placement.hosts'); + const count: number = _.get(serviceSpec, 'placement.count'); + const label: string = _.get(serviceSpec, 'placement.label'); + const hostPattern: string = _.get(serviceSpec, 'placement.host_pattern'); + if (_.isArray(hosts)) { + kv.push(...hosts); + } + if (_.isNumber(count)) { + kv.push($localize`count:${count}`); + } + if (_.isString(label)) { + kv.push($localize`label:${label}`); + } + if (_.isString(hostPattern)) { + kv.push(...hostPattern); + } + return kv.join(';'); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.html index 078ac04d42e..8e70b94a4ca 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.html @@ -1,17 +1,17 @@ -<cd-orchestrator-doc-panel *ngIf="!hasOrchestrator"></cd-orchestrator-doc-panel> +<cd-orchestrator-doc-panel *ngIf="showDocPanel"></cd-orchestrator-doc-panel> <cd-table *ngIf="hasOrchestrator" #daemonsTable [data]="daemons" [columns]="columns" columnMode="flex" - [autoReload]="60000" + [autoReload]="5000" (fetchData)="getDaemons($event)"> </cd-table> <ng-template #statusTpl let-row="row"> <span class="badge" - [ngClass]="getStatusClass(row.status)"> + [ngClass]="row | pipeFunction:getStatusClass"> {{ row.status_desc }} </span> </ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.spec.ts index 89b06b97910..42c06228d86 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.spec.ts @@ -2,6 +2,7 @@ import { HttpClientTestingModule } from '@angular/common/http/testing'; import { ComponentFixture, TestBed } from '@angular/core/testing'; import _ from 'lodash'; +import { NgxPipeFunctionModule } from 'ngx-pipe-function'; import { of } from 'rxjs'; import { CephModule } from '~/app/ceph/ceph.module'; @@ -77,7 +78,7 @@ describe('ServiceDaemonListComponent', () => { }; configureTestBed({ - imports: [HttpClientTestingModule, CephModule, CoreModule, SharedModule] + imports: [HttpClientTestingModule, CephModule, CoreModule, NgxPipeFunctionModule, SharedModule] }); beforeEach(() => { @@ -109,4 +110,8 @@ describe('ServiceDaemonListComponent', () => { component.getDaemons(new CdTableFetchDataContext(() => undefined)); expect(component.daemons.length).toBe(3); }); + + it('should not display doc panel if orchestrator is available', () => { + expect(component.showDocPanel).toBeFalsy(); + }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.ts index 5a7b223bc87..c6d0a0a0561 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.ts @@ -46,6 +46,7 @@ export class ServiceDaemonListComponent implements OnInit, OnChanges, AfterViewI columns: CdTableColumn[] = []; hasOrchestrator = false; + showDocPanel = false; private daemonsTable: TableComponent; private daemonsTableTplsSub: Subscription; @@ -126,6 +127,7 @@ export class ServiceDaemonListComponent implements OnInit, OnChanges, AfterViewI this.orchService.status().subscribe((data: { available: boolean }) => { this.hasOrchestrator = data.available; + this.showDocPanel = !data.available; }); } @@ -149,14 +151,14 @@ export class ServiceDaemonListComponent implements OnInit, OnChanges, AfterViewI } } - getStatusClass(status: number) { + getStatusClass(row: Daemon): string { return _.get( { '-1': 'badge-danger', '0': 'badge-warning', '1': 'badge-success' }, - status, + row.status, 'badge-dark' ); } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-details/service-details.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-details/service-details.component.spec.ts index 23bb679414b..6be3b268952 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-details/service-details.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-details/service-details.component.spec.ts @@ -3,6 +3,7 @@ import { ComponentFixture, TestBed } from '@angular/core/testing'; import { RouterTestingModule } from '@angular/router/testing'; import { NgbNavModule } from '@ng-bootstrap/ng-bootstrap'; +import { NgxPipeFunctionModule } from 'ngx-pipe-function'; import { CdTableSelection } from '~/app/shared/models/cd-table-selection'; import { SummaryService } from '~/app/shared/services/summary.service'; @@ -16,7 +17,13 @@ describe('ServiceDetailsComponent', () => { let fixture: ComponentFixture<ServiceDetailsComponent>; configureTestBed({ - imports: [HttpClientTestingModule, RouterTestingModule, SharedModule, NgbNavModule], + imports: [ + HttpClientTestingModule, + RouterTestingModule, + SharedModule, + NgbNavModule, + NgxPipeFunctionModule + ], declarations: [ServiceDetailsComponent, ServiceDaemonListComponent], providers: [{ provide: SummaryService, useValue: { subscribeOnce: jest.fn() } }] }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.html index 05918798d1b..6f9d47ce421 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.html @@ -403,11 +403,9 @@ <div class="card-footer"> <div class="text-right"> - <cd-submit-button (submitAction)="onSubmit()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="serviceForm">{{ action | titlecase }} {{ resource | upperFirst }} - </cd-submit-button> - <cd-back-button></cd-back-button> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="serviceForm" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)"></cd-form-button-panel> </div> </div> </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.spec.ts index 215868065c7..6ed8b414532 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.spec.ts @@ -270,7 +270,7 @@ describe('ServiceFormComponent', () => { it('should submit iscsi with trusted ips', () => { formHelper.setValue('ssl', true); - formHelper.setValue('trusted_ip_list', ' 172.16.0.5, 192.1.1.10 '); + formHelper.setValue('trusted_ip_list', ' 172.16.0.5, 192.1.1.10 '); component.onSubmit(); expect(cephServiceService.create).toHaveBeenCalledWith({ service_type: 'iscsi', @@ -282,7 +282,7 @@ describe('ServiceFormComponent', () => { api_secure: true, ssl_cert: '', ssl_key: '', - trusted_ip_list: ['172.16.0.5', '192.1.1.10'] + trusted_ip_list: '172.16.0.5, 192.1.1.10' }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts index 9a2b7747b11..533f2ae833a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts @@ -300,9 +300,7 @@ export class ServiceFormComponent extends CdForm implements OnInit { case 'iscsi': serviceSpec['pool'] = values['pool']; if (_.isString(values['trusted_ip_list']) && !_.isEmpty(values['trusted_ip_list'])) { - let parts = _.split(values['trusted_ip_list'], ','); - parts = _.map(parts, _.trim); - serviceSpec['trusted_ip_list'] = parts; + serviceSpec['trusted_ip_list'] = values['trusted_ip_list'].trim(); } if (_.isNumber(values['api_port']) && values['api_port'] > 0) { serviceSpec['api_port'] = values['api_port']; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.html index 1db551591f5..f6f66ee5148 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.html @@ -1,4 +1,4 @@ -<cd-orchestrator-doc-panel *ngIf="!orchStatus?.available"></cd-orchestrator-doc-panel> +<cd-orchestrator-doc-panel *ngIf="showDocPanel"></cd-orchestrator-doc-panel> <ng-container *ngIf="orchStatus?.available"> <cd-table [data]="services" [columns]="columns" @@ -6,7 +6,7 @@ forceIdentifier="true" columnMode="flex" selectionType="single" - [autoReload]="60000" + [autoReload]="5000" (fetchData)="getServices($event)" [hasDetails]="true" (setExpandedRow)="setExpandedRow($event)" diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.spec.ts index c3631d50e53..f36f6c39568 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.spec.ts @@ -82,7 +82,10 @@ describe('ServicesComponent', () => { it('should have columns that are sortable', () => { expect( component.columns + // Filter the 'Expand/Collapse Row' column. .filter((column) => !(column.cellClass === 'cd-datatable-expand-collapse')) + // Filter the 'Placement' column. + .filter((column) => !(column.prop === '')) .every((column) => Boolean(column.prop)) ).toBeTruthy(); }); @@ -91,4 +94,8 @@ describe('ServicesComponent', () => { component.getServices(new CdTableFetchDataContext(() => undefined)); expect(component.services.length).toBe(2); }); + + it('should not display doc panel if orchestrator is available', () => { + expect(component.showDocPanel).toBeFalsy(); + }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.ts index 8306f7ddf0f..32d27bc1f4f 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.ts @@ -24,6 +24,7 @@ import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; import { ModalService } from '~/app/shared/services/modal.service'; import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; import { URLBuilderService } from '~/app/shared/services/url-builder.service'; +import { PlacementPipe } from './placement.pipe'; const BASE_URL = 'services'; @@ -44,6 +45,7 @@ export class ServicesComponent extends ListWithDetails implements OnChanges, OnI permissions: Permissions; tableActions: CdTableAction[]; + showDocPanel = false; orchStatus: OrchestratorStatus; actionOrchFeatures = { @@ -109,6 +111,12 @@ export class ServicesComponent extends ListWithDetails implements OnChanges, OnI } }, { + name: $localize`Placement`, + prop: '', + pipe: new PlacementPipe(), + flexGrow: 1 + }, + { name: $localize`Running`, prop: 'status.running', flexGrow: 1 @@ -132,6 +140,7 @@ export class ServicesComponent extends ListWithDetails implements OnChanges, OnI this.orchService.status().subscribe((status: OrchestratorStatus) => { this.orchStatus = status; + this.showDocPanel = !status.available; }); } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/telemetry/telemetry.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/telemetry/telemetry.component.html index a128c6f2510..877374967a4 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/telemetry/telemetry.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/telemetry/telemetry.component.html @@ -207,7 +207,7 @@ <button type="button" class="btn btn-light" (click)="next()"> - <ng-container i18n>Next</ng-container> + <ng-container i18n>{{ actionLabels.NEXT }}</ng-container> </button> </div> </div> @@ -293,14 +293,11 @@ </div> <div class="card-footer"> <div class="button-group text-right"> - <cd-submit-button (submitAction)="onSubmit()" - [form]="previewForm"> - <ng-container i18n>Save</ng-container> - </cd-submit-button> - <button type="button" - class="btn btn-light" - (click)="back()" - i18n>Back</button> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + (backActionEvent)="back()" + [form]="previewForm" + [submitText]="actionLabels.UPDATE" + [cancelText]="actionLabels.BACK"></cd-form-button-panel> </div> </div> </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/telemetry/telemetry.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/telemetry/telemetry.component.ts index 4b65089c655..8edb7b6a927 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/telemetry/telemetry.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/telemetry/telemetry.component.ts @@ -7,6 +7,7 @@ import { forkJoin as observableForkJoin } from 'rxjs'; import { MgrModuleService } from '~/app/shared/api/mgr-module.service'; import { TelemetryService } from '~/app/shared/api/telemetry.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; import { NotificationType } from '~/app/shared/enum/notification-type.enum'; import { CdForm } from '~/app/shared/forms/cd-form'; import { CdFormBuilder } from '~/app/shared/forms/cd-form-builder'; @@ -43,6 +44,7 @@ export class TelemetryComponent extends CdForm implements OnInit { step = 1; constructor( + public actionLabels: ActionLabelsI18n, private formBuilder: CdFormBuilder, private mgrModuleService: MgrModuleService, private notificationService: NotificationService, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-cluster-type.enum.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-cluster-type.enum.ts new file mode 100644 index 00000000000..7a775e5ab2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-cluster-type.enum.ts @@ -0,0 +1,4 @@ +export enum NFSClusterType { + user = 'user', + orchestrator = 'orchestrator' +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.html index 3f596d40080..11c4ccb22af 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.html @@ -31,7 +31,7 @@ value="" i18n>-- Select the cluster --</option> <option *ngFor="let cluster of allClusters" - [value]="cluster">{{ cluster }}</option> + [value]="cluster.cluster_id">{{ cluster.cluster_id }}</option> </select> <span class="invalid-feedback" *ngIf="nfsForm.showError('cluster_id', formDir, 'required')" @@ -40,7 +40,8 @@ </div> <!-- daemons --> - <div class="form-group row"> + <div class="form-group row" + *ngIf="clusterType"> <label class="cd-col-form-label" for="daemons"> <ng-container i18n>Daemons</ng-container> @@ -52,7 +53,8 @@ type="text" [value]="daemon" disabled /> - <span class="input-group-append"> + <span *ngIf="clusterType === 'user'" + class="input-group-append"> <button class="btn btn-light" type="button" (click)="removeDaemon(i, daemon)"> @@ -63,7 +65,8 @@ </div> </ng-container> - <div class="row"> + <div *ngIf="clusterType === 'user'" + class="row"> <div class="col-md-12"> <cd-select [data]="nfsForm.get('daemons').value" [options]="daemonsSelections" @@ -75,6 +78,22 @@ </cd-select> </div> </div> + + <div *ngIf="clusterType === 'orchestrator'" + class="row"> + <div class="col-md-12"> + <button type="button" + class="btn btn-light float-right" + (click)="onToggleAllDaemonsSelection()"> + <i [ngClass]="[icons.add]"></i> + <ng-container *ngIf="nfsForm.getValue('daemons').length === 0; else hasDaemons" + i18n>Add all daemons</ng-container> + <ng-template #hasDaemons> + <ng-container i18n>Remove all daemons</ng-container> + </ng-template> + </button> + </div> + </div> </div> </div> @@ -485,13 +504,10 @@ </div> <div class="card-footer"> - <div class="text-right"> - <cd-submit-button - (submitAction)="submitAction()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="formDir">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button></cd-back-button> - </div> + <cd-form-button-panel (submitActionEvent)="submitAction()" + [form]="nfsForm" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> </form> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.spec.ts index 1ad45854d00..9911d18d8ed 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.spec.ts @@ -11,6 +11,7 @@ import { LoadingPanelComponent } from '~/app/shared/components/loading-panel/loa import { SharedModule } from '~/app/shared/shared.module'; import { ActivatedRouteStub } from '~/testing/activated-route-stub'; import { configureTestBed } from '~/testing/unit-test-helper'; +import { NFSClusterType } from '../nfs-cluster-type.enum'; import { NfsFormClientComponent } from '../nfs-form-client/nfs-form-client.component'; import { NfsFormComponent } from './nfs-form.component'; @@ -49,9 +50,9 @@ describe('NfsFormComponent', () => { fixture.detectChanges(); httpTesting.expectOne('api/nfs-ganesha/daemon').flush([ - { daemon_id: 'node1', cluster_id: 'cluster1' }, - { daemon_id: 'node2', cluster_id: 'cluster1' }, - { daemon_id: 'node5', cluster_id: 'cluster2' } + { daemon_id: 'node1', cluster_id: 'cluster1', cluster_type: NFSClusterType.user }, + { daemon_id: 'node2', cluster_id: 'cluster1', cluster_type: NFSClusterType.user }, + { daemon_id: 'node5', cluster_id: 'cluster2', cluster_type: NFSClusterType.orchestrator } ]); httpTesting.expectOne('ui-api/nfs-ganesha/fsals').flush(['CEPH', 'RGW']); httpTesting.expectOne('ui-api/nfs-ganesha/cephx/clients').flush(['admin', 'fs', 'rgw']); @@ -112,6 +113,7 @@ describe('NfsFormComponent', () => { it('should prepare data when selecting an cluster', () => { expect(component.allDaemons).toEqual({ cluster1: ['node1', 'node2'], cluster2: ['node5'] }); expect(component.daemonsSelections).toEqual([]); + expect(component.clusterType).toBeNull(); component.nfsForm.patchValue({ cluster_id: 'cluster1' }); component.onClusterChange(); @@ -120,6 +122,12 @@ describe('NfsFormComponent', () => { { description: '', name: 'node1', selected: false, enabled: true }, { description: '', name: 'node2', selected: false, enabled: true } ]); + expect(component.clusterType).toBe(NFSClusterType.user); + + component.nfsForm.patchValue({ cluster_id: 'cluster2' }); + component.onClusterChange(); + expect(component.clusterType).toBe(NFSClusterType.orchestrator); + expect(component.daemonsSelections).toEqual([]); }); it('should clean data when changing cluster', () => { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.ts index d24c0709655..5234be1404e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.ts @@ -20,6 +20,7 @@ import { FinishedTask } from '~/app/shared/models/finished-task'; import { Permission } from '~/app/shared/models/permissions'; import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; +import { NFSClusterType } from '../nfs-cluster-type.enum'; import { NfsFormClientComponent } from '../nfs-form-client/nfs-form-client.component'; @Component({ @@ -38,13 +39,14 @@ export class NfsFormComponent extends CdForm implements OnInit { isEdit = false; cluster_id: string = null; + clusterType: string = null; export_id: string = null; isNewDirectory = false; isNewBucket = false; isDefaultCluster = false; - allClusters: string[] = null; + allClusters: { cluster_id: string; cluster_type: string }[] = null; allDaemons = {}; icons = Icons; @@ -227,11 +229,13 @@ export class NfsFormComponent extends CdForm implements OnInit { res.sec_label_xattr = res.fsal.sec_label_xattr; } - this.daemonsSelections = _.map( - this.allDaemons[res.cluster_id], - (daemon) => new SelectOption(res.daemons.indexOf(daemon) !== -1, daemon, '') - ); - this.daemonsSelections = [...this.daemonsSelections]; + if (this.clusterType === NFSClusterType.user) { + this.daemonsSelections = _.map( + this.allDaemons[res.cluster_id], + (daemon) => new SelectOption(res.daemons.indexOf(daemon) !== -1, daemon, '') + ); + this.daemonsSelections = [...this.daemonsSelections]; + } res.protocolNfsv3 = res.protocols.indexOf(3) !== -1; res.protocolNfsv4 = res.protocols.indexOf(4) !== -1; @@ -259,25 +263,27 @@ export class NfsFormComponent extends CdForm implements OnInit { resolveDaemons(daemons: Record<string, any>) { daemons = _.sortBy(daemons, ['daemon_id']); + const clusters = _.groupBy(daemons, 'cluster_id'); - this.allClusters = _(daemons) - .map((daemon) => daemon.cluster_id) - .sortedUniq() - .value(); - - _.forEach(this.allClusters, (cluster) => { - this.allDaemons[cluster] = []; + this.allClusters = []; + _.forIn(clusters, (cluster, cluster_id) => { + this.allClusters.push({ cluster_id: cluster_id, cluster_type: cluster[0].cluster_type }); + this.allDaemons[cluster_id] = []; }); _.forEach(daemons, (daemon) => { this.allDaemons[daemon.cluster_id].push(daemon.daemon_id); }); + if (this.isEdit) { + this.clusterType = _.find(this.allClusters, { cluster_id: this.cluster_id })?.cluster_type; + } + const hasOneCluster = _.isArray(this.allClusters) && this.allClusters.length === 1; - this.isDefaultCluster = hasOneCluster && this.allClusters[0] === '_default_'; + this.isDefaultCluster = hasOneCluster && this.allClusters[0].cluster_id === '_default_'; if (hasOneCluster) { this.nfsForm.patchValue({ - cluster_id: this.allClusters[0] + cluster_id: this.allClusters[0].cluster_id }); this.onClusterChange(); } @@ -467,11 +473,16 @@ export class NfsFormComponent extends CdForm implements OnInit { onClusterChange() { const cluster_id = this.nfsForm.getValue('cluster_id'); - this.daemonsSelections = _.map( - this.allDaemons[cluster_id], - (daemon) => new SelectOption(false, daemon, '') - ); - this.daemonsSelections = [...this.daemonsSelections]; + this.clusterType = _.find(this.allClusters, { cluster_id: cluster_id })?.cluster_type; + if (this.clusterType === NFSClusterType.user) { + this.daemonsSelections = _.map( + this.allDaemons[cluster_id], + (daemon) => new SelectOption(false, daemon, '') + ); + this.daemonsSelections = [...this.daemonsSelections]; + } else { + this.daemonsSelections = []; + } this.nfsForm.patchValue({ daemons: [] }); } @@ -493,6 +504,13 @@ export class NfsFormComponent extends CdForm implements OnInit { this.nfsForm.get('daemons').setValue(this.nfsForm.getValue('daemons')); } + onToggleAllDaemonsSelection() { + const cluster_id = this.nfsForm.getValue('cluster_id'); + const daemons = + this.nfsForm.getValue('daemons').length === 0 ? this.allDaemons[cluster_id] : []; + this.nfsForm.patchValue({ daemons: daemons }); + } + submitAction() { let action: Observable<any>; const requestModel = this._buildRequest(); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/crush-rule-form-modal/crush-rule-form-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/crush-rule-form-modal/crush-rule-form-modal.component.html index 546a5ec437a..7bf328df4eb 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/crush-rule-form-modal/crush-rule-form-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/crush-rule-form-modal/crush-rule-form-modal.component.html @@ -114,10 +114,9 @@ </div> <div class="modal-footer"> - <cd-submit-button (submitAction)="onSubmit()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="frm">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button [back]="activeModal.close"></cd-back-button> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="form" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/erasure-code-profile-form/erasure-code-profile-form-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/erasure-code-profile-form/erasure-code-profile-form-modal.component.html index 0ce41105e4e..92cecabc915 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/erasure-code-profile-form/erasure-code-profile-form-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/erasure-code-profile-form/erasure-code-profile-form-modal.component.html @@ -406,10 +406,9 @@ </div> <div class="modal-footer"> - <cd-submit-button (submitAction)="onSubmit()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="frm">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button [back]="activeModal.close"></cd-back-button> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="form" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.html index a91c3ca4bbe..f62a7283feb 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.html @@ -597,13 +597,10 @@ </div> </div> <div class="card-footer"> - <div class="text-right"> - <cd-submit-button [form]="formDir" - i18n="form action button|Example: Create Pool@@formActionButton" - (submitAction)="submit()">{{ action | titlecase }} {{ resource | upperFirst }} - </cd-submit-button> - <cd-back-button></cd-back-button> - </div> + <cd-form-button-panel (submitActionEvent)="submit()" + [form]="form" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.ts index bf4c206e2f4..c1b6ae5db83 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.ts @@ -484,7 +484,7 @@ export class PoolFormComponent extends CdForm implements OnInit { return (ecpControl.valid || ecpControl.disabled) && ecp ? pgs / (ecp.k + ecp.m) : 0; } - private alignPgs(pgs = this.form.getValue('pgNum')) { + alignPgs(pgs = this.form.getValue('pgNum')) { this.setPgs(Math.round(this.calculatePgPower(pgs < 1 ? 1 : pgs))); } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html index 07aa631bf35..310ec3d171f 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html @@ -289,12 +289,10 @@ </div> <div class="card-footer"> - <div class="text-right"> - <cd-submit-button (submitAction)="submit()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="bucketForm">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button></cd-back-button> - </div> + <cd-form-button-panel (submitActionEvent)="submit()" + [form]="bucketForm" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> </form> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-capability-modal/rgw-user-capability-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-capability-modal/rgw-user-capability-modal.component.html index e07155637f1..24cf4ab5f54 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-capability-modal/rgw-user-capability-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-capability-modal/rgw-user-capability-modal.component.html @@ -61,10 +61,9 @@ </div> <div class="modal-footer"> - <cd-submit-button (submitAction)="onSubmit()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="formGroup">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button [back]="activeModal.close"></cd-back-button> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="formGroup" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.html index 164f6c2b3e8..882f59a78a4 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.html @@ -414,10 +414,10 @@ <div class="col-12"> <button type="button" class="btn btn-light float-right tc_addCapButton" - [disabled]="hasAllCapabilities()" + [disabled]="capabilities | pipeFunction:hasAllCapabilities" i18n-ngbTooltip ngbTooltip="All capabilities are already added." - [disableTooltip]="!hasAllCapabilities()" + [disableTooltip]="!(capabilities | pipeFunction:hasAllCapabilities)" triggers="pointerenter:pointerleave" (click)="showCapabilityModal()"> <i [ngClass]="[icons.add]"></i> @@ -619,12 +619,10 @@ </div> <div class="card-footer"> - <div class="text-right"> - <cd-submit-button (submitAction)="onSubmit()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="userForm">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button></cd-back-button> - </div> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="userForm" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> </form> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.spec.ts index 987fd62d3a8..091e7d12453 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.spec.ts @@ -5,6 +5,7 @@ import { Router } from '@angular/router'; import { RouterTestingModule } from '@angular/router/testing'; import { NgbTooltipModule } from '@ng-bootstrap/ng-bootstrap'; +import { NgxPipeFunctionModule } from 'ngx-pipe-function'; import { ToastrModule } from 'ngx-toastr'; import { of as observableOf } from 'rxjs'; @@ -32,7 +33,8 @@ describe('RgwUserFormComponent', () => { RouterTestingModule, SharedModule, ToastrModule.forRoot(), - NgbTooltipModule + NgbTooltipModule, + NgxPipeFunctionModule ] }); @@ -322,7 +324,7 @@ describe('RgwUserFormComponent', () => { fixture.detectChanges(); - expect(component.hasAllCapabilities()).toBeTruthy(); + expect(component.hasAllCapabilities(component.capabilities)).toBeTruthy(); const capabilityButton = fixture.debugElement.nativeElement.querySelector('.tc_addCapButton'); expect(capabilityButton.disabled).toBeTruthy(); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.ts index 5fbbb9face9..0b3103c9f8d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.ts @@ -378,7 +378,7 @@ export class RgwUserFormComponent extends CdForm implements OnInit { // Add // Create an observable to add the capability when the form is submitted. this.submitObservables.push(this.rgwUserService.addCapability(uid, cap.type, cap.perm)); - this.capabilities.push(cap); + this.capabilities = [...this.capabilities, cap]; // Notify Angular CD } // Mark the form as dirty to be able to submit it. this.userForm.markAsDirty(); @@ -398,12 +398,13 @@ export class RgwUserFormComponent extends CdForm implements OnInit { ); // Remove the capability to update the UI. this.capabilities.splice(index, 1); + this.capabilities = [...this.capabilities]; // Notify Angular CD // Mark the form as dirty to be able to submit it. this.userForm.markAsDirty(); } - hasAllCapabilities() { - return !_.difference(RgwUserCapabilities.getAll(), _.map(this.capabilities, 'type')).length; + hasAllCapabilities(capabilities: RgwUserCapability[]) { + return !_.difference(RgwUserCapabilities.getAll(), _.map(capabilities, 'type')).length; } /** diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-s3-key-modal/rgw-user-s3-key-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-s3-key-modal/rgw-user-s3-key-modal.component.html index a6f73722dce..0f8edcaf5dd 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-s3-key-modal/rgw-user-s3-key-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-s3-key-modal/rgw-user-s3-key-modal.component.html @@ -119,11 +119,10 @@ </div> <div class="modal-footer"> - <cd-submit-button *ngIf="!viewing" - (submitAction)="onSubmit()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="formGroup">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button [back]="activeModal.close"></cd-back-button> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="formGroup" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + [showSubmit]="!viewing"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-subuser-modal/rgw-user-subuser-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-subuser-modal/rgw-user-subuser-modal.component.html index 7ad3d10962d..66c59cb3f2c 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-subuser-modal/rgw-user-subuser-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-subuser-modal/rgw-user-subuser-modal.component.html @@ -1,7 +1,6 @@ <cd-modal [modalRef]="bsModalRef"> <ng-container i18n="form title|Example: Create Pool@@formTitle" class="modal-title">{{ action | titlecase }} {{ resource | upperFirst }}</ng-container> - <ng-container class="modal-content"> <form #frm="ngForm" [formGroup]="formGroup" @@ -122,10 +121,9 @@ </div> <div class="modal-footer"> - <cd-submit-button (submitAction)="onSubmit()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="formGroup">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button [back]="bsModalRef.close"></cd-back-button> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="formGroup" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-swift-key-modal/rgw-user-swift-key-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-swift-key-modal/rgw-user-swift-key-modal.component.html index 1db2ef23caf..8121dbc31e3 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-swift-key-modal/rgw-user-swift-key-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-swift-key-modal/rgw-user-swift-key-modal.component.html @@ -51,7 +51,7 @@ </div> <div class="modal-footer"> - <cd-back-button [back]="activeModal.close"></cd-back-button> + <cd-back-button (backAction)="activeModal.close()"></cd-back-button> </div> </ng-container> </cd-modal> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts index 02f53923028..00ef8bd49e5 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts @@ -4,6 +4,7 @@ import { FormsModule, ReactiveFormsModule } from '@angular/forms'; import { RouterModule, Routes } from '@angular/router'; import { NgbNavModule, NgbTooltipModule } from '@ng-bootstrap/ng-bootstrap'; +import { NgxPipeFunctionModule } from 'ngx-pipe-function'; import { ActionLabels, URLVerbs } from '~/app/shared/constants/app.constants'; import { AuthGuardService } from '~/app/shared/services/auth-guard.service'; @@ -32,7 +33,8 @@ import { RgwUserSwiftKeyModalComponent } from './rgw-user-swift-key-modal/rgw-us PerformanceCounterModule, NgbNavModule, RouterModule, - NgbTooltipModule + NgbTooltipModule, + NgxPipeFunctionModule ], exports: [ Rgw501Component, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/auth.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/auth.module.ts index dc17a215bfc..56b92e26377 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/auth.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/auth.module.ts @@ -4,6 +4,7 @@ import { FormsModule, ReactiveFormsModule } from '@angular/forms'; import { RouterModule, Routes } from '@angular/router'; import { NgbNavModule, NgbPopoverModule } from '@ng-bootstrap/ng-bootstrap'; +import { NgxPipeFunctionModule } from 'ngx-pipe-function'; import { ActionLabels, URLVerbs } from '~/app/shared/constants/app.constants'; import { SharedModule } from '~/app/shared/shared.module'; @@ -26,6 +27,7 @@ import { UserTabsComponent } from './user-tabs/user-tabs.component'; SharedModule, NgbNavModule, NgbPopoverModule, + NgxPipeFunctionModule, RouterModule ], declarations: [ diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login-password-form/login-password-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login-password-form/login-password-form.component.html index 07c0b6d3bfd..2dc30df52e6 100755 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login-password-form/login-password-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login-password-form/login-password-form.component.html @@ -85,18 +85,11 @@ *ngIf="userForm.showError('confirmnewpassword', frm, 'match')" i18n>Password confirmation doesn't match the new password.</span> </div> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + (backActionEvent)="onCancel()" + [form]="userForm" + [disabled]="userForm.invalid" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + wrappingClass="text-right"></cd-form-button-panel> </form> - <div class="form-footer"> - <cd-submit-button class="full-width" - btnClass="btn-block" - (submitAction)="onSubmit()" - [form]="userForm" - i18n="form action button|Example: Create Pool@@formActionButton"> - {{ action | titlecase }} {{ resource | upperFirst }} - </cd-submit-button> - <button class="btn btn-light" - (click)="onCancel()"> - <ng-container i18n>Cancel</ng-container> - </button> - </div> </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login-password-form/login-password-form.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login-password-form/login-password-form.component.scss index 25df0569e2b..15addd1e821 100755 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login-password-form/login-password-form.component.scss +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login-password-form/login-password-form.component.scss @@ -1,20 +1,36 @@ +@use 'sass:map'; @use './src/styles/vendor/variables' as vv; +$dark-secondary: darken(vv.$secondary, 4%); + ::ng-deep cd-login-password-form { h4 { margin: 0 0 30px; } + .form-group { + background-color: $dark-secondary; + border-left: 4px solid vv.$white; + + &:focus-within { + border-left: 4px solid map.get(vv.$theme-colors, 'accent'); + } + } + .btn-password, .btn-password:focus, .form-control, .form-control:focus { - background-color: vv.$gray-800; - color: vv.$white; + background-color: $dark-secondary; + border: 0; + box-shadow: none; + color: vv.$body-color-bright; + filter: none; + outline: none; } .form-control::placeholder { - color: vv.$gray-500; + color: vv.$gray-600; } .btn-password:focus { @@ -25,3 +41,28 @@ margin-left: 5px; } } + +// This will override the colors applied by chrome +@keyframes autofill { + to { + background-color: $dark-secondary; + color: vv.$body-color-bright; + } +} + +input:-webkit-autofill { + animation-fill-mode: both; + animation-name: autofill; + border-radius: 0; + box-shadow: 0 0 0 1000px $dark-secondary inset; + -webkit-text-fill-color: vv.$body-color-bright; + transition-property: none; +} + +.invalid-feedback { + padding-left: 9px; +} + +.is-invalid.cd-form-control { + border-color: transparent; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.html b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.html index 9ea2b29b2de..8565c3615c6 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.html @@ -58,7 +58,7 @@ <input type="submit" class="btn btn-accent px-5 py-2" [disabled]="loginForm.invalid" - value="Login" + value="Log in" i18n-value> </form> </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/role-form/role-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/role-form/role-form.component.html index 00260011115..ce395236545 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/role-form/role-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/role-form/role-form.component.html @@ -68,12 +68,10 @@ </div> <div class="card-footer"> - <div class="text-right"> - <cd-submit-button (submitAction)="submit()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="formDir">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button></cd-back-button> - </div> + <cd-form-button-panel (submitActionEvent)="submit()" + [form]="roleForm" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> </form> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/user-form/user-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/user-form/user-form.component.html index 067f854cf9d..098b15e97af 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/user-form/user-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/user-form/user-form.component.html @@ -240,12 +240,10 @@ </div> <div class="card-footer"> - <div class="text-right"> - <cd-submit-button (submitAction)="submit()" - i18n="form action button|Example: Create Pool@@formActionButton" - [form]="formDir">{{ action | titlecase }} {{ resource | upperFirst }}</cd-submit-button> - <cd-back-button></cd-back-button> - </div> + <cd-form-button-panel (submitActionEvent)="submit()" + [form]="userForm" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> </form> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/user-password-form/user-password-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/user-password-form/user-password-form.component.html index cbde4f7d655..64a679a32ba 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/user-password-form/user-password-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/user-password-form/user-password-form.component.html @@ -111,12 +111,10 @@ </div> <div class="card-footer"> - <cd-submit-button (submitAction)="onSubmit()" - [form]="userForm" - class="float-right" - i18n="form action button|Example: Create Pool@@formActionButton"> - {{ action | titlecase }} {{ resource | upperFirst }} - </cd-submit-button> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="userForm" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + wrappingClass="text-right"></cd-form-button-panel> </div> </div> </form> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/host.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/host.service.spec.ts index aea5f045ffc..babddca3e61 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/host.service.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/host.service.spec.ts @@ -49,4 +49,24 @@ describe('HostService', () => { expect(req.request.method).toBe('PUT'); expect(req.request.body).toEqual({ labels: ['foo', 'bar'] }); })); + + it('should call getInventory', () => { + service.getInventory('host-0').subscribe(); + let req = httpTesting.expectOne('api/host/host-0/inventory'); + expect(req.request.method).toBe('GET'); + + service.getInventory('host-0', true).subscribe(); + req = httpTesting.expectOne('api/host/host-0/inventory?refresh=true'); + expect(req.request.method).toBe('GET'); + }); + + it('should call inventoryList', () => { + service.inventoryList().subscribe(); + let req = httpTesting.expectOne('ui-api/host/inventory'); + expect(req.request.method).toBe('GET'); + + service.inventoryList(true).subscribe(); + req = httpTesting.expectOne('ui-api/host/inventory?refresh=true'); + expect(req.request.method).toBe('GET'); + }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/host.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/host.service.ts index 3b9e7068e77..5f34d96af4d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/host.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/host.service.ts @@ -1,9 +1,12 @@ -import { HttpClient } from '@angular/common/http'; +import { HttpClient, HttpParams } from '@angular/common/http'; import { Injectable } from '@angular/core'; -import { Observable } from 'rxjs'; -import { map } from 'rxjs/operators'; +import _ from 'lodash'; +import { Observable, of as observableOf } from 'rxjs'; +import { map, mergeMap, toArray } from 'rxjs/operators'; +import { InventoryDevice } from '~/app/ceph/cluster/inventory/inventory-devices/inventory-device.model'; +import { InventoryHost } from '~/app/ceph/cluster/inventory/inventory-host.model'; import { Daemon } from '../models/daemon.interface'; import { CdDevice } from '../models/devices'; import { SmartDataResponseV1 } from '../models/smart'; @@ -14,6 +17,7 @@ import { DeviceService } from '../services/device.service'; }) export class HostService { baseURL = 'api/host'; + baseUIURL = 'ui-api/host'; constructor(private http: HttpClient, private deviceService: DeviceService) {} @@ -44,10 +48,75 @@ export class HostService { } getLabels(): Observable<string[]> { - return this.http.get<string[]>('ui-api/host/labels'); + return this.http.get<string[]>(`${this.baseUIURL}/labels`); } update(hostname: string, labels: string[]) { return this.http.put(`${this.baseURL}/${hostname}`, { labels: labels }); } + + identifyDevice(hostname: string, device: string, duration: number) { + return this.http.post(`${this.baseURL}/${hostname}/identify_device`, { + device, + duration + }); + } + + private getInventoryParams(refresh?: boolean): HttpParams { + let params = new HttpParams(); + if (refresh) { + params = params.append('refresh', _.toString(refresh)); + } + return params; + } + + /** + * Get inventory of a host. + * + * @param hostname the host query. + * @param refresh true to ask the Orchestrator to refresh inventory. + */ + getInventory(hostname: string, refresh?: boolean): Observable<InventoryHost> { + const params = this.getInventoryParams(refresh); + return this.http.get<InventoryHost>(`${this.baseURL}/${hostname}/inventory`, { + params: params + }); + } + + /** + * Get inventories of all hosts. + * + * @param refresh true to ask the Orchestrator to refresh inventory. + */ + inventoryList(refresh?: boolean): Observable<InventoryHost[]> { + const params = this.getInventoryParams(refresh); + return this.http.get<InventoryHost[]>(`${this.baseUIURL}/inventory`, { params: params }); + } + + /** + * Get device list via host inventories. + * + * @param hostname the host to query. undefined for all hosts. + * @param refresh true to ask the Orchestrator to refresh inventory. + */ + inventoryDeviceList(hostname?: string, refresh?: boolean): Observable<InventoryDevice[]> { + let observable; + if (hostname) { + observable = this.getInventory(hostname, refresh).pipe(toArray()); + } else { + observable = this.inventoryList(refresh); + } + return observable.pipe( + mergeMap((hosts: InventoryHost[]) => { + const devices = _.flatMap(hosts, (host) => { + return host.devices.map((device) => { + device.hostname = host.name; + device.uid = device.device_id ? device.device_id : `${device.hostname}-${device.path}`; + return device; + }); + }); + return observableOf(devices); + }) + ); + } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/orchestrator.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/orchestrator.service.spec.ts index 4dfc595bfee..f4c7e4390ca 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/orchestrator.service.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/orchestrator.service.spec.ts @@ -32,32 +32,4 @@ describe('OrchestratorService', () => { const req = httpTesting.expectOne(`${apiPath}/status`); expect(req.request.method).toBe('GET'); }); - - it('should call inventoryList with arguments', () => { - const inventoryPath = `${apiPath}/inventory`; - const tests: { args: any[]; expectedUrl: any }[] = [ - { - args: [], - expectedUrl: inventoryPath - }, - { - args: ['host0'], - expectedUrl: `${inventoryPath}?hostname=host0` - }, - { - args: [undefined, true], - expectedUrl: `${inventoryPath}?refresh=true` - }, - { - args: ['host0', true], - expectedUrl: `${inventoryPath}?hostname=host0&refresh=true` - } - ]; - - for (const test of tests) { - service.inventoryList(...test.args).subscribe(); - const req = httpTesting.expectOne(test.expectedUrl); - expect(req.request.method).toBe('GET'); - } - }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/orchestrator.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/orchestrator.service.ts index ddd0f75bb4d..20117158215 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/orchestrator.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/orchestrator.service.ts @@ -1,12 +1,9 @@ -import { HttpClient, HttpParams } from '@angular/common/http'; +import { HttpClient } from '@angular/common/http'; import { Injectable } from '@angular/core'; import _ from 'lodash'; -import { Observable, of as observableOf } from 'rxjs'; -import { mergeMap } from 'rxjs/operators'; +import { Observable } from 'rxjs'; -import { InventoryDevice } from '~/app/ceph/cluster/inventory/inventory-devices/inventory-device.model'; -import { InventoryHost } from '~/app/ceph/cluster/inventory/inventory-host.model'; import { OrchestratorFeature } from '../models/orchestrator.enum'; import { OrchestratorStatus } from '../models/orchestrator.interface'; @@ -46,38 +43,4 @@ export class OrchestratorService { } return false; } - - identifyDevice(hostname: string, device: string, duration: number) { - return this.http.post(`${this.url}/identify_device`, { - hostname, - device, - duration - }); - } - - inventoryList(hostname?: string, refresh?: boolean): Observable<InventoryHost[]> { - let params = new HttpParams(); - if (hostname) { - params = params.append('hostname', hostname); - } - if (refresh) { - params = params.append('refresh', _.toString(refresh)); - } - return this.http.get<InventoryHost[]>(`${this.url}/inventory`, { params: params }); - } - - inventoryDeviceList(hostname?: string, refresh?: boolean): Observable<InventoryDevice[]> { - return this.inventoryList(hostname, refresh).pipe( - mergeMap((hosts: InventoryHost[]) => { - const devices = _.flatMap(hosts, (host) => { - return host.devices.map((device) => { - device.hostname = host.name; - device.uid = device.device_id ? device.device_id : `${device.hostname}-${device.path}`; - return device; - }); - }); - return observableOf(devices); - }) - ); - } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/back-button/back-button.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/back-button/back-button.component.ts index 1af872b8d4d..a578f039402 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/back-button/back-button.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/back-button/back-button.component.ts @@ -1,5 +1,5 @@ import { Location } from '@angular/common'; -import { Component, Input } from '@angular/core'; +import { Component, EventEmitter, Input, Output } from '@angular/core'; import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; @@ -9,8 +9,16 @@ import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; styleUrls: ['./back-button.component.scss'] }) export class BackButtonComponent { + @Output() backAction = new EventEmitter(); + @Input() name: string = this.actionLabels.CANCEL; + constructor(private location: Location, private actionLabels: ActionLabelsI18n) {} - @Input() name: string = this.actionLabels.CANCEL; - @Input() back: Function = () => this.location.back(); + back() { + if (this.backAction.observers.length === 0) { + this.location.back(); + } else { + this.backAction.emit(); + } + } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts index cb27f473f97..5defbf36a3a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts @@ -26,6 +26,7 @@ import { CriticalConfirmationModalComponent } from './critical-confirmation-moda import { DateTimePickerComponent } from './date-time-picker/date-time-picker.component'; import { DocComponent } from './doc/doc.component'; import { DownloadButtonComponent } from './download-button/download-button.component'; +import { FormButtonPanelComponent } from './form-button-panel/form-button-panel.component'; import { FormModalComponent } from './form-modal/form-modal.component'; import { GrafanaComponent } from './grafana/grafana.component'; import { HelperComponent } from './helper/helper.component'; @@ -87,7 +88,8 @@ import { UsageBarComponent } from './usage-bar/usage-bar.component'; OrchestratorDocPanelComponent, DateTimePickerComponent, DocComponent, - DownloadButtonComponent + DownloadButtonComponent, + FormButtonPanelComponent ], providers: [], exports: [ @@ -111,7 +113,8 @@ import { UsageBarComponent } from './usage-bar/usage-bar.component'; OrchestratorDocPanelComponent, DateTimePickerComponent, DocComponent, - DownloadButtonComponent + DownloadButtonComponent, + FormButtonPanelComponent ] }) export class ComponentsModule {} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/confirmation-modal/confirmation-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/confirmation-modal/confirmation-modal.component.html index c7130ef6591..3e0d1d29900 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/confirmation-modal/confirmation-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/confirmation-modal/confirmation-modal.component.html @@ -12,14 +12,10 @@ </p> </div> <div class="modal-footer"> - <cd-submit-button [form]="confirmationForm" - (submitAction)="onSubmit(confirmationForm.value)"> - {{ buttonText }} - </cd-submit-button> - <cd-back-button [back]="boundCancel" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="onSubmit(confirmationForm.value)" + (backActionEvent)="boundCancel()" + [form]="confirmationForm" + [submitText]="buttonText"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/confirmation-modal/confirmation-modal.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/confirmation-modal/confirmation-modal.component.spec.ts index d44b646ca07..a76c5d378ed 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/confirmation-modal/confirmation-modal.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/confirmation-modal/confirmation-modal.component.spec.ts @@ -1,7 +1,6 @@ import { Component, NgModule, NO_ERRORS_SCHEMA, TemplateRef, ViewChild } from '@angular/core'; import { ComponentFixture, TestBed } from '@angular/core/testing'; import { ReactiveFormsModule } from '@angular/forms'; -import { By } from '@angular/platform-browser'; import { RouterTestingModule } from '@angular/router/testing'; import { NgbActiveModal, NgbModalModule, NgbModalRef } from '@ng-bootstrap/ng-bootstrap'; @@ -9,6 +8,7 @@ import { NgbActiveModal, NgbModalModule, NgbModalRef } from '@ng-bootstrap/ng-bo import { ModalService } from '~/app/shared/services/modal.service'; import { configureTestBed, FixtureHelper } from '~/testing/unit-test-helper'; import { BackButtonComponent } from '../back-button/back-button.component'; +import { FormButtonPanelComponent } from '../form-button-panel/form-button-panel.component'; import { ModalComponent } from '../modal/modal.component'; import { SubmitButtonComponent } from '../submit-button/submit-button.component'; import { ConfirmationModalComponent } from './confirmation-modal.component'; @@ -72,11 +72,12 @@ describe('ConfirmationModalComponent', () => { BackButtonComponent, MockComponent, ModalComponent, - SubmitButtonComponent + SubmitButtonComponent, + FormButtonPanelComponent ], schemas: [NO_ERRORS_SCHEMA], imports: [ReactiveFormsModule, MockModule, RouterTestingModule, NgbModalModule], - providers: [NgbActiveModal, SubmitButtonComponent] + providers: [NgbActiveModal, SubmitButtonComponent, FormButtonPanelComponent] }); beforeEach(() => { @@ -161,10 +162,7 @@ describe('ConfirmationModalComponent', () => { it('should use the correct submit action', () => { // In order to ignore the `ElementRef` usage of `SubmitButtonComponent` - spyOn( - fixture.debugElement.query(By.directive(SubmitButtonComponent)).componentInstance, - 'focusButton' - ); + spyOn(fh.getElementByCss('.tc_submitButton').componentInstance, 'focusButton'); fh.clickElement('.tc_submitButton'); expect(component.onSubmit).toHaveBeenCalledTimes(1); expect(component.activeModal.close).toHaveBeenCalledTimes(0); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component.html index 4d708145476..29b669b141f 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component.html @@ -42,15 +42,9 @@ </div> </div> <div class="modal-footer"> - <cd-submit-button #submitButton - [form]="deletionForm" - (submitAction)="callSubmitAction()"> - <ng-container *ngTemplateOutlet="deletionHeading"></ng-container> - </cd-submit-button> - <cd-back-button [back]="activeModal.close" - name="Cancel" - i18n-name> - </cd-back-button> + <cd-form-button-panel (submitActionEvent)="callSubmitAction()" + [form]="deletionForm" + [submitText]="(actionDescription | titlecase) + ' ' + itemDescription"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-button-panel/form-button-panel.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-button-panel/form-button-panel.component.html new file mode 100644 index 00000000000..bef4bb8a748 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-button-panel/form-button-panel.component.html @@ -0,0 +1,10 @@ +<div [class]="wrappingClass"> + <cd-back-button class="m-2" + (backAction)="backAction()" + [name]="cancelText"></cd-back-button> + <cd-submit-button *ngIf="showSubmit" + (submitAction)="submitAction()" + [disabled]="disabled" + [form]="form" + data-cy="submitBtn">{{ submitText }}</cd-submit-button> +</div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-button-panel/form-button-panel.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-button-panel/form-button-panel.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-button-panel/form-button-panel.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-button-panel/form-button-panel.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-button-panel/form-button-panel.component.spec.ts new file mode 100644 index 00000000000..b8350485b3b --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-button-panel/form-button-panel.component.spec.ts @@ -0,0 +1,25 @@ +import { NO_ERRORS_SCHEMA } from '@angular/core'; +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { configureTestBed } from '~/testing/unit-test-helper'; +import { FormButtonPanelComponent } from './form-button-panel.component'; + +describe('FormButtonPanelComponent', () => { + let component: FormButtonPanelComponent; + let fixture: ComponentFixture<FormButtonPanelComponent>; + + configureTestBed({ + declarations: [FormButtonPanelComponent], + schemas: [NO_ERRORS_SCHEMA] + }); + + beforeEach(() => { + fixture = TestBed.createComponent(FormButtonPanelComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-button-panel/form-button-panel.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-button-panel/form-button-panel.component.ts new file mode 100644 index 00000000000..7684d191563 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-button-panel/form-button-panel.component.ts @@ -0,0 +1,55 @@ +import { Location } from '@angular/common'; +import { Component, EventEmitter, Input, Output } from '@angular/core'; +import { FormGroup, NgForm } from '@angular/forms'; + +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; +import { ModalService } from '~/app/shared/services/modal.service'; + +@Component({ + selector: 'cd-form-button-panel', + templateUrl: './form-button-panel.component.html', + styleUrls: ['./form-button-panel.component.scss'] +}) +export class FormButtonPanelComponent { + @Output() + submitActionEvent = new EventEmitter(); + @Output() + backActionEvent = new EventEmitter(); + + @Input() + form: FormGroup | NgForm; + @Input() + showSubmit = true; + @Input() + wrappingClass = ''; + @Input() + btnClass = ''; + @Input() + submitText: string = this.actionLabels.CREATE; + @Input() + cancelText: string = this.actionLabels.CANCEL; + @Input() + disabled = false; + + constructor( + private location: Location, + private actionLabels: ActionLabelsI18n, + private modalService: ModalService + ) {} + + submitAction() { + this.submitActionEvent.emit(); + } + + backAction() { + if (this.backActionEvent.observers.length === 0) { + if (this.modalService.hasOpenModals()) { + this.modalService.dismissAll(); + } else { + this.location.back(); + } + } else { + this.backActionEvent.emit(); + } + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-modal/form-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-modal/form-modal.component.html index f1f4b7f573f..061747f1130 100755 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-modal/form-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-modal/form-modal.component.html @@ -60,11 +60,9 @@ </ng-container> </div> <div class="modal-footer"> - <cd-submit-button [form]="formGroup" - (submitAction)="onSubmitForm(formGroup.value)"> - {{ submitButtonText }} - </cd-submit-button> - <cd-back-button [back]="activeModal.close"></cd-back-button> + <cd-form-button-panel (submitActionEvent)="onSubmitForm(formGroup.value)" + [form]="formGroup" + [submitText]="submitButtonText"></cd-form-button-panel> </div> </form> </ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/constants/app.constants.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/constants/app.constants.ts index 16e323d3012..7747f146836 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/constants/app.constants.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/constants/app.constants.ts @@ -83,6 +83,10 @@ export class ActionLabelsI18n { REMOVE: string; EDIT: string; CANCEL: string; + PREVIEW: string; + MOVE: string; + NEXT: string; + BACK: string; CHANGE: string; COPY: string; CLONE: string; @@ -104,6 +108,7 @@ export class ActionLabelsI18n { ROLLBACK: string; SCRUB: string; SET: string; + SUBMIT: string; SHOW: string; TRASH: string; UNPROTECT: string; @@ -121,6 +126,7 @@ export class ActionLabelsI18n { /* Add an existing item to a container */ this.ADD = $localize`Add`; this.SET = $localize`Set`; + this.SUBMIT = $localize`Submit`; /* Remove an item from a container WITHOUT deleting it */ this.REMOVE = $localize`Remove`; @@ -130,6 +136,12 @@ export class ActionLabelsI18n { this.EDIT = $localize`Edit`; this.UPDATE = $localize`Update`; this.CANCEL = $localize`Cancel`; + this.PREVIEW = $localize`Preview`; + this.MOVE = $localize`Move`; + + /* Wizard wording */ + this.NEXT = $localize`Next`; + this.BACK = $localize`Back`; /* Non-standard actions */ this.CLONE = $localize`Clone`; @@ -174,6 +186,8 @@ export class SucceededActionLabelsI18n { REMOVED: string; EDITED: string; CANCELED: string; + PREVIEWED: string; + MOVED: string; COPIED: string; CLONED: string; DEEP_SCRUBBED: string; @@ -213,6 +227,8 @@ export class SucceededActionLabelsI18n { /* Make changes to an existing item */ this.EDITED = $localize`Edited`; this.CANCELED = $localize`Canceled`; + this.PREVIEWED = $localize`Previewed`; + this.MOVED = $localize`Moved`; /* Non-standard actions */ this.CLONED = $localize`Cloned`; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts index b32ee065589..ede8f2368b7 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/datatable.module.ts @@ -5,6 +5,7 @@ import { RouterModule } from '@angular/router'; import { NgbDropdownModule, NgbTooltipModule } from '@ng-bootstrap/ng-bootstrap'; import { NgxDatatableModule } from '@swimlane/ngx-datatable'; +import { NgxPipeFunctionModule } from 'ngx-pipe-function'; import { ComponentsModule } from '../components/components.module'; import { PipesModule } from '../pipes/pipes.module'; @@ -16,6 +17,7 @@ import { TableComponent } from './table/table.component'; imports: [ CommonModule, NgxDatatableModule, + NgxPipeFunctionModule, FormsModule, NgbDropdownModule, NgbTooltipModule, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-actions/table-actions.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-actions/table-actions.component.html index c8061057154..0f7c1a9cd99 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-actions/table-actions.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-actions/table-actions.component.html @@ -1,14 +1,14 @@ <div class="btn-group"> - <ng-container *ngIf="getCurrentButton() as action"> + <ng-container *ngIf="currentAction"> <button type="button" - title="{{ useDisableDesc(action) }}" + title="{{ useDisableDesc(currentAction) }}" class="btn btn-{{btnColor}}" - [ngClass]="{'disabled': disableSelectionAction(action)}" - (click)="useClickAction(action)" - [routerLink]="useRouterLink(action)" - [preserveFragment]="action.preserveFragment ? '' : null"> - <i [ngClass]="[action.icon]"></i> - <span>{{ action.name }}</span> + [ngClass]="{'disabled': disableSelectionAction(currentAction)}" + (click)="useClickAction(currentAction)" + [routerLink]="useRouterLink(currentAction)" + [preserveFragment]="currentAction.preserveFragment ? '' : null"> + <i [ngClass]="[currentAction.icon]"></i> + <span>{{ currentAction.name }}</span> </button> </ng-container> <div class="btn-group" @@ -17,7 +17,7 @@ role="group" aria-label="Button group with nested dropdown"> <button class="btn btn-{{btnColor}} dropdown-toggle-split" - *ngIf="showDropDownActions()" + *ngIf="dropDownActions.length > 1" ngbDropdownToggle> <ng-container *ngIf="dropDownOnly">{{ dropDownOnly }} </ng-container> <span *ngIf="!dropDownOnly" @@ -27,7 +27,7 @@ ngbDropdownMenu> <ng-container *ngFor="let action of dropDownActions"> <button ngbDropdownItem - class="{{ toClassName(action['name']) }}" + class="{{ toClassName(action) }}" title="{{ useDisableDesc(action) }}" (click)="useClickAction(action)" [routerLink]="useRouterLink(action)" diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-actions/table-actions.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-actions/table-actions.component.spec.ts index d88a3be292c..81cc1b97207 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-actions/table-actions.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-actions/table-actions.component.spec.ts @@ -1,6 +1,8 @@ import { ComponentFixture, TestBed } from '@angular/core/testing'; import { RouterTestingModule } from '@angular/router/testing'; +import { NgxPipeFunctionModule } from 'ngx-pipe-function'; + import { ComponentsModule } from '~/app/shared/components/components.module'; import { CdTableAction } from '~/app/shared/models/cd-table-action'; import { CdTableSelection } from '~/app/shared/models/cd-table-selection'; @@ -21,7 +23,7 @@ describe('TableActionsComponent', () => { configureTestBed({ declarations: [TableActionsComponent], - imports: [ComponentsModule, RouterTestingModule] + imports: [ComponentsModule, NgxPipeFunctionModule, RouterTestingModule] }); beforeEach(() => { @@ -157,9 +159,9 @@ describe('TableActionsComponent', () => { }); it('should convert any name to a proper CSS class', () => { - expect(component.toClassName('Create')).toBe('create'); - expect(component.toClassName('Mark x down')).toBe('mark-x-down'); - expect(component.toClassName('?Su*per!')).toBe('super'); + expect(component.toClassName({ name: 'Create' } as CdTableAction)).toBe('create'); + expect(component.toClassName({ name: 'Mark x down' } as CdTableAction)).toBe('mark-x-down'); + expect(component.toClassName({ name: '?Su*per!' } as CdTableAction)).toBe('super'); }); describe('useDisableDesc', () => { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-actions/table-actions.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-actions/table-actions.component.ts index fc951233ac4..0497f930193 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-actions/table-actions.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-actions/table-actions.component.ts @@ -1,4 +1,4 @@ -import { Component, Input, OnInit } from '@angular/core'; +import { Component, Input, OnChanges, OnInit, SimpleChanges } from '@angular/core'; import _ from 'lodash'; @@ -12,7 +12,7 @@ import { Permission } from '~/app/shared/models/permissions'; templateUrl: './table-actions.component.html', styleUrls: ['./table-actions.component.scss'] }) -export class TableActionsComponent implements OnInit { +export class TableActionsComponent implements OnChanges, OnInit { @Input() permission: Permission; @Input() @@ -28,6 +28,7 @@ export class TableActionsComponent implements OnInit { @Input() dropDownOnly?: string; + currentAction?: CdTableAction; // Array with all visible actions dropDownActions: CdTableAction[] = []; @@ -35,11 +36,22 @@ export class TableActionsComponent implements OnInit { ngOnInit() { this.removeActionsWithNoPermissions(); + this.onSelectionChange(); + } + + ngOnChanges(changes: SimpleChanges) { + if (changes.selection) { + this.onSelectionChange(); + } + } + + onSelectionChange(): void { this.updateDropDownActions(); + this.updateCurrentAction(); } - toClassName(name: string): string { - return name + toClassName(action: CdTableAction): string { + return action.name .replace(/ /g, '-') .replace(/[^a-z-]/gi, '') .toLowerCase(); @@ -59,7 +71,7 @@ export class TableActionsComponent implements OnInit { ); } - private updateDropDownActions() { + private updateDropDownActions(): void { this.dropDownActions = this.tableActions.filter((action) => action.visible ? action.visible(this.selection) : action ); @@ -73,18 +85,17 @@ export class TableActionsComponent implements OnInit { * Default button conditions of actions: * - 'create' actions can be used with no or multiple selections * - 'update' and 'delete' actions can be used with one selection - * - * @returns {CdTableAction} */ - getCurrentButton(): CdTableAction { + private updateCurrentAction(): void { if (this.dropDownOnly) { - return undefined; + this.currentAction = undefined; + return; } let buttonAction = this.dropDownActions.find((tableAction) => this.showableAction(tableAction)); if (!buttonAction && this.dropDownActions.length > 0) { buttonAction = this.dropDownActions[0]; } - return buttonAction; + this.currentAction = buttonAction; } /** @@ -129,11 +140,6 @@ export class TableActionsComponent implements OnInit { ); } - showDropDownActions() { - this.updateDropDownActions(); - return this.dropDownActions.length > 1; - } - useClickAction(action: CdTableAction) { /** * In order to show tooltips for deactivated menu items, the class @@ -150,7 +156,6 @@ export class TableActionsComponent implements OnInit { const result = action.disable(this.selection); return _.isString(result) ? result : undefined; } - return undefined; } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-key-value/table-key-value.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-key-value/table-key-value.component.spec.ts index 7beb142ba48..150d4424105 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-key-value/table-key-value.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table-key-value/table-key-value.component.spec.ts @@ -4,6 +4,7 @@ import { RouterTestingModule } from '@angular/router/testing'; import { NgbDropdownModule, NgbTooltipModule } from '@ng-bootstrap/ng-bootstrap'; import { NgxDatatableModule } from '@swimlane/ngx-datatable'; +import { NgxPipeFunctionModule } from 'ngx-pipe-function'; import { ComponentsModule } from '~/app/shared/components/components.module'; import { CellTemplate } from '~/app/shared/enum/cell-template.enum'; @@ -27,7 +28,8 @@ describe('TableKeyValueComponent', () => { RouterTestingModule, NgbDropdownModule, PipesModule, - NgbTooltipModule + NgbTooltipModule, + NgxPipeFunctionModule ] }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.html index 0d5460a989e..71ba156cf3f 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.html @@ -269,7 +269,7 @@ <ng-template #classAddingTpl let-value="value"> - <span class="{{useCustomClass(value)}}">{{ value }}</span> + <span class="{{ value | pipeFunction:useCustomClass:this }}">{{ value }}</span> </ng-template> <ng-template #badgeTpl diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.spec.ts index f3d4f4e36c5..a2a329ab645 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.spec.ts @@ -7,6 +7,7 @@ import { RouterTestingModule } from '@angular/router/testing'; import { NgbDropdownModule, NgbTooltipModule } from '@ng-bootstrap/ng-bootstrap'; import { NgxDatatableModule } from '@swimlane/ngx-datatable'; import _ from 'lodash'; +import { NgxPipeFunctionModule } from 'ngx-pipe-function'; import { ComponentsModule } from '~/app/shared/components/components.module'; import { CellTemplate } from '~/app/shared/enum/cell-template.enum'; @@ -42,6 +43,7 @@ describe('TableComponent', () => { imports: [ BrowserAnimationsModule, NgxDatatableModule, + NgxPipeFunctionModule, FormsModule, ComponentsModule, RouterTestingModule, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/doc.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/doc.service.spec.ts index 5694998ace4..7c3bf24dd5d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/doc.service.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/doc.service.spec.ts @@ -22,7 +22,13 @@ describe('DocService', () => { it('should return full URL', () => { expect(service.urlGenerator('iscsi', 'foo')).toBe( - 'http://docs.ceph.com/docs/foo/mgr/dashboard/#enabling-iscsi-management' + 'https://docs.ceph.com/en/foo/mgr/dashboard/#enabling-iscsi-management' + ); + }); + + it('should return latest version URL for master', () => { + expect(service.urlGenerator('orch', 'master')).toBe( + 'https://docs.ceph.com/en/latest/mgr/orchestrator' ); }); @@ -60,7 +66,7 @@ describe('DocService', () => { nextSummary('foo'); expect(result).toEqual( - 'http://docs.ceph.com/docs/foo/mgr/dashboard/#enabling-prometheus-alerting' + 'https://docs.ceph.com/en/foo/mgr/dashboard/#enabling-prometheus-alerting' ); expect(i).toBe(1); expect(subscriber.closed).toBe(true); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/doc.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/doc.service.ts index 09181f12b3c..4cbb4cf185b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/doc.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/doc.service.ts @@ -24,7 +24,8 @@ export class DocService { } urlGenerator(section: string, release = 'master'): string { - const domain = `http://docs.ceph.com/docs/${release}/`; + const docVersion = release === 'master' ? 'latest' : release; + const domain = `https://docs.ceph.com/en/${docVersion}/`; const domainCeph = `https://ceph.io/`; const sections = { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts index 528ad82c6b0..c39bb0c26b6 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts @@ -120,6 +120,10 @@ export class TaskMessageService { 'host/delete': this.newTaskMessage(this.commonOperations.delete, (metadata) => this.host(metadata) ), + 'host/identify_device': this.newTaskMessage( + new TaskMessageOperation($localize`Identifying`, $localize`identify`, $localize`Identified`), + (metadata) => $localize`device '${metadata.device}' on host '${metadata.hostname}'` + ), // OSD tasks 'osd/create': this.newTaskMessage( this.commonOperations.create, @@ -328,11 +332,6 @@ export class TaskMessageService { this.grafana.update_dashboards, () => ({}) ), - // Orchestrator tasks - 'orchestrator/identify_device': this.newTaskMessage( - new TaskMessageOperation($localize`Identifying`, $localize`identify`, $localize`Identified`), - (metadata) => $localize`device '${metadata.device}' on host '${metadata.hostname}'` - ), // Service tasks 'service/create': this.newTaskMessage(this.commonOperations.create, (metadata) => this.service(metadata) diff --git a/src/pybind/mgr/dashboard/frontend/src/testing/unit-test-helper.ts b/src/pybind/mgr/dashboard/frontend/src/testing/unit-test-helper.ts index 9e8442da185..28f88e1fac8 100644 --- a/src/pybind/mgr/dashboard/frontend/src/testing/unit-test-helper.ts +++ b/src/pybind/mgr/dashboard/frontend/src/testing/unit-test-helper.ts @@ -45,9 +45,18 @@ export function configureTestBed(configuration: any, entryComponents?: any) { export class PermissionHelper { tac: TableActionsComponent; permission: Permission; + selection: { single: object; multiple: object[] }; - constructor(permission: Permission) { + /** + * @param permission The permissions used by this test. + * @param selection The selection used by this test. Configure this if + * the table actions require a more complex selection object to perform + * a correct test run. + * Defaults to `{ single: {}, multiple: [{}, {}] }`. + */ + constructor(permission: Permission, selection?: { single: object; multiple: object[] }) { this.permission = permission; + this.selection = _.defaultTo(selection, { single: {}, multiple: [{}, {}] }); } setPermissionsAndGetActions(tableActions: CdTableAction[]): any { @@ -91,11 +100,13 @@ export class PermissionHelper { testScenarios() { const result: any = {}; // 'multiple selections' - result.multiple = this.testScenario([{}, {}]); + result.multiple = this.testScenario(this.selection.multiple); // 'select executing item' - result.executing = this.testScenario([{ cdExecuting: 'someAction' }]); + result.executing = this.testScenario([ + _.merge({ cdExecuting: 'someAction' }, this.selection.single) + ]); // 'select non-executing item' - result.single = this.testScenario([{}]); + result.single = this.testScenario([this.selection.single]); // 'no selection' result.no = this.testScenario([]); @@ -104,12 +115,13 @@ export class PermissionHelper { private testScenario(selection: object[]) { this.setSelection(selection); - const btn = this.tac.getCurrentButton(); - return btn ? btn.name : ''; + const action: CdTableAction = this.tac.currentAction; + return action ? action.name : ''; } setSelection(selection: object[]) { this.tac.selection.selected = selection; + this.tac.onSelectionChange(); } } @@ -638,7 +650,7 @@ export class TableActionHelper { const tableActionElement = fixture.debugElement.query(By.directive(TableActionsComponent)); const toClassName = TestBed.inject(TableActionsComponent).toClassName; const getActionElement = (action: CdTableAction) => - tableActionElement.query(By.css(`[ngbDropdownItem].${toClassName(action.name)}`)); + tableActionElement.query(By.css(`[ngbDropdownItem].${toClassName(action)}`)); const actions = {}; tableActions.forEach((action) => { diff --git a/src/pybind/mgr/dashboard/module.py b/src/pybind/mgr/dashboard/module.py index 238b99f226d..e3f69664764 100644 --- a/src/pybind/mgr/dashboard/module.py +++ b/src/pybind/mgr/dashboard/module.py @@ -9,6 +9,8 @@ import errno import logging import os import socket +import ssl +import sys import tempfile import threading import time @@ -93,8 +95,8 @@ class CherryPyConfig(object): """ server_addr = self.get_localized_module_option( # type: ignore 'server_addr', get_default_addr()) - ssl = self.get_localized_module_option('ssl', True) # type: ignore - if not ssl: + use_ssl = self.get_localized_module_option('ssl', True) # type: ignore + if not use_ssl: server_port = self.get_localized_module_option('server_port', 8080) # type: ignore else: server_port = self.get_localized_module_option('ssl_server_port', 8443) # type: ignore @@ -104,7 +106,7 @@ class CherryPyConfig(object): 'no server_addr configured; ' 'try "ceph config set mgr mgr/{}/{}/server_addr <ip>"' .format(self.module_name, self.get_mgr_id())) # type: ignore - self.log.info('server: ssl=%s host=%s port=%d', 'yes' if ssl else 'no', # type: ignore + self.log.info('server: ssl=%s host=%s port=%d', 'yes' if use_ssl else 'no', # type: ignore server_addr, server_port) # Initialize custom handlers. @@ -141,7 +143,7 @@ class CherryPyConfig(object): 'tools.plugin_hooks_filter_request.on': True, } - if ssl: + if use_ssl: # SSL initialization cert = self.get_store("crt") # type: ignore if cert is not None: @@ -163,9 +165,18 @@ class CherryPyConfig(object): verify_tls_files(cert_fname, pkey_fname) + # Create custom SSL context to disable TLS 1.0 and 1.1. + context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + context.load_cert_chain(cert_fname, pkey_fname) + if sys.version_info >= (3, 7): + context.minimum_version = ssl.TLSVersion.TLSv1_2 + else: + context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1 + config['server.ssl_module'] = 'builtin' config['server.ssl_certificate'] = cert_fname config['server.ssl_private_key'] = pkey_fname + config['server.ssl_context'] = context self.update_cherrypy_config(config) @@ -173,7 +184,7 @@ class CherryPyConfig(object): 'url_prefix', default='')) uri = "{0}://{1}:{2}{3}/".format( - 'https' if ssl else 'http', + 'https' if use_ssl else 'http', socket.getfqdn(server_addr if server_addr != '::' else ''), server_port, self.url_prefix diff --git a/src/pybind/mgr/dashboard/openapi.yaml b/src/pybind/mgr/dashboard/openapi.yaml index 0019cfb2d70..cd2a570a8be 100644 --- a/src/pybind/mgr/dashboard/openapi.yaml +++ b/src/pybind/mgr/dashboard/openapi.yaml @@ -3427,6 +3427,354 @@ paths: - jwt: [] tags: - Host + /api/host/{hostname}/identify_device: + post: + description: "\n Identify a device by switching on the device light for\ + \ N seconds.\n :param hostname: The hostname of the device to process.\n\ + \ :param device: The device identifier to process, e.g. ``/dev/dm-0``\ + \ or\n ``ABC1234DEF567-1R1234_ABC8DE0Q``.\n :param duration:\ + \ The duration in seconds how long the LED should flash.\n " + parameters: + - in: path + name: hostname + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + properties: + device: + type: string + duration: + type: string + required: + - device + - duration + type: object + responses: + '201': + content: + application/vnd.ceph.api.v1.0+json: + type: object + description: Resource created. + '202': + content: + application/vnd.ceph.api.v1.0+json: + type: object + description: Operation is still executing. Please check the task queue. + '400': + description: Operation exception. Please check the response body for details. + '401': + description: Unauthenticated access. Please login first. + '403': + description: Unauthorized access. Please check your permissions. + '500': + description: Unexpected error. Please check the response body for the stack + trace. + security: + - jwt: [] + tags: + - Host + /api/host/{hostname}/inventory: + get: + parameters: + - description: Hostname + in: path + name: hostname + required: true + schema: + type: string + - allowEmptyValue: true + description: Trigger asynchronous refresh + in: query + name: refresh + schema: + type: string + responses: + '200': + content: + application/vnd.ceph.api.v1.0+json: + schema: + properties: + addr: + description: Host address + type: string + devices: + description: Host devices + items: + properties: + available: + description: If the device can be provisioned to an OSD + type: boolean + device_id: + description: Device's udev ID + type: string + human_readable_type: + description: Device type. ssd or hdd + type: string + lsm_data: + description: '' + properties: + errors: + description: '' + items: + type: string + type: array + health: + description: '' + type: string + ledSupport: + description: '' + properties: + FAILstatus: + description: '' + type: string + FAILsupport: + description: '' + type: string + IDENTstatus: + description: '' + type: string + IDENTsupport: + description: '' + type: string + required: + - IDENTsupport + - IDENTstatus + - FAILsupport + - FAILstatus + type: object + linkSpeed: + description: '' + type: string + mediaType: + description: '' + type: string + rpm: + description: '' + type: string + serialNum: + description: '' + type: string + transport: + description: '' + type: string + required: + - serialNum + - transport + - mediaType + - rpm + - linkSpeed + - health + - ledSupport + - errors + type: object + lvs: + description: '' + items: + properties: + block_uuid: + description: '' + type: string + cluster_fsid: + description: '' + type: string + cluster_name: + description: '' + type: string + name: + description: '' + type: string + osd_fsid: + description: '' + type: string + osd_id: + description: '' + type: string + osdspec_affinity: + description: '' + type: string + type: + description: '' + type: string + required: + - name + - osd_id + - cluster_name + - type + - osd_fsid + - cluster_fsid + - osdspec_affinity + - block_uuid + type: object + type: array + osd_ids: + description: Device OSD IDs + items: + type: integer + type: array + path: + description: Device path + type: string + rejected_reasons: + description: '' + items: + type: string + type: array + sys_api: + description: '' + properties: + human_readable_size: + description: '' + type: string + locked: + description: '' + type: integer + model: + description: '' + type: string + nr_requests: + description: '' + type: string + partitions: + description: '' + properties: + partition_name: + description: '' + properties: + holders: + description: '' + items: + type: string + type: array + human_readable_size: + description: '' + type: string + sectors: + description: '' + type: string + sectorsize: + description: '' + type: integer + size: + description: '' + type: integer + start: + description: '' + type: string + required: + - start + - sectors + - sectorsize + - size + - human_readable_size + - holders + type: object + required: + - partition_name + type: object + path: + description: '' + type: string + removable: + description: '' + type: string + rev: + description: '' + type: string + ro: + description: '' + type: string + rotational: + description: '' + type: string + sas_address: + description: '' + type: string + sas_device_handle: + description: '' + type: string + scheduler_mode: + description: '' + type: string + sectors: + description: '' + type: integer + sectorsize: + description: '' + type: string + size: + description: '' + type: integer + support_discard: + description: '' + type: string + vendor: + description: '' + type: string + required: + - removable + - ro + - vendor + - model + - rev + - sas_address + - sas_device_handle + - support_discard + - rotational + - nr_requests + - scheduler_mode + - partitions + - sectors + - sectorsize + - size + - human_readable_size + - path + - locked + type: object + required: + - rejected_reasons + - available + - path + - sys_api + - lvs + - human_readable_type + - device_id + - lsm_data + - osd_ids + type: object + type: array + labels: + description: Host labels + items: + type: string + type: array + name: + description: Hostname + type: string + required: + - name + - addr + - devices + - labels + type: object + description: OK + '400': + description: Operation exception. Please check the response body for details. + '401': + description: Unauthenticated access. Please login first. + '403': + description: Unauthorized access. Please check your permissions. + '500': + description: Unexpected error. Please check the response body for the stack + trace. + security: + - jwt: [] + summary: Get inventory of a host + tags: + - Host /api/host/{hostname}/smart: get: parameters: @@ -5352,86 +5700,6 @@ paths: summary: Status of NFS-Ganesha management feature tags: - NFS-Ganesha - /api/orchestrator/identify_device: - post: - description: "\n Identify a device by switching on the device light for\ - \ N seconds.\n :param hostname: The hostname of the device to process.\n\ - \ :param device: The device identifier to process, e.g. ``/dev/dm-0``\ - \ or\n ``ABC1234DEF567-1R1234_ABC8DE0Q``.\n :param duration:\ - \ The duration in seconds how long the LED should flash.\n " - parameters: [] - requestBody: - content: - application/json: - schema: - properties: - device: - type: string - duration: - type: string - hostname: - type: string - required: - - hostname - - device - - duration - type: object - responses: - '201': - content: - application/vnd.ceph.api.v1.0+json: - type: object - description: Resource created. - '202': - content: - application/vnd.ceph.api.v1.0+json: - type: object - description: Operation is still executing. Please check the task queue. - '400': - description: Operation exception. Please check the response body for details. - '401': - description: Unauthenticated access. Please login first. - '403': - description: Unauthorized access. Please check your permissions. - '500': - description: Unexpected error. Please check the response body for the stack - trace. - security: - - jwt: [] - tags: - - Orchestrator - /api/orchestrator/inventory: - get: - parameters: - - allowEmptyValue: true - in: query - name: hostname - schema: - type: string - - allowEmptyValue: true - in: query - name: refresh - schema: - type: string - responses: - '200': - content: - application/vnd.ceph.api.v1.0+json: - type: object - description: OK - '400': - description: Operation exception. Please check the response body for details. - '401': - description: Unauthenticated access. Please login first. - '403': - description: Unauthorized access. Please check your permissions. - '500': - description: Unexpected error. Please check the response body for the stack - trace. - security: - - jwt: [] - tags: - - OrchestratorInventory /api/orchestrator/status: get: parameters: [] @@ -9958,8 +10226,6 @@ tags: name: OSD - description: Orchestrator Management API name: Orchestrator -- description: Get Orchestrator Inventory Details - name: OrchestratorInventory - description: OSD Perf Counters Management API name: OsdPerfCounter - description: Perf Counters Management API diff --git a/src/pybind/mgr/dashboard/requirements-lint.txt b/src/pybind/mgr/dashboard/requirements-lint.txt index 85821253725..87beb5ce5a3 100644 --- a/src/pybind/mgr/dashboard/requirements-lint.txt +++ b/src/pybind/mgr/dashboard/requirements-lint.txt @@ -8,3 +8,4 @@ rstcheck==3.3.1; python_version >= '3' autopep8; python_version >= '3' pyfakefs; python_version >= '3' isort==5.5.3 +pytest diff --git a/src/pybind/mgr/dashboard/run-backend-api-tests.sh b/src/pybind/mgr/dashboard/run-backend-api-tests.sh index d37c4a3ee0f..ae35d7d5435 100755 --- a/src/pybind/mgr/dashboard/run-backend-api-tests.sh +++ b/src/pybind/mgr/dashboard/run-backend-api-tests.sh @@ -35,7 +35,7 @@ get_cmake_variable() { [ -z "$BUILD_DIR" ] && BUILD_DIR=build CURR_DIR=`pwd` -LOCAL_BUILD_DIR="$CURR_DIR/../../../../$BUILD_DIR" +LOCAL_BUILD_DIR=$(cd "$CURR_DIR/../../../../$BUILD_DIR"; pwd) setup_teuthology() { TEMP_DIR=`mktemp -d` @@ -83,7 +83,7 @@ on_tests_error() { local ret=$? if [[ -n "$JENKINS_HOME" && -z "$ON_TESTS_ERROR_RUN" ]]; then CEPH_OUT_DIR=${CEPH_OUT_DIR:-"$LOCAL_BUILD_DIR"/out} - display_log "mgr" 1000 + display_log "mgr" 1500 display_log "osd" 1000 ON_TESTS_ERROR_RUN=1 fi @@ -119,8 +119,7 @@ run_teuthology_tests() { local python_common_dir=$source_dir/src/python-common # In CI environment we set python paths inside build (where you find the required frontend build: "dist" dir). if [[ -n "$JENKINS_HOME" ]]; then - export PYBIND=$LOCAL_BUILD_DIR/src/pybind - pybind_dir=$PYBIND + pybind_dir+=":$LOCAL_BUILD_DIR/src/pybind" fi export PYTHONPATH=$source_dir/qa:$LOCAL_BUILD_DIR/lib/cython_modules/lib.3/:$pybind_dir:$python_common_dir:${COVERAGE_PATH} export RGW=${RGW:-1} diff --git a/src/pybind/mgr/dashboard/services/ceph_service.py b/src/pybind/mgr/dashboard/services/ceph_service.py index 42d511c1b50..fa97b33ea34 100644 --- a/src/pybind/mgr/dashboard/services/ceph_service.py +++ b/src/pybind/mgr/dashboard/services/ceph_service.py @@ -303,7 +303,7 @@ class CephService(object): CephService._get_smart_data_by_device(device)) else: msg = '[SMART] could not retrieve device list from daemon with type %s and ' +\ - 'with ID %d' + 'with ID %s' logger.debug(msg, daemon_type, daemon_id) return smart_data diff --git a/src/pybind/mgr/dashboard/services/rgw_client.py b/src/pybind/mgr/dashboard/services/rgw_client.py index 0013d671c74..2355bbbc1ff 100644 --- a/src/pybind/mgr/dashboard/services/rgw_client.py +++ b/src/pybind/mgr/dashboard/services/rgw_client.py @@ -163,7 +163,7 @@ def _parse_frontend_config(config): the first found option will be returned. Get more details about the configuration syntax here: - http://docs.ceph.com/docs/master/radosgw/frontends/ + http://docs.ceph.com/en/latest/radosgw/frontends/ https://civetweb.github.io/civetweb/UserManual.html :param config: The configuration string to parse. diff --git a/src/pybind/mgr/dashboard/tests/test_ceph_service.py b/src/pybind/mgr/dashboard/tests/test_ceph_service.py index 6c793631fd9..3433443b170 100644 --- a/src/pybind/mgr/dashboard/tests/test_ceph_service.py +++ b/src/pybind/mgr/dashboard/tests/test_ceph_service.py @@ -2,12 +2,12 @@ # pylint: disable=dangerous-default-value,too-many-public-methods from __future__ import absolute_import +import logging import unittest +from contextlib import contextmanager +from unittest import mock -try: - import mock -except ImportError: - import unittest.mock as mock +import pytest from ..services.ceph_service import CephService @@ -66,3 +66,44 @@ class CephServiceTest(unittest.TestCase): def test_get_pg_status_without_match(self): self.assertEqual(self.service.get_pool_pg_status('no-pool'), {}) + + +@contextmanager +def mock_smart_data(data): + devices = [{'devid': devid} for devid in data] + + def _get_smart_data(d): + return {d['devid']: data[d['devid']]} + + with mock.patch.object(CephService, '_get_smart_data_by_device', side_effect=_get_smart_data), \ + mock.patch.object(CephService, 'get_devices_by_host', return_value=devices), \ + mock.patch.object(CephService, 'get_devices_by_daemon', return_value=devices): + yield + + +@pytest.mark.parametrize( + "by,args,log", + [ + ('host', ('osd0',), 'from host osd0'), + ('daemon', ('osd', '1'), 'with ID 1') + ] +) +def test_get_smart_data(caplog, by, args, log): + # pylint: disable=protected-access + expected_data = { + 'aaa': {'device': {'name': '/dev/sda'}}, + 'bbb': {'device': {'name': '/dev/sdb'}}, + } + with mock_smart_data(expected_data): + smart_data = getattr(CephService, 'get_smart_data_by_{}'.format(by))(*args) + getattr(CephService, 'get_devices_by_{}'.format(by)).assert_called_with(*args) + CephService._get_smart_data_by_device.assert_called() + assert smart_data == expected_data + + with caplog.at_level(logging.DEBUG): + with mock_smart_data([]): + smart_data = getattr(CephService, 'get_smart_data_by_{}'.format(by))(*args) + getattr(CephService, 'get_devices_by_{}'.format(by)).assert_called_with(*args) + CephService._get_smart_data_by_device.assert_not_called() + assert smart_data == {} + assert log in caplog.text diff --git a/src/pybind/mgr/dashboard/tests/test_host.py b/src/pybind/mgr/dashboard/tests/test_host.py index 9c2500cd078..050e28e8810 100644 --- a/src/pybind/mgr/dashboard/tests/test_host.py +++ b/src/pybind/mgr/dashboard/tests/test_host.py @@ -1,26 +1,55 @@ +import contextlib import unittest +from typing import List, Optional +from unittest import mock -try: - import mock -except ImportError: - from unittest import mock - -from orchestrator import HostSpec +from orchestrator import HostSpec, InventoryHost from .. import mgr -from ..controllers.host import Host, HostUi, get_hosts +from ..controllers.host import Host, HostUi, get_device_osd_map, get_hosts, get_inventories +from ..tools import NotificationQueue, TaskManager from . import ControllerTestCase # pylint: disable=no-name-in-module +@contextlib.contextmanager +def patch_orch(available: bool, hosts: Optional[List[HostSpec]] = None, + inventory: Optional[List[dict]] = None): + with mock.patch('dashboard.controllers.orchestrator.OrchClient.instance') as instance: + fake_client = mock.Mock() + fake_client.available.return_value = available + fake_client.get_missing_features.return_value = [] + + if hosts is not None: + fake_client.hosts.list.return_value = hosts + + if inventory is not None: + def _list_inventory(hosts=None, refresh=False): # pylint: disable=unused-argument + inv_hosts = [] + for inv_host in inventory: + if hosts is None or inv_host['name'] in hosts: + inv_hosts.append(InventoryHost.from_json(inv_host)) + return inv_hosts + fake_client.inventory.list.side_effect = _list_inventory + + instance.return_value = fake_client + yield fake_client + + class HostControllerTest(ControllerTestCase): URL_HOST = '/api/host' @classmethod def setup_server(cls): + NotificationQueue.start_queue() + TaskManager.init() # pylint: disable=protected-access Host._cp_config['tools.authenticate.on'] = False cls.setup_controllers([Host]) + @classmethod + def tearDownClass(cls): + NotificationQueue.stop() + @mock.patch('dashboard.controllers.host.get_hosts') def test_host_list(self, mock_get_hosts): hosts = [{ @@ -70,60 +99,96 @@ class HostControllerTest(ControllerTestCase): self.assertStatus(200) self.assertJsonBody(hosts) - @mock.patch('dashboard.controllers.orchestrator.OrchClient.instance') - def test_get_1(self, instance): + def test_get_1(self): mgr.list_servers.return_value = [] - fake_client = mock.Mock() - fake_client.available.return_value = False - instance.return_value = fake_client - - self._get('{}/node1'.format(self.URL_HOST)) - self.assertStatus(404) + with patch_orch(False): + self._get('{}/node1'.format(self.URL_HOST)) + self.assertStatus(404) - @mock.patch('dashboard.controllers.orchestrator.OrchClient.instance') - def test_get_2(self, instance): + def test_get_2(self): mgr.list_servers.return_value = [{'hostname': 'node1'}] - fake_client = mock.Mock() - fake_client.available.return_value = False - instance.return_value = fake_client - - self._get('{}/node1'.format(self.URL_HOST)) - self.assertStatus(200) - self.assertIn('labels', self.json_body()) + with patch_orch(False): + self._get('{}/node1'.format(self.URL_HOST)) + self.assertStatus(200) + self.assertIn('labels', self.json_body()) - @mock.patch('dashboard.controllers.orchestrator.OrchClient.instance') - def test_get_3(self, instance): + def test_get_3(self): mgr.list_servers.return_value = [] - fake_client = mock.Mock() - fake_client.available.return_value = True - fake_client.hosts.list.return_value = [HostSpec('node1')] - instance.return_value = fake_client + with patch_orch(True, hosts=[HostSpec('node1')]): + self._get('{}/node1'.format(self.URL_HOST)) + self.assertStatus(200) + self.assertIn('labels', self.json_body()) - self._get('{}/node1'.format(self.URL_HOST)) - self.assertStatus(200) - self.assertIn('labels', self.json_body()) - - @mock.patch('dashboard.controllers.orchestrator.OrchClient.instance') - def test_set_labels(self, instance): + def test_set_labels(self): mgr.list_servers.return_value = [] - - fake_client = mock.Mock() - fake_client.available.return_value = True - fake_client.get_missing_features.return_value = [] - fake_client.hosts.list.return_value = [ + orch_hosts = [ HostSpec('node0', labels=['aaa', 'bbb']) ] - fake_client.hosts.remove_label = mock.Mock() - fake_client.hosts.add_label = mock.Mock() - instance.return_value = fake_client + with patch_orch(True, hosts=orch_hosts) as fake_client: + fake_client.hosts.remove_label = mock.Mock() + fake_client.hosts.add_label = mock.Mock() + + self._put('{}/node0'.format(self.URL_HOST), {'labels': ['bbb', 'ccc']}) + self.assertStatus(200) + fake_client.hosts.remove_label.assert_called_once_with('node0', 'aaa') + fake_client.hosts.add_label.assert_called_once_with('node0', 'ccc') + + @mock.patch('dashboard.controllers.host.time') + def test_identify_device(self, mock_time): + url = '{}/host-0/identify_device'.format(self.URL_HOST) + with patch_orch(True) as fake_client: + payload = { + 'device': '/dev/sdz', + 'duration': '1' + } + self._task_post(url, payload) + self.assertStatus(200) + mock_time.sleep.assert_called() + calls = [ + mock.call('host-0', '/dev/sdz', 'ident', True), + mock.call('host-0', '/dev/sdz', 'ident', False), + ] + fake_client.blink_device_light.assert_has_calls(calls) + + @mock.patch('dashboard.controllers.host.get_inventories') + def test_inventory(self, mock_get_inventories): + inventory_url = '{}/host-0/inventory'.format(self.URL_HOST) + with patch_orch(True): + tests = [ + { + 'url': inventory_url, + 'inventories': [{'a': 'b'}], + 'refresh': None, + 'resp': {'a': 'b'} + }, + { + 'url': '{}?refresh=true'.format(inventory_url), + 'inventories': [{'a': 'b'}], + 'refresh': "true", + 'resp': {'a': 'b'} + }, + { + 'url': inventory_url, + 'inventories': [], + 'refresh': None, + 'resp': {} + }, + ] + for test in tests: + mock_get_inventories.reset_mock() + mock_get_inventories.return_value = test['inventories'] + self._get(test['url']) + mock_get_inventories.assert_called_once_with(['host-0'], test['refresh']) + self.assertEqual(self.json_body(), test['resp']) + self.assertStatus(200) - self._put('{}/node0'.format(self.URL_HOST), {'labels': ['bbb', 'ccc']}) - self.assertStatus(200) - fake_client.hosts.remove_label.assert_called_once_with('node0', 'aaa') - fake_client.hosts.add_label.assert_called_once_with('node0', 'ccc') + # list without orchestrator service + with patch_orch(False): + self._get(inventory_url) + self.assertStatus(503) class HostUiControllerTest(ControllerTestCase): @@ -135,66 +200,179 @@ class HostUiControllerTest(ControllerTestCase): HostUi._cp_config['tools.authenticate.on'] = False cls.setup_controllers([HostUi]) - @mock.patch('dashboard.controllers.orchestrator.OrchClient.instance') - def test_labels(self, instance): - fake_client = mock.Mock() - fake_client.available.return_value = True - fake_client.hosts.list.return_value = [ + def test_labels(self): + orch_hosts = [ HostSpec('node1', labels=['foo']), HostSpec('node2', labels=['foo', 'bar']) ] - instance.return_value = fake_client - self._get('{}/labels'.format(self.URL_HOST)) - self.assertStatus(200) - labels = self.json_body() - labels.sort() - self.assertListEqual(labels, ['bar', 'foo']) + with patch_orch(True, hosts=orch_hosts): + self._get('{}/labels'.format(self.URL_HOST)) + self.assertStatus(200) + labels = self.json_body() + labels.sort() + self.assertListEqual(labels, ['bar', 'foo']) + + @mock.patch('dashboard.controllers.host.get_inventories') + def test_inventory(self, mock_get_inventories): + inventory_url = '{}/inventory'.format(self.URL_HOST) + with patch_orch(True): + tests = [ + { + 'url': inventory_url, + 'refresh': None + }, + { + 'url': '{}?refresh=true'.format(inventory_url), + 'refresh': "true" + }, + ] + for test in tests: + mock_get_inventories.reset_mock() + mock_get_inventories.return_value = [{'a': 'b'}] + self._get(test['url']) + mock_get_inventories.assert_called_once_with(None, test['refresh']) + self.assertEqual(self.json_body(), [{'a': 'b'}]) + self.assertStatus(200) + + # list without orchestrator service + with patch_orch(False): + self._get(inventory_url) + self.assertStatus(503) class TestHosts(unittest.TestCase): - @mock.patch('dashboard.controllers.orchestrator.OrchClient.instance') - def test_get_hosts(self, instance): + def test_get_hosts(self): mgr.list_servers.return_value = [{ 'hostname': 'node1' }, { 'hostname': 'localhost' }] - - fake_client = mock.Mock() - fake_client.available.return_value = True - fake_client.hosts.list.return_value = [ + orch_hosts = [ HostSpec('node1', labels=['foo', 'bar']), HostSpec('node2', labels=['bar']) ] - instance.return_value = fake_client - hosts = get_hosts() - self.assertEqual(len(hosts), 3) - checks = { - 'localhost': { - 'sources': { - 'ceph': True, - 'orchestrator': False + with patch_orch(True, hosts=orch_hosts): + hosts = get_hosts() + self.assertEqual(len(hosts), 3) + checks = { + 'localhost': { + 'sources': { + 'ceph': True, + 'orchestrator': False + }, + 'labels': [] }, - 'labels': [] + 'node1': { + 'sources': { + 'ceph': True, + 'orchestrator': True + }, + 'labels': ['bar', 'foo'] + }, + 'node2': { + 'sources': { + 'ceph': False, + 'orchestrator': True + }, + 'labels': ['bar'] + } + } + for host in hosts: + hostname = host['hostname'] + self.assertDictEqual(host['sources'], checks[hostname]['sources']) + self.assertListEqual(host['labels'], checks[hostname]['labels']) + + @mock.patch('dashboard.controllers.host.mgr.get') + def test_get_device_osd_map(self, mgr_get): + mgr_get.side_effect = lambda key: { + 'osd_metadata': { + '0': { + 'hostname': 'node0', + 'devices': 'nvme0n1,sdb', + }, + '1': { + 'hostname': 'node0', + 'devices': 'nvme0n1,sdc', + }, + '2': { + 'hostname': 'node1', + 'devices': 'sda', + }, + '3': { + 'hostname': 'node2', + 'devices': '', + } + } + }[key] + + device_osd_map = get_device_osd_map() + mgr.get.assert_called_with('osd_metadata') + # sort OSD IDs to make assertDictEqual work + for devices in device_osd_map.values(): + for host in devices.keys(): + devices[host] = sorted(devices[host]) + self.assertDictEqual(device_osd_map, { + 'node0': { + 'nvme0n1': [0, 1], + 'sdb': [0], + 'sdc': [1], }, 'node1': { - 'sources': { - 'ceph': True, - 'orchestrator': True - }, - 'labels': ['bar', 'foo'] + 'sda': [2] + } + }) + + @mock.patch('dashboard.controllers.host.str_to_bool') + @mock.patch('dashboard.controllers.host.get_device_osd_map') + def test_get_inventories(self, mock_get_device_osd_map, mock_str_to_bool): + mock_get_device_osd_map.return_value = { + 'host-0': { + 'nvme0n1': [1, 2], + 'sdb': [1], + 'sdc': [2] }, - 'node2': { - 'sources': { - 'ceph': False, - 'orchestrator': True - }, - 'labels': ['bar'] + 'host-1': { + 'sdb': [3] } } - for host in hosts: - hostname = host['hostname'] - self.assertDictEqual(host['sources'], checks[hostname]['sources']) - self.assertListEqual(host['labels'], checks[hostname]['labels']) + inventory = [ + { + 'name': 'host-0', + 'addr': '1.2.3.4', + 'devices': [ + {'path': 'nvme0n1'}, + {'path': '/dev/sdb'}, + {'path': '/dev/sdc'}, + ] + }, + { + 'name': 'host-1', + 'addr': '1.2.3.5', + 'devices': [ + {'path': '/dev/sda'}, + {'path': 'sdb'}, + ] + } + ] + + with patch_orch(True, inventory=inventory) as orch_client: + mock_str_to_bool.return_value = True + + hosts = ['host-0', 'host-1'] + inventories = get_inventories(hosts, 'true') + mock_str_to_bool.assert_called_with('true') + orch_client.inventory.list.assert_called_once_with(hosts=hosts, refresh=True) + self.assertEqual(len(inventories), 2) + host0 = inventories[0] + self.assertEqual(host0['name'], 'host-0') + self.assertEqual(host0['addr'], '1.2.3.4') + self.assertEqual(host0['devices'][0]['osd_ids'], [1, 2]) + self.assertEqual(host0['devices'][1]['osd_ids'], [1]) + self.assertEqual(host0['devices'][2]['osd_ids'], [2]) + host1 = inventories[1] + self.assertEqual(host1['name'], 'host-1') + self.assertEqual(host1['addr'], '1.2.3.5') + self.assertEqual(host1['devices'][0]['osd_ids'], []) + self.assertEqual(host1['devices'][1]['osd_ids'], [3]) diff --git a/src/pybind/mgr/dashboard/tests/test_orchestrator.py b/src/pybind/mgr/dashboard/tests/test_orchestrator.py index 00102f36a58..d9ee85cf305 100644 --- a/src/pybind/mgr/dashboard/tests/test_orchestrator.py +++ b/src/pybind/mgr/dashboard/tests/test_orchestrator.py @@ -1,16 +1,10 @@ import inspect import unittest +from unittest import mock -try: - import mock -except ImportError: - from unittest import mock - -from orchestrator import InventoryHost from orchestrator import Orchestrator as OrchestratorBase -from .. import mgr -from ..controllers.orchestrator import Orchestrator, OrchestratorInventory, get_device_osd_map +from ..controllers.orchestrator import Orchestrator from ..services.orchestrator import OrchFeature from . import ControllerTestCase # pylint: disable=no-name-in-module @@ -23,9 +17,7 @@ class OrchestratorControllerTest(ControllerTestCase): def setup_server(cls): # pylint: disable=protected-access Orchestrator._cp_config['tools.authenticate.on'] = False - OrchestratorInventory._cp_config['tools.authenticate.on'] = False - cls.setup_controllers([Orchestrator, - OrchestratorInventory]) + cls.setup_controllers([Orchestrator]) @mock.patch('dashboard.controllers.orchestrator.OrchClient.instance') def test_status_get(self, instance): @@ -39,127 +31,8 @@ class OrchestratorControllerTest(ControllerTestCase): self.assertStatus(200) self.assertJsonBody(status) - def _set_inventory(self, mock_instance, inventory): - # pylint: disable=unused-argument - def _list_inventory(hosts=None, refresh=False): - inv_hosts = [] - for inv_host in inventory: - if hosts is None or inv_host['name'] in hosts: - inv_hosts.append(InventoryHost.from_json(inv_host)) - return inv_hosts - mock_instance.inventory.list.side_effect = _list_inventory - - @mock.patch('dashboard.controllers.orchestrator.get_device_osd_map') - @mock.patch('dashboard.controllers.orchestrator.OrchClient.instance') - def test_inventory_list(self, instance, get_dev_osd_map): - get_dev_osd_map.return_value = { - 'host-0': { - 'nvme0n1': [1, 2], - 'sdb': [1], - 'sdc': [2] - }, - 'host-1': { - 'sdb': [3] - } - } - inventory = [ - { - 'name': 'host-0', - 'addr': '1.2.3.4', - 'devices': [ - {'path': 'nvme0n1'}, - {'path': '/dev/sdb'}, - {'path': '/dev/sdc'}, - ] - }, - { - 'name': 'host-1', - 'addr': '1.2.3.5', - 'devices': [ - {'path': '/dev/sda'}, - {'path': 'sdb'}, - ] - } - ] - fake_client = mock.Mock() - fake_client.available.return_value = True - fake_client.get_missing_features.return_value = [] - self._set_inventory(fake_client, inventory) - instance.return_value = fake_client - - # list - self._get(self.URL_INVENTORY) - self.assertStatus(200) - resp = self.json_body() - self.assertEqual(len(resp), 2) - host0 = resp[0] - self.assertEqual(host0['name'], 'host-0') - self.assertEqual(host0['addr'], '1.2.3.4') - self.assertEqual(host0['devices'][0]['osd_ids'], [1, 2]) - self.assertEqual(host0['devices'][1]['osd_ids'], [1]) - self.assertEqual(host0['devices'][2]['osd_ids'], [2]) - host1 = resp[1] - self.assertEqual(host1['name'], 'host-1') - self.assertEqual(host1['addr'], '1.2.3.5') - self.assertEqual(host1['devices'][0]['osd_ids'], []) - self.assertEqual(host1['devices'][1]['osd_ids'], [3]) - - # list with existent hostname - self._get('{}?hostname=host-0'.format(self.URL_INVENTORY)) - self.assertStatus(200) - self.assertEqual(self.json_body()[0]['name'], 'host-0') - - # list with non-existent inventory - self._get('{}?hostname=host-10'.format(self.URL_INVENTORY)) - self.assertStatus(200) - self.assertJsonBody([]) - - # list without orchestrator service - fake_client.available.return_value = False - self._get(self.URL_INVENTORY) - self.assertStatus(503) - class TestOrchestrator(unittest.TestCase): - def test_get_device_osd_map(self): - mgr.get.side_effect = lambda key: { - 'osd_metadata': { - '0': { - 'hostname': 'node0', - 'devices': 'nvme0n1,sdb', - }, - '1': { - 'hostname': 'node0', - 'devices': 'nvme0n1,sdc', - }, - '2': { - 'hostname': 'node1', - 'devices': 'sda', - }, - '3': { - 'hostname': 'node2', - 'devices': '', - } - } - }[key] - - device_osd_map = get_device_osd_map() - mgr.get.assert_called_with('osd_metadata') - # sort OSD IDs to make assertDictEqual work - for devices in device_osd_map.values(): - for host in devices.keys(): - devices[host] = sorted(devices[host]) - self.assertDictEqual(device_osd_map, { - 'node0': { - 'nvme0n1': [0, 1], - 'sdb': [0], - 'sdc': [1], - }, - 'node1': { - 'sda': [2] - } - }) - def test_features_has_corresponding_methods(self): defined_methods = [v for k, v in inspect.getmembers( OrchFeature, lambda m: not inspect.isroutine(m)) if not k.startswith('_')] diff --git a/src/pybind/mgr/dashboard/tox.ini b/src/pybind/mgr/dashboard/tox.ini index 8880c9180e3..1cda4c45acd 100644 --- a/src/pybind/mgr/dashboard/tox.ini +++ b/src/pybind/mgr/dashboard/tox.ini @@ -38,6 +38,7 @@ deps = passenv = PYTHONPATH setenv = + PYTHONPATH=$PYTHONPATH:../.. CFLAGS = -DXMLSEC_NO_SIZE_T PYTHONUNBUFFERED=1 PYTHONDONTWRITEBYTECODE=1 @@ -168,7 +169,7 @@ passenv = PYTHONPATH setenv = UNITTEST = true - PYTHONPATH=$PYTHONPATH:.. + PYTHONPATH=$PYTHONPATH:..:../.. OPENAPI_FILE=openapi.yaml check: OPENAPI_FILE_TMP={envtmpdir}/{env:OPENAPI_FILE} commands = diff --git a/src/pybind/mgr/devicehealth/module.py b/src/pybind/mgr/devicehealth/module.py index 6ce89127493..52d0e416617 100644 --- a/src/pybind/mgr/devicehealth/module.py +++ b/src/pybind/mgr/devicehealth/module.py @@ -17,7 +17,7 @@ DEVICE_HEALTH_IN_USE = 'DEVICE_HEALTH_IN_USE' DEVICE_HEALTH_TOOMANY = 'DEVICE_HEALTH_TOOMANY' HEALTH_MESSAGES = { DEVICE_HEALTH: '%d device(s) expected to fail soon', - DEVICE_HEALTH_IN_USE: '%d daemons(s) expected to fail soon and still contain data', + DEVICE_HEALTH_IN_USE: '%d daemon(s) expected to fail soon and still contain data', DEVICE_HEALTH_TOOMANY: 'Too many daemons are expected to fail soon', } diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py index 263f683f7b0..cf0614d76ed 100644 --- a/src/pybind/mgr/mgr_module.py +++ b/src/pybind/mgr/mgr_module.py @@ -380,6 +380,7 @@ class Option(dict): (k, v) for k, v in vars().items() if k != 'self' and v is not None) + class Command(dict): """ Helper class to declare options for COMMANDS list. @@ -443,6 +444,7 @@ class CPlusPlusHandler(logging.Handler): if record.levelno >= self.level: self._module._ceph_log(self.format(record)) + class ClusterLogHandler(logging.Handler): def __init__(self, module_inst): super().__init__() @@ -463,6 +465,7 @@ class ClusterLogHandler(logging.Handler): level, self.format(record)) + class FileHandler(logging.FileHandler): def __init__(self, module_inst): path = module_inst.get_ceph_option("log_file") @@ -501,7 +504,6 @@ class MgrModuleLoggingMixin(object): self._root_logger.setLevel(logging.NOTSET) self._set_log_level(mgr_level, module_level, cluster_level) - def _unconfigure_logging(self): # remove existing handlers: rm_handlers = [ @@ -765,7 +767,6 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): # Keep a librados instance for those that need it. self._rados = None - def __del__(self): self._unconfigure_logging() @@ -1494,7 +1495,7 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): def update_progress_event(self, evid, desc, progress, add_to_ceph_s): return self._ceph_update_progress_event(str(evid), str(desc), - float(progress), + float(progress), bool(add_to_ceph_s)) def complete_progress_event(self, evid): diff --git a/src/pybind/mgr/mgr_util.py b/src/pybind/mgr/mgr_util.py index 4a2e1f22555..4513777970b 100644 --- a/src/pybind/mgr/mgr_util.py +++ b/src/pybind/mgr/mgr_util.py @@ -20,7 +20,7 @@ else: from threading import _Timer as Timer try: - from typing import Tuple, Any, Callable + from typing import Tuple, Any, Callable, Optional, Dict except ImportError: TYPE_CHECKING = False # just for type checking @@ -370,8 +370,8 @@ def get_default_addr(): with contextlib.closing(sock): sock.bind(("::1", 0)) return True - except (AttributeError, socket.error) as e: - return False + except (AttributeError, socket.error): + return False try: return get_default_addr.result # type: ignore @@ -385,22 +385,53 @@ class ServerConfigException(Exception): pass -def create_self_signed_cert(organisation='Ceph', common_name='mgr') -> Tuple[str, str]: +def create_self_signed_cert(organisation: str = 'Ceph', + common_name: str = 'mgr', + dname: Optional[Dict[str, str]] = None) -> Tuple[str, str]: """Returns self-signed PEM certificates valid for 10 years. - :return cert, pkey + + The optional dname parameter provides complete control of the cert/key + creation by supporting all valid RDNs via a dictionary. However, if dname + is not provided the default O and CN settings will be applied. + + :param organisation: String representing the Organisation(O) RDN (default='Ceph') + :param common_name: String representing the Common Name(CN) RDN (default='mgr') + :param dname: Optional dictionary containing RDNs to use for crt/key generation + + :return: ssl crt and key in utf-8 format + + :raises ValueError: if the dname parameter received contains invalid RDNs + """ from OpenSSL import crypto from uuid import uuid4 + # RDN = Relative Distinguished Name + valid_RDN_list = ['C', 'ST', 'L', 'O', 'OU', 'CN', 'emailAddress'] + # create a key pair pkey = crypto.PKey() pkey.generate_key(crypto.TYPE_RSA, 2048) + # Create a "subject" object + req = crypto.X509Req() + subj = req.get_subject() + + if dname: + # dname received, so check it contains valid RDNs + if not all(field in valid_RDN_list for field in dname): + raise ValueError("Invalid DNAME received. Valid DNAME fields are {}".format(', '.join(valid_RDN_list))) + else: + dname = {"O": organisation, "CN": common_name} + + # populate the subject with the dname settings + for k, v in dname.items(): + setattr(subj, k, v) + # create a self-signed cert cert = crypto.X509() - cert.get_subject().O = organisation - cert.get_subject().CN = common_name + cert.set_subject(req.get_subject()) cert.set_serial_number(int(uuid4())) cert.gmtime_adj_notBefore(0) cert.gmtime_adj_notAfter(10 * 365 * 24 * 60 * 60) # 10 years @@ -515,6 +546,7 @@ def verify_tls_files(cert_fname, pkey_fname): 'Private key {} and certificate {} do not match up: {}'.format( pkey_fname, cert_fname, str(e))) + def get_most_recent_rate(rates): """ Get most recent rate from rates @@ -565,6 +597,7 @@ def get_time_series_rates(data): return [(data2[0], _derivative(data1, data2)) for data1, data2 in _pairwise(data)] + def _filter_time_series(data): """ Filters time series data @@ -609,6 +642,7 @@ def _filter_time_series(data): filtered.append(data[-1]) return filtered + def _derivative(p1, p2): """ Derivative between two time series data points @@ -629,6 +663,7 @@ def _derivative(p1, p2): """ return (p2[1] - p1[1]) / float(p2[0] - p1[0]) + def _pairwise(iterable): it = iter(iterable) a = next(it, None) @@ -637,6 +672,7 @@ def _pairwise(iterable): yield (a, b) a = b + def to_pretty_timedelta(n): if n < datetime.timedelta(seconds=120): return str(n.seconds) + 's' diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 117d3d0b3d6..39dc20ecc2e 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -879,6 +879,7 @@ class Orchestrator(object): 'rbd-mirror': self.apply_rbd_mirror, 'rgw': self.apply_rgw, 'host': self.add_host, + 'cephadm-exporter': self.apply_cephadm_exporter, } def merge(ls, r): @@ -1092,11 +1093,11 @@ class Orchestrator(object): raise NotImplementedError() def add_grafana(self, spec: ServiceSpec) -> Completion[List[str]]: - """Create a new Node-Exporter service""" + """Create a new grafana service""" raise NotImplementedError() def apply_grafana(self, spec: ServiceSpec) -> Completion[str]: - """Update existing a Node-Exporter daemon(s)""" + """Update existing a grafana service""" raise NotImplementedError() def add_alertmanager(self, spec: ServiceSpec) -> Completion[List[str]]: @@ -1107,6 +1108,14 @@ class Orchestrator(object): """Update an existing AlertManager daemon(s)""" raise NotImplementedError() + def add_cephadm_exporter(self, spec: ServiceSpec) -> Completion[List[str]]: + """Create a new cephadm exporter daemon""" + raise NotImplementedError() + + def apply_cephadm_exporter(self, spec: ServiceSpec) -> Completion[str]: + """Update an existing cephadm exporter daemon""" + raise NotImplementedError() + def upgrade_check(self, image: Optional[str], version: Optional[str]) -> Completion[str]: raise NotImplementedError() diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index ea27c762a08..021229f529d 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -405,7 +405,7 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, table._align['SIZE'] = 'r' table.left_padding_width = 0 table.right_padding_width = 2 - for host_ in completion.result: # type: InventoryHost + for host_ in sorted(completion.result, key=lambda h: h.name): # type: InventoryHost for d in host_.devices.devices: # type: Device led_ident = 'N/A' @@ -601,6 +601,18 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, ukn(s.container_image_id)[0:12], ukn(s.container_id))) + remove_column = 'CONTAINER ID' + if table.get_string(fields=[remove_column], border=False, + header=False).count('<unknown>') == len(daemons): + try: + table.del_column(remove_column) + except AttributeError as e: + # del_column method was introduced in prettytable 2.0 + if str(e) != "del_column": + raise + table.field_names.remove(remove_column) + table._rows = [row[:-1] for row in table._rows] + return HandleCommandResult(stdout=table.get_string()) @_cli_write_command( @@ -809,7 +821,7 @@ Usage: @_cli_write_command( 'orch daemon add', - 'name=daemon_type,type=CephChoices,strings=mon|mgr|rbd-mirror|crash|alertmanager|grafana|node-exporter|prometheus,req=false ' + 'name=daemon_type,type=CephChoices,strings=mon|mgr|rbd-mirror|crash|alertmanager|grafana|node-exporter|prometheus|cephadm-exporter,req=false ' 'name=placement,type=CephString,req=false', 'Add daemon(s)') def _daemon_add_misc(self, @@ -854,6 +866,8 @@ Usage: completion = self.add_nfs(spec) elif daemon_type == 'iscsi': completion = self.add_iscsi(spec) + elif daemon_type == 'cephadm-exporter': + completion = self.add_cephadm_exporter(spec) else: raise OrchestratorValidationError(f'unknown daemon type `{daemon_type}`') @@ -1048,7 +1062,7 @@ Usage: @_cli_write_command( 'orch apply', - 'name=service_type,type=CephChoices,strings=mon|mgr|rbd-mirror|crash|alertmanager|grafana|node-exporter|prometheus,req=false ' + 'name=service_type,type=CephChoices,strings=mon|mgr|rbd-mirror|crash|alertmanager|grafana|node-exporter|prometheus|cephadm-exporter,req=false ' 'name=placement,type=CephString,req=false ' 'name=dry_run,type=CephBool,req=false ' 'name=format,type=CephChoices,strings=plain|json|json-pretty|yaml,req=false ' diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index f2abf00e4ab..71d451337d1 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -8,13 +8,13 @@ import re import socket import threading import time -from mgr_module import MgrModule, MgrStandbyModule, CommandResult, PG_STATES +from mgr_module import MgrModule, MgrStandbyModule, PG_STATES from mgr_util import get_default_addr, profile_method from rbd import RBD from collections import namedtuple try: from typing import Optional, Dict, Any, Set -except: +except ImportError: pass # Defaults for the Prometheus HTTP server. Can also set in config-key @@ -187,19 +187,28 @@ class MetricCollectionThread(threading.Thread): def __init__(self, module): # type: (Module) -> None self.mod = module + self.active = True + self.event = threading.Event() super(MetricCollectionThread, self).__init__(target=self.collect) def collect(self): self.mod.log.info('starting metric collection thread') - while True: + while self.active: self.mod.log.debug('collecting cache in thread') if self.mod.have_mon_connection(): start_time = time.time() - data = self.mod.collect() - duration = time.time() - start_time + try: + data = self.mod.collect() + except: + # Log any issues encountered during the data collection and continue + self.mod.log.exception("failed to collect metrics:") + self.event.wait(self.mod.scrape_interval) + continue + + duration = time.time() - start_time self.mod.log.debug('collecting cache in thread done') - + sleep_time = self.mod.scrape_interval - duration if sleep_time < 0: self.mod.log.warning( @@ -218,11 +227,14 @@ class MetricCollectionThread(threading.Thread): self.mod.collect_cache = data self.mod.collect_time = duration - time.sleep(sleep_time) + self.event.wait(sleep_time) else: self.mod.log.error('No MON connection') - time.sleep(self.mod.scrape_interval) + self.event.wait(self.mod.scrape_interval) + def stop(self): + self.active = False + self.event.set() class Module(MgrModule): COMMANDS = [ @@ -274,7 +286,7 @@ class Module(MgrModule): } # type: Dict[str, Any] global _global_instance _global_instance = self - MetricCollectionThread(_global_instance).start() + self.metrics_thread = MetricCollectionThread(_global_instance) def _setup_static_metrics(self): metrics = {} @@ -585,12 +597,10 @@ class Module(MgrModule): all_modules = {module.get('name'):module.get('can_run') for module in mgr_map['available_modules']} - ceph_release = None for mgr in all_mgrs: host_version = servers.get((mgr, 'mgr'), ('', '')) if mgr == active: _state = 1 - ceph_release = host_version[1].split()[-2] # e.g. nautilus else: _state = 0 @@ -601,7 +611,7 @@ class Module(MgrModule): self.metrics['mgr_status'].set(_state, ( 'mgr.{}'.format(mgr), )) - always_on_modules = mgr_map['always_on_modules'].get(ceph_release, []) + always_on_modules = mgr_map['always_on_modules'].get(self.release_name, []) active_modules = list(always_on_modules) active_modules.extend(mgr_map['modules']) @@ -1151,26 +1161,8 @@ class Module(MgrModule): if service['type'] != 'mgr': continue id_ = service['id'] - # get port for prometheus module at mgr with id_ - # TODO use get_config_prefix or get_config here once - # https://github.com/ceph/ceph/pull/20458 is merged - result = CommandResult("") - assert isinstance(_global_instance, Module) - _global_instance.send_command( - result, "mon", '', - json.dumps({ - "prefix": "config-key get", - 'key': "config/mgr/mgr/prometheus/{}/server_port".format(id_), - }), - "") - r, outb, outs = result.wait() - if r != 0: - _global_instance.log.error("Failed to retrieve port for mgr {}: {}".format(id_, outs)) - targets.append('{}:{}'.format(hostname, DEFAULT_PORT)) - else: - port = json.loads(outb) - targets.append('{}:{}'.format(hostname, port)) - + port = self._get_module_option('server_port', DEFAULT_PORT, id_) + targets.append(f'{hostname}:{port}') ret = [ { "targets": targets, @@ -1273,6 +1265,8 @@ class Module(MgrModule): (server_addr, server_port) ) + self.metrics_thread.start() + # Publish the URI that others may use to access the service we're # about to start serving self.set_uri('http://{0}:{1}/'.format( @@ -1292,9 +1286,13 @@ class Module(MgrModule): # wait for the shutdown event self.shutdown_event.wait() self.shutdown_event.clear() + # tell metrics collection thread to stop collecting new metrics + self.metrics_thread.stop() cherrypy.engine.stop() self.log.info('Engine stopped.') self.shutdown_rbd_stats() + # wait for the metrics collection thread to stop + self.metrics_thread.join() def shutdown(self): self.log.info('Stopping engine...') diff --git a/src/pybind/mgr/rbd_support/task.py b/src/pybind/mgr/rbd_support/task.py index c17ffa102f7..87d43eca15a 100644 --- a/src/pybind/mgr/rbd_support/task.py +++ b/src/pybind/mgr/rbd_support/task.py @@ -21,7 +21,9 @@ TASK_SEQUENCE = "sequence" TASK_ID = "id" TASK_REFS = "refs" TASK_MESSAGE = "message" +TASK_RETRY_ATTEMPTS = "retry_attempts" TASK_RETRY_TIME = "retry_time" +TASK_RETRY_MESSAGE = "retry_message" TASK_IN_PROGRESS = "in_progress" TASK_PROGRESS = "progress" TASK_CANCELED = "canceled" @@ -47,6 +49,7 @@ VALID_TASK_ACTIONS = [TASK_REF_ACTION_FLATTEN, TASK_REF_ACTION_MIGRATION_ABORT] TASK_RETRY_INTERVAL = timedelta(seconds=30) +TASK_MAX_RETRY_INTERVAL = timedelta(seconds=300) MAX_COMPLETED_TASKS = 50 @@ -71,11 +74,14 @@ class Task: self.task_id = task_id self.message = message self.refs = refs + self.retry_message = None + self.retry_attempts = 0 self.retry_time = None self.in_progress = False self.progress = 0.0 self.canceled = False self.failed = False + self.progress_posted = False def __str__(self): return self.to_json() @@ -98,6 +104,10 @@ class Task: TASK_MESSAGE: self.message, TASK_REFS: self.refs } + if self.retry_message: + d[TASK_RETRY_MESSAGE] = self.retry_message + if self.retry_attempts: + d[TASK_RETRY_ATTEMPTS] = self.retry_attempts if self.retry_time: d[TASK_RETRY_TIME] = self.retry_time.isoformat() if self.in_progress: @@ -364,7 +374,6 @@ class TaskHandler: else: task.in_progress = True self.in_progress_task = task - self.update_progress(task, 0) self.lock.release() try: @@ -386,6 +395,7 @@ class TaskHandler: except rados.ObjectNotFound as e: self.log.error("execute_task: {}".format(e)) if pool_valid: + task.retry_message = "{}".format(e) self.update_progress(task, 0) else: # pool DNE -- remove the task @@ -394,11 +404,15 @@ class TaskHandler: except (rados.Error, rbd.Error) as e: self.log.error("execute_task: {}".format(e)) + task.retry_message = "{}".format(e) self.update_progress(task, 0) finally: task.in_progress = False - task.retry_time = datetime.now() + TASK_RETRY_INTERVAL + task.retry_attempts += 1 + task.retry_time = datetime.now() + min( + TASK_RETRY_INTERVAL * task.retry_attempts, + TASK_MAX_RETRY_INTERVAL) def progress_callback(self, task, current, total): progress = float(current) / float(total) @@ -416,7 +430,12 @@ class TaskHandler: finally: self.lock.release() - self.throttled_update_progress(task, progress) + if not task.progress_posted: + # delayed creation of progress event until first callback + self.post_progress(task, progress) + else: + self.throttled_update_progress(task, progress) + return 0 def execute_flatten(self, ioctx, task): @@ -492,6 +511,10 @@ class TaskHandler: self.log.info("{}: task={}".format(task.failure_message, str(task))) def complete_progress(self, task): + if not task.progress_posted: + # ensure progress event exists before we complete/fail it + self.post_progress(task, 0) + self.log.debug("complete_progress: task={}".format(str(task))) try: if task.failed: @@ -503,7 +526,7 @@ class TaskHandler: # progress module is disabled pass - def update_progress(self, task, progress): + def _update_progress(self, task, progress): self.log.debug("update_progress: task={}, progress={}".format(str(task), progress)) try: refs = {"origin": "rbd_support"} @@ -515,6 +538,14 @@ class TaskHandler: # progress module is disabled pass + def post_progress(self, task, progress): + self._update_progress(task, progress) + task.progress_posted = True + + def update_progress(self, task, progress): + if task.progress_posted: + self._update_progress(task, progress) + @Throttle(timedelta(seconds=1)) def throttled_update_progress(self, task, progress): self.update_progress(task, progress) diff --git a/src/pybind/mgr/requirements.txt b/src/pybind/mgr/requirements.txt index 63840e252f8..88b9c1b855b 100644 --- a/src/pybind/mgr/requirements.txt +++ b/src/pybind/mgr/requirements.txt @@ -9,4 +9,3 @@ execnet remoto Jinja2 pyfakefs -urllib3==1.25.11 diff --git a/src/pybind/mgr/rook/module.py b/src/pybind/mgr/rook/module.py index 1d6707237ab..362319a8f69 100644 --- a/src/pybind/mgr/rook/module.py +++ b/src/pybind/mgr/rook/module.py @@ -374,7 +374,6 @@ class RookOrchestrator(MgrModule, orchestrator.Orchestrator): for p in pods: sd = orchestrator.DaemonDescription() sd.hostname = p['hostname'] - sd.container_id = p['name'] sd.daemon_type = p['labels']['app'].replace('rook-ceph-', '') status = { 'Pending': -1, @@ -397,6 +396,7 @@ class RookOrchestrator(MgrModule, orchestrator.Orchestrator): if service_name is not None and service_name != sd.service_name(): continue sd.container_image_name = p['container_image_name'] + sd.container_image_id = p['container_image_id'] sd.created = p['created'] sd.last_configured = p['created'] sd.last_deployed = p['created'] diff --git a/src/pybind/mgr/rook/rook_cluster.py b/src/pybind/mgr/rook/rook_cluster.py index f6f8cdc717d..c842b3579ca 100644 --- a/src/pybind/mgr/rook/rook_cluster.py +++ b/src/pybind/mgr/rook/rook_cluster.py @@ -197,7 +197,7 @@ class RookCluster(object): self.rook_pods = KubernetesResource(self.coreV1_api.list_namespaced_pod, namespace=self.rook_env.namespace, label_selector="rook_cluster={0}".format( - self.rook_env.cluster_name)) + self.rook_env.namespace)) self.nodes = KubernetesResource(self.coreV1_api.list_node) def rook_url(self, path): @@ -284,7 +284,7 @@ class RookCluster(object): rook_cluster=rook And MDS containers additionally have `rook_filesystem` label - Label filter is rook_cluster=<cluster name> + Label filter is rook_cluster=<cluster namespace> rook_file_system=<self.fs_name> """ def predicate(item): @@ -319,6 +319,7 @@ class RookCluster(object): pods = [i for i in self.rook_pods.items if predicate(i)] pods_summary = [] + prefix = 'sha256:' for p in pods: d = p.to_dict() @@ -329,12 +330,16 @@ class RookCluster(object): image_name = c['image'] break + image_id = d['status']['container_statuses'][0]['image_id'] + image_id = image_id.split(prefix)[1] if prefix in image_id else image_id + s = { "name": d['metadata']['name'], "hostname": d['spec']['node_name'], "labels": d['metadata']['labels'], 'phase': d['status']['phase'], 'container_image_name': image_name, + 'container_image_id': image_id, 'refreshed': refreshed, # these may get set below... 'started': None, diff --git a/src/pybind/mgr/tests/test_tls.py b/src/pybind/mgr/tests/test_tls.py new file mode 100644 index 00000000000..923d91917ca --- /dev/null +++ b/src/pybind/mgr/tests/test_tls.py @@ -0,0 +1,35 @@ +from mgr_util import create_self_signed_cert, verify_tls, ServerConfigException +from OpenSSL import crypto, SSL + +import unittest + + +class TLSchecks(unittest.TestCase): + + def test_defaults(self): + crt, key = create_self_signed_cert() + verify_tls(crt, key) + + def test_specific_dname(self): + crt, key = create_self_signed_cert(dname={'O': 'Ceph', 'OU': 'testsuite'}) + verify_tls(crt, key) + + def test_invalid_RDN(self): + self.assertRaises(ValueError, create_self_signed_cert, dname={'O': 'Ceph', 'Bogus': 'testsuite'}) + + def test_invalid_key(self): + crt, key = create_self_signed_cert() + + # fudge the key, to force an error to be detected during verify_tls + fudged = f"{key[:-35]}c0ffee==\n{key[-25:]}".encode('utf-8') + self.assertRaises(ServerConfigException, verify_tls, crt, fudged) + + def test_mismatched_tls(self): + crt, _ = create_self_signed_cert() + + # generate another key + new_key = crypto.PKey() + new_key.generate_key(crypto.TYPE_RSA, 2048) + new_key = crypto.dump_privatekey(crypto.FILETYPE_PEM, new_key).decode('utf-8') + + self.assertRaises(SSL.Error, verify_tls, crt, new_key) diff --git a/src/pybind/rados/c_rados.pxd b/src/pybind/rados/c_rados.pxd new file mode 100644 index 00000000000..75ee9a51298 --- /dev/null +++ b/src/pybind/rados/c_rados.pxd @@ -0,0 +1,295 @@ +# cython: embedsignature=True + +from libc.stdint cimport * +from ctime cimport time_t, timeval +from rados cimport rados_t, rados_config_t, rados_ioctx_t + +cdef extern from "err.h" nogil: + cdef int _MAX_ERRNO "MAX_ERRNO" + + +cdef extern from "rados/rados_types.h" nogil: + cdef char* _LIBRADOS_ALL_NSPACES "LIBRADOS_ALL_NSPACES" + cdef struct notify_ack_t: + unsigned long notifier_id + unsigned long cookie + char *payload + unsigned long payload_len + + cdef struct notify_timeout_t: + unsigned long notifier_id + unsigned long cookie + +cdef extern from "rados/librados.h" nogil: + enum: + _LIBRADOS_OP_FLAG_EXCL "LIBRADOS_OP_FLAG_EXCL" + _LIBRADOS_OP_FLAG_FAILOK "LIBRADOS_OP_FLAG_FAILOK" + _LIBRADOS_OP_FLAG_FADVISE_RANDOM "LIBRADOS_OP_FLAG_FADVISE_RANDOM" + _LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL "LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL" + _LIBRADOS_OP_FLAG_FADVISE_WILLNEED "LIBRADOS_OP_FLAG_FADVISE_WILLNEED" + _LIBRADOS_OP_FLAG_FADVISE_DONTNEED "LIBRADOS_OP_FLAG_FADVISE_DONTNEED" + _LIBRADOS_OP_FLAG_FADVISE_NOCACHE "LIBRADOS_OP_FLAG_FADVISE_NOCACHE" + + + enum: + _LIBRADOS_OPERATION_NOFLAG "LIBRADOS_OPERATION_NOFLAG" + _LIBRADOS_OPERATION_BALANCE_READS "LIBRADOS_OPERATION_BALANCE_READS" + _LIBRADOS_OPERATION_LOCALIZE_READS "LIBRADOS_OPERATION_LOCALIZE_READS" + _LIBRADOS_OPERATION_ORDER_READS_WRITES "LIBRADOS_OPERATION_ORDER_READS_WRITES" + _LIBRADOS_OPERATION_IGNORE_CACHE "LIBRADOS_OPERATION_IGNORE_CACHE" + _LIBRADOS_OPERATION_SKIPRWLOCKS "LIBRADOS_OPERATION_SKIPRWLOCKS" + _LIBRADOS_OPERATION_IGNORE_OVERLAY "LIBRADOS_OPERATION_IGNORE_OVERLAY" + _LIBRADOS_CREATE_EXCLUSIVE "LIBRADOS_CREATE_EXCLUSIVE" + _LIBRADOS_CREATE_IDEMPOTENT "LIBRADOS_CREATE_IDEMPOTENT" + + cdef uint64_t _LIBRADOS_SNAP_HEAD "LIBRADOS_SNAP_HEAD" + + ctypedef void* rados_xattrs_iter_t + ctypedef void* rados_omap_iter_t + ctypedef void* rados_list_ctx_t + ctypedef uint64_t rados_snap_t + ctypedef void *rados_write_op_t + ctypedef void *rados_read_op_t + ctypedef void *rados_completion_t + ctypedef void (*rados_callback_t)(rados_completion_t cb, void *arg) + ctypedef void (*rados_log_callback_t)(void *arg, const char *line, const char *who, + uint64_t sec, uint64_t nsec, uint64_t seq, const char *level, const char *msg) + ctypedef void (*rados_log_callback2_t)(void *arg, const char *line, const char *channel, const char *who, const char *name, + uint64_t sec, uint64_t nsec, uint64_t seq, const char *level, const char *msg) + ctypedef void (*rados_watchcb2_t)(void *arg, int64_t notify_id, + uint64_t handle, uint64_t notifier_id, + void *data, size_t data_len) + ctypedef void (*rados_watcherrcb_t)(void *pre, uint64_t cookie, int err) + + + cdef struct rados_cluster_stat_t: + uint64_t kb + uint64_t kb_used + uint64_t kb_avail + uint64_t num_objects + + cdef struct rados_pool_stat_t: + uint64_t num_bytes + uint64_t num_kb + uint64_t num_objects + uint64_t num_object_clones + uint64_t num_object_copies + uint64_t num_objects_missing_on_primary + uint64_t num_objects_unfound + uint64_t num_objects_degraded + uint64_t num_rd + uint64_t num_rd_kb + uint64_t num_wr + uint64_t num_wr_kb + + void rados_buffer_free(char *buf) + + void rados_version(int *major, int *minor, int *extra) + int rados_create2(rados_t *pcluster, const char *const clustername, + const char * const name, uint64_t flags) + int rados_create_with_context(rados_t *cluster, rados_config_t cct) + int rados_connect(rados_t cluster) + void rados_shutdown(rados_t cluster) + uint64_t rados_get_instance_id(rados_t cluster) + int rados_conf_read_file(rados_t cluster, const char *path) + int rados_conf_parse_argv_remainder(rados_t cluster, int argc, const char **argv, const char **remargv) + int rados_conf_parse_env(rados_t cluster, const char *var) + int rados_conf_set(rados_t cluster, char *option, const char *value) + int rados_conf_get(rados_t cluster, char *option, char *buf, size_t len) + + rados_t rados_ioctx_get_cluster(rados_ioctx_t io) + int rados_ioctx_pool_stat(rados_ioctx_t io, rados_pool_stat_t *stats) + int64_t rados_pool_lookup(rados_t cluster, const char *pool_name) + int rados_pool_reverse_lookup(rados_t cluster, int64_t id, char *buf, size_t maxlen) + int rados_pool_create(rados_t cluster, const char *pool_name) + int rados_pool_create_with_crush_rule(rados_t cluster, const char *pool_name, uint8_t crush_rule_num) + int rados_pool_create_with_auid(rados_t cluster, const char *pool_name, uint64_t auid) + int rados_pool_create_with_all(rados_t cluster, const char *pool_name, uint64_t auid, uint8_t crush_rule_num) + int rados_pool_get_base_tier(rados_t cluster, int64_t pool, int64_t *base_tier) + int rados_pool_list(rados_t cluster, char *buf, size_t len) + int rados_pool_delete(rados_t cluster, const char *pool_name) + int rados_inconsistent_pg_list(rados_t cluster, int64_t pool, char *buf, size_t len) + + int rados_cluster_stat(rados_t cluster, rados_cluster_stat_t *result) + int rados_cluster_fsid(rados_t cluster, char *buf, size_t len) + int rados_blocklist_add(rados_t cluster, char *client_address, uint32_t expire_seconds) + int rados_getaddrs(rados_t cluster, char** addrs) + int rados_application_enable(rados_ioctx_t io, const char *app_name, + int force) + void rados_set_pool_full_try(rados_ioctx_t io) + void rados_unset_pool_full_try(rados_ioctx_t io) + int rados_application_list(rados_ioctx_t io, char *values, + size_t *values_len) + int rados_application_metadata_get(rados_ioctx_t io, const char *app_name, + const char *key, char *value, + size_t *value_len) + int rados_application_metadata_set(rados_ioctx_t io, const char *app_name, + const char *key, const char *value) + int rados_application_metadata_remove(rados_ioctx_t io, + const char *app_name, const char *key) + int rados_application_metadata_list(rados_ioctx_t io, + const char *app_name, char *keys, + size_t *key_len, char *values, + size_t *value_len) + int rados_ping_monitor(rados_t cluster, const char *mon_id, char **outstr, size_t *outstrlen) + int rados_mon_command(rados_t cluster, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) + int rados_mgr_command(rados_t cluster, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) + int rados_mgr_command_target(rados_t cluster, + const char *name, + const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) + int rados_mon_command_target(rados_t cluster, const char *name, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) + int rados_osd_command(rados_t cluster, int osdid, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) + int rados_pg_command(rados_t cluster, const char *pgstr, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) + int rados_monitor_log(rados_t cluster, const char *level, rados_log_callback_t cb, void *arg) + int rados_monitor_log2(rados_t cluster, const char *level, rados_log_callback2_t cb, void *arg) + + int rados_wait_for_latest_osdmap(rados_t cluster) + + int rados_service_register(rados_t cluster, const char *service, const char *daemon, const char *metadata_dict) + int rados_service_update_status(rados_t cluster, const char *status_dict) + + int rados_ioctx_create(rados_t cluster, const char *pool_name, rados_ioctx_t *ioctx) + int rados_ioctx_create2(rados_t cluster, int64_t pool_id, rados_ioctx_t *ioctx) + void rados_ioctx_destroy(rados_ioctx_t io) + void rados_ioctx_locator_set_key(rados_ioctx_t io, const char *key) + void rados_ioctx_set_namespace(rados_ioctx_t io, const char * nspace) + + uint64_t rados_get_last_version(rados_ioctx_t io) + int rados_stat(rados_ioctx_t io, const char *o, uint64_t *psize, time_t *pmtime) + int rados_write(rados_ioctx_t io, const char *oid, const char *buf, size_t len, uint64_t off) + int rados_write_full(rados_ioctx_t io, const char *oid, const char *buf, size_t len) + int rados_writesame(rados_ioctx_t io, const char *oid, const char *buf, size_t data_len, size_t write_len, uint64_t off) + int rados_append(rados_ioctx_t io, const char *oid, const char *buf, size_t len) + int rados_read(rados_ioctx_t io, const char *oid, char *buf, size_t len, uint64_t off) + int rados_remove(rados_ioctx_t io, const char *oid) + int rados_trunc(rados_ioctx_t io, const char *oid, uint64_t size) + int rados_cmpext(rados_ioctx_t io, const char *o, const char *cmp_buf, size_t cmp_len, uint64_t off) + int rados_getxattr(rados_ioctx_t io, const char *o, const char *name, char *buf, size_t len) + int rados_setxattr(rados_ioctx_t io, const char *o, const char *name, const char *buf, size_t len) + int rados_rmxattr(rados_ioctx_t io, const char *o, const char *name) + int rados_getxattrs(rados_ioctx_t io, const char *oid, rados_xattrs_iter_t *iter) + int rados_getxattrs_next(rados_xattrs_iter_t iter, const char **name, const char **val, size_t *len) + void rados_getxattrs_end(rados_xattrs_iter_t iter) + + int rados_nobjects_list_open(rados_ioctx_t io, rados_list_ctx_t *ctx) + int rados_nobjects_list_next(rados_list_ctx_t ctx, const char **entry, const char **key, const char **nspace) + void rados_nobjects_list_close(rados_list_ctx_t ctx) + + int rados_ioctx_pool_requires_alignment2(rados_ioctx_t io, int * requires) + int rados_ioctx_pool_required_alignment2(rados_ioctx_t io, uint64_t * alignment) + + int rados_ioctx_snap_rollback(rados_ioctx_t io, const char * oid, const char * snapname) + int rados_ioctx_snap_create(rados_ioctx_t io, const char * snapname) + int rados_ioctx_snap_remove(rados_ioctx_t io, const char * snapname) + int rados_ioctx_snap_lookup(rados_ioctx_t io, const char * name, rados_snap_t * id) + int rados_ioctx_snap_get_name(rados_ioctx_t io, rados_snap_t id, char * name, int maxlen) + void rados_ioctx_snap_set_read(rados_ioctx_t io, rados_snap_t snap) + int rados_ioctx_snap_list(rados_ioctx_t io, rados_snap_t * snaps, int maxlen) + int rados_ioctx_snap_get_stamp(rados_ioctx_t io, rados_snap_t id, time_t * t) + int64_t rados_ioctx_get_id(rados_ioctx_t io) + int rados_ioctx_get_pool_name(rados_ioctx_t io, char *buf, unsigned maxlen) + + int rados_ioctx_selfmanaged_snap_create(rados_ioctx_t io, + rados_snap_t *snapid) + int rados_ioctx_selfmanaged_snap_remove(rados_ioctx_t io, + rados_snap_t snapid) + int rados_ioctx_selfmanaged_snap_set_write_ctx(rados_ioctx_t io, + rados_snap_t snap_seq, + rados_snap_t *snap, + int num_snaps) + int rados_ioctx_selfmanaged_snap_rollback(rados_ioctx_t io, const char *oid, + rados_snap_t snapid) + + int rados_lock_exclusive(rados_ioctx_t io, const char * oid, const char * name, + const char * cookie, const char * desc, + timeval * duration, uint8_t flags) + int rados_lock_shared(rados_ioctx_t io, const char * o, const char * name, + const char * cookie, const char * tag, const char * desc, + timeval * duration, uint8_t flags) + int rados_unlock(rados_ioctx_t io, const char * o, const char * name, const char * cookie) + + rados_write_op_t rados_create_write_op() + void rados_release_write_op(rados_write_op_t write_op) + + rados_read_op_t rados_create_read_op() + void rados_release_read_op(rados_read_op_t read_op) + + int rados_aio_create_completion2(void * cb_arg, rados_callback_t cb_complete, rados_completion_t * pc) + void rados_aio_release(rados_completion_t c) + int rados_aio_stat(rados_ioctx_t io, const char *oid, rados_completion_t completion, uint64_t *psize, time_t *pmtime) + int rados_aio_write(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * buf, size_t len, uint64_t off) + int rados_aio_append(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * buf, size_t len) + int rados_aio_write_full(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * buf, size_t len) + int rados_aio_writesame(rados_ioctx_t io, const char *oid, rados_completion_t completion, const char *buf, size_t data_len, size_t write_len, uint64_t off) + int rados_aio_remove(rados_ioctx_t io, const char * oid, rados_completion_t completion) + int rados_aio_read(rados_ioctx_t io, const char * oid, rados_completion_t completion, char * buf, size_t len, uint64_t off) + int rados_aio_flush(rados_ioctx_t io) + int rados_aio_cmpext(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *cmp_buf, size_t cmp_len, uint64_t off) + int rados_aio_rmxattr(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *name) + + int rados_aio_get_return_value(rados_completion_t c) + int rados_aio_wait_for_complete_and_cb(rados_completion_t c) + int rados_aio_wait_for_complete(rados_completion_t c) + int rados_aio_is_complete(rados_completion_t c) + + int rados_exec(rados_ioctx_t io, const char * oid, const char * cls, const char * method, + const char * in_buf, size_t in_len, char * buf, size_t out_len) + int rados_aio_exec(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * cls, const char * method, + const char * in_buf, size_t in_len, char * buf, size_t out_len) + int rados_aio_setxattr(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *name, const char *buf, size_t len) + + int rados_write_op_operate(rados_write_op_t write_op, rados_ioctx_t io, const char * oid, time_t * mtime, int flags) + int rados_aio_write_op_operate(rados_write_op_t write_op, rados_ioctx_t io, rados_completion_t completion, const char *oid, time_t *mtime, int flags) + void rados_write_op_omap_set(rados_write_op_t write_op, const char * const* keys, const char * const* vals, const size_t * lens, size_t num) + void rados_write_op_omap_rm_keys(rados_write_op_t write_op, const char * const* keys, size_t keys_len) + void rados_write_op_omap_clear(rados_write_op_t write_op) + void rados_write_op_omap_rm_range2(rados_write_op_t write_op, const char *key_begin, size_t key_begin_len, const char *key_end, size_t key_end_len) + void rados_write_op_set_flags(rados_write_op_t write_op, int flags) + void rados_write_op_setxattr(rados_write_op_t write_op, const char *name, const char *value, size_t value_len) + void rados_write_op_rmxattr(rados_write_op_t write_op, const char *name) + + void rados_write_op_create(rados_write_op_t write_op, int exclusive, const char *category) + void rados_write_op_append(rados_write_op_t write_op, const char *buffer, size_t len) + void rados_write_op_write_full(rados_write_op_t write_op, const char *buffer, size_t len) + void rados_write_op_assert_version(rados_write_op_t write_op, uint64_t ver) + void rados_write_op_write(rados_write_op_t write_op, const char *buffer, size_t len, uint64_t offset) + void rados_write_op_remove(rados_write_op_t write_op) + void rados_write_op_truncate(rados_write_op_t write_op, uint64_t offset) + void rados_write_op_zero(rados_write_op_t write_op, uint64_t offset, uint64_t len) + void rados_write_op_exec(rados_write_op_t write_op, const char *cls, const char *method, const char *in_buf, size_t in_len, int *prval) + void rados_write_op_writesame(rados_write_op_t write_op, const char *buffer, size_t data_len, size_t write_len, uint64_t offset) + void rados_read_op_omap_get_vals2(rados_read_op_t read_op, const char * start_after, const char * filter_prefix, uint64_t max_return, rados_omap_iter_t * iter, unsigned char *pmore, int * prval) + void rados_read_op_omap_get_keys2(rados_read_op_t read_op, const char * start_after, uint64_t max_return, rados_omap_iter_t * iter, unsigned char *pmore, int * prval) + void rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op, const char * const* keys, size_t keys_len, rados_omap_iter_t * iter, int * prval) + int rados_read_op_operate(rados_read_op_t read_op, rados_ioctx_t io, const char * oid, int flags) + int rados_aio_read_op_operate(rados_read_op_t read_op, rados_ioctx_t io, rados_completion_t completion, const char *oid, int flags) + void rados_read_op_set_flags(rados_read_op_t read_op, int flags) + int rados_omap_get_next(rados_omap_iter_t iter, const char * const* key, const char * const* val, size_t * len) + void rados_omap_get_end(rados_omap_iter_t iter) + int rados_notify2(rados_ioctx_t io, const char * o, const char *buf, int buf_len, uint64_t timeout_ms, char **reply_buffer, size_t *reply_buffer_len) + int rados_aio_notify(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * buf, int len, uint64_t timeout_ms, char **reply_buffer, size_t *reply_buffer_len) + int rados_decode_notify_response(char *reply_buffer, size_t reply_buffer_len, notify_ack_t **acks, size_t *nr_acks, notify_timeout_t **timeouts, size_t *nr_timeouts) + void rados_free_notify_response(notify_ack_t *acks, size_t nr_acks, notify_timeout_t *timeouts) + int rados_notify_ack(rados_ioctx_t io, const char *o, uint64_t notify_id, uint64_t cookie, const char *buf, int buf_len) + int rados_watch3(rados_ioctx_t io, const char *o, uint64_t *cookie, rados_watchcb2_t watchcb, rados_watcherrcb_t watcherrcb, uint32_t timeout, void *arg) + int rados_watch_check(rados_ioctx_t io, uint64_t cookie) + int rados_unwatch2(rados_ioctx_t io, uint64_t cookie) + int rados_watch_flush(rados_t cluster) diff --git a/src/pybind/rados/ctime.pxd b/src/pybind/rados/ctime.pxd new file mode 100644 index 00000000000..3e16ce4c207 --- /dev/null +++ b/src/pybind/rados/ctime.pxd @@ -0,0 +1,11 @@ +# cython: embedsignature=True + +cdef extern from "time.h": + ctypedef long int time_t + ctypedef long int suseconds_t + + +cdef extern from "sys/time.h": + cdef struct timeval: + time_t tv_sec + suseconds_t tv_usec diff --git a/src/pybind/rados/mock_rados.pxi b/src/pybind/rados/mock_rados.pxi new file mode 100644 index 00000000000..dcd79ed66ad --- /dev/null +++ b/src/pybind/rados/mock_rados.pxi @@ -0,0 +1,449 @@ +# cython: embedsignature=True + +from libc.stdint cimport * +from ctime cimport time_t, timeval + +# mirrors the structure of c_rados, but instead *defines* the rados functions + +# err.h +cdef: + int _MAX_ERRNO "MAX_ERRNO" + +# rados/rados_types.h +cdef: + char* _LIBRADOS_ALL_NSPACES = "\001" + struct notify_ack_t: + unsigned long notifier_id + unsigned long cookie + char *payload + unsigned long payload_len + + struct notify_timeout_t: + unsigned long notifier_id + unsigned long cookie + +# rados/librados.h +cdef nogil: + enum: + _LIBRADOS_OP_FLAG_EXCL "LIBRADOS_OP_FLAG_EXCL" + _LIBRADOS_OP_FLAG_FAILOK "LIBRADOS_OP_FLAG_FAILOK" + _LIBRADOS_OP_FLAG_FADVISE_RANDOM "LIBRADOS_OP_FLAG_FADVISE_RANDOM" + _LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL "LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL" + _LIBRADOS_OP_FLAG_FADVISE_WILLNEED "LIBRADOS_OP_FLAG_FADVISE_WILLNEED" + _LIBRADOS_OP_FLAG_FADVISE_DONTNEED "LIBRADOS_OP_FLAG_FADVISE_DONTNEED" + _LIBRADOS_OP_FLAG_FADVISE_NOCACHE "LIBRADOS_OP_FLAG_FADVISE_NOCACHE" + + + enum: + _LIBRADOS_OPERATION_NOFLAG "LIBRADOS_OPERATION_NOFLAG" + _LIBRADOS_OPERATION_BALANCE_READS "LIBRADOS_OPERATION_BALANCE_READS" + _LIBRADOS_OPERATION_LOCALIZE_READS "LIBRADOS_OPERATION_LOCALIZE_READS" + _LIBRADOS_OPERATION_ORDER_READS_WRITES "LIBRADOS_OPERATION_ORDER_READS_WRITES" + _LIBRADOS_OPERATION_IGNORE_CACHE "LIBRADOS_OPERATION_IGNORE_CACHE" + _LIBRADOS_OPERATION_SKIPRWLOCKS "LIBRADOS_OPERATION_SKIPRWLOCKS" + _LIBRADOS_OPERATION_IGNORE_OVERLAY "LIBRADOS_OPERATION_IGNORE_OVERLAY" + _LIBRADOS_CREATE_EXCLUSIVE "LIBRADOS_CREATE_EXCLUSIVE" + _LIBRADOS_CREATE_IDEMPOTENT "LIBRADOS_CREATE_IDEMPOTENT" + + uint64_t _LIBRADOS_SNAP_HEAD "LIBRADOS_SNAP_HEAD" + + ctypedef void* rados_xattrs_iter_t + ctypedef void* rados_omap_iter_t + ctypedef void* rados_list_ctx_t + ctypedef uint64_t rados_snap_t + ctypedef void *rados_write_op_t + ctypedef void *rados_read_op_t + ctypedef void *rados_completion_t + ctypedef void (*rados_callback_t)(rados_completion_t cb, void *arg) + ctypedef void (*rados_log_callback_t)(void *arg, const char *line, const char *who, + uint64_t sec, uint64_t nsec, uint64_t seq, const char *level, const char *msg) + ctypedef void (*rados_log_callback2_t)(void *arg, const char *line, const char *channel, const char *who, const char *name, + uint64_t sec, uint64_t nsec, uint64_t seq, const char *level, const char *msg) + ctypedef void (*rados_watchcb2_t)(void *arg, int64_t notify_id, + uint64_t handle, uint64_t notifier_id, + void *data, size_t data_len) + ctypedef void (*rados_watcherrcb_t)(void *pre, uint64_t cookie, int err) + + + struct rados_cluster_stat_t: + uint64_t kb + uint64_t kb_used + uint64_t kb_avail + uint64_t num_objects + + struct rados_pool_stat_t: + uint64_t num_bytes + uint64_t num_kb + uint64_t num_objects + uint64_t num_object_clones + uint64_t num_object_copies + uint64_t num_objects_missing_on_primary + uint64_t num_objects_unfound + uint64_t num_objects_degraded + uint64_t num_rd + uint64_t num_rd_kb + uint64_t num_wr + uint64_t num_wr_kb + + void rados_buffer_free(char *buf): + pass + + void rados_version(int *major, int *minor, int *extra): + pass + + int rados_create2(rados_t *pcluster, const char *const clustername, + const char * const name, uint64_t flags): + pass + + int rados_create_with_context(rados_t *cluster, rados_config_t cct): + pass + int rados_connect(rados_t cluster): + pass + void rados_shutdown(rados_t cluster): + pass + cdef uint64_t rados_get_instance_id(rados_t cluster): + pass + int rados_conf_read_file(rados_t cluster, const char *path): + pass + int rados_conf_parse_argv_remainder(rados_t cluster, int argc, const char **argv, const char **remargv): + pass + int rados_conf_parse_env(rados_t cluster, const char *var): + pass + int rados_conf_set(rados_t cluster, char *option, const char *value): + pass + int rados_conf_get(rados_t cluster, char *option, char *buf, size_t len): + pass + + rados_t rados_ioctx_get_cluster(rados_ioctx_t io): + pass + int rados_ioctx_pool_stat(rados_ioctx_t io, rados_pool_stat_t *stats): + pass + int64_t rados_pool_lookup(rados_t cluster, const char *pool_name): + pass + int rados_pool_reverse_lookup(rados_t cluster, int64_t id, char *buf, size_t maxlen): + pass + int rados_pool_create(rados_t cluster, const char *pool_name): + pass + int rados_pool_create_with_crush_rule(rados_t cluster, const char *pool_name, uint8_t crush_rule_num): + pass + int rados_pool_create_with_auid(rados_t cluster, const char *pool_name, uint64_t auid): + pass + int rados_pool_create_with_all(rados_t cluster, const char *pool_name, uint64_t auid, uint8_t crush_rule_num): + pass + int rados_pool_get_base_tier(rados_t cluster, int64_t pool, int64_t *base_tier): + pass + int rados_pool_list(rados_t cluster, char *buf, size_t len): + pass + int rados_pool_delete(rados_t cluster, const char *pool_name): + pass + int rados_inconsistent_pg_list(rados_t cluster, int64_t pool, char *buf, size_t len): + pass + + int rados_cluster_stat(rados_t cluster, rados_cluster_stat_t *result): + pass + int rados_cluster_fsid(rados_t cluster, char *buf, size_t len): + pass + int rados_blocklist_add(rados_t cluster, char *client_address, uint32_t expire_seconds): + pass + int rados_getaddrs(rados_t cluster, char** addrs): + pass + int rados_application_enable(rados_ioctx_t io, const char *app_name, + int force): + pass + void rados_set_pool_full_try(rados_ioctx_t io): + pass + void rados_unset_pool_full_try(rados_ioctx_t io): + pass + int rados_application_list(rados_ioctx_t io, char *values, + size_t *values_len): + pass + int rados_application_metadata_get(rados_ioctx_t io, const char *app_name, + const char *key, char *value, + size_t *value_len): + pass + int rados_application_metadata_set(rados_ioctx_t io, const char *app_name, + const char *key, const char *value): + pass + int rados_application_metadata_remove(rados_ioctx_t io, + const char *app_name, const char *key): + pass + int rados_application_metadata_list(rados_ioctx_t io, + const char *app_name, char *keys, + size_t *key_len, char *values, + size_t *value_len): + pass + int rados_ping_monitor(rados_t cluster, const char *mon_id, char **outstr, size_t *outstrlen): + pass + int rados_mon_command(rados_t cluster, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen): + pass + int rados_mgr_command(rados_t cluster, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen): + pass + int rados_mgr_command_target(rados_t cluster, + const char *name, + const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen): + pass + int rados_mon_command_target(rados_t cluster, const char *name, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen): + pass + int rados_osd_command(rados_t cluster, int osdid, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen): + pass + int rados_pg_command(rados_t cluster, const char *pgstr, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen): + pass + int rados_monitor_log(rados_t cluster, const char *level, rados_log_callback_t cb, void *arg): + pass + int rados_monitor_log2(rados_t cluster, const char *level, rados_log_callback2_t cb, void *arg): + pass + + int rados_wait_for_latest_osdmap(rados_t cluster): + pass + + int rados_service_register(rados_t cluster, const char *service, const char *daemon, const char *metadata_dict): + pass + int rados_service_update_status(rados_t cluster, const char *status_dict): + pass + + int rados_ioctx_create(rados_t cluster, const char *pool_name, rados_ioctx_t *ioctx): + pass + int rados_ioctx_create2(rados_t cluster, int64_t pool_id, rados_ioctx_t *ioctx): + pass + void rados_ioctx_destroy(rados_ioctx_t io): + pass + void rados_ioctx_locator_set_key(rados_ioctx_t io, const char *key): + pass + void rados_ioctx_set_namespace(rados_ioctx_t io, const char * nspace): + pass + + cdef uint64_t rados_get_last_version(rados_ioctx_t io): + pass + int rados_stat(rados_ioctx_t io, const char *o, uint64_t *psize, time_t *pmtime): + pass + int rados_write(rados_ioctx_t io, const char *oid, const char *buf, size_t len, uint64_t off): + pass + int rados_write_full(rados_ioctx_t io, const char *oid, const char *buf, size_t len): + pass + int rados_writesame(rados_ioctx_t io, const char *oid, const char *buf, size_t data_len, size_t write_len, uint64_t off): + pass + int rados_append(rados_ioctx_t io, const char *oid, const char *buf, size_t len): + pass + int rados_read(rados_ioctx_t io, const char *oid, char *buf, size_t len, uint64_t off): + pass + int rados_remove(rados_ioctx_t io, const char *oid): + pass + int rados_trunc(rados_ioctx_t io, const char *oid, uint64_t size): + pass + int rados_cmpext(rados_ioctx_t io, const char *o, const char *cmp_buf, size_t cmp_len, uint64_t off): + pass + int rados_getxattr(rados_ioctx_t io, const char *o, const char *name, char *buf, size_t len): + pass + int rados_setxattr(rados_ioctx_t io, const char *o, const char *name, const char *buf, size_t len): + pass + int rados_rmxattr(rados_ioctx_t io, const char *o, const char *name): + pass + int rados_getxattrs(rados_ioctx_t io, const char *oid, rados_xattrs_iter_t *iter): + pass + int rados_getxattrs_next(rados_xattrs_iter_t iter, const char **name, const char **val, size_t *len): + pass + void rados_getxattrs_end(rados_xattrs_iter_t iter): + pass + + int rados_nobjects_list_open(rados_ioctx_t io, rados_list_ctx_t *ctx): + pass + int rados_nobjects_list_next(rados_list_ctx_t ctx, const char **entry, const char **key, const char **nspace): + pass + void rados_nobjects_list_close(rados_list_ctx_t ctx): + pass + + int rados_ioctx_pool_requires_alignment2(rados_ioctx_t io, int * requires): + pass + int rados_ioctx_pool_required_alignment2(rados_ioctx_t io, uint64_t * alignment): + pass + + int rados_ioctx_snap_rollback(rados_ioctx_t io, const char * oid, const char * snapname): + pass + int rados_ioctx_snap_create(rados_ioctx_t io, const char * snapname): + pass + int rados_ioctx_snap_remove(rados_ioctx_t io, const char * snapname): + pass + int rados_ioctx_snap_lookup(rados_ioctx_t io, const char * name, rados_snap_t * id): + pass + int rados_ioctx_snap_get_name(rados_ioctx_t io, rados_snap_t id, char * name, int maxlen): + pass + void rados_ioctx_snap_set_read(rados_ioctx_t io, rados_snap_t snap): + pass + int rados_ioctx_snap_list(rados_ioctx_t io, rados_snap_t * snaps, int maxlen): + pass + int rados_ioctx_snap_get_stamp(rados_ioctx_t io, rados_snap_t id, time_t * t): + pass + int64_t rados_ioctx_get_id(rados_ioctx_t io): + pass + int rados_ioctx_get_pool_name(rados_ioctx_t io, char *buf, unsigned maxlen): + pass + + int rados_ioctx_selfmanaged_snap_create(rados_ioctx_t io, + rados_snap_t *snapid): + pass + int rados_ioctx_selfmanaged_snap_remove(rados_ioctx_t io, + rados_snap_t snapid): + pass + int rados_ioctx_selfmanaged_snap_set_write_ctx(rados_ioctx_t io, + rados_snap_t snap_seq, + rados_snap_t *snap, + int num_snaps): + pass + int rados_ioctx_selfmanaged_snap_rollback(rados_ioctx_t io, const char *oid, + rados_snap_t snapid): + pass + + int rados_lock_exclusive(rados_ioctx_t io, const char * oid, const char * name, + const char * cookie, const char * desc, + timeval * duration, uint8_t flags): + pass + int rados_lock_shared(rados_ioctx_t io, const char * o, const char * name, + const char * cookie, const char * tag, const char * desc, + timeval * duration, uint8_t flags): + pass + int rados_unlock(rados_ioctx_t io, const char * o, const char * name, const char * cookie): + pass + + rados_write_op_t rados_create_write_op(): + pass + void rados_release_write_op(rados_write_op_t write_op): + pass + + rados_read_op_t rados_create_read_op(): + pass + void rados_release_read_op(rados_read_op_t read_op): + pass + + int rados_aio_create_completion2(void * cb_arg, rados_callback_t cb_complete, rados_completion_t * pc): + pass + void rados_aio_release(rados_completion_t c): + pass + int rados_aio_stat(rados_ioctx_t io, const char *oid, rados_completion_t completion, uint64_t *psize, time_t *pmtime): + pass + int rados_aio_write(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * buf, size_t len, uint64_t off): + pass + int rados_aio_append(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * buf, size_t len): + pass + int rados_aio_write_full(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * buf, size_t len): + pass + int rados_aio_writesame(rados_ioctx_t io, const char *oid, rados_completion_t completion, const char *buf, size_t data_len, size_t write_len, uint64_t off): + pass + int rados_aio_remove(rados_ioctx_t io, const char * oid, rados_completion_t completion): + pass + int rados_aio_read(rados_ioctx_t io, const char * oid, rados_completion_t completion, char * buf, size_t len, uint64_t off): + pass + int rados_aio_flush(rados_ioctx_t io): + pass + int rados_aio_cmpext(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *cmp_buf, size_t cmp_len, uint64_t off): + pass + int rados_aio_rmxattr(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *name): + pass + + int rados_aio_get_return_value(rados_completion_t c): + pass + int rados_aio_wait_for_complete_and_cb(rados_completion_t c): + pass + int rados_aio_wait_for_complete(rados_completion_t c): + pass + int rados_aio_is_complete(rados_completion_t c): + pass + + int rados_exec(rados_ioctx_t io, const char * oid, const char * cls, const char * method, + const char * in_buf, size_t in_len, char * buf, size_t out_len): + pass + int rados_aio_exec(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * cls, const char * method, + const char * in_buf, size_t in_len, char * buf, size_t out_len): + pass + int rados_aio_setxattr(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *name, const char *buf, size_t len): + pass + int rados_write_op_operate(rados_write_op_t write_op, rados_ioctx_t io, const char * oid, time_t * mtime, int flags): + pass + int rados_aio_write_op_operate(rados_write_op_t write_op, rados_ioctx_t io, rados_completion_t completion, const char *oid, time_t *mtime, int flags): + pass + void rados_write_op_omap_set(rados_write_op_t write_op, const char * const* keys, const char * const* vals, const size_t * lens, size_t num): + pass + void rados_write_op_omap_rm_keys(rados_write_op_t write_op, const char * const* keys, size_t keys_len): + pass + void rados_write_op_omap_clear(rados_write_op_t write_op): + pass + void rados_write_op_omap_rm_range2(rados_write_op_t write_op, const char *key_begin, size_t key_begin_len, const char *key_end, size_t key_end_len): + pass + void rados_write_op_set_flags(rados_write_op_t write_op, int flags): + pass + void rados_write_op_setxattr(rados_write_op_t write_op, const char *name, const char *value, size_t value_len): + pass + void rados_write_op_rmxattr(rados_write_op_t write_op, const char *name): + pass + + void rados_write_op_create(rados_write_op_t write_op, int exclusive, const char *category): + pass + void rados_write_op_append(rados_write_op_t write_op, const char *buffer, size_t len): + pass + void rados_write_op_write_full(rados_write_op_t write_op, const char *buffer, size_t len): + pass + void rados_write_op_assert_version(rados_write_op_t write_op, uint64_t ver): + pass + void rados_write_op_write(rados_write_op_t write_op, const char *buffer, size_t len, uint64_t offset): + pass + void rados_write_op_remove(rados_write_op_t write_op): + pass + void rados_write_op_truncate(rados_write_op_t write_op, uint64_t offset): + pass + void rados_write_op_zero(rados_write_op_t write_op, uint64_t offset, uint64_t len): + pass + void rados_write_op_exec(rados_write_op_t write_op, const char *cls, const char *method, const char *in_buf, size_t in_len, int *prval): + pass + void rados_write_op_writesame(rados_write_op_t write_op, const char *buffer, size_t data_len, size_t write_len, uint64_t offset): + pass + void rados_read_op_omap_get_vals2(rados_read_op_t read_op, const char * start_after, const char * filter_prefix, uint64_t max_return, rados_omap_iter_t * iter, unsigned char *pmore, int * prval): + pass + void rados_read_op_omap_get_keys2(rados_read_op_t read_op, const char * start_after, uint64_t max_return, rados_omap_iter_t * iter, unsigned char *pmore, int * prval): + pass + void rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op, const char * const* keys, size_t keys_len, rados_omap_iter_t * iter, int * prval): + pass + int rados_read_op_operate(rados_read_op_t read_op, rados_ioctx_t io, const char * oid, int flags): + pass + int rados_aio_read_op_operate(rados_read_op_t read_op, rados_ioctx_t io, rados_completion_t completion, const char *oid, int flags): + pass + void rados_read_op_set_flags(rados_read_op_t read_op, int flags): + pass + int rados_omap_get_next(rados_omap_iter_t iter, const char * const* key, const char * const* val, size_t * len): + pass + void rados_omap_get_end(rados_omap_iter_t iter): + pass + int rados_notify2(rados_ioctx_t io, const char * o, const char *buf, int buf_len, uint64_t timeout_ms, char **reply_buffer, size_t *reply_buffer_len): + pass + int rados_aio_notify(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * buf, int len, uint64_t timeout_ms, char **reply_buffer, size_t *reply_buffer_len): + pass + int rados_decode_notify_response(char *reply_buffer, size_t reply_buffer_len, notify_ack_t **acks, size_t *nr_acks, notify_timeout_t **timeouts, size_t *nr_timeouts): + pass + void rados_free_notify_response(notify_ack_t *acks, size_t nr_acks, notify_timeout_t *timeouts): + pass + int rados_notify_ack(rados_ioctx_t io, const char *o, uint64_t notify_id, uint64_t cookie, const char *buf, int buf_len): + pass + int rados_watch3(rados_ioctx_t io, const char *o, uint64_t *cookie, rados_watchcb2_t watchcb, rados_watcherrcb_t watcherrcb, uint32_t timeout, void *arg): + pass + int rados_watch_check(rados_ioctx_t io, uint64_t cookie): + pass + int rados_unwatch2(rados_ioctx_t io, uint64_t cookie): + pass + int rados_watch_flush(rados_t cluster): + pass diff --git a/src/pybind/rados/rados.pxd b/src/pybind/rados/rados.pxd index 14f6b503d1c..fec534e8cc7 100644 --- a/src/pybind/rados/rados.pxd +++ b/src/pybind/rados/rados.pxd @@ -4,11 +4,16 @@ # # Copyright 2016 Mehdi Abaakouk <sileht@redhat.com> - -cdef extern from "rados/librados.h" nogil: - ctypedef void* rados_t - ctypedef void* rados_config_t - ctypedef void* rados_ioctx_t +IF BUILD_DOC: + cdef: + ctypedef void* rados_t + ctypedef void* rados_config_t + ctypedef void* rados_ioctx_t +ELSE: + cdef extern from "rados/librados.h" nogil: + ctypedef void* rados_t + ctypedef void* rados_config_t + ctypedef void* rados_ioctx_t cdef class Rados(object): diff --git a/src/pybind/rados/rados.pyx b/src/pybind/rados/rados.pyx index ea66bd37a51..bdab1339dbc 100644 --- a/src/pybind/rados/rados.pyx +++ b/src/pybind/rados/rados.pyx @@ -18,6 +18,10 @@ from cpython.pycapsule cimport * from libc cimport errno from libc.stdint cimport * from libc.stdlib cimport malloc, realloc, free +IF BUILD_DOC: + include "mock_rados.pxi" +ELSE: + from c_rados cimport * import threading import time @@ -37,307 +41,6 @@ cdef extern from "Python.h": int _PyBytes_Resize(PyObject **string, Py_ssize_t newsize) except -1 void PyEval_InitThreads() - -cdef extern from "time.h": - ctypedef long int time_t - ctypedef long int suseconds_t - - -cdef extern from "sys/time.h": - cdef struct timeval: - time_t tv_sec - suseconds_t tv_usec - - -cdef extern from "err.h" nogil: - cdef int _MAX_ERRNO "MAX_ERRNO" - - -cdef extern from "rados/rados_types.h" nogil: - cdef char* _LIBRADOS_ALL_NSPACES "LIBRADOS_ALL_NSPACES" - cdef struct notify_ack_t: - unsigned long notifier_id - unsigned long cookie - char *payload - unsigned long payload_len - - cdef struct notify_timeout_t: - unsigned long notifier_id - unsigned long cookie - -cdef extern from "rados/librados.h" nogil: - enum: - _LIBRADOS_OP_FLAG_EXCL "LIBRADOS_OP_FLAG_EXCL" - _LIBRADOS_OP_FLAG_FAILOK "LIBRADOS_OP_FLAG_FAILOK" - _LIBRADOS_OP_FLAG_FADVISE_RANDOM "LIBRADOS_OP_FLAG_FADVISE_RANDOM" - _LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL "LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL" - _LIBRADOS_OP_FLAG_FADVISE_WILLNEED "LIBRADOS_OP_FLAG_FADVISE_WILLNEED" - _LIBRADOS_OP_FLAG_FADVISE_DONTNEED "LIBRADOS_OP_FLAG_FADVISE_DONTNEED" - _LIBRADOS_OP_FLAG_FADVISE_NOCACHE "LIBRADOS_OP_FLAG_FADVISE_NOCACHE" - - - enum: - _LIBRADOS_OPERATION_NOFLAG "LIBRADOS_OPERATION_NOFLAG" - _LIBRADOS_OPERATION_BALANCE_READS "LIBRADOS_OPERATION_BALANCE_READS" - _LIBRADOS_OPERATION_LOCALIZE_READS "LIBRADOS_OPERATION_LOCALIZE_READS" - _LIBRADOS_OPERATION_ORDER_READS_WRITES "LIBRADOS_OPERATION_ORDER_READS_WRITES" - _LIBRADOS_OPERATION_IGNORE_CACHE "LIBRADOS_OPERATION_IGNORE_CACHE" - _LIBRADOS_OPERATION_SKIPRWLOCKS "LIBRADOS_OPERATION_SKIPRWLOCKS" - _LIBRADOS_OPERATION_IGNORE_OVERLAY "LIBRADOS_OPERATION_IGNORE_OVERLAY" - _LIBRADOS_CREATE_EXCLUSIVE "LIBRADOS_CREATE_EXCLUSIVE" - _LIBRADOS_CREATE_IDEMPOTENT "LIBRADOS_CREATE_IDEMPOTENT" - - cdef uint64_t _LIBRADOS_SNAP_HEAD "LIBRADOS_SNAP_HEAD" - - ctypedef void* rados_xattrs_iter_t - ctypedef void* rados_omap_iter_t - ctypedef void* rados_list_ctx_t - ctypedef uint64_t rados_snap_t - ctypedef void *rados_write_op_t - ctypedef void *rados_read_op_t - ctypedef void *rados_completion_t - ctypedef void (*rados_callback_t)(rados_completion_t cb, void *arg) - ctypedef void (*rados_log_callback_t)(void *arg, const char *line, const char *who, - uint64_t sec, uint64_t nsec, uint64_t seq, const char *level, const char *msg) - ctypedef void (*rados_log_callback2_t)(void *arg, const char *line, const char *channel, const char *who, const char *name, - uint64_t sec, uint64_t nsec, uint64_t seq, const char *level, const char *msg) - ctypedef void (*rados_watchcb2_t)(void *arg, int64_t notify_id, - uint64_t handle, uint64_t notifier_id, - void *data, size_t data_len) - ctypedef void (*rados_watcherrcb_t)(void *pre, uint64_t cookie, int err) - - - cdef struct rados_cluster_stat_t: - uint64_t kb - uint64_t kb_used - uint64_t kb_avail - uint64_t num_objects - - cdef struct rados_pool_stat_t: - uint64_t num_bytes - uint64_t num_kb - uint64_t num_objects - uint64_t num_object_clones - uint64_t num_object_copies - uint64_t num_objects_missing_on_primary - uint64_t num_objects_unfound - uint64_t num_objects_degraded - uint64_t num_rd - uint64_t num_rd_kb - uint64_t num_wr - uint64_t num_wr_kb - - void rados_buffer_free(char *buf) - - void rados_version(int *major, int *minor, int *extra) - int rados_create2(rados_t *pcluster, const char *const clustername, - const char * const name, uint64_t flags) - int rados_create_with_context(rados_t *cluster, rados_config_t cct) - int rados_connect(rados_t cluster) - void rados_shutdown(rados_t cluster) - uint64_t rados_get_instance_id(rados_t cluster) - int rados_conf_read_file(rados_t cluster, const char *path) - int rados_conf_parse_argv_remainder(rados_t cluster, int argc, const char **argv, const char **remargv) - int rados_conf_parse_env(rados_t cluster, const char *var) - int rados_conf_set(rados_t cluster, char *option, const char *value) - int rados_conf_get(rados_t cluster, char *option, char *buf, size_t len) - - rados_t rados_ioctx_get_cluster(rados_ioctx_t io) - int rados_ioctx_pool_stat(rados_ioctx_t io, rados_pool_stat_t *stats) - int64_t rados_pool_lookup(rados_t cluster, const char *pool_name) - int rados_pool_reverse_lookup(rados_t cluster, int64_t id, char *buf, size_t maxlen) - int rados_pool_create(rados_t cluster, const char *pool_name) - int rados_pool_create_with_crush_rule(rados_t cluster, const char *pool_name, uint8_t crush_rule_num) - int rados_pool_create_with_auid(rados_t cluster, const char *pool_name, uint64_t auid) - int rados_pool_create_with_all(rados_t cluster, const char *pool_name, uint64_t auid, uint8_t crush_rule_num) - int rados_pool_get_base_tier(rados_t cluster, int64_t pool, int64_t *base_tier) - int rados_pool_list(rados_t cluster, char *buf, size_t len) - int rados_pool_delete(rados_t cluster, const char *pool_name) - int rados_inconsistent_pg_list(rados_t cluster, int64_t pool, char *buf, size_t len) - - int rados_cluster_stat(rados_t cluster, rados_cluster_stat_t *result) - int rados_cluster_fsid(rados_t cluster, char *buf, size_t len) - int rados_blocklist_add(rados_t cluster, char *client_address, uint32_t expire_seconds) - int rados_getaddrs(rados_t cluster, char** addrs) - int rados_application_enable(rados_ioctx_t io, const char *app_name, - int force) - void rados_set_osdmap_full_try(rados_ioctx_t io) - void rados_unset_osdmap_full_try(rados_ioctx_t io) - int rados_application_list(rados_ioctx_t io, char *values, - size_t *values_len) - int rados_application_metadata_get(rados_ioctx_t io, const char *app_name, - const char *key, char *value, - size_t *value_len) - int rados_application_metadata_set(rados_ioctx_t io, const char *app_name, - const char *key, const char *value) - int rados_application_metadata_remove(rados_ioctx_t io, - const char *app_name, const char *key) - int rados_application_metadata_list(rados_ioctx_t io, - const char *app_name, char *keys, - size_t *key_len, char *values, - size_t *value_len) - int rados_ping_monitor(rados_t cluster, const char *mon_id, char **outstr, size_t *outstrlen) - int rados_mon_command(rados_t cluster, const char **cmd, size_t cmdlen, - const char *inbuf, size_t inbuflen, - char **outbuf, size_t *outbuflen, - char **outs, size_t *outslen) - int rados_mgr_command(rados_t cluster, const char **cmd, size_t cmdlen, - const char *inbuf, size_t inbuflen, - char **outbuf, size_t *outbuflen, - char **outs, size_t *outslen) - int rados_mgr_command_target(rados_t cluster, - const char *name, - const char **cmd, size_t cmdlen, - const char *inbuf, size_t inbuflen, - char **outbuf, size_t *outbuflen, - char **outs, size_t *outslen) - int rados_mon_command_target(rados_t cluster, const char *name, const char **cmd, size_t cmdlen, - const char *inbuf, size_t inbuflen, - char **outbuf, size_t *outbuflen, - char **outs, size_t *outslen) - int rados_osd_command(rados_t cluster, int osdid, const char **cmd, size_t cmdlen, - const char *inbuf, size_t inbuflen, - char **outbuf, size_t *outbuflen, - char **outs, size_t *outslen) - int rados_pg_command(rados_t cluster, const char *pgstr, const char **cmd, size_t cmdlen, - const char *inbuf, size_t inbuflen, - char **outbuf, size_t *outbuflen, - char **outs, size_t *outslen) - int rados_monitor_log(rados_t cluster, const char *level, rados_log_callback_t cb, void *arg) - int rados_monitor_log2(rados_t cluster, const char *level, rados_log_callback2_t cb, void *arg) - - int rados_wait_for_latest_osdmap(rados_t cluster) - - int rados_service_register(rados_t cluster, const char *service, const char *daemon, const char *metadata_dict) - int rados_service_update_status(rados_t cluster, const char *status_dict) - - int rados_ioctx_create(rados_t cluster, const char *pool_name, rados_ioctx_t *ioctx) - int rados_ioctx_create2(rados_t cluster, int64_t pool_id, rados_ioctx_t *ioctx) - void rados_ioctx_destroy(rados_ioctx_t io) - void rados_ioctx_locator_set_key(rados_ioctx_t io, const char *key) - void rados_ioctx_set_namespace(rados_ioctx_t io, const char * nspace) - - uint64_t rados_get_last_version(rados_ioctx_t io) - int rados_stat(rados_ioctx_t io, const char *o, uint64_t *psize, time_t *pmtime) - int rados_write(rados_ioctx_t io, const char *oid, const char *buf, size_t len, uint64_t off) - int rados_write_full(rados_ioctx_t io, const char *oid, const char *buf, size_t len) - int rados_writesame(rados_ioctx_t io, const char *oid, const char *buf, size_t data_len, size_t write_len, uint64_t off) - int rados_append(rados_ioctx_t io, const char *oid, const char *buf, size_t len) - int rados_read(rados_ioctx_t io, const char *oid, char *buf, size_t len, uint64_t off) - int rados_remove(rados_ioctx_t io, const char *oid) - int rados_trunc(rados_ioctx_t io, const char *oid, uint64_t size) - int rados_cmpext(rados_ioctx_t io, const char *o, const char *cmp_buf, size_t cmp_len, uint64_t off) - int rados_getxattr(rados_ioctx_t io, const char *o, const char *name, char *buf, size_t len) - int rados_setxattr(rados_ioctx_t io, const char *o, const char *name, const char *buf, size_t len) - int rados_rmxattr(rados_ioctx_t io, const char *o, const char *name) - int rados_getxattrs(rados_ioctx_t io, const char *oid, rados_xattrs_iter_t *iter) - int rados_getxattrs_next(rados_xattrs_iter_t iter, const char **name, const char **val, size_t *len) - void rados_getxattrs_end(rados_xattrs_iter_t iter) - - int rados_nobjects_list_open(rados_ioctx_t io, rados_list_ctx_t *ctx) - int rados_nobjects_list_next(rados_list_ctx_t ctx, const char **entry, const char **key, const char **nspace) - void rados_nobjects_list_close(rados_list_ctx_t ctx) - - int rados_ioctx_pool_requires_alignment2(rados_ioctx_t io, int * requires) - int rados_ioctx_pool_required_alignment2(rados_ioctx_t io, uint64_t * alignment) - - int rados_ioctx_snap_rollback(rados_ioctx_t io, const char * oid, const char * snapname) - int rados_ioctx_snap_create(rados_ioctx_t io, const char * snapname) - int rados_ioctx_snap_remove(rados_ioctx_t io, const char * snapname) - int rados_ioctx_snap_lookup(rados_ioctx_t io, const char * name, rados_snap_t * id) - int rados_ioctx_snap_get_name(rados_ioctx_t io, rados_snap_t id, char * name, int maxlen) - void rados_ioctx_snap_set_read(rados_ioctx_t io, rados_snap_t snap) - int rados_ioctx_snap_list(rados_ioctx_t io, rados_snap_t * snaps, int maxlen) - int rados_ioctx_snap_get_stamp(rados_ioctx_t io, rados_snap_t id, time_t * t) - int64_t rados_ioctx_get_id(rados_ioctx_t io) - int rados_ioctx_get_pool_name(rados_ioctx_t io, char *buf, unsigned maxlen) - - int rados_ioctx_selfmanaged_snap_create(rados_ioctx_t io, - rados_snap_t *snapid) - int rados_ioctx_selfmanaged_snap_remove(rados_ioctx_t io, - rados_snap_t snapid) - int rados_ioctx_selfmanaged_snap_set_write_ctx(rados_ioctx_t io, - rados_snap_t snap_seq, - rados_snap_t *snap, - int num_snaps) - int rados_ioctx_selfmanaged_snap_rollback(rados_ioctx_t io, const char *oid, - rados_snap_t snapid) - - int rados_lock_exclusive(rados_ioctx_t io, const char * oid, const char * name, - const char * cookie, const char * desc, - timeval * duration, uint8_t flags) - int rados_lock_shared(rados_ioctx_t io, const char * o, const char * name, - const char * cookie, const char * tag, const char * desc, - timeval * duration, uint8_t flags) - int rados_unlock(rados_ioctx_t io, const char * o, const char * name, const char * cookie) - - rados_write_op_t rados_create_write_op() - void rados_release_write_op(rados_write_op_t write_op) - - rados_read_op_t rados_create_read_op() - void rados_release_read_op(rados_read_op_t read_op) - - int rados_aio_create_completion2(void * cb_arg, rados_callback_t cb_complete, rados_completion_t * pc) - void rados_aio_release(rados_completion_t c) - int rados_aio_stat(rados_ioctx_t io, const char *oid, rados_completion_t completion, uint64_t *psize, time_t *pmtime) - int rados_aio_write(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * buf, size_t len, uint64_t off) - int rados_aio_append(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * buf, size_t len) - int rados_aio_write_full(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * buf, size_t len) - int rados_aio_writesame(rados_ioctx_t io, const char *oid, rados_completion_t completion, const char *buf, size_t data_len, size_t write_len, uint64_t off) - int rados_aio_remove(rados_ioctx_t io, const char * oid, rados_completion_t completion) - int rados_aio_read(rados_ioctx_t io, const char * oid, rados_completion_t completion, char * buf, size_t len, uint64_t off) - int rados_aio_flush(rados_ioctx_t io) - int rados_aio_cmpext(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *cmp_buf, size_t cmp_len, uint64_t off) - int rados_aio_rmxattr(rados_ioctx_t io, const char *o, rados_completion_t completion, const char *name) - - int rados_aio_get_return_value(rados_completion_t c) - int rados_aio_wait_for_complete_and_cb(rados_completion_t c) - int rados_aio_wait_for_complete(rados_completion_t c) - int rados_aio_is_complete(rados_completion_t c) - - int rados_exec(rados_ioctx_t io, const char * oid, const char * cls, const char * method, - const char * in_buf, size_t in_len, char * buf, size_t out_len) - int rados_aio_exec(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * cls, const char * method, - const char * in_buf, size_t in_len, char * buf, size_t out_len) - - int rados_write_op_operate(rados_write_op_t write_op, rados_ioctx_t io, const char * oid, time_t * mtime, int flags) - int rados_aio_write_op_operate(rados_write_op_t write_op, rados_ioctx_t io, rados_completion_t completion, const char *oid, time_t *mtime, int flags) - void rados_write_op_omap_set(rados_write_op_t write_op, const char * const* keys, const char * const* vals, const size_t * lens, size_t num) - void rados_write_op_omap_rm_keys(rados_write_op_t write_op, const char * const* keys, size_t keys_len) - void rados_write_op_omap_clear(rados_write_op_t write_op) - void rados_write_op_set_flags(rados_write_op_t write_op, int flags) - void rados_write_op_setxattr(rados_write_op_t write_op, const char *name, const char *value, size_t value_len) - void rados_write_op_rmxattr(rados_write_op_t write_op, const char *name) - - void rados_write_op_create(rados_write_op_t write_op, int exclusive, const char *category) - void rados_write_op_append(rados_write_op_t write_op, const char *buffer, size_t len) - void rados_write_op_write_full(rados_write_op_t write_op, const char *buffer, size_t len) - void rados_write_op_assert_version(rados_write_op_t write_op, uint64_t ver) - void rados_write_op_write(rados_write_op_t write_op, const char *buffer, size_t len, uint64_t offset) - void rados_write_op_remove(rados_write_op_t write_op) - void rados_write_op_truncate(rados_write_op_t write_op, uint64_t offset) - void rados_write_op_zero(rados_write_op_t write_op, uint64_t offset, uint64_t len) - void rados_write_op_exec(rados_write_op_t write_op, const char *cls, const char *method, const char *in_buf, size_t in_len, int *prval) - void rados_write_op_writesame(rados_write_op_t write_op, const char *buffer, size_t data_len, size_t write_len, uint64_t offset) - void rados_read_op_omap_get_vals2(rados_read_op_t read_op, const char * start_after, const char * filter_prefix, uint64_t max_return, rados_omap_iter_t * iter, unsigned char *pmore, int * prval) - void rados_read_op_omap_get_keys2(rados_read_op_t read_op, const char * start_after, uint64_t max_return, rados_omap_iter_t * iter, unsigned char *pmore, int * prval) - void rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op, const char * const* keys, size_t keys_len, rados_omap_iter_t * iter, int * prval) - int rados_read_op_operate(rados_read_op_t read_op, rados_ioctx_t io, const char * oid, int flags) - int rados_aio_read_op_operate(rados_read_op_t read_op, rados_ioctx_t io, rados_completion_t completion, const char *oid, int flags) - void rados_read_op_set_flags(rados_read_op_t read_op, int flags) - int rados_omap_get_next(rados_omap_iter_t iter, const char * const* key, const char * const* val, size_t * len) - void rados_omap_get_end(rados_omap_iter_t iter) - int rados_notify2(rados_ioctx_t io, const char * o, const char *buf, int buf_len, uint64_t timeout_ms, char **reply_buffer, size_t *reply_buffer_len) - int rados_aio_notify(rados_ioctx_t io, const char * oid, rados_completion_t completion, const char * buf, int len, uint64_t timeout_ms, char **reply_buffer, size_t *reply_buffer_len) - int rados_decode_notify_response(char *reply_buffer, size_t reply_buffer_len, notify_ack_t **acks, size_t *nr_acks, notify_timeout_t **timeouts, size_t *nr_timeouts) - void rados_free_notify_response(notify_ack_t *acks, size_t nr_acks, notify_timeout_t *timeouts) - int rados_notify_ack(rados_ioctx_t io, const char *o, uint64_t notify_id, uint64_t cookie, const char *buf, int buf_len) - int rados_watch3(rados_ioctx_t io, const char *o, uint64_t *cookie, rados_watchcb2_t watchcb, rados_watcherrcb_t watcherrcb, uint32_t timeout, void *arg) - int rados_watch_check(rados_ioctx_t io, uint64_t cookie) - int rados_unwatch2(rados_ioctx_t io, uint64_t cookie) - int rados_watch_flush(rados_t cluster) - - LIBRADOS_OP_FLAG_EXCL = _LIBRADOS_OP_FLAG_EXCL LIBRADOS_OP_FLAG_FAILOK = _LIBRADOS_OP_FLAG_FAILOK LIBRADOS_OP_FLAG_FADVISE_RANDOM = _LIBRADOS_OP_FLAG_FADVISE_RANDOM @@ -2835,6 +2538,41 @@ cdef class Ioctx(object): raise make_ex(ret, "error executing %s::%s on %s" % (cls, method, object_name)) return completion + def aio_setxattr(self, object_name: str, xattr_name: str, xattr_value: bytes, + oncomplete: Optional[Callable] = None) -> Completion: + """ + Asynchronously set an extended attribute on an object + + :param object_name: the name of the object to set xattr to + :param xattr_name: which extended attribute to set + :param xattr_value: the value of the extended attribute + :param oncomplete: what to do when the setxttr completes + + :raises: :class:`Error` + :returns: completion object + """ + object_name_raw = cstr(object_name, 'object_name') + xattr_name_raw = cstr(xattr_name , 'xattr_name') + + cdef: + Completion completion + char* _object_name = object_name_raw + char* _xattr_name = xattr_name_raw + char* _xattr_value = xattr_value + size_t xattr_value_len = len(xattr_value) + + completion = self.__get_completion(oncomplete, None) + self.__track_completion(completion) + with nogil: + ret = rados_aio_setxattr(self.io, _object_name, + completion.rados_comp, + _xattr_name, _xattr_value, xattr_value_len) + + if ret < 0: + completion._cleanup() + raise make_ex(ret, "Failed to set xattr %r" % xattr_name) + return completion + def aio_remove(self, object_name: str, oncomplete: Optional[Callable] = None, onsafe: Optional[Callable] = None) -> Completion: @@ -4047,6 +3785,26 @@ returned %d, but should return zero on success." % (self.name, ret)) with nogil: rados_write_op_omap_clear(_write_op.write_op) + def remove_omap_range2(self, write_op: WriteOp, key_begin: str, key_end: str): + """ + Remove key/value pairs from an object whose keys are in the range + [key_begin, key_end) + :param write_op: write operation object + :param key_begin: the lower bound of the key range to remove + :param key_end: the upper bound of the key range to remove + """ + key_begin_raw = cstr(key_begin, 'key_begin') + key_end_raw = cstr(key_end, 'key_end') + cdef: + WriteOp _write_op = write_op + char* _key_begin = key_begin_raw + size_t key_begin_len = len(key_begin) + char* _key_end = key_end_raw + size_t key_end_len = len(key_end) + with nogil: + rados_write_op_omap_rm_range2(_write_op.write_op, _key_begin, key_begin_len, + _key_end, key_end_len) + def lock_exclusive(self, key: str, name: str, cookie: str, desc: str = "", duration: Optional[int] = None, flags: int = 0): @@ -4172,14 +3930,14 @@ returned %d, but should return zero on success." % (self.name, ret)) Set global osdmap_full_try label to true """ with nogil: - rados_set_osdmap_full_try(self.io) + rados_set_pool_full_try(self.io) def unset_osdmap_full_try(self): """ Unset """ with nogil: - rados_unset_osdmap_full_try(self.io) + rados_unset_pool_full_try(self.io) def application_enable(self, app_name: str, force: bool = False): """ diff --git a/src/pybind/rados/setup.py b/src/pybind/rados/setup.py index 5341ae99217..0d1047825b3 100755 --- a/src/pybind/rados/setup.py +++ b/src/pybind/rados/setup.py @@ -13,7 +13,6 @@ from itertools import filterfalse, takewhile import os import shutil -import subprocess import sys import tempfile import textwrap @@ -133,10 +132,12 @@ def check_sanity(): shutil.rmtree(tmp_dir) -if 'BUILD_DOC' in os.environ.keys(): - pass +if 'BUILD_DOC' in os.environ or 'READTHEDOCS' in os.environ: + ext_args = {} + cython_constants = dict(BUILD_DOC=True) elif check_sanity(): - pass + ext_args = get_python_flags(['rados']) + cython_constants = dict(BUILD_DOC=False) else: sys.exit(1) @@ -186,12 +187,13 @@ setup( Extension( "rados", [source], - **get_python_flags(['rados']) + **ext_args ) ], # use "3str" when Cython 3.0 is available compiler_directives={'language_level': sys.version_info.major}, - build_dir=os.environ.get("CYTHON_BUILD_DIR", None) + compile_time_env=cython_constants, + build_dir=os.environ.get("CYTHON_BUILD_DIR", None), ), classifiers=[ 'Intended Audience :: Developers', diff --git a/src/pybind/rbd/c_rbd.pxd b/src/pybind/rbd/c_rbd.pxd new file mode 100644 index 00000000000..ab1a8a76081 --- /dev/null +++ b/src/pybind/rbd/c_rbd.pxd @@ -0,0 +1,692 @@ +# cython: embedsignature=True + +from libc.stdint cimport * +from ctime cimport time_t, timespec + +cdef extern from "rados/librados.h": + enum: + _LIBRADOS_SNAP_HEAD "LIBRADOS_SNAP_HEAD" + +cdef extern from "rbd/librbd.h": + ctypedef int (*librbd_progress_fn_t)(uint64_t offset, uint64_t total, void* ptr) + +cdef extern from "rbd/librbd.h" nogil: + enum: + _RBD_FEATURE_LAYERING "RBD_FEATURE_LAYERING" + _RBD_FEATURE_STRIPINGV2 "RBD_FEATURE_STRIPINGV2" + _RBD_FEATURE_EXCLUSIVE_LOCK "RBD_FEATURE_EXCLUSIVE_LOCK" + _RBD_FEATURE_OBJECT_MAP "RBD_FEATURE_OBJECT_MAP" + _RBD_FEATURE_FAST_DIFF "RBD_FEATURE_FAST_DIFF" + _RBD_FEATURE_DEEP_FLATTEN "RBD_FEATURE_DEEP_FLATTEN" + _RBD_FEATURE_JOURNALING "RBD_FEATURE_JOURNALING" + _RBD_FEATURE_DATA_POOL "RBD_FEATURE_DATA_POOL" + _RBD_FEATURE_OPERATIONS "RBD_FEATURE_OPERATIONS" + _RBD_FEATURE_MIGRATING "RBD_FEATURE_MIGRATING" + _RBD_FEATURE_NON_PRIMARY "RBD_FEATURE_NON_PRIMARY" + + _RBD_FEATURES_INCOMPATIBLE "RBD_FEATURES_INCOMPATIBLE" + _RBD_FEATURES_RW_INCOMPATIBLE "RBD_FEATURES_RW_INCOMPATIBLE" + _RBD_FEATURES_MUTABLE "RBD_FEATURES_MUTABLE" + _RBD_FEATURES_SINGLE_CLIENT "RBD_FEATURES_SINGLE_CLIENT" + _RBD_FEATURES_ALL "RBD_FEATURES_ALL" + + _RBD_OPERATION_FEATURE_CLONE_PARENT "RBD_OPERATION_FEATURE_CLONE_PARENT" + _RBD_OPERATION_FEATURE_CLONE_CHILD "RBD_OPERATION_FEATURE_CLONE_CHILD" + _RBD_OPERATION_FEATURE_GROUP "RBD_OPERATION_FEATURE_GROUP" + _RBD_OPERATION_FEATURE_SNAP_TRASH "RBD_OPERATION_FEATURE_SNAP_TRASH" + + _RBD_FLAG_OBJECT_MAP_INVALID "RBD_FLAG_OBJECT_MAP_INVALID" + _RBD_FLAG_FAST_DIFF_INVALID "RBD_FLAG_FAST_DIFF_INVALID" + + _RBD_IMAGE_OPTION_FORMAT "RBD_IMAGE_OPTION_FORMAT" + _RBD_IMAGE_OPTION_FEATURES "RBD_IMAGE_OPTION_FEATURES" + _RBD_IMAGE_OPTION_ORDER "RBD_IMAGE_OPTION_ORDER" + _RBD_IMAGE_OPTION_STRIPE_UNIT "RBD_IMAGE_OPTION_STRIPE_UNIT" + _RBD_IMAGE_OPTION_STRIPE_COUNT "RBD_IMAGE_OPTION_STRIPE_COUNT" + _RBD_IMAGE_OPTION_DATA_POOL "RBD_IMAGE_OPTION_DATA_POOL" + + RBD_MAX_BLOCK_NAME_SIZE + RBD_MAX_IMAGE_NAME_SIZE + + _RBD_SNAP_CREATE_SKIP_QUIESCE "RBD_SNAP_CREATE_SKIP_QUIESCE" + _RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR "RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR" + + _RBD_SNAP_REMOVE_UNPROTECT "RBD_SNAP_REMOVE_UNPROTECT" + _RBD_SNAP_REMOVE_FLATTEN "RBD_SNAP_REMOVE_FLATTEN" + _RBD_SNAP_REMOVE_FORCE "RBD_SNAP_REMOVE_FORCE" + + _RBD_WRITE_ZEROES_FLAG_THICK_PROVISION "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" + + ctypedef void* rados_t + ctypedef void* rados_ioctx_t + ctypedef void* rbd_image_t + ctypedef void* rbd_image_options_t + ctypedef void* rbd_pool_stats_t + ctypedef void *rbd_completion_t + + ctypedef struct rbd_image_info_t: + uint64_t size + uint64_t obj_size + uint64_t num_objs + int order + char block_name_prefix[RBD_MAX_BLOCK_NAME_SIZE] + uint64_t parent_pool + char parent_name[RBD_MAX_IMAGE_NAME_SIZE] + + ctypedef struct rbd_snap_info_t: + uint64_t id + uint64_t size + char *name + + ctypedef struct rbd_snap_group_namespace_t: + int64_t group_pool + char *group_name + char *group_snap_name + + ctypedef enum rbd_snap_mirror_state_t: + _RBD_SNAP_MIRROR_STATE_PRIMARY "RBD_SNAP_MIRROR_STATE_PRIMARY" + _RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED "RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED" + _RBD_SNAP_MIRROR_STATE_NON_PRIMARY "RBD_SNAP_MIRROR_STATE_NON_PRIMARY" + _RBD_SNAP_MIRROR_STATE_NON_PRIMARY_DEMOTED "RBD_SNAP_MIRROR_STATE_NON_PRIMARY_DEMOTED" + + ctypedef struct rbd_snap_mirror_namespace_t: + rbd_snap_mirror_state_t state + size_t mirror_peer_uuids_count + char *mirror_peer_uuids + bint complete + char *primary_mirror_uuid + uint64_t primary_snap_id + uint64_t last_copied_object_number + + ctypedef struct rbd_group_info_t: + char *name + int64_t pool + + ctypedef struct rbd_image_spec_t: + char *id + char *name + + ctypedef struct rbd_linked_image_spec_t: + int64_t pool_id + char *pool_name + char *pool_namespace + char *image_id + char *image_name + bint trash + + ctypedef enum rbd_snap_namespace_type_t: + _RBD_SNAP_NAMESPACE_TYPE_USER "RBD_SNAP_NAMESPACE_TYPE_USER" + _RBD_SNAP_NAMESPACE_TYPE_GROUP "RBD_SNAP_NAMESPACE_TYPE_GROUP" + _RBD_SNAP_NAMESPACE_TYPE_TRASH "RBD_SNAP_NAMESPACE_TYPE_TRASH" + _RBD_SNAP_NAMESPACE_TYPE_MIRROR "RBD_SNAP_NAMESPACE_TYPE_MIRROR" + + ctypedef struct rbd_snap_spec_t: + uint64_t id + rbd_snap_namespace_type_t namespace_type + char *name + + ctypedef enum rbd_mirror_mode_t: + _RBD_MIRROR_MODE_DISABLED "RBD_MIRROR_MODE_DISABLED" + _RBD_MIRROR_MODE_IMAGE "RBD_MIRROR_MODE_IMAGE" + _RBD_MIRROR_MODE_POOL "RBD_MIRROR_MODE_POOL" + + ctypedef enum rbd_mirror_peer_direction_t: + _RBD_MIRROR_PEER_DIRECTION_RX "RBD_MIRROR_PEER_DIRECTION_RX" + _RBD_MIRROR_PEER_DIRECTION_TX "RBD_MIRROR_PEER_DIRECTION_TX" + _RBD_MIRROR_PEER_DIRECTION_RX_TX "RBD_MIRROR_PEER_DIRECTION_RX_TX" + + ctypedef struct rbd_mirror_peer_site_t: + char *uuid + rbd_mirror_peer_direction_t direction + char *site_name + char *mirror_uuid + char *client_name + time_t last_seen + + cdef char* _RBD_MIRROR_PEER_ATTRIBUTE_NAME_MON_HOST "RBD_MIRROR_PEER_ATTRIBUTE_NAME_MON_HOST" + cdef char* _RBD_MIRROR_PEER_ATTRIBUTE_NAME_KEY "RBD_MIRROR_PEER_ATTRIBUTE_NAME_KEY" + + ctypedef enum rbd_mirror_image_mode_t: + _RBD_MIRROR_IMAGE_MODE_JOURNAL "RBD_MIRROR_IMAGE_MODE_JOURNAL" + _RBD_MIRROR_IMAGE_MODE_SNAPSHOT "RBD_MIRROR_IMAGE_MODE_SNAPSHOT" + + ctypedef enum rbd_mirror_image_state_t: + _RBD_MIRROR_IMAGE_DISABLING "RBD_MIRROR_IMAGE_DISABLING" + _RBD_MIRROR_IMAGE_ENABLED "RBD_MIRROR_IMAGE_ENABLED" + _RBD_MIRROR_IMAGE_DISABLED "RBD_MIRROR_IMAGE_DISABLED" + + ctypedef struct rbd_mirror_image_info_t: + char *global_id + rbd_mirror_image_state_t state + bint primary + + ctypedef enum rbd_mirror_image_status_state_t: + _MIRROR_IMAGE_STATUS_STATE_UNKNOWN "MIRROR_IMAGE_STATUS_STATE_UNKNOWN" + _MIRROR_IMAGE_STATUS_STATE_ERROR "MIRROR_IMAGE_STATUS_STATE_ERROR" + _MIRROR_IMAGE_STATUS_STATE_SYNCING "MIRROR_IMAGE_STATUS_STATE_SYNCING" + _MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY "MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY" + _MIRROR_IMAGE_STATUS_STATE_REPLAYING "MIRROR_IMAGE_STATUS_STATE_REPLAYING" + _MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY "MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY" + _MIRROR_IMAGE_STATUS_STATE_STOPPED "MIRROR_IMAGE_STATUS_STATE_STOPPED" + + ctypedef struct rbd_mirror_image_site_status_t: + char *mirror_uuid + rbd_mirror_image_status_state_t state + char *description + time_t last_update + bint up + + ctypedef struct rbd_mirror_image_global_status_t: + char *name + rbd_mirror_image_info_t info + uint32_t site_statuses_count + rbd_mirror_image_site_status_t *site_statuses + + ctypedef enum rbd_lock_mode_t: + _RBD_LOCK_MODE_EXCLUSIVE "RBD_LOCK_MODE_EXCLUSIVE" + _RBD_LOCK_MODE_SHARED "RBD_LOCK_MODE_SHARED" + + ctypedef enum rbd_trash_image_source_t: + _RBD_TRASH_IMAGE_SOURCE_USER "RBD_TRASH_IMAGE_SOURCE_USER", + _RBD_TRASH_IMAGE_SOURCE_MIRRORING "RBD_TRASH_IMAGE_SOURCE_MIRRORING", + _RBD_TRASH_IMAGE_SOURCE_MIGRATION "RBD_TRASH_IMAGE_SOURCE_MIGRATION" + _RBD_TRASH_IMAGE_SOURCE_REMOVING "RBD_TRASH_IMAGE_SOURCE_REMOVING" + + ctypedef struct rbd_trash_image_info_t: + char *id + char *name + rbd_trash_image_source_t source + time_t deletion_time + time_t deferment_end_time + + ctypedef struct rbd_image_watcher_t: + char *addr + int64_t id + uint64_t cookie + + ctypedef enum rbd_group_image_state_t: + _RBD_GROUP_IMAGE_STATE_ATTACHED "RBD_GROUP_IMAGE_STATE_ATTACHED" + _RBD_GROUP_IMAGE_STATE_INCOMPLETE "RBD_GROUP_IMAGE_STATE_INCOMPLETE" + + ctypedef struct rbd_group_image_info_t: + char *name + int64_t pool + rbd_group_image_state_t state + + ctypedef enum rbd_group_snap_state_t: + _RBD_GROUP_SNAP_STATE_INCOMPLETE "RBD_GROUP_SNAP_STATE_INCOMPLETE" + _RBD_GROUP_SNAP_STATE_COMPLETE "RBD_GROUP_SNAP_STATE_COMPLETE" + + ctypedef struct rbd_group_snap_info_t: + char *name + rbd_group_snap_state_t state + + ctypedef enum rbd_image_migration_state_t: + _RBD_IMAGE_MIGRATION_STATE_UNKNOWN "RBD_IMAGE_MIGRATION_STATE_UNKNOWN" + _RBD_IMAGE_MIGRATION_STATE_ERROR "RBD_IMAGE_MIGRATION_STATE_ERROR" + _RBD_IMAGE_MIGRATION_STATE_PREPARING "RBD_IMAGE_MIGRATION_STATE_PREPARING" + _RBD_IMAGE_MIGRATION_STATE_PREPARED "RBD_IMAGE_MIGRATION_STATE_PREPARED" + _RBD_IMAGE_MIGRATION_STATE_EXECUTING "RBD_IMAGE_MIGRATION_STATE_EXECUTING" + _RBD_IMAGE_MIGRATION_STATE_EXECUTED "RBD_IMAGE_MIGRATION_STATE_EXECUTED" + _RBD_IMAGE_MIGRATION_STATE_ABORTING "RBD_IMAGE_MIGRATION_STATE_ABORTING" + + ctypedef struct rbd_image_migration_status_t: + int64_t source_pool_id + char *source_pool_namespace + char *source_image_name + char *source_image_id + int64_t dest_pool_id + char *dest_pool_namespace + char *dest_image_name + char *dest_image_id + rbd_image_migration_state_t state + char *state_description + + ctypedef enum rbd_config_source_t: + _RBD_CONFIG_SOURCE_CONFIG "RBD_CONFIG_SOURCE_CONFIG" + _RBD_CONFIG_SOURCE_POOL "RBD_CONFIG_SOURCE_POOL" + _RBD_CONFIG_SOURCE_IMAGE "RBD_CONFIG_SOURCE_IMAGE" + + ctypedef struct rbd_config_option_t: + char *name + char *value + rbd_config_source_t source + + ctypedef enum rbd_pool_stat_option_t: + _RBD_POOL_STAT_OPTION_IMAGES "RBD_POOL_STAT_OPTION_IMAGES" + _RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES "RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES" + _RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES "RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES" + _RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS "RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS" + _RBD_POOL_STAT_OPTION_TRASH_IMAGES "RBD_POOL_STAT_OPTION_TRASH_IMAGES" + _RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES "RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES" + _RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES "RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES" + _RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS "RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS" + + ctypedef void (*rbd_callback_t)(rbd_completion_t cb, void *arg) + + void rbd_version(int *major, int *minor, int *extra) + + void rbd_image_spec_list_cleanup(rbd_image_spec_t *image, size_t num_images) + void rbd_linked_image_spec_cleanup(rbd_linked_image_spec_t *image) + void rbd_linked_image_spec_list_cleanup(rbd_linked_image_spec_t *images, + size_t num_images) + void rbd_snap_spec_cleanup(rbd_snap_spec_t *snap) + + void rbd_image_options_create(rbd_image_options_t* opts) + void rbd_image_options_destroy(rbd_image_options_t opts) + int rbd_image_options_set_string(rbd_image_options_t opts, int optname, + const char* optval) + int rbd_image_options_set_uint64(rbd_image_options_t opts, int optname, + uint64_t optval) + int rbd_image_options_get_string(rbd_image_options_t opts, int optname, + char* optval, size_t maxlen) + int rbd_image_options_get_uint64(rbd_image_options_t opts, int optname, + uint64_t* optval) + int rbd_image_options_unset(rbd_image_options_t opts, int optname) + void rbd_image_options_clear(rbd_image_options_t opts) + int rbd_image_options_is_empty(rbd_image_options_t opts) + + int rbd_list(rados_ioctx_t io, char *names, size_t *size) + int rbd_list2(rados_ioctx_t io, rbd_image_spec_t *images, + size_t *num_images) + int rbd_create(rados_ioctx_t io, const char *name, uint64_t size, + int *order) + int rbd_create4(rados_ioctx_t io, const char *name, uint64_t size, + rbd_image_options_t opts) + int rbd_clone3(rados_ioctx_t p_ioctx, const char *p_name, + const char *p_snapname, rados_ioctx_t c_ioctx, + const char *c_name, rbd_image_options_t c_opts) + int rbd_remove_with_progress(rados_ioctx_t io, const char *name, + librbd_progress_fn_t cb, void *cbdata) + int rbd_rename(rados_ioctx_t src_io_ctx, const char *srcname, + const char *destname) + + int rbd_trash_move(rados_ioctx_t io, const char *name, uint64_t delay) + int rbd_trash_get(rados_ioctx_t io, const char *id, + rbd_trash_image_info_t *info) + void rbd_trash_get_cleanup(rbd_trash_image_info_t *info) + int rbd_trash_list(rados_ioctx_t io, rbd_trash_image_info_t *trash_entries, + size_t *num_entries) + void rbd_trash_list_cleanup(rbd_trash_image_info_t *trash_entries, + size_t num_entries) + int rbd_trash_purge(rados_ioctx_t io, time_t expire_ts, float threshold) + int rbd_trash_remove_with_progress(rados_ioctx_t io, const char *id, + int force, librbd_progress_fn_t cb, + void *cbdata) + int rbd_trash_restore(rados_ioctx_t io, const char *id, const char *name) + + int rbd_migration_prepare(rados_ioctx_t io_ctx, const char *image_name, + rados_ioctx_t dest_io_ctx, + const char *dest_image_name, + rbd_image_options_t opts) + int rbd_migration_prepare_import(const char *source_spec, + rados_ioctx_t dest_io_ctx, + const char *dest_image_name, + rbd_image_options_t opts) + int rbd_migration_execute_with_progress(rados_ioctx_t io_ctx, + const char *image_name, + librbd_progress_fn_t cb, + void *cbdata) + int rbd_migration_commit_with_progress(rados_ioctx_t io_ctx, + const char *image_name, + librbd_progress_fn_t cb, + void *cbdata) + int rbd_migration_abort_with_progress(rados_ioctx_t io_ctx, + const char *image_name, + librbd_progress_fn_t cb, void *cbdata) + int rbd_migration_status(rados_ioctx_t io_ctx, const char *image_name, + rbd_image_migration_status_t *status, + size_t status_size) + void rbd_migration_status_cleanup(rbd_image_migration_status_t *status) + + int rbd_mirror_site_name_get(rados_t cluster, char *name, size_t *max_len) + int rbd_mirror_site_name_set(rados_t cluster, const char *name) + + int rbd_mirror_mode_get(rados_ioctx_t io, rbd_mirror_mode_t *mirror_mode) + int rbd_mirror_mode_set(rados_ioctx_t io, rbd_mirror_mode_t mirror_mode) + + int rbd_mirror_uuid_get(rados_ioctx_t io_ctx, char *mirror_uuid, + size_t *max_len) + + int rbd_mirror_peer_bootstrap_create(rados_ioctx_t io_ctx, char *token, + size_t *max_len) + int rbd_mirror_peer_bootstrap_import( + rados_ioctx_t io_ctx, rbd_mirror_peer_direction_t direction, + const char *token) + + int rbd_mirror_peer_site_add( + rados_ioctx_t io, char *uuid, size_t uuid_max_length, + rbd_mirror_peer_direction_t direction, const char *site_name, + const char *client_name) + int rbd_mirror_peer_site_remove(rados_ioctx_t io, const char *uuid) + int rbd_mirror_peer_site_list( + rados_ioctx_t io_ctx, rbd_mirror_peer_site_t *peers,int *max_peers) + void rbd_mirror_peer_site_list_cleanup( + rbd_mirror_peer_site_t *peers, int max_peers) + + int rbd_mirror_peer_site_set_name( + rados_ioctx_t io_ctx, const char *uuid, const char *site_name) + int rbd_mirror_peer_site_set_client_name( + rados_ioctx_t io_ctx, const char *uuid, const char *client_name) + + int rbd_mirror_peer_site_get_attributes( + rados_ioctx_t io_ctx, const char *uuid, char *keys, size_t *max_key_len, + char *values, size_t *max_val_length, size_t *key_value_count) + int rbd_mirror_peer_site_set_attributes( + rados_ioctx_t io_ctx, const char *uuid, const char *keys, + const char *values, size_t count) + + int rbd_mirror_image_global_status_list( + rados_ioctx_t io, const char *start_id, size_t max, char **image_ids, + rbd_mirror_image_global_status_t *images, size_t *len) + void rbd_mirror_image_global_status_list_cleanup( + char **image_ids, rbd_mirror_image_global_status_t *images, size_t len) + int rbd_mirror_image_status_summary(rados_ioctx_t io, + rbd_mirror_image_status_state_t *states, + int *counts, size_t *maxlen) + int rbd_mirror_image_instance_id_list(rados_ioctx_t io_ctx, + const char *start_id, + size_t max, char **image_ids, + char **instance_ids, + size_t *len) + void rbd_mirror_image_instance_id_list_cleanup(char **image_ids, + char **instance_ids, + size_t len) + int rbd_mirror_image_info_list(rados_ioctx_t io_ctx, + rbd_mirror_image_mode_t *mode_filter, + const char *start_id, size_t max, + char **image_ids, + rbd_mirror_image_mode_t *mode_entries, + rbd_mirror_image_info_t *info_entries, + size_t *num_entries) + void rbd_mirror_image_info_list_cleanup(char **image_ids, + rbd_mirror_image_info_t *info_entries, + size_t num_entries) + + int rbd_pool_metadata_get(rados_ioctx_t io_ctx, const char *key, + char *value, size_t *val_len) + int rbd_pool_metadata_set(rados_ioctx_t io_ctx, const char *key, + const char *value) + int rbd_pool_metadata_remove(rados_ioctx_t io_ctx, const char *key) + int rbd_pool_metadata_list(rados_ioctx_t io_ctx, const char *start, + uint64_t max, char *keys, size_t *key_len, + char *values, size_t *vals_len) + + int rbd_config_pool_list(rados_ioctx_t io_ctx, rbd_config_option_t *options, + int *max_options) + void rbd_config_pool_list_cleanup(rbd_config_option_t *options, + int max_options) + + int rbd_open(rados_ioctx_t io, const char *name, + rbd_image_t *image, const char *snap_name) + int rbd_open_by_id(rados_ioctx_t io, const char *image_id, + rbd_image_t *image, const char *snap_name) + int rbd_open_read_only(rados_ioctx_t io, const char *name, + rbd_image_t *image, const char *snap_name) + int rbd_open_by_id_read_only(rados_ioctx_t io, const char *image_id, + rbd_image_t *image, const char *snap_name) + int rbd_aio_open(rados_ioctx_t io, const char *name, rbd_image_t *image, + const char *snap_name, rbd_completion_t c) + int rbd_aio_open_by_id(rados_ioctx_t io, const char *id, rbd_image_t *image, + const char *snap_name, rbd_completion_t c) + int rbd_aio_open_read_only(rados_ioctx_t io, const char *name, + rbd_image_t *image, const char *snap_name, + rbd_completion_t c) + int rbd_aio_open_by_id_read_only(rados_ioctx_t io, const char *id, + rbd_image_t *image, const char *snap_name, + rbd_completion_t c) + int rbd_features_to_string(uint64_t features, char *str_features, size_t *size) + int rbd_features_from_string(const char *str_features, uint64_t *features) + int rbd_close(rbd_image_t image) + int rbd_aio_close(rbd_image_t image, rbd_completion_t c) + int rbd_resize2(rbd_image_t image, uint64_t size, bint allow_shrink, + librbd_progress_fn_t cb, void *cbdata) + int rbd_stat(rbd_image_t image, rbd_image_info_t *info, size_t infosize) + int rbd_get_old_format(rbd_image_t image, uint8_t *old) + int rbd_get_size(rbd_image_t image, uint64_t *size) + int rbd_get_features(rbd_image_t image, uint64_t *features) + int rbd_update_features(rbd_image_t image, uint64_t features, + uint8_t enabled) + int rbd_get_op_features(rbd_image_t image, uint64_t *op_features) + int rbd_get_stripe_unit(rbd_image_t image, uint64_t *stripe_unit) + int rbd_get_stripe_count(rbd_image_t image, uint64_t *stripe_count) + int rbd_get_create_timestamp(rbd_image_t image, timespec *timestamp) + int rbd_get_access_timestamp(rbd_image_t image, timespec *timestamp) + int rbd_get_modify_timestamp(rbd_image_t image, timespec *timestamp) + int rbd_get_overlap(rbd_image_t image, uint64_t *overlap) + int rbd_get_name(rbd_image_t image, char *name, size_t *name_len) + int rbd_get_id(rbd_image_t image, char *id, size_t id_len) + int rbd_get_block_name_prefix(rbd_image_t image, char *prefix, + size_t prefix_len) + int64_t rbd_get_data_pool_id(rbd_image_t image) + int rbd_get_parent(rbd_image_t image, + rbd_linked_image_spec_t *parent_image, + rbd_snap_spec_t *parent_snap) + int rbd_get_migration_source_spec(rbd_image_t image, + char* source_spec, size_t* max_len) + int rbd_get_flags(rbd_image_t image, uint64_t *flags) + int rbd_get_group(rbd_image_t image, rbd_group_info_t *group_info, + size_t group_info_size) + + ssize_t rbd_read2(rbd_image_t image, uint64_t ofs, size_t len, + char *buf, int op_flags) + ssize_t rbd_write2(rbd_image_t image, uint64_t ofs, size_t len, + const char *buf, int op_flags) + int rbd_discard(rbd_image_t image, uint64_t ofs, uint64_t len) + int rbd_write_zeroes(rbd_image_t image, uint64_t ofs, uint64_t len, + int zero_flags, int op_flags) + int rbd_copy3(rbd_image_t src, rados_ioctx_t dest_io_ctx, + const char *destname, rbd_image_options_t dest_opts) + int rbd_deep_copy(rbd_image_t src, rados_ioctx_t dest_io_ctx, + const char *destname, rbd_image_options_t dest_opts) + int rbd_snap_list(rbd_image_t image, rbd_snap_info_t *snaps, + int *max_snaps) + void rbd_snap_list_end(rbd_snap_info_t *snaps) + int rbd_snap_create2(rbd_image_t image, const char *snapname, uint32_t flags, + librbd_progress_fn_t cb, void *cbdata) + int rbd_snap_remove(rbd_image_t image, const char *snapname) + int rbd_snap_remove2(rbd_image_t image, const char *snapname, uint32_t flags, + librbd_progress_fn_t cb, void *cbdata) + int rbd_snap_remove_by_id(rbd_image_t image, uint64_t snap_id) + int rbd_snap_rollback(rbd_image_t image, const char *snapname) + int rbd_snap_rename(rbd_image_t image, const char *snapname, + const char* dstsnapsname) + int rbd_snap_protect(rbd_image_t image, const char *snap_name) + int rbd_snap_unprotect(rbd_image_t image, const char *snap_name) + int rbd_snap_is_protected(rbd_image_t image, const char *snap_name, + int *is_protected) + int rbd_snap_exists(rbd_image_t image, const char *snapname, bint *exists) + int rbd_snap_get_limit(rbd_image_t image, uint64_t *limit) + int rbd_snap_set_limit(rbd_image_t image, uint64_t limit) + int rbd_snap_get_timestamp(rbd_image_t image, uint64_t snap_id, timespec *timestamp) + int rbd_snap_set(rbd_image_t image, const char *snapname) + int rbd_snap_set_by_id(rbd_image_t image, uint64_t snap_id) + int rbd_snap_get_name(rbd_image_t image, uint64_t snap_id, + char *snapname, size_t *name_len) + int rbd_snap_get_id(rbd_image_t image, const char *snapname, + uint64_t *snap_id) + int rbd_snap_get_namespace_type(rbd_image_t image, + uint64_t snap_id, + rbd_snap_namespace_type_t *namespace_type) + int rbd_snap_get_group_namespace(rbd_image_t image, uint64_t snap_id, + rbd_snap_group_namespace_t *group_info, + size_t snap_group_namespace_size) + void rbd_snap_group_namespace_cleanup(rbd_snap_group_namespace_t *group_spec, + size_t snap_group_namespace_size) + int rbd_snap_get_trash_namespace(rbd_image_t image, uint64_t snap_id, + char *original_name, size_t max_length) + int rbd_snap_get_mirror_namespace( + rbd_image_t image, uint64_t snap_id, + rbd_snap_mirror_namespace_t *mirror_ns, + size_t snap_mirror_namespace_size) + void rbd_snap_mirror_namespace_cleanup( + rbd_snap_mirror_namespace_t *mirror_ns, + size_t snap_mirror_namespace_size) + + int rbd_flatten_with_progress(rbd_image_t image, librbd_progress_fn_t cb, + void *cbdata) + int rbd_sparsify(rbd_image_t image, size_t sparse_size) + int rbd_rebuild_object_map(rbd_image_t image, librbd_progress_fn_t cb, + void *cbdata) + int rbd_list_children3(rbd_image_t image, rbd_linked_image_spec_t *children, + size_t *max_children) + int rbd_list_descendants(rbd_image_t image, + rbd_linked_image_spec_t *descendants, + size_t *max_descendants) + + ssize_t rbd_list_lockers(rbd_image_t image, int *exclusive, + char *tag, size_t *tag_len, + char *clients, size_t *clients_len, + char *cookies, size_t *cookies_len, + char *addrs, size_t *addrs_len) + int rbd_lock_exclusive(rbd_image_t image, const char *cookie) + int rbd_lock_shared(rbd_image_t image, const char *cookie, + const char *tag) + int rbd_unlock(rbd_image_t image, const char *cookie) + int rbd_break_lock(rbd_image_t image, const char *client, + const char *cookie) + + int rbd_is_exclusive_lock_owner(rbd_image_t image, int *is_owner) + int rbd_lock_acquire(rbd_image_t image, rbd_lock_mode_t lock_mode) + int rbd_lock_release(rbd_image_t image) + int rbd_lock_get_owners(rbd_image_t image, rbd_lock_mode_t *lock_mode, + char **lock_owners, size_t *max_lock_owners) + void rbd_lock_get_owners_cleanup(char **lock_owners, + size_t lock_owner_count) + int rbd_lock_break(rbd_image_t image, rbd_lock_mode_t lock_mode, + char *lock_owner) + + # We use -9000 to propagate Python exceptions. We use except? to make sure + # things still work as intended if -9000 happens to be a valid errno value + # somewhere. + int rbd_diff_iterate2(rbd_image_t image, const char *fromsnapname, + uint64_t ofs, uint64_t len, + uint8_t include_parent, uint8_t whole_object, + int (*cb)(uint64_t, size_t, int, void *) + nogil except? -9000, + void *arg) except? -9000 + + int rbd_flush(rbd_image_t image) + int rbd_invalidate_cache(rbd_image_t image) + + int rbd_mirror_image_enable2(rbd_image_t image, + rbd_mirror_image_mode_t mode) + int rbd_mirror_image_disable(rbd_image_t image, bint force) + int rbd_mirror_image_promote(rbd_image_t image, bint force) + int rbd_mirror_image_demote(rbd_image_t image) + int rbd_mirror_image_resync(rbd_image_t image) + int rbd_mirror_image_create_snapshot2(rbd_image_t image, uint32_t flags, + uint64_t *snap_id) + int rbd_aio_mirror_image_create_snapshot(rbd_image_t image, uint32_t flags, + uint64_t *snap_id, + rbd_completion_t c) + int rbd_mirror_image_get_info(rbd_image_t image, + rbd_mirror_image_info_t *mirror_image_info, + size_t info_size) + void rbd_mirror_image_get_info_cleanup( + rbd_mirror_image_info_t *mirror_image_info) + int rbd_aio_mirror_image_get_info( + rbd_image_t image, rbd_mirror_image_info_t *mirror_image_info, + size_t info_size, rbd_completion_t c) + int rbd_mirror_image_get_mode(rbd_image_t image, + rbd_mirror_image_mode_t *mode) + int rbd_aio_mirror_image_get_mode(rbd_image_t image, + rbd_mirror_image_mode_t *mode, + rbd_completion_t c) + int rbd_mirror_image_get_global_status( + rbd_image_t image, + rbd_mirror_image_global_status_t *mirror_image_global_status, + size_t status_size) + void rbd_mirror_image_global_status_cleanup( + rbd_mirror_image_global_status_t *mirror_image_global_status) + int rbd_mirror_image_get_instance_id(rbd_image_t image, char *instance_id, + size_t *id_max_length) + + int rbd_aio_write2(rbd_image_t image, uint64_t off, size_t len, + const char *buf, rbd_completion_t c, int op_flags) + int rbd_aio_read2(rbd_image_t image, uint64_t off, size_t len, + char *buf, rbd_completion_t c, int op_flags) + int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len, + rbd_completion_t c) + int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off, uint64_t len, + rbd_completion_t c, int zero_flags, int op_flags) + + int rbd_aio_create_completion(void *cb_arg, rbd_callback_t complete_cb, + rbd_completion_t *c) + int rbd_aio_is_complete(rbd_completion_t c) + int rbd_aio_wait_for_complete(rbd_completion_t c) + ssize_t rbd_aio_get_return_value(rbd_completion_t c) + void rbd_aio_release(rbd_completion_t c) + int rbd_aio_flush(rbd_image_t image, rbd_completion_t c) + + int rbd_metadata_get(rbd_image_t image, const char *key, char *value, + size_t *val_len) + int rbd_metadata_set(rbd_image_t image, const char *key, const char *value) + int rbd_metadata_remove(rbd_image_t image, const char *key) + int rbd_metadata_list(rbd_image_t image, const char *start, uint64_t max, + char *keys, size_t *key_len, char *values, + size_t *vals_len) + int rbd_group_create(rados_ioctx_t p, const char *name) + int rbd_group_remove(rados_ioctx_t p, const char *name) + int rbd_group_list(rados_ioctx_t p, char *names, size_t *size) + int rbd_group_rename(rados_ioctx_t p, const char *src, const char *dest) + void rbd_group_info_cleanup(rbd_group_info_t *group_info, + size_t group_info_size) + int rbd_group_image_add(rados_ioctx_t group_p, const char *group_name, + rados_ioctx_t image_p, const char *image_name) + int rbd_group_image_remove(rados_ioctx_t group_p, const char *group_name, + rados_ioctx_t image_p, const char *image_name) + + int rbd_group_image_list(rados_ioctx_t group_p, + const char *group_name, + rbd_group_image_info_t *images, + size_t group_image_info_size, + size_t *image_size) + void rbd_group_image_list_cleanup(rbd_group_image_info_t *images, + size_t group_image_info_size, size_t len) + + int rbd_group_snap_create(rados_ioctx_t group_p, const char *group_name, + const char *snap_name) + + int rbd_group_snap_remove(rados_ioctx_t group_p, const char *group_name, + const char *snap_name) + + int rbd_group_snap_rename(rados_ioctx_t group_p, const char *group_name, + const char *old_snap_name, + const char *new_snap_name) + + int rbd_group_snap_list(rados_ioctx_t group_p, + const char *group_name, + rbd_group_snap_info_t *snaps, + size_t group_snap_info_size, + size_t *snaps_size) + + void rbd_group_snap_list_cleanup(rbd_group_snap_info_t *snaps, + size_t group_snap_info_size, size_t len) + int rbd_group_snap_rollback(rados_ioctx_t group_p, const char *group_name, + const char *snap_name) + + int rbd_watchers_list(rbd_image_t image, rbd_image_watcher_t *watchers, + size_t *max_watchers) + void rbd_watchers_list_cleanup(rbd_image_watcher_t *watchers, + size_t num_watchers) + + int rbd_config_image_list(rbd_image_t image, rbd_config_option_t *options, + int *max_options) + void rbd_config_image_list_cleanup(rbd_config_option_t *options, + int max_options) + + int rbd_namespace_create(rados_ioctx_t io, const char *namespace_name) + int rbd_namespace_remove(rados_ioctx_t io, const char *namespace_name) + int rbd_namespace_list(rados_ioctx_t io, char *namespace_names, + size_t *size) + int rbd_namespace_exists(rados_ioctx_t io, const char *namespace_name, + bint *exists) + + int rbd_pool_init(rados_ioctx_t, bint force) + + void rbd_pool_stats_create(rbd_pool_stats_t *stats) + void rbd_pool_stats_destroy(rbd_pool_stats_t stats) + int rbd_pool_stats_option_add_uint64(rbd_pool_stats_t stats, + int stat_option, uint64_t* stat_val) + int rbd_pool_stats_get(rados_ioctx_t io, rbd_pool_stats_t stats) diff --git a/src/pybind/rbd/ctime.pxd b/src/pybind/rbd/ctime.pxd new file mode 100644 index 00000000000..ca3ed1c02fe --- /dev/null +++ b/src/pybind/rbd/ctime.pxd @@ -0,0 +1,7 @@ +# cython: embedsignature=True + +cdef extern from "time.h": + ctypedef long int time_t + cdef struct timespec: + time_t tv_sec + long tv_nsec diff --git a/src/pybind/rbd/mock_rbd.pxi b/src/pybind/rbd/mock_rbd.pxi new file mode 100644 index 00000000000..c972ba7b97c --- /dev/null +++ b/src/pybind/rbd/mock_rbd.pxi @@ -0,0 +1,881 @@ +# cython: embedsignature=True + +from libc.stdint cimport * +from ctime cimport time_t, timespec + +cdef nogil: + enum: + _LIBRADOS_SNAP_HEAD "LIBRADOS_SNAP_HEAD" + +cdef: + ctypedef int (*librbd_progress_fn_t)(uint64_t offset, uint64_t total, void* ptr) + +cdef nogil: + enum: + _RBD_FEATURE_LAYERING "RBD_FEATURE_LAYERING" + _RBD_FEATURE_STRIPINGV2 "RBD_FEATURE_STRIPINGV2" + _RBD_FEATURE_EXCLUSIVE_LOCK "RBD_FEATURE_EXCLUSIVE_LOCK" + _RBD_FEATURE_OBJECT_MAP "RBD_FEATURE_OBJECT_MAP" + _RBD_FEATURE_FAST_DIFF "RBD_FEATURE_FAST_DIFF" + _RBD_FEATURE_DEEP_FLATTEN "RBD_FEATURE_DEEP_FLATTEN" + _RBD_FEATURE_JOURNALING "RBD_FEATURE_JOURNALING" + _RBD_FEATURE_DATA_POOL "RBD_FEATURE_DATA_POOL" + _RBD_FEATURE_OPERATIONS "RBD_FEATURE_OPERATIONS" + _RBD_FEATURE_MIGRATING "RBD_FEATURE_MIGRATING" + _RBD_FEATURE_NON_PRIMARY "RBD_FEATURE_NON_PRIMARY" + + _RBD_FEATURES_INCOMPATIBLE "RBD_FEATURES_INCOMPATIBLE" + _RBD_FEATURES_RW_INCOMPATIBLE "RBD_FEATURES_RW_INCOMPATIBLE" + _RBD_FEATURES_MUTABLE "RBD_FEATURES_MUTABLE" + _RBD_FEATURES_SINGLE_CLIENT "RBD_FEATURES_SINGLE_CLIENT" + _RBD_FEATURES_ALL "RBD_FEATURES_ALL" + + _RBD_OPERATION_FEATURE_CLONE_PARENT "RBD_OPERATION_FEATURE_CLONE_PARENT" + _RBD_OPERATION_FEATURE_CLONE_CHILD "RBD_OPERATION_FEATURE_CLONE_CHILD" + _RBD_OPERATION_FEATURE_GROUP "RBD_OPERATION_FEATURE_GROUP" + _RBD_OPERATION_FEATURE_SNAP_TRASH "RBD_OPERATION_FEATURE_SNAP_TRASH" + + _RBD_FLAG_OBJECT_MAP_INVALID "RBD_FLAG_OBJECT_MAP_INVALID" + _RBD_FLAG_FAST_DIFF_INVALID "RBD_FLAG_FAST_DIFF_INVALID" + + _RBD_IMAGE_OPTION_FORMAT "RBD_IMAGE_OPTION_FORMAT" + _RBD_IMAGE_OPTION_FEATURES "RBD_IMAGE_OPTION_FEATURES" + _RBD_IMAGE_OPTION_ORDER "RBD_IMAGE_OPTION_ORDER" + _RBD_IMAGE_OPTION_STRIPE_UNIT "RBD_IMAGE_OPTION_STRIPE_UNIT" + _RBD_IMAGE_OPTION_STRIPE_COUNT "RBD_IMAGE_OPTION_STRIPE_COUNT" + _RBD_IMAGE_OPTION_DATA_POOL "RBD_IMAGE_OPTION_DATA_POOL" + + RBD_MAX_BLOCK_NAME_SIZE + RBD_MAX_IMAGE_NAME_SIZE + + _RBD_SNAP_CREATE_SKIP_QUIESCE "RBD_SNAP_CREATE_SKIP_QUIESCE" + _RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR "RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR" + + _RBD_SNAP_REMOVE_UNPROTECT "RBD_SNAP_REMOVE_UNPROTECT" + _RBD_SNAP_REMOVE_FLATTEN "RBD_SNAP_REMOVE_FLATTEN" + _RBD_SNAP_REMOVE_FORCE "RBD_SNAP_REMOVE_FORCE" + + _RBD_WRITE_ZEROES_FLAG_THICK_PROVISION "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" + + ctypedef void* rados_t + ctypedef void* rados_ioctx_t + ctypedef void* rbd_image_t + ctypedef void* rbd_image_options_t + ctypedef void* rbd_pool_stats_t + ctypedef void *rbd_completion_t + + ctypedef struct rbd_image_info_t: + uint64_t size + uint64_t obj_size + uint64_t num_objs + int order + char block_name_prefix[RBD_MAX_BLOCK_NAME_SIZE] + uint64_t parent_pool + char parent_name[RBD_MAX_IMAGE_NAME_SIZE] + + ctypedef struct rbd_snap_info_t: + uint64_t id + uint64_t size + char *name + + ctypedef struct rbd_snap_group_namespace_t: + int64_t group_pool + char *group_name + char *group_snap_name + + ctypedef enum rbd_snap_mirror_state_t: + _RBD_SNAP_MIRROR_STATE_PRIMARY "RBD_SNAP_MIRROR_STATE_PRIMARY" + _RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED "RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED" + _RBD_SNAP_MIRROR_STATE_NON_PRIMARY "RBD_SNAP_MIRROR_STATE_NON_PRIMARY" + _RBD_SNAP_MIRROR_STATE_NON_PRIMARY_DEMOTED "RBD_SNAP_MIRROR_STATE_NON_PRIMARY_DEMOTED" + + ctypedef struct rbd_snap_mirror_namespace_t: + rbd_snap_mirror_state_t state + size_t mirror_peer_uuids_count + char *mirror_peer_uuids + bint complete + char *primary_mirror_uuid + uint64_t primary_snap_id + uint64_t last_copied_object_number + + ctypedef struct rbd_group_info_t: + char *name + int64_t pool + + ctypedef struct rbd_image_spec_t: + char *id + char *name + + ctypedef struct rbd_linked_image_spec_t: + int64_t pool_id + char *pool_name + char *pool_namespace + char *image_id + char *image_name + bint trash + + ctypedef enum rbd_snap_namespace_type_t: + _RBD_SNAP_NAMESPACE_TYPE_USER "RBD_SNAP_NAMESPACE_TYPE_USER" + _RBD_SNAP_NAMESPACE_TYPE_GROUP "RBD_SNAP_NAMESPACE_TYPE_GROUP" + _RBD_SNAP_NAMESPACE_TYPE_TRASH "RBD_SNAP_NAMESPACE_TYPE_TRASH" + _RBD_SNAP_NAMESPACE_TYPE_MIRROR "RBD_SNAP_NAMESPACE_TYPE_MIRROR" + + ctypedef struct rbd_snap_spec_t: + uint64_t id + rbd_snap_namespace_type_t namespace_type + char *name + + ctypedef enum rbd_mirror_mode_t: + _RBD_MIRROR_MODE_DISABLED "RBD_MIRROR_MODE_DISABLED" + _RBD_MIRROR_MODE_IMAGE "RBD_MIRROR_MODE_IMAGE" + _RBD_MIRROR_MODE_POOL "RBD_MIRROR_MODE_POOL" + + ctypedef enum rbd_mirror_peer_direction_t: + _RBD_MIRROR_PEER_DIRECTION_RX "RBD_MIRROR_PEER_DIRECTION_RX" + _RBD_MIRROR_PEER_DIRECTION_TX "RBD_MIRROR_PEER_DIRECTION_TX" + _RBD_MIRROR_PEER_DIRECTION_RX_TX "RBD_MIRROR_PEER_DIRECTION_RX_TX" + + ctypedef struct rbd_mirror_peer_site_t: + char *uuid + rbd_mirror_peer_direction_t direction + char *site_name + char *mirror_uuid + char *client_name + time_t last_seen + + cdef char* _RBD_MIRROR_PEER_ATTRIBUTE_NAME_MON_HOST = "mon_host" + cdef char* _RBD_MIRROR_PEER_ATTRIBUTE_NAME_KEY = "key" + + ctypedef enum rbd_mirror_image_mode_t: + _RBD_MIRROR_IMAGE_MODE_JOURNAL "RBD_MIRROR_IMAGE_MODE_JOURNAL" + _RBD_MIRROR_IMAGE_MODE_SNAPSHOT "RBD_MIRROR_IMAGE_MODE_SNAPSHOT" + + ctypedef enum rbd_mirror_image_state_t: + _RBD_MIRROR_IMAGE_DISABLING "RBD_MIRROR_IMAGE_DISABLING" + _RBD_MIRROR_IMAGE_ENABLED "RBD_MIRROR_IMAGE_ENABLED" + _RBD_MIRROR_IMAGE_DISABLED "RBD_MIRROR_IMAGE_DISABLED" + + ctypedef struct rbd_mirror_image_info_t: + char *global_id + rbd_mirror_image_state_t state + bint primary + + ctypedef enum rbd_mirror_image_status_state_t: + _MIRROR_IMAGE_STATUS_STATE_UNKNOWN "MIRROR_IMAGE_STATUS_STATE_UNKNOWN" + _MIRROR_IMAGE_STATUS_STATE_ERROR "MIRROR_IMAGE_STATUS_STATE_ERROR" + _MIRROR_IMAGE_STATUS_STATE_SYNCING "MIRROR_IMAGE_STATUS_STATE_SYNCING" + _MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY "MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY" + _MIRROR_IMAGE_STATUS_STATE_REPLAYING "MIRROR_IMAGE_STATUS_STATE_REPLAYING" + _MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY "MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY" + _MIRROR_IMAGE_STATUS_STATE_STOPPED "MIRROR_IMAGE_STATUS_STATE_STOPPED" + + ctypedef struct rbd_mirror_image_site_status_t: + char *mirror_uuid + rbd_mirror_image_status_state_t state + char *description + time_t last_update + bint up + + ctypedef struct rbd_mirror_image_global_status_t: + char *name + rbd_mirror_image_info_t info + uint32_t site_statuses_count + rbd_mirror_image_site_status_t *site_statuses + + ctypedef enum rbd_lock_mode_t: + _RBD_LOCK_MODE_EXCLUSIVE "RBD_LOCK_MODE_EXCLUSIVE" + _RBD_LOCK_MODE_SHARED "RBD_LOCK_MODE_SHARED" + + ctypedef enum rbd_trash_image_source_t: + _RBD_TRASH_IMAGE_SOURCE_USER "RBD_TRASH_IMAGE_SOURCE_USER", + _RBD_TRASH_IMAGE_SOURCE_MIRRORING "RBD_TRASH_IMAGE_SOURCE_MIRRORING", + _RBD_TRASH_IMAGE_SOURCE_MIGRATION "RBD_TRASH_IMAGE_SOURCE_MIGRATION" + _RBD_TRASH_IMAGE_SOURCE_REMOVING "RBD_TRASH_IMAGE_SOURCE_REMOVING" + + ctypedef struct rbd_trash_image_info_t: + char *id + char *name + rbd_trash_image_source_t source + time_t deletion_time + time_t deferment_end_time + + ctypedef struct rbd_image_watcher_t: + char *addr + int64_t id + uint64_t cookie + + ctypedef enum rbd_group_image_state_t: + _RBD_GROUP_IMAGE_STATE_ATTACHED "RBD_GROUP_IMAGE_STATE_ATTACHED" + _RBD_GROUP_IMAGE_STATE_INCOMPLETE "RBD_GROUP_IMAGE_STATE_INCOMPLETE" + + ctypedef struct rbd_group_image_info_t: + char *name + int64_t pool + rbd_group_image_state_t state + + ctypedef enum rbd_group_snap_state_t: + _RBD_GROUP_SNAP_STATE_INCOMPLETE "RBD_GROUP_SNAP_STATE_INCOMPLETE" + _RBD_GROUP_SNAP_STATE_COMPLETE "RBD_GROUP_SNAP_STATE_COMPLETE" + + ctypedef struct rbd_group_snap_info_t: + char *name + rbd_group_snap_state_t state + + ctypedef enum rbd_image_migration_state_t: + _RBD_IMAGE_MIGRATION_STATE_UNKNOWN "RBD_IMAGE_MIGRATION_STATE_UNKNOWN" + _RBD_IMAGE_MIGRATION_STATE_ERROR "RBD_IMAGE_MIGRATION_STATE_ERROR" + _RBD_IMAGE_MIGRATION_STATE_PREPARING "RBD_IMAGE_MIGRATION_STATE_PREPARING" + _RBD_IMAGE_MIGRATION_STATE_PREPARED "RBD_IMAGE_MIGRATION_STATE_PREPARED" + _RBD_IMAGE_MIGRATION_STATE_EXECUTING "RBD_IMAGE_MIGRATION_STATE_EXECUTING" + _RBD_IMAGE_MIGRATION_STATE_EXECUTED "RBD_IMAGE_MIGRATION_STATE_EXECUTED" + _RBD_IMAGE_MIGRATION_STATE_ABORTING "RBD_IMAGE_MIGRATION_STATE_ABORTING" + + ctypedef struct rbd_image_migration_status_t: + int64_t source_pool_id + char *source_pool_namespace + char *source_image_name + char *source_image_id + int64_t dest_pool_id + char *dest_pool_namespace + char *dest_image_name + char *dest_image_id + rbd_image_migration_state_t state + char *state_description + + ctypedef enum rbd_config_source_t: + _RBD_CONFIG_SOURCE_CONFIG "RBD_CONFIG_SOURCE_CONFIG" + _RBD_CONFIG_SOURCE_POOL "RBD_CONFIG_SOURCE_POOL" + _RBD_CONFIG_SOURCE_IMAGE "RBD_CONFIG_SOURCE_IMAGE" + + ctypedef struct rbd_config_option_t: + char *name + char *value + rbd_config_source_t source + + ctypedef enum rbd_pool_stat_option_t: + _RBD_POOL_STAT_OPTION_IMAGES "RBD_POOL_STAT_OPTION_IMAGES" + _RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES "RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES" + _RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES "RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES" + _RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS "RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS" + _RBD_POOL_STAT_OPTION_TRASH_IMAGES "RBD_POOL_STAT_OPTION_TRASH_IMAGES" + _RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES "RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES" + _RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES "RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES" + _RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS "RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS" + + ctypedef void (*rbd_callback_t)(rbd_completion_t cb, void *arg) + + void rbd_version(int *major, int *minor, int *extra): + pass + void rbd_image_spec_list_cleanup(rbd_image_spec_t *image, size_t num_images): + pass + void rbd_linked_image_spec_cleanup(rbd_linked_image_spec_t *image): + pass + void rbd_linked_image_spec_list_cleanup(rbd_linked_image_spec_t *images, + size_t num_images): + pass + void rbd_snap_spec_cleanup(rbd_snap_spec_t *snap): + pass + void rbd_image_options_create(rbd_image_options_t* opts): + pass + void rbd_image_options_destroy(rbd_image_options_t opts): + pass + int rbd_image_options_set_string(rbd_image_options_t opts, int optname, + const char* optval): + pass + int rbd_image_options_set_uint64(rbd_image_options_t opts, int optname, + uint64_t optval): + pass + int rbd_image_options_get_string(rbd_image_options_t opts, int optname, + char* optval, size_t maxlen): + pass + int rbd_image_options_get_uint64(rbd_image_options_t opts, int optname, + uint64_t* optval): + pass + int rbd_image_options_unset(rbd_image_options_t opts, int optname): + pass + void rbd_image_options_clear(rbd_image_options_t opts): + pass + int rbd_image_options_is_empty(rbd_image_options_t opts): + pass + + int rbd_list(rados_ioctx_t io, char *names, size_t *size): + pass + int rbd_list2(rados_ioctx_t io, rbd_image_spec_t *images, + size_t *num_images): + pass + int rbd_create(rados_ioctx_t io, const char *name, uint64_t size, + int *order): + pass + int rbd_create4(rados_ioctx_t io, const char *name, uint64_t size, + rbd_image_options_t opts): + pass + int rbd_clone3(rados_ioctx_t p_ioctx, const char *p_name, + const char *p_snapname, rados_ioctx_t c_ioctx, + const char *c_name, rbd_image_options_t c_opts): + pass + int rbd_remove_with_progress(rados_ioctx_t io, const char *name, + librbd_progress_fn_t cb, void *cbdata): + pass + int rbd_rename(rados_ioctx_t src_io_ctx, const char *srcname, + const char *destname): + pass + + int rbd_trash_move(rados_ioctx_t io, const char *name, uint64_t delay): + pass + int rbd_trash_get(rados_ioctx_t io, const char *id, + rbd_trash_image_info_t *info): + pass + void rbd_trash_get_cleanup(rbd_trash_image_info_t *info): + pass + int rbd_trash_list(rados_ioctx_t io, rbd_trash_image_info_t *trash_entries, + size_t *num_entries): + pass + void rbd_trash_list_cleanup(rbd_trash_image_info_t *trash_entries, + size_t num_entries): + pass + int rbd_trash_purge(rados_ioctx_t io, time_t expire_ts, float threshold): + pass + int rbd_trash_remove_with_progress(rados_ioctx_t io, const char *id, + int force, librbd_progress_fn_t cb, + void *cbdata): + pass + int rbd_trash_restore(rados_ioctx_t io, const char *id, const char *name): + pass + + int rbd_migration_prepare(rados_ioctx_t io_ctx, const char *image_name, + rados_ioctx_t dest_io_ctx, + const char *dest_image_name, + rbd_image_options_t opts): + pass + int rbd_migration_prepare_import(const char *source_spec, + rados_ioctx_t dest_io_ctx, + const char *dest_image_name, + rbd_image_options_t opts): + pass + int rbd_migration_execute_with_progress(rados_ioctx_t io_ctx, + const char *image_name, + librbd_progress_fn_t cb, + void *cbdata): + pass + int rbd_migration_commit_with_progress(rados_ioctx_t io_ctx, + const char *image_name, + librbd_progress_fn_t cb, + void *cbdata): + pass + int rbd_migration_abort_with_progress(rados_ioctx_t io_ctx, + const char *image_name, + librbd_progress_fn_t cb, void *cbdata): + pass + int rbd_migration_status(rados_ioctx_t io_ctx, const char *image_name, + rbd_image_migration_status_t *status, + size_t status_size): + pass + void rbd_migration_status_cleanup(rbd_image_migration_status_t *status): + pass + + int rbd_mirror_site_name_get(rados_t cluster, char *name, size_t *max_len): + pass + int rbd_mirror_site_name_set(rados_t cluster, const char *name): + pass + + int rbd_mirror_mode_get(rados_ioctx_t io, rbd_mirror_mode_t *mirror_mode): + pass + int rbd_mirror_mode_set(rados_ioctx_t io, rbd_mirror_mode_t mirror_mode): + pass + + int rbd_mirror_uuid_get(rados_ioctx_t io_ctx, char *mirror_uuid, + size_t *max_len): + pass + + int rbd_mirror_peer_bootstrap_create(rados_ioctx_t io_ctx, char *token, + size_t *max_len): + pass + int rbd_mirror_peer_bootstrap_import( + rados_ioctx_t io_ctx, rbd_mirror_peer_direction_t direction, + const char *token): + pass + + int rbd_mirror_peer_site_add( + rados_ioctx_t io, char *uuid, size_t uuid_max_length, + rbd_mirror_peer_direction_t direction, const char *site_name, + const char *client_name): + pass + int rbd_mirror_peer_site_remove(rados_ioctx_t io, const char *uuid): + pass + int rbd_mirror_peer_site_list( + rados_ioctx_t io_ctx, rbd_mirror_peer_site_t *peers,int *max_peers): + pass + void rbd_mirror_peer_site_list_cleanup( + rbd_mirror_peer_site_t *peers, int max_peers): + pass + + int rbd_mirror_peer_site_set_name( + rados_ioctx_t io_ctx, const char *uuid, const char *site_name): + pass + int rbd_mirror_peer_site_set_client_name( + rados_ioctx_t io_ctx, const char *uuid, const char *client_name): + pass + + int rbd_mirror_peer_site_get_attributes( + rados_ioctx_t io_ctx, const char *uuid, char *keys, size_t *max_key_len, + char *values, size_t *max_val_length, size_t *key_value_count): + pass + int rbd_mirror_peer_site_set_attributes( + rados_ioctx_t io_ctx, const char *uuid, const char *keys, + const char *values, size_t count): + pass + + int rbd_mirror_image_global_status_list( + rados_ioctx_t io, const char *start_id, size_t max, char **image_ids, + rbd_mirror_image_global_status_t *images, size_t *len): + pass + void rbd_mirror_image_global_status_list_cleanup( + char **image_ids, rbd_mirror_image_global_status_t *images, size_t len): + pass + int rbd_mirror_image_status_summary(rados_ioctx_t io, + rbd_mirror_image_status_state_t *states, + int *counts, size_t *maxlen): + pass + int rbd_mirror_image_instance_id_list(rados_ioctx_t io_ctx, + const char *start_id, + size_t max, char **image_ids, + char **instance_ids, + size_t *len): + pass + void rbd_mirror_image_instance_id_list_cleanup(char **image_ids, + char **instance_ids, + size_t len): + pass + int rbd_mirror_image_info_list(rados_ioctx_t io_ctx, + rbd_mirror_image_mode_t *mode_filter, + const char *start_id, size_t max, + char **image_ids, + rbd_mirror_image_mode_t *mode_entries, + rbd_mirror_image_info_t *info_entries, + size_t *num_entries): + pass + void rbd_mirror_image_info_list_cleanup(char **image_ids, + rbd_mirror_image_info_t *info_entries, + size_t num_entries): + pass + + int rbd_pool_metadata_get(rados_ioctx_t io_ctx, const char *key, + char *value, size_t *val_len): + pass + int rbd_pool_metadata_set(rados_ioctx_t io_ctx, const char *key, + const char *value): + pass + int rbd_pool_metadata_remove(rados_ioctx_t io_ctx, const char *key): + pass + int rbd_pool_metadata_list(rados_ioctx_t io_ctx, const char *start, + uint64_t max, char *keys, size_t *key_len, + char *values, size_t *vals_len): + pass + + int rbd_config_pool_list(rados_ioctx_t io_ctx, rbd_config_option_t *options, + int *max_options): + pass + void rbd_config_pool_list_cleanup(rbd_config_option_t *options, + int max_options): + pass + + int rbd_open(rados_ioctx_t io, const char *name, + rbd_image_t *image, const char *snap_name): + pass + int rbd_open_by_id(rados_ioctx_t io, const char *image_id, + rbd_image_t *image, const char *snap_name): + pass + int rbd_open_read_only(rados_ioctx_t io, const char *name, + rbd_image_t *image, const char *snap_name): + pass + int rbd_open_by_id_read_only(rados_ioctx_t io, const char *image_id, + rbd_image_t *image, const char *snap_name): + pass + int rbd_aio_open(rados_ioctx_t io, const char *name, rbd_image_t *image, + const char *snap_name, rbd_completion_t c): + pass + int rbd_aio_open_by_id(rados_ioctx_t io, const char *id, rbd_image_t *image, + const char *snap_name, rbd_completion_t c): + pass + int rbd_aio_open_read_only(rados_ioctx_t io, const char *name, + rbd_image_t *image, const char *snap_name, + rbd_completion_t c): + pass + int rbd_aio_open_by_id_read_only(rados_ioctx_t io, const char *id, + rbd_image_t *image, const char *snap_name, + rbd_completion_t c): + pass + int rbd_features_to_string(uint64_t features, char *str_features, size_t *size): + pass + int rbd_features_from_string(const char *str_features, uint64_t *features): + pass + int rbd_close(rbd_image_t image): + pass + int rbd_aio_close(rbd_image_t image, rbd_completion_t c): + pass + int rbd_resize2(rbd_image_t image, uint64_t size, bint allow_shrink, + librbd_progress_fn_t cb, void *cbdata): + pass + int rbd_stat(rbd_image_t image, rbd_image_info_t *info, size_t infosize): + pass + int rbd_get_old_format(rbd_image_t image, uint8_t *old): + pass + int rbd_get_size(rbd_image_t image, uint64_t *size): + pass + int rbd_get_features(rbd_image_t image, uint64_t *features): + pass + int rbd_update_features(rbd_image_t image, uint64_t features, + uint8_t enabled): + pass + int rbd_get_op_features(rbd_image_t image, uint64_t *op_features): + pass + int rbd_get_stripe_unit(rbd_image_t image, uint64_t *stripe_unit): + pass + int rbd_get_stripe_count(rbd_image_t image, uint64_t *stripe_count): + pass + int rbd_get_create_timestamp(rbd_image_t image, timespec *timestamp): + pass + int rbd_get_access_timestamp(rbd_image_t image, timespec *timestamp): + pass + int rbd_get_modify_timestamp(rbd_image_t image, timespec *timestamp): + pass + int rbd_get_overlap(rbd_image_t image, uint64_t *overlap): + pass + int rbd_get_name(rbd_image_t image, char *name, size_t *name_len): + pass + int rbd_get_id(rbd_image_t image, char *id, size_t id_len): + pass + int rbd_get_block_name_prefix(rbd_image_t image, char *prefix, + size_t prefix_len): + pass + int64_t rbd_get_data_pool_id(rbd_image_t image): + pass + int rbd_get_parent(rbd_image_t image, + rbd_linked_image_spec_t *parent_image, + rbd_snap_spec_t *parent_snap): + pass + int rbd_get_migration_source_spec(rbd_image_t image, + char* source_spec, size_t* max_len): + pass + int rbd_get_flags(rbd_image_t image, uint64_t *flags): + pass + int rbd_get_group(rbd_image_t image, rbd_group_info_t *group_info, + size_t group_info_size): + pass + + ssize_t rbd_read2(rbd_image_t image, uint64_t ofs, size_t len, + char *buf, int op_flags): + pass + ssize_t rbd_write2(rbd_image_t image, uint64_t ofs, size_t len, + const char *buf, int op_flags): + pass + int rbd_discard(rbd_image_t image, uint64_t ofs, uint64_t len): + pass + int rbd_write_zeroes(rbd_image_t image, uint64_t ofs, uint64_t len, + int zero_flags, int op_flags): + pass + int rbd_copy3(rbd_image_t src, rados_ioctx_t dest_io_ctx, + const char *destname, rbd_image_options_t dest_opts): + pass + int rbd_deep_copy(rbd_image_t src, rados_ioctx_t dest_io_ctx, + const char *destname, rbd_image_options_t dest_opts): + pass + int rbd_snap_list(rbd_image_t image, rbd_snap_info_t *snaps, + int *max_snaps): + pass + void rbd_snap_list_end(rbd_snap_info_t *snaps): + pass + int rbd_snap_create2(rbd_image_t image, const char *snapname, uint32_t flags, + librbd_progress_fn_t cb, void *cbdata): + pass + int rbd_snap_remove(rbd_image_t image, const char *snapname): + pass + int rbd_snap_remove2(rbd_image_t image, const char *snapname, uint32_t flags, + librbd_progress_fn_t cb, void *cbdata): + pass + int rbd_snap_remove_by_id(rbd_image_t image, uint64_t snap_id): + pass + int rbd_snap_rollback(rbd_image_t image, const char *snapname): + pass + int rbd_snap_rename(rbd_image_t image, const char *snapname, + const char* dstsnapsname): + pass + int rbd_snap_protect(rbd_image_t image, const char *snap_name): + pass + int rbd_snap_unprotect(rbd_image_t image, const char *snap_name): + pass + int rbd_snap_is_protected(rbd_image_t image, const char *snap_name, + int *is_protected): + pass + int rbd_snap_exists(rbd_image_t image, const char *snapname, bint *exists): + pass + int rbd_snap_get_limit(rbd_image_t image, uint64_t *limit): + pass + int rbd_snap_set_limit(rbd_image_t image, uint64_t limit): + pass + int rbd_snap_get_timestamp(rbd_image_t image, uint64_t snap_id, timespec *timestamp): + pass + int rbd_snap_set(rbd_image_t image, const char *snapname): + pass + int rbd_snap_set_by_id(rbd_image_t image, uint64_t snap_id): + pass + int rbd_snap_get_name(rbd_image_t image, uint64_t snap_id, + char *snapname, size_t *name_len): + pass + int rbd_snap_get_id(rbd_image_t image, const char *snapname, + uint64_t *snap_id): + pass + int rbd_snap_get_namespace_type(rbd_image_t image, + uint64_t snap_id, + rbd_snap_namespace_type_t *namespace_type): + pass + int rbd_snap_get_group_namespace(rbd_image_t image, uint64_t snap_id, + rbd_snap_group_namespace_t *group_info, + size_t snap_group_namespace_size): + pass + void rbd_snap_group_namespace_cleanup(rbd_snap_group_namespace_t *group_spec, + size_t snap_group_namespace_size): + pass + int rbd_snap_get_trash_namespace(rbd_image_t image, uint64_t snap_id, + char *original_name, size_t max_length): + pass + int rbd_snap_get_mirror_namespace( + rbd_image_t image, uint64_t snap_id, + rbd_snap_mirror_namespace_t *mirror_ns, + size_t snap_mirror_namespace_size): + pass + void rbd_snap_mirror_namespace_cleanup( + rbd_snap_mirror_namespace_t *mirror_ns, + size_t snap_mirror_namespace_size): + pass + + int rbd_flatten_with_progress(rbd_image_t image, librbd_progress_fn_t cb, + void *cbdata): + pass + int rbd_sparsify(rbd_image_t image, size_t sparse_size): + pass + int rbd_rebuild_object_map(rbd_image_t image, librbd_progress_fn_t cb, + void *cbdata): + pass + int rbd_list_children3(rbd_image_t image, rbd_linked_image_spec_t *children, + size_t *max_children): + pass + int rbd_list_descendants(rbd_image_t image, + rbd_linked_image_spec_t *descendants, + size_t *max_descendants): + pass + + ssize_t rbd_list_lockers(rbd_image_t image, int *exclusive, + char *tag, size_t *tag_len, + char *clients, size_t *clients_len, + char *cookies, size_t *cookies_len, + char *addrs, size_t *addrs_len): + pass + int rbd_lock_exclusive(rbd_image_t image, const char *cookie): + pass + int rbd_lock_shared(rbd_image_t image, const char *cookie, + const char *tag): + pass + int rbd_unlock(rbd_image_t image, const char *cookie): + pass + int rbd_break_lock(rbd_image_t image, const char *client, + const char *cookie): + pass + + int rbd_is_exclusive_lock_owner(rbd_image_t image, int *is_owner): + pass + int rbd_lock_acquire(rbd_image_t image, rbd_lock_mode_t lock_mode): + pass + int rbd_lock_release(rbd_image_t image): + pass + int rbd_lock_get_owners(rbd_image_t image, rbd_lock_mode_t *lock_mode, + char **lock_owners, size_t *max_lock_owners): + pass + void rbd_lock_get_owners_cleanup(char **lock_owners, + size_t lock_owner_count): + pass + int rbd_lock_break(rbd_image_t image, rbd_lock_mode_t lock_mode, + char *lock_owner): + pass + + # We use -9000 to propagate Python exceptions. We use except? to make sure + # things still work as intended if -9000 happens to be a valid errno value + # somewhere. + int rbd_diff_iterate2(rbd_image_t image, const char *fromsnapname, + uint64_t ofs, uint64_t len, + uint8_t include_parent, uint8_t whole_object, + int (*cb)(uint64_t, size_t, int, void *) + nogil except? -9000, + void *arg) except? -9000: + pass + + int rbd_flush(rbd_image_t image): + pass + int rbd_invalidate_cache(rbd_image_t image): + pass + + int rbd_mirror_image_enable2(rbd_image_t image, + rbd_mirror_image_mode_t mode): + pass + int rbd_mirror_image_disable(rbd_image_t image, bint force): + pass + int rbd_mirror_image_promote(rbd_image_t image, bint force): + pass + int rbd_mirror_image_demote(rbd_image_t image): + pass + int rbd_mirror_image_resync(rbd_image_t image): + pass + int rbd_mirror_image_create_snapshot2(rbd_image_t image, uint32_t flags, + uint64_t *snap_id): + pass + int rbd_aio_mirror_image_create_snapshot(rbd_image_t image, uint32_t flags, + uint64_t *snap_id, + rbd_completion_t c): + pass + int rbd_mirror_image_get_info(rbd_image_t image, + rbd_mirror_image_info_t *mirror_image_info, + size_t info_size): + pass + void rbd_mirror_image_get_info_cleanup( + rbd_mirror_image_info_t *mirror_image_info): + pass + int rbd_aio_mirror_image_get_info( + rbd_image_t image, rbd_mirror_image_info_t *mirror_image_info, + size_t info_size, rbd_completion_t c): + pass + int rbd_mirror_image_get_mode(rbd_image_t image, + rbd_mirror_image_mode_t *mode): + pass + int rbd_aio_mirror_image_get_mode(rbd_image_t image, + rbd_mirror_image_mode_t *mode, + rbd_completion_t c): + pass + int rbd_mirror_image_get_global_status( + rbd_image_t image, + rbd_mirror_image_global_status_t *mirror_image_global_status, + size_t status_size): + pass + void rbd_mirror_image_global_status_cleanup( + rbd_mirror_image_global_status_t *mirror_image_global_status): + pass + int rbd_mirror_image_get_instance_id(rbd_image_t image, char *instance_id, + size_t *id_max_length): + pass + int rbd_aio_write2(rbd_image_t image, uint64_t off, size_t len, + const char *buf, rbd_completion_t c, int op_flags): + pass + int rbd_aio_read2(rbd_image_t image, uint64_t off, size_t len, + char *buf, rbd_completion_t c, int op_flags): + pass + int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len, + rbd_completion_t c): + pass + int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off, uint64_t len, + rbd_completion_t c, int zero_flags, int op_flags): + pass + int rbd_aio_create_completion(void *cb_arg, rbd_callback_t complete_cb, + rbd_completion_t *c): + pass + int rbd_aio_is_complete(rbd_completion_t c): + pass + int rbd_aio_wait_for_complete(rbd_completion_t c): + pass + ssize_t rbd_aio_get_return_value(rbd_completion_t c): + pass + void rbd_aio_release(rbd_completion_t c): + pass + int rbd_aio_flush(rbd_image_t image, rbd_completion_t c): + pass + + int rbd_metadata_get(rbd_image_t image, const char *key, char *value, + size_t *val_len): + pass + int rbd_metadata_set(rbd_image_t image, const char *key, const char *value): + pass + int rbd_metadata_remove(rbd_image_t image, const char *key): + pass + int rbd_metadata_list(rbd_image_t image, const char *start, uint64_t max, + char *keys, size_t *key_len, char *values, + size_t *vals_len): + pass + int rbd_group_create(rados_ioctx_t p, const char *name): + pass + int rbd_group_remove(rados_ioctx_t p, const char *name): + pass + int rbd_group_list(rados_ioctx_t p, char *names, size_t *size): + pass + int rbd_group_rename(rados_ioctx_t p, const char *src, const char *dest): + pass + void rbd_group_info_cleanup(rbd_group_info_t *group_info, + size_t group_info_size): + pass + int rbd_group_image_add(rados_ioctx_t group_p, const char *group_name, + rados_ioctx_t image_p, const char *image_name): + pass + int rbd_group_image_remove(rados_ioctx_t group_p, const char *group_name, + rados_ioctx_t image_p, const char *image_name): + pass + int rbd_group_image_list(rados_ioctx_t group_p, + const char *group_name, + rbd_group_image_info_t *images, + size_t group_image_info_size, + size_t *image_size): + pass + void rbd_group_image_list_cleanup(rbd_group_image_info_t *images, + size_t group_image_info_size, size_t len): + pass + int rbd_group_snap_create(rados_ioctx_t group_p, const char *group_name, + const char *snap_name): + pass + int rbd_group_snap_remove(rados_ioctx_t group_p, const char *group_name, + const char *snap_name): + pass + int rbd_group_snap_rename(rados_ioctx_t group_p, const char *group_name, + const char *old_snap_name, + const char *new_snap_name): + pass + int rbd_group_snap_list(rados_ioctx_t group_p, + const char *group_name, + rbd_group_snap_info_t *snaps, + size_t group_snap_info_size, + size_t *snaps_size): + pass + void rbd_group_snap_list_cleanup(rbd_group_snap_info_t *snaps, + size_t group_snap_info_size, size_t len): + pass + int rbd_group_snap_rollback(rados_ioctx_t group_p, const char *group_name, + const char *snap_name): + pass + int rbd_watchers_list(rbd_image_t image, rbd_image_watcher_t *watchers, + size_t *max_watchers): + pass + void rbd_watchers_list_cleanup(rbd_image_watcher_t *watchers, + size_t num_watchers): + pass + int rbd_config_image_list(rbd_image_t image, rbd_config_option_t *options, + int *max_options): + pass + void rbd_config_image_list_cleanup(rbd_config_option_t *options, + int max_options): + pass + int rbd_namespace_create(rados_ioctx_t io, const char *namespace_name): + pass + int rbd_namespace_remove(rados_ioctx_t io, const char *namespace_name): + pass + int rbd_namespace_list(rados_ioctx_t io, char *namespace_names, + size_t *size): + pass + int rbd_namespace_exists(rados_ioctx_t io, const char *namespace_name, + bint *exists): + pass + int rbd_pool_init(rados_ioctx_t io, bint force): + pass + void rbd_pool_stats_create(rbd_pool_stats_t *stats): + pass + void rbd_pool_stats_destroy(rbd_pool_stats_t stats): + pass + int rbd_pool_stats_option_add_uint64(rbd_pool_stats_t stats, + int stat_option, uint64_t* stat_val): + pass + int rbd_pool_stats_get(rados_ioctx_t io, rbd_pool_stats_t stats): + pass diff --git a/src/pybind/rbd/rbd.pyx b/src/pybind/rbd/rbd.pyx index dd3188323e0..ba0d108efb9 100644 --- a/src/pybind/rbd/rbd.pyx +++ b/src/pybind/rbd/rbd.pyx @@ -32,7 +32,11 @@ from datetime import datetime from itertools import chain import time -cimport rados +IF BUILD_DOC: + include "mock_rbd.pxi" +ELSE: + from c_rbd cimport * + cimport rados cdef extern from "Python.h": @@ -44,703 +48,10 @@ cdef extern from "Python.h": char* PyBytes_AsString(PyObject *string) except NULL int _PyBytes_Resize(PyObject **string, Py_ssize_t newsize) except -1 -cdef extern from "time.h": - ctypedef long int time_t - cdef struct timespec: - time_t tv_sec - long tv_nsec - cdef extern from "<errno.h>" nogil: enum: _ECANCELED "ECANCELED" -cdef extern from "rados/librados.h": - enum: - _LIBRADOS_SNAP_HEAD "LIBRADOS_SNAP_HEAD" - -cdef extern from "rbd/librbd.h": - ctypedef int (*librbd_progress_fn_t)(uint64_t offset, uint64_t total, void* ptr) - -cdef extern from "rbd/librbd.h" nogil: - enum: - _RBD_FEATURE_LAYERING "RBD_FEATURE_LAYERING" - _RBD_FEATURE_STRIPINGV2 "RBD_FEATURE_STRIPINGV2" - _RBD_FEATURE_EXCLUSIVE_LOCK "RBD_FEATURE_EXCLUSIVE_LOCK" - _RBD_FEATURE_OBJECT_MAP "RBD_FEATURE_OBJECT_MAP" - _RBD_FEATURE_FAST_DIFF "RBD_FEATURE_FAST_DIFF" - _RBD_FEATURE_DEEP_FLATTEN "RBD_FEATURE_DEEP_FLATTEN" - _RBD_FEATURE_JOURNALING "RBD_FEATURE_JOURNALING" - _RBD_FEATURE_DATA_POOL "RBD_FEATURE_DATA_POOL" - _RBD_FEATURE_OPERATIONS "RBD_FEATURE_OPERATIONS" - _RBD_FEATURE_MIGRATING "RBD_FEATURE_MIGRATING" - _RBD_FEATURE_NON_PRIMARY "RBD_FEATURE_NON_PRIMARY" - - _RBD_FEATURES_INCOMPATIBLE "RBD_FEATURES_INCOMPATIBLE" - _RBD_FEATURES_RW_INCOMPATIBLE "RBD_FEATURES_RW_INCOMPATIBLE" - _RBD_FEATURES_MUTABLE "RBD_FEATURES_MUTABLE" - _RBD_FEATURES_SINGLE_CLIENT "RBD_FEATURES_SINGLE_CLIENT" - _RBD_FEATURES_ALL "RBD_FEATURES_ALL" - - _RBD_OPERATION_FEATURE_CLONE_PARENT "RBD_OPERATION_FEATURE_CLONE_PARENT" - _RBD_OPERATION_FEATURE_CLONE_CHILD "RBD_OPERATION_FEATURE_CLONE_CHILD" - _RBD_OPERATION_FEATURE_GROUP "RBD_OPERATION_FEATURE_GROUP" - _RBD_OPERATION_FEATURE_SNAP_TRASH "RBD_OPERATION_FEATURE_SNAP_TRASH" - - _RBD_FLAG_OBJECT_MAP_INVALID "RBD_FLAG_OBJECT_MAP_INVALID" - _RBD_FLAG_FAST_DIFF_INVALID "RBD_FLAG_FAST_DIFF_INVALID" - - _RBD_IMAGE_OPTION_FORMAT "RBD_IMAGE_OPTION_FORMAT" - _RBD_IMAGE_OPTION_FEATURES "RBD_IMAGE_OPTION_FEATURES" - _RBD_IMAGE_OPTION_ORDER "RBD_IMAGE_OPTION_ORDER" - _RBD_IMAGE_OPTION_STRIPE_UNIT "RBD_IMAGE_OPTION_STRIPE_UNIT" - _RBD_IMAGE_OPTION_STRIPE_COUNT "RBD_IMAGE_OPTION_STRIPE_COUNT" - _RBD_IMAGE_OPTION_DATA_POOL "RBD_IMAGE_OPTION_DATA_POOL" - - RBD_MAX_BLOCK_NAME_SIZE - RBD_MAX_IMAGE_NAME_SIZE - - _RBD_SNAP_CREATE_SKIP_QUIESCE "RBD_SNAP_CREATE_SKIP_QUIESCE" - _RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR "RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR" - - _RBD_SNAP_REMOVE_UNPROTECT "RBD_SNAP_REMOVE_UNPROTECT" - _RBD_SNAP_REMOVE_FLATTEN "RBD_SNAP_REMOVE_FLATTEN" - _RBD_SNAP_REMOVE_FORCE "RBD_SNAP_REMOVE_FORCE" - - _RBD_WRITE_ZEROES_FLAG_THICK_PROVISION "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" - - ctypedef void* rados_t - ctypedef void* rados_ioctx_t - ctypedef void* rbd_image_t - ctypedef void* rbd_image_options_t - ctypedef void* rbd_pool_stats_t - ctypedef void *rbd_completion_t - - ctypedef struct rbd_image_info_t: - uint64_t size - uint64_t obj_size - uint64_t num_objs - int order - char block_name_prefix[RBD_MAX_BLOCK_NAME_SIZE] - uint64_t parent_pool - char parent_name[RBD_MAX_IMAGE_NAME_SIZE] - - ctypedef struct rbd_snap_info_t: - uint64_t id - uint64_t size - char *name - - ctypedef struct rbd_snap_group_namespace_t: - int64_t group_pool - char *group_name - char *group_snap_name - - ctypedef enum rbd_snap_mirror_state_t: - _RBD_SNAP_MIRROR_STATE_PRIMARY "RBD_SNAP_MIRROR_STATE_PRIMARY" - _RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED "RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED" - _RBD_SNAP_MIRROR_STATE_NON_PRIMARY "RBD_SNAP_MIRROR_STATE_NON_PRIMARY" - _RBD_SNAP_MIRROR_STATE_NON_PRIMARY_DEMOTED "RBD_SNAP_MIRROR_STATE_NON_PRIMARY_DEMOTED" - - ctypedef struct rbd_snap_mirror_namespace_t: - rbd_snap_mirror_state_t state - size_t mirror_peer_uuids_count - char *mirror_peer_uuids - bint complete - char *primary_mirror_uuid - uint64_t primary_snap_id - uint64_t last_copied_object_number - - ctypedef struct rbd_group_info_t: - char *name - int64_t pool - - ctypedef struct rbd_image_spec_t: - char *id - char *name - - ctypedef struct rbd_linked_image_spec_t: - int64_t pool_id - char *pool_name - char *pool_namespace - char *image_id - char *image_name - bint trash - - ctypedef enum rbd_snap_namespace_type_t: - _RBD_SNAP_NAMESPACE_TYPE_USER "RBD_SNAP_NAMESPACE_TYPE_USER" - _RBD_SNAP_NAMESPACE_TYPE_GROUP "RBD_SNAP_NAMESPACE_TYPE_GROUP" - _RBD_SNAP_NAMESPACE_TYPE_TRASH "RBD_SNAP_NAMESPACE_TYPE_TRASH" - _RBD_SNAP_NAMESPACE_TYPE_MIRROR "RBD_SNAP_NAMESPACE_TYPE_MIRROR" - - ctypedef struct rbd_snap_spec_t: - uint64_t id - rbd_snap_namespace_type_t namespace_type - char *name - - ctypedef enum rbd_mirror_mode_t: - _RBD_MIRROR_MODE_DISABLED "RBD_MIRROR_MODE_DISABLED" - _RBD_MIRROR_MODE_IMAGE "RBD_MIRROR_MODE_IMAGE" - _RBD_MIRROR_MODE_POOL "RBD_MIRROR_MODE_POOL" - - ctypedef enum rbd_mirror_peer_direction_t: - _RBD_MIRROR_PEER_DIRECTION_RX "RBD_MIRROR_PEER_DIRECTION_RX" - _RBD_MIRROR_PEER_DIRECTION_TX "RBD_MIRROR_PEER_DIRECTION_TX" - _RBD_MIRROR_PEER_DIRECTION_RX_TX "RBD_MIRROR_PEER_DIRECTION_RX_TX" - - ctypedef struct rbd_mirror_peer_site_t: - char *uuid - rbd_mirror_peer_direction_t direction - char *site_name - char *mirror_uuid - char *client_name - time_t last_seen - - cdef char* _RBD_MIRROR_PEER_ATTRIBUTE_NAME_MON_HOST "RBD_MIRROR_PEER_ATTRIBUTE_NAME_MON_HOST" - cdef char* _RBD_MIRROR_PEER_ATTRIBUTE_NAME_KEY "RBD_MIRROR_PEER_ATTRIBUTE_NAME_KEY" - - ctypedef enum rbd_mirror_image_mode_t: - _RBD_MIRROR_IMAGE_MODE_JOURNAL "RBD_MIRROR_IMAGE_MODE_JOURNAL" - _RBD_MIRROR_IMAGE_MODE_SNAPSHOT "RBD_MIRROR_IMAGE_MODE_SNAPSHOT" - - ctypedef enum rbd_mirror_image_state_t: - _RBD_MIRROR_IMAGE_DISABLING "RBD_MIRROR_IMAGE_DISABLING" - _RBD_MIRROR_IMAGE_ENABLED "RBD_MIRROR_IMAGE_ENABLED" - _RBD_MIRROR_IMAGE_DISABLED "RBD_MIRROR_IMAGE_DISABLED" - - ctypedef struct rbd_mirror_image_info_t: - char *global_id - rbd_mirror_image_state_t state - bint primary - - ctypedef enum rbd_mirror_image_status_state_t: - _MIRROR_IMAGE_STATUS_STATE_UNKNOWN "MIRROR_IMAGE_STATUS_STATE_UNKNOWN" - _MIRROR_IMAGE_STATUS_STATE_ERROR "MIRROR_IMAGE_STATUS_STATE_ERROR" - _MIRROR_IMAGE_STATUS_STATE_SYNCING "MIRROR_IMAGE_STATUS_STATE_SYNCING" - _MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY "MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY" - _MIRROR_IMAGE_STATUS_STATE_REPLAYING "MIRROR_IMAGE_STATUS_STATE_REPLAYING" - _MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY "MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY" - _MIRROR_IMAGE_STATUS_STATE_STOPPED "MIRROR_IMAGE_STATUS_STATE_STOPPED" - - ctypedef struct rbd_mirror_image_site_status_t: - char *mirror_uuid - rbd_mirror_image_status_state_t state - char *description - time_t last_update - bint up - - ctypedef struct rbd_mirror_image_global_status_t: - char *name - rbd_mirror_image_info_t info - uint32_t site_statuses_count - rbd_mirror_image_site_status_t *site_statuses - - ctypedef enum rbd_lock_mode_t: - _RBD_LOCK_MODE_EXCLUSIVE "RBD_LOCK_MODE_EXCLUSIVE" - _RBD_LOCK_MODE_SHARED "RBD_LOCK_MODE_SHARED" - - ctypedef enum rbd_trash_image_source_t: - _RBD_TRASH_IMAGE_SOURCE_USER "RBD_TRASH_IMAGE_SOURCE_USER", - _RBD_TRASH_IMAGE_SOURCE_MIRRORING "RBD_TRASH_IMAGE_SOURCE_MIRRORING", - _RBD_TRASH_IMAGE_SOURCE_MIGRATION "RBD_TRASH_IMAGE_SOURCE_MIGRATION" - _RBD_TRASH_IMAGE_SOURCE_REMOVING "RBD_TRASH_IMAGE_SOURCE_REMOVING" - - ctypedef struct rbd_trash_image_info_t: - char *id - char *name - rbd_trash_image_source_t source - time_t deletion_time - time_t deferment_end_time - - ctypedef struct rbd_image_watcher_t: - char *addr - int64_t id - uint64_t cookie - - ctypedef enum rbd_group_image_state_t: - _RBD_GROUP_IMAGE_STATE_ATTACHED "RBD_GROUP_IMAGE_STATE_ATTACHED" - _RBD_GROUP_IMAGE_STATE_INCOMPLETE "RBD_GROUP_IMAGE_STATE_INCOMPLETE" - - ctypedef struct rbd_group_image_info_t: - char *name - int64_t pool - rbd_group_image_state_t state - - ctypedef enum rbd_group_snap_state_t: - _RBD_GROUP_SNAP_STATE_INCOMPLETE "RBD_GROUP_SNAP_STATE_INCOMPLETE" - _RBD_GROUP_SNAP_STATE_COMPLETE "RBD_GROUP_SNAP_STATE_COMPLETE" - - ctypedef struct rbd_group_snap_info_t: - char *name - rbd_group_snap_state_t state - - ctypedef enum rbd_image_migration_state_t: - _RBD_IMAGE_MIGRATION_STATE_UNKNOWN "RBD_IMAGE_MIGRATION_STATE_UNKNOWN" - _RBD_IMAGE_MIGRATION_STATE_ERROR "RBD_IMAGE_MIGRATION_STATE_ERROR" - _RBD_IMAGE_MIGRATION_STATE_PREPARING "RBD_IMAGE_MIGRATION_STATE_PREPARING" - _RBD_IMAGE_MIGRATION_STATE_PREPARED "RBD_IMAGE_MIGRATION_STATE_PREPARED" - _RBD_IMAGE_MIGRATION_STATE_EXECUTING "RBD_IMAGE_MIGRATION_STATE_EXECUTING" - _RBD_IMAGE_MIGRATION_STATE_EXECUTED "RBD_IMAGE_MIGRATION_STATE_EXECUTED" - _RBD_IMAGE_MIGRATION_STATE_ABORTING "RBD_IMAGE_MIGRATION_STATE_ABORTING" - - ctypedef struct rbd_image_migration_status_t: - int64_t source_pool_id - char *source_pool_namespace - char *source_image_name - char *source_image_id - int64_t dest_pool_id - char *dest_pool_namespace - char *dest_image_name - char *dest_image_id - rbd_image_migration_state_t state - char *state_description - - ctypedef enum rbd_config_source_t: - _RBD_CONFIG_SOURCE_CONFIG "RBD_CONFIG_SOURCE_CONFIG" - _RBD_CONFIG_SOURCE_POOL "RBD_CONFIG_SOURCE_POOL" - _RBD_CONFIG_SOURCE_IMAGE "RBD_CONFIG_SOURCE_IMAGE" - - ctypedef struct rbd_config_option_t: - char *name - char *value - rbd_config_source_t source - - ctypedef enum rbd_pool_stat_option_t: - _RBD_POOL_STAT_OPTION_IMAGES "RBD_POOL_STAT_OPTION_IMAGES" - _RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES "RBD_POOL_STAT_OPTION_IMAGE_PROVISIONED_BYTES" - _RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES "RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES" - _RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS "RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS" - _RBD_POOL_STAT_OPTION_TRASH_IMAGES "RBD_POOL_STAT_OPTION_TRASH_IMAGES" - _RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES "RBD_POOL_STAT_OPTION_TRASH_PROVISIONED_BYTES" - _RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES "RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES" - _RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS "RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS" - - ctypedef void (*rbd_callback_t)(rbd_completion_t cb, void *arg) - - void rbd_version(int *major, int *minor, int *extra) - - void rbd_image_spec_list_cleanup(rbd_image_spec_t *image, size_t num_images) - void rbd_linked_image_spec_cleanup(rbd_linked_image_spec_t *image) - void rbd_linked_image_spec_list_cleanup(rbd_linked_image_spec_t *images, - size_t num_images) - void rbd_snap_spec_cleanup(rbd_snap_spec_t *snap) - - void rbd_image_options_create(rbd_image_options_t* opts) - void rbd_image_options_destroy(rbd_image_options_t opts) - int rbd_image_options_set_string(rbd_image_options_t opts, int optname, - const char* optval) - int rbd_image_options_set_uint64(rbd_image_options_t opts, int optname, - uint64_t optval) - int rbd_image_options_get_string(rbd_image_options_t opts, int optname, - char* optval, size_t maxlen) - int rbd_image_options_get_uint64(rbd_image_options_t opts, int optname, - uint64_t* optval) - int rbd_image_options_unset(rbd_image_options_t opts, int optname) - void rbd_image_options_clear(rbd_image_options_t opts) - int rbd_image_options_is_empty(rbd_image_options_t opts) - - int rbd_list(rados_ioctx_t io, char *names, size_t *size) - int rbd_list2(rados_ioctx_t io, rbd_image_spec_t *images, - size_t *num_images) - int rbd_create(rados_ioctx_t io, const char *name, uint64_t size, - int *order) - int rbd_create4(rados_ioctx_t io, const char *name, uint64_t size, - rbd_image_options_t opts) - int rbd_clone3(rados_ioctx_t p_ioctx, const char *p_name, - const char *p_snapname, rados_ioctx_t c_ioctx, - const char *c_name, rbd_image_options_t c_opts) - int rbd_remove_with_progress(rados_ioctx_t io, const char *name, - librbd_progress_fn_t cb, void *cbdata) - int rbd_rename(rados_ioctx_t src_io_ctx, const char *srcname, - const char *destname) - - int rbd_trash_move(rados_ioctx_t io, const char *name, uint64_t delay) - int rbd_trash_get(rados_ioctx_t io, const char *id, - rbd_trash_image_info_t *info) - void rbd_trash_get_cleanup(rbd_trash_image_info_t *info) - int rbd_trash_list(rados_ioctx_t io, rbd_trash_image_info_t *trash_entries, - size_t *num_entries) - void rbd_trash_list_cleanup(rbd_trash_image_info_t *trash_entries, - size_t num_entries) - int rbd_trash_purge(rados_ioctx_t io, time_t expire_ts, float threshold) - int rbd_trash_remove_with_progress(rados_ioctx_t io, const char *id, - int force, librbd_progress_fn_t cb, - void *cbdata) - int rbd_trash_restore(rados_ioctx_t io, const char *id, const char *name) - - int rbd_migration_prepare(rados_ioctx_t io_ctx, const char *image_name, - rados_ioctx_t dest_io_ctx, - const char *dest_image_name, - rbd_image_options_t opts) - int rbd_migration_prepare_import(const char *source_spec, - rados_ioctx_t dest_io_ctx, - const char *dest_image_name, - rbd_image_options_t opts) - int rbd_migration_execute_with_progress(rados_ioctx_t io_ctx, - const char *image_name, - librbd_progress_fn_t cb, - void *cbdata) - int rbd_migration_commit_with_progress(rados_ioctx_t io_ctx, - const char *image_name, - librbd_progress_fn_t cb, - void *cbdata) - int rbd_migration_abort_with_progress(rados_ioctx_t io_ctx, - const char *image_name, - librbd_progress_fn_t cb, void *cbdata) - int rbd_migration_status(rados_ioctx_t io_ctx, const char *image_name, - rbd_image_migration_status_t *status, - size_t status_size) - void rbd_migration_status_cleanup(rbd_image_migration_status_t *status) - - int rbd_mirror_site_name_get(rados_t cluster, char *name, size_t *max_len) - int rbd_mirror_site_name_set(rados_t cluster, const char *name) - - int rbd_mirror_mode_get(rados_ioctx_t io, rbd_mirror_mode_t *mirror_mode) - int rbd_mirror_mode_set(rados_ioctx_t io, rbd_mirror_mode_t mirror_mode) - - int rbd_mirror_uuid_get(rados_ioctx_t io_ctx, char *mirror_uuid, - size_t *max_len) - - int rbd_mirror_peer_bootstrap_create(rados_ioctx_t io_ctx, char *token, - size_t *max_len) - int rbd_mirror_peer_bootstrap_import( - rados_ioctx_t io_ctx, rbd_mirror_peer_direction_t direction, - const char *token) - - int rbd_mirror_peer_site_add( - rados_ioctx_t io, char *uuid, size_t uuid_max_length, - rbd_mirror_peer_direction_t direction, const char *site_name, - const char *client_name) - int rbd_mirror_peer_site_remove(rados_ioctx_t io, const char *uuid) - int rbd_mirror_peer_site_list( - rados_ioctx_t io_ctx, rbd_mirror_peer_site_t *peers,int *max_peers) - void rbd_mirror_peer_site_list_cleanup( - rbd_mirror_peer_site_t *peers, int max_peers) - - int rbd_mirror_peer_site_set_name( - rados_ioctx_t io_ctx, const char *uuid, const char *site_name) - int rbd_mirror_peer_site_set_client_name( - rados_ioctx_t io_ctx, const char *uuid, const char *client_name) - - int rbd_mirror_peer_site_get_attributes( - rados_ioctx_t io_ctx, const char *uuid, char *keys, size_t *max_key_len, - char *values, size_t *max_val_length, size_t *key_value_count) - int rbd_mirror_peer_site_set_attributes( - rados_ioctx_t io_ctx, const char *uuid, const char *keys, - const char *values, size_t count) - - int rbd_mirror_image_global_status_list( - rados_ioctx_t io, const char *start_id, size_t max, char **image_ids, - rbd_mirror_image_global_status_t *images, size_t *len) - void rbd_mirror_image_global_status_list_cleanup( - char **image_ids, rbd_mirror_image_global_status_t *images, size_t len) - int rbd_mirror_image_status_summary(rados_ioctx_t io, - rbd_mirror_image_status_state_t *states, - int *counts, size_t *maxlen) - int rbd_mirror_image_instance_id_list(rados_ioctx_t io_ctx, - const char *start_id, - size_t max, char **image_ids, - char **instance_ids, - size_t *len) - void rbd_mirror_image_instance_id_list_cleanup(char **image_ids, - char **instance_ids, - size_t len) - int rbd_mirror_image_info_list(rados_ioctx_t io_ctx, - rbd_mirror_image_mode_t *mode_filter, - const char *start_id, size_t max, - char **image_ids, - rbd_mirror_image_mode_t *mode_entries, - rbd_mirror_image_info_t *info_entries, - size_t *num_entries) - void rbd_mirror_image_info_list_cleanup(char **image_ids, - rbd_mirror_image_info_t *info_entries, - size_t num_entries) - - int rbd_pool_metadata_get(rados_ioctx_t io_ctx, const char *key, - char *value, size_t *val_len) - int rbd_pool_metadata_set(rados_ioctx_t io_ctx, const char *key, - const char *value) - int rbd_pool_metadata_remove(rados_ioctx_t io_ctx, const char *key) - int rbd_pool_metadata_list(rados_ioctx_t io_ctx, const char *start, - uint64_t max, char *keys, size_t *key_len, - char *values, size_t *vals_len) - - int rbd_config_pool_list(rados_ioctx_t io_ctx, rbd_config_option_t *options, - int *max_options) - void rbd_config_pool_list_cleanup(rbd_config_option_t *options, - int max_options) - - int rbd_open(rados_ioctx_t io, const char *name, - rbd_image_t *image, const char *snap_name) - int rbd_open_by_id(rados_ioctx_t io, const char *image_id, - rbd_image_t *image, const char *snap_name) - int rbd_open_read_only(rados_ioctx_t io, const char *name, - rbd_image_t *image, const char *snap_name) - int rbd_open_by_id_read_only(rados_ioctx_t io, const char *image_id, - rbd_image_t *image, const char *snap_name) - int rbd_aio_open(rados_ioctx_t io, const char *name, rbd_image_t *image, - const char *snap_name, rbd_completion_t c) - int rbd_aio_open_by_id(rados_ioctx_t io, const char *id, rbd_image_t *image, - const char *snap_name, rbd_completion_t c) - int rbd_aio_open_read_only(rados_ioctx_t io, const char *name, - rbd_image_t *image, const char *snap_name, - rbd_completion_t c) - int rbd_aio_open_by_id_read_only(rados_ioctx_t io, const char *id, - rbd_image_t *image, const char *snap_name, - rbd_completion_t c) - int rbd_features_to_string(uint64_t features, char *str_features, size_t *size) - int rbd_features_from_string(const char *str_features, uint64_t *features) - int rbd_close(rbd_image_t image) - int rbd_aio_close(rbd_image_t image, rbd_completion_t c) - int rbd_resize2(rbd_image_t image, uint64_t size, bint allow_shrink, - librbd_progress_fn_t cb, void *cbdata) - int rbd_stat(rbd_image_t image, rbd_image_info_t *info, size_t infosize) - int rbd_get_old_format(rbd_image_t image, uint8_t *old) - int rbd_get_size(rbd_image_t image, uint64_t *size) - int rbd_get_features(rbd_image_t image, uint64_t *features) - int rbd_update_features(rbd_image_t image, uint64_t features, - uint8_t enabled) - int rbd_get_op_features(rbd_image_t image, uint64_t *op_features) - int rbd_get_stripe_unit(rbd_image_t image, uint64_t *stripe_unit) - int rbd_get_stripe_count(rbd_image_t image, uint64_t *stripe_count) - int rbd_get_create_timestamp(rbd_image_t image, timespec *timestamp) - int rbd_get_access_timestamp(rbd_image_t image, timespec *timestamp) - int rbd_get_modify_timestamp(rbd_image_t image, timespec *timestamp) - int rbd_get_overlap(rbd_image_t image, uint64_t *overlap) - int rbd_get_name(rbd_image_t image, char *name, size_t *name_len) - int rbd_get_id(rbd_image_t image, char *id, size_t id_len) - int rbd_get_block_name_prefix(rbd_image_t image, char *prefix, - size_t prefix_len) - int64_t rbd_get_data_pool_id(rbd_image_t image) - int rbd_get_parent(rbd_image_t image, - rbd_linked_image_spec_t *parent_image, - rbd_snap_spec_t *parent_snap) - int rbd_get_migration_source_spec(rbd_image_t image, - char* source_spec, size_t* max_len) - int rbd_get_flags(rbd_image_t image, uint64_t *flags) - int rbd_get_group(rbd_image_t image, rbd_group_info_t *group_info, - size_t group_info_size) - - ssize_t rbd_read2(rbd_image_t image, uint64_t ofs, size_t len, - char *buf, int op_flags) - ssize_t rbd_write2(rbd_image_t image, uint64_t ofs, size_t len, - const char *buf, int op_flags) - int rbd_discard(rbd_image_t image, uint64_t ofs, uint64_t len) - int rbd_write_zeroes(rbd_image_t image, uint64_t ofs, uint64_t len, - int zero_flags, int op_flags) - int rbd_copy3(rbd_image_t src, rados_ioctx_t dest_io_ctx, - const char *destname, rbd_image_options_t dest_opts) - int rbd_deep_copy(rbd_image_t src, rados_ioctx_t dest_io_ctx, - const char *destname, rbd_image_options_t dest_opts) - int rbd_snap_list(rbd_image_t image, rbd_snap_info_t *snaps, - int *max_snaps) - void rbd_snap_list_end(rbd_snap_info_t *snaps) - int rbd_snap_create2(rbd_image_t image, const char *snapname, uint32_t flags, - librbd_progress_fn_t cb, void *cbdata) - int rbd_snap_remove(rbd_image_t image, const char *snapname) - int rbd_snap_remove2(rbd_image_t image, const char *snapname, uint32_t flags, - librbd_progress_fn_t cb, void *cbdata) - int rbd_snap_remove_by_id(rbd_image_t image, uint64_t snap_id) - int rbd_snap_rollback(rbd_image_t image, const char *snapname) - int rbd_snap_rename(rbd_image_t image, const char *snapname, - const char* dstsnapsname) - int rbd_snap_protect(rbd_image_t image, const char *snap_name) - int rbd_snap_unprotect(rbd_image_t image, const char *snap_name) - int rbd_snap_is_protected(rbd_image_t image, const char *snap_name, - int *is_protected) - int rbd_snap_exists(rbd_image_t image, const char *snapname, bint *exists) - int rbd_snap_get_limit(rbd_image_t image, uint64_t *limit) - int rbd_snap_set_limit(rbd_image_t image, uint64_t limit) - int rbd_snap_get_timestamp(rbd_image_t image, uint64_t snap_id, timespec *timestamp) - int rbd_snap_set(rbd_image_t image, const char *snapname) - int rbd_snap_set_by_id(rbd_image_t image, uint64_t snap_id) - int rbd_snap_get_name(rbd_image_t image, uint64_t snap_id, - char *snapname, size_t *name_len) - int rbd_snap_get_id(rbd_image_t image, const char *snapname, - uint64_t *snap_id) - int rbd_snap_get_namespace_type(rbd_image_t image, - uint64_t snap_id, - rbd_snap_namespace_type_t *namespace_type) - int rbd_snap_get_group_namespace(rbd_image_t image, uint64_t snap_id, - rbd_snap_group_namespace_t *group_info, - size_t snap_group_namespace_size) - void rbd_snap_group_namespace_cleanup(rbd_snap_group_namespace_t *group_spec, - size_t snap_group_namespace_size) - int rbd_snap_get_trash_namespace(rbd_image_t image, uint64_t snap_id, - char *original_name, size_t max_length) - int rbd_snap_get_mirror_namespace( - rbd_image_t image, uint64_t snap_id, - rbd_snap_mirror_namespace_t *mirror_ns, - size_t snap_mirror_namespace_size) - void rbd_snap_mirror_namespace_cleanup( - rbd_snap_mirror_namespace_t *mirror_ns, - size_t snap_mirror_namespace_size) - - int rbd_flatten_with_progress(rbd_image_t image, librbd_progress_fn_t cb, - void *cbdata) - int rbd_sparsify(rbd_image_t image, size_t sparse_size) - int rbd_rebuild_object_map(rbd_image_t image, librbd_progress_fn_t cb, - void *cbdata) - int rbd_list_children3(rbd_image_t image, rbd_linked_image_spec_t *children, - size_t *max_children) - int rbd_list_descendants(rbd_image_t image, - rbd_linked_image_spec_t *descendants, - size_t *max_descendants) - - ssize_t rbd_list_lockers(rbd_image_t image, int *exclusive, - char *tag, size_t *tag_len, - char *clients, size_t *clients_len, - char *cookies, size_t *cookies_len, - char *addrs, size_t *addrs_len) - int rbd_lock_exclusive(rbd_image_t image, const char *cookie) - int rbd_lock_shared(rbd_image_t image, const char *cookie, - const char *tag) - int rbd_unlock(rbd_image_t image, const char *cookie) - int rbd_break_lock(rbd_image_t image, const char *client, - const char *cookie) - - int rbd_is_exclusive_lock_owner(rbd_image_t image, int *is_owner) - int rbd_lock_acquire(rbd_image_t image, rbd_lock_mode_t lock_mode) - int rbd_lock_release(rbd_image_t image) - int rbd_lock_get_owners(rbd_image_t image, rbd_lock_mode_t *lock_mode, - char **lock_owners, size_t *max_lock_owners) - void rbd_lock_get_owners_cleanup(char **lock_owners, - size_t lock_owner_count) - int rbd_lock_break(rbd_image_t image, rbd_lock_mode_t lock_mode, - char *lock_owner) - - # We use -9000 to propagate Python exceptions. We use except? to make sure - # things still work as intended if -9000 happens to be a valid errno value - # somewhere. - int rbd_diff_iterate2(rbd_image_t image, const char *fromsnapname, - uint64_t ofs, uint64_t len, - uint8_t include_parent, uint8_t whole_object, - int (*cb)(uint64_t, size_t, int, void *) - nogil except? -9000, - void *arg) except? -9000 - - int rbd_flush(rbd_image_t image) - int rbd_invalidate_cache(rbd_image_t image) - - int rbd_mirror_image_enable2(rbd_image_t image, - rbd_mirror_image_mode_t mode) - int rbd_mirror_image_disable(rbd_image_t image, bint force) - int rbd_mirror_image_promote(rbd_image_t image, bint force) - int rbd_mirror_image_demote(rbd_image_t image) - int rbd_mirror_image_resync(rbd_image_t image) - int rbd_mirror_image_create_snapshot2(rbd_image_t image, uint32_t flags, - uint64_t *snap_id) - int rbd_aio_mirror_image_create_snapshot(rbd_image_t image, uint32_t flags, - uint64_t *snap_id, - rbd_completion_t c) - int rbd_mirror_image_get_info(rbd_image_t image, - rbd_mirror_image_info_t *mirror_image_info, - size_t info_size) - void rbd_mirror_image_get_info_cleanup( - rbd_mirror_image_info_t *mirror_image_info) - int rbd_aio_mirror_image_get_info( - rbd_image_t image, rbd_mirror_image_info_t *mirror_image_info, - size_t info_size, rbd_completion_t c) - int rbd_mirror_image_get_mode(rbd_image_t image, - rbd_mirror_image_mode_t *mode) - int rbd_aio_mirror_image_get_mode(rbd_image_t image, - rbd_mirror_image_mode_t *mode, - rbd_completion_t c) - int rbd_mirror_image_get_global_status( - rbd_image_t image, - rbd_mirror_image_global_status_t *mirror_image_global_status, - size_t status_size) - void rbd_mirror_image_global_status_cleanup( - rbd_mirror_image_global_status_t *mirror_image_global_status) - int rbd_mirror_image_get_instance_id(rbd_image_t image, char *instance_id, - size_t *id_max_length) - - int rbd_aio_write2(rbd_image_t image, uint64_t off, size_t len, - const char *buf, rbd_completion_t c, int op_flags) - int rbd_aio_read2(rbd_image_t image, uint64_t off, size_t len, - char *buf, rbd_completion_t c, int op_flags) - int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len, - rbd_completion_t c) - int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off, uint64_t len, - rbd_completion_t c, int zero_flags, int op_flags) - - int rbd_aio_create_completion(void *cb_arg, rbd_callback_t complete_cb, - rbd_completion_t *c) - int rbd_aio_is_complete(rbd_completion_t c) - int rbd_aio_wait_for_complete(rbd_completion_t c) - ssize_t rbd_aio_get_return_value(rbd_completion_t c) - void rbd_aio_release(rbd_completion_t c) - int rbd_aio_flush(rbd_image_t image, rbd_completion_t c) - - int rbd_metadata_get(rbd_image_t image, const char *key, char *value, - size_t *val_len) - int rbd_metadata_set(rbd_image_t image, const char *key, const char *value) - int rbd_metadata_remove(rbd_image_t image, const char *key) - int rbd_metadata_list(rbd_image_t image, const char *start, uint64_t max, - char *keys, size_t *key_len, char *values, - size_t *vals_len) - int rbd_group_create(rados_ioctx_t p, const char *name) - int rbd_group_remove(rados_ioctx_t p, const char *name) - int rbd_group_list(rados_ioctx_t p, char *names, size_t *size) - int rbd_group_rename(rados_ioctx_t p, const char *src, const char *dest) - void rbd_group_info_cleanup(rbd_group_info_t *group_info, - size_t group_info_size) - int rbd_group_image_add(rados_ioctx_t group_p, const char *group_name, - rados_ioctx_t image_p, const char *image_name) - int rbd_group_image_remove(rados_ioctx_t group_p, const char *group_name, - rados_ioctx_t image_p, const char *image_name) - - int rbd_group_image_list(rados_ioctx_t group_p, - const char *group_name, - rbd_group_image_info_t *images, - size_t group_image_info_size, - size_t *image_size) - void rbd_group_image_list_cleanup(rbd_group_image_info_t *images, - size_t group_image_info_size, size_t len) - - int rbd_group_snap_create(rados_ioctx_t group_p, const char *group_name, - const char *snap_name) - - int rbd_group_snap_remove(rados_ioctx_t group_p, const char *group_name, - const char *snap_name) - - int rbd_group_snap_rename(rados_ioctx_t group_p, const char *group_name, - const char *old_snap_name, - const char *new_snap_name) - - int rbd_group_snap_list(rados_ioctx_t group_p, - const char *group_name, - rbd_group_snap_info_t *snaps, - size_t group_snap_info_size, - size_t *snaps_size) - - void rbd_group_snap_list_cleanup(rbd_group_snap_info_t *snaps, - size_t group_snap_info_size, size_t len) - int rbd_group_snap_rollback(rados_ioctx_t group_p, const char *group_name, - const char *snap_name) - - int rbd_watchers_list(rbd_image_t image, rbd_image_watcher_t *watchers, - size_t *max_watchers) - void rbd_watchers_list_cleanup(rbd_image_watcher_t *watchers, - size_t num_watchers) - - int rbd_config_image_list(rbd_image_t image, rbd_config_option_t *options, - int *max_options) - void rbd_config_image_list_cleanup(rbd_config_option_t *options, - int max_options) - - int rbd_namespace_create(rados_ioctx_t io, const char *namespace_name) - int rbd_namespace_remove(rados_ioctx_t io, const char *namespace_name) - int rbd_namespace_list(rados_ioctx_t io, char *namespace_names, - size_t *size) - int rbd_namespace_exists(rados_ioctx_t io, const char *namespace_name, - bint *exists) - - int rbd_pool_init(rados_ioctx_t, bint force) - - void rbd_pool_stats_create(rbd_pool_stats_t *stats) - void rbd_pool_stats_destroy(rbd_pool_stats_t stats) - int rbd_pool_stats_option_add_uint64(rbd_pool_stats_t stats, - int stat_option, uint64_t* stat_val) - int rbd_pool_stats_get(rados_ioctx_t io, rbd_pool_stats_t stats) ECANCELED = _ECANCELED @@ -1040,11 +351,18 @@ cdef make_ex(ret, msg, exception_map=errno_to_exception): return OSError(msg, errno=ret) -cdef rados_t convert_rados(rados.Rados rados) except? NULL: - return <rados_t>rados.cluster +IF BUILD_DOC: + cdef rados_t convert_rados(rados) nogil: + return <rados_t>0 + + cdef rados_ioctx_t convert_ioctx(ioctx) nogil: + return <rados_ioctx_t>0 +ELSE: + cdef rados_t convert_rados(rados.Rados rados) except? NULL: + return <rados_t>rados.cluster -cdef rados_ioctx_t convert_ioctx(rados.Ioctx ioctx) except? NULL: - return <rados_ioctx_t>ioctx.io + cdef rados_ioctx_t convert_ioctx(rados.Ioctx ioctx) except? NULL: + return <rados_ioctx_t>ioctx.io cdef int progress_callback(uint64_t offset, uint64_t total, void* ptr) with gil: return (<object>ptr)(offset, total) diff --git a/src/pybind/rbd/setup.py b/src/pybind/rbd/setup.py index 60c47c1a9c9..73ea4555f46 100755 --- a/src/pybind/rbd/setup.py +++ b/src/pybind/rbd/setup.py @@ -136,10 +136,16 @@ def check_sanity(): shutil.rmtree(tmp_dir) -if 'BUILD_DOC' in os.environ.keys(): - pass +if 'BUILD_DOC' in os.environ or 'READTHEDOCS' in os.environ: + ext_args = {} + cython_constants = dict(BUILD_DOC=True) + cythonize_args = dict(compile_time_env=cython_constants) elif check_sanity(): - pass + ext_args = get_python_flags(['rados', 'rbd']) + cython_constants = dict(BUILD_DOC=False) + include_path = [os.path.join(os.path.dirname(__file__), "..", "rados")] + cythonize_args = dict(compile_time_env=cython_constants, + include_path=include_path) else: sys.exit(1) @@ -190,14 +196,12 @@ setup( Extension( "rbd", [source], - **get_python_flags(['rbd', 'rados']) + **ext_args ) ], compiler_directives={'language_level': sys.version_info.major}, build_dir=os.environ.get("CYTHON_BUILD_DIR", None), - include_path=[ - os.path.join(os.path.dirname(__file__), "..", "rados") - ] + **cythonize_args ), classifiers=[ 'Intended Audience :: Developers', diff --git a/src/pybind/rgw/c_rgw.pxd b/src/pybind/rgw/c_rgw.pxd new file mode 100644 index 00000000000..988b67b0ef9 --- /dev/null +++ b/src/pybind/rgw/c_rgw.pxd @@ -0,0 +1,137 @@ +# cython: embedsignature=True + +from libc.stdint cimport * +from libcpp cimport bool +from cstat cimport stat + + +cdef extern from "rados/librgw.h" nogil: + ctypedef void* librgw_t + + int librgw_create(librgw_t *rgw, int argc, char **argv) + void librgw_shutdown(librgw_t rgw) + + +cdef extern from "rados/rgw_file.h" nogil: + enum: + RGW_FS_TYPE_FILE + RGW_FS_TYPE_DIRECTORY + + RGW_LOOKUP_FLAG_CREATE + + RGW_SETATTR_MODE + RGW_SETATTR_UID + RGW_SETATTR_GID + RGW_SETATTR_MTIME + RGW_SETATTR_ATIME + RGW_SETATTR_SIZE + RGW_SETATTR_CTIME + + RGW_READDIR_FLAG_NONE + RGW_READDIR_FLAG_DOTDOT + + RGW_OPEN_FLAG_CREATE + RGW_OPEN_FLAG_V3 # ops have v3 semantics + RGW_OPEN_FLAG_STATELESS # alias it + + RGW_CLOSE_FLAG_RELE + + + ctypedef void *rgw_fh_hk + cdef struct rgw_file_handle: + pass + + cdef struct rgw_fs: + librgw_t rgw + void *fs_private + void *root_fh + + # mount info hypothetical--emulate Unix, support at least UUID-length fsid + cdef struct rgw_statvfs: + uint64_t f_bsize # file system block size + uint64_t f_frsize # fragment size + uint64_t f_blocks # size of fs in f_frsize units + uint64_t f_bfree # free blocks + uint64_t f_bavail # free blocks for unprivileged users + uint64_t f_files # inodes + uint64_t f_ffree # free inodes + uint64_t f_favail # free inodes for unprivileged users + uint64_t f_fsid[2] # /* file system ID + uint64_t f_flag # mount flags + uint64_t f_namemax # maximum filename length + + void rgwfile_version(int *major, int *minor, int *extra) + + int rgw_lookup(rgw_fs *fs, + rgw_file_handle *parent_fh, const char *path, + rgw_file_handle **fh, stat* st, uint32_t st_mask, + uint32_t flags) + + int rgw_lookup_handle(rgw_fs *fs, rgw_fh_hk *fh_hk, + rgw_file_handle **fh, uint32_t flags) + + int rgw_fh_rele(rgw_fs *fs, rgw_file_handle *fh, + uint32_t flags) + + int rgw_mount(librgw_t rgw, const char *uid, const char *key, + const char *secret, rgw_fs **fs, uint32_t flags) + + int rgw_umount(rgw_fs *fs, uint32_t flags) + + int rgw_statfs(rgw_fs *fs, rgw_file_handle *parent_fh, + rgw_statvfs *vfs_st, uint32_t flags) + + int rgw_create(rgw_fs *fs, rgw_file_handle *parent_fh, + const char *name, stat *st, uint32_t mask, + rgw_file_handle **fh, uint32_t posix_flags, + uint32_t flags) + + int rgw_mkdir(rgw_fs *fs, + rgw_file_handle *parent_fh, + const char *name, stat *st, uint32_t mask, + rgw_file_handle **fh, uint32_t flags) + + int rgw_rename(rgw_fs *fs, + rgw_file_handle *olddir, const char* old_name, + rgw_file_handle *newdir, const char* new_name, + uint32_t flags) + + int rgw_unlink(rgw_fs *fs, + rgw_file_handle *parent_fh, const char* path, + uint32_t flags) + + int rgw_readdir(rgw_fs *fs, + rgw_file_handle *parent_fh, uint64_t *offset, + bool (*cb)(const char *name, void *arg, uint64_t offset, stat *st, uint32_t st_mask, uint32_t flags) nogil except? -9000, + void *cb_arg, bool *eof, uint32_t flags) except? -9000 + + int rgw_getattr(rgw_fs *fs, + rgw_file_handle *fh, stat *st, + uint32_t flags) + + int rgw_setattr(rgw_fs *fs, rgw_file_handle *fh, stat *st, + uint32_t mask, uint32_t flags) + + int rgw_truncate(rgw_fs *fs, rgw_file_handle *fh, uint64_t size, uint32_t flags) + + int rgw_open(rgw_fs *fs, rgw_file_handle *parent_fh, + uint32_t posix_flags, uint32_t flags) + + int rgw_close(rgw_fs *fs, rgw_file_handle *fh, + uint32_t flags) + + int rgw_read(rgw_fs *fs, + rgw_file_handle *fh, uint64_t offset, + size_t length, size_t *bytes_read, void *buffer, + uint32_t flags) + + int rgw_write(rgw_fs *fs, + rgw_file_handle *fh, uint64_t offset, + size_t length, size_t *bytes_written, void *buffer, + uint32_t flags) + + int rgw_fsync(rgw_fs *fs, rgw_file_handle *fh, + uint32_t flags) + + int rgw_commit(rgw_fs *fs, rgw_file_handle *fh, + uint64_t offset, uint64_t length, uint32_t flags) diff --git a/src/pybind/rgw/cstat.pxd b/src/pybind/rgw/cstat.pxd new file mode 100644 index 00000000000..f7da7c6def2 --- /dev/null +++ b/src/pybind/rgw/cstat.pxd @@ -0,0 +1,20 @@ +cdef extern from "time.h": + ctypedef long int time_t + + +cdef extern from "sys/stat.h": + cdef struct stat: + unsigned long st_dev + unsigned long st_ino + unsigned long st_nlink + unsigned int st_mode + unsigned int st_uid + unsigned int st_gid + int __pad0 + unsigned long st_rdev + long int st_size + long int st_blksize + long int st_blocks + time_t st_atime + time_t st_mtime + time_t st_ctime diff --git a/src/pybind/rgw/mock_rgw.pxi b/src/pybind/rgw/mock_rgw.pxi new file mode 100644 index 00000000000..ca893a5bb8a --- /dev/null +++ b/src/pybind/rgw/mock_rgw.pxi @@ -0,0 +1,156 @@ +# cython: embedsignature=True + +cdef nogil: + ctypedef void* librgw_t + + int librgw_create(librgw_t *rgw, int argc, char **argv): + pass + void librgw_shutdown(librgw_t rgw): + pass + + +cdef nogil: + enum: + RGW_FS_TYPE_FILE + RGW_FS_TYPE_DIRECTORY + + RGW_LOOKUP_FLAG_CREATE + + RGW_SETATTR_MODE + RGW_SETATTR_UID + RGW_SETATTR_GID + RGW_SETATTR_MTIME + RGW_SETATTR_ATIME + RGW_SETATTR_SIZE + RGW_SETATTR_CTIME + + RGW_READDIR_FLAG_NONE + RGW_READDIR_FLAG_DOTDOT + + RGW_OPEN_FLAG_CREATE + RGW_OPEN_FLAG_V3 # ops have v3 semantics + RGW_OPEN_FLAG_STATELESS # alias it + + RGW_CLOSE_FLAG_RELE + + ctypedef void *rgw_fh_hk + cdef struct rgw_file_handle: + rgw_fh_hk fh_hk + void *fs_private + int fh_type + + cdef struct rgw_fs: + librgw_t rgw + void *fs_private + void *root_fh + + # mount info hypothetical--emulate Unix, support at least UUID-length fsid + cdef struct rgw_statvfs: + uint64_t f_bsize # file system block size + uint64_t f_frsize # fragment size + uint64_t f_blocks # size of fs in f_frsize units + uint64_t f_bfree # free blocks + uint64_t f_bavail # free blocks for unprivileged users + uint64_t f_files # inodes + uint64_t f_ffree # free inodes + uint64_t f_favail # free inodes for unprivileged users + uint64_t f_fsid[2] # /* file system ID + uint64_t f_flag # mount flags + uint64_t f_namemax # maximum filename length + + void rgwfile_version(int *major, int *minor, int *extra): + pass + + int rgw_lookup(rgw_fs *fs, + rgw_file_handle *parent_fh, const char *path, + rgw_file_handle **fh, stat* st, uint32_t st_mask, + uint32_t flags): + pass + + int rgw_lookup_handle(rgw_fs *fs, rgw_fh_hk *fh_hk, + rgw_file_handle **fh, uint32_t flags): + pass + + int rgw_fh_rele(rgw_fs *fs, rgw_file_handle *fh, + uint32_t flags): + pass + + int rgw_mount(librgw_t rgw, const char *uid, const char *key, + const char *secret, rgw_fs **fs, uint32_t flags): + pass + + int rgw_umount(rgw_fs *fs, uint32_t flags): + pass + + int rgw_statfs(rgw_fs *fs, rgw_file_handle *parent_fh, + rgw_statvfs *vfs_st, uint32_t flags): + pass + + int rgw_create(rgw_fs *fs, rgw_file_handle *parent_fh, + const char *name, stat *st, uint32_t mask, + rgw_file_handle **fh, uint32_t posix_flags, + uint32_t flags): + pass + + int rgw_mkdir(rgw_fs *fs, + rgw_file_handle *parent_fh, + const char *name, stat *st, uint32_t mask, + rgw_file_handle **fh, uint32_t flags): + pass + + int rgw_rename(rgw_fs *fs, + rgw_file_handle *olddir, const char* old_name, + rgw_file_handle *newdir, const char* new_name, + uint32_t flags): + pass + + int rgw_unlink(rgw_fs *fs, + rgw_file_handle *parent_fh, const char* path, + uint32_t flags): + pass + + int rgw_readdir(rgw_fs *fs, + rgw_file_handle *parent_fh, uint64_t *offset, + bint (*cb)(const char *name, void *arg, uint64_t offset, stat *st, uint32_t st_mask, uint32_t flags) nogil except? -9000, + void *cb_arg, bint *eof, uint32_t flags) except? -9000: + pass + + int rgw_getattr(rgw_fs *fs, + rgw_file_handle *fh, stat *st, + uint32_t flags): + pass + + int rgw_setattr(rgw_fs *fs, rgw_file_handle *fh, stat *st, + uint32_t mask, uint32_t flags): + pass + + int rgw_truncate(rgw_fs *fs, rgw_file_handle *fh, uint64_t size, uint32_t flags): + pass + + int rgw_open(rgw_fs *fs, rgw_file_handle *parent_fh, + uint32_t posix_flags, uint32_t flags): + pass + + int rgw_close(rgw_fs *fs, rgw_file_handle *fh, + uint32_t flags): + pass + + int rgw_read(rgw_fs *fs, + rgw_file_handle *fh, uint64_t offset, + size_t length, size_t *bytes_read, void *buffer, + uint32_t flags): + pass + + int rgw_write(rgw_fs *fs, + rgw_file_handle *fh, uint64_t offset, + size_t length, size_t *bytes_written, void *buffer, + uint32_t flags): + pass + + int rgw_fsync(rgw_fs *fs, rgw_file_handle *fh, + uint32_t flags): + pass + + int rgw_commit(rgw_fs *fs, rgw_file_handle *fh, + uint64_t offset, uint64_t length, uint32_t flags): + pass diff --git a/src/pybind/rgw/rgw.pyx b/src/pybind/rgw/rgw.pyx index 6e85dd718f5..9bbcdfff586 100644 --- a/src/pybind/rgw/rgw.pyx +++ b/src/pybind/rgw/rgw.pyx @@ -5,10 +5,14 @@ This module is a thin wrapper around rgw_file. from cpython cimport PyObject, ref, exc, array from libc.stdint cimport * -from libcpp cimport bool from libc.stdlib cimport malloc, realloc, free +from cstat cimport stat -cimport rados +IF BUILD_DOC: + include "mock_rgw.pxi" +ELSE: + from c_rgw cimport * + cimport rados from collections import namedtuple from datetime import datetime @@ -26,160 +30,6 @@ cdef extern from "Python.h": void PyEval_InitThreads() -cdef extern from "time.h": - ctypedef long int time_t - - -cdef extern from "sys/stat.h": - cdef struct stat: - unsigned long st_dev - unsigned long st_ino - unsigned long st_nlink - unsigned int st_mode - unsigned int st_uid - unsigned int st_gid - int __pad0 - unsigned long st_rdev - long int st_size - long int st_blksize - long int st_blocks - time_t st_atime - time_t st_mtime - time_t st_ctime - - -cdef extern from "rados/librgw.h" nogil: - ctypedef void* librgw_t - - int librgw_create(librgw_t *rgw, int argc, char **argv) - void librgw_shutdown(librgw_t rgw) - - -cdef extern from "rados/rgw_file.h" nogil: - enum: - RGW_FS_TYPE_FILE - RGW_FS_TYPE_DIRECTORY - - RGW_LOOKUP_FLAG_CREATE - - RGW_SETATTR_MODE - RGW_SETATTR_UID - RGW_SETATTR_GID - RGW_SETATTR_MTIME - RGW_SETATTR_ATIME - RGW_SETATTR_SIZE - RGW_SETATTR_CTIME - - RGW_READDIR_FLAG_NONE - RGW_READDIR_FLAG_DOTDOT - - RGW_OPEN_FLAG_CREATE - RGW_OPEN_FLAG_V3 # ops have v3 semantics - RGW_OPEN_FLAG_STATELESS # alias it - - RGW_CLOSE_FLAG_RELE - - - ctypedef void *rgw_fh_hk - cdef struct rgw_file_handle: - pass - - cdef struct rgw_fs: - librgw_t rgw - void *fs_private - void *root_fh - - # mount info hypothetical--emulate Unix, support at least UUID-length fsid - cdef struct rgw_statvfs: - uint64_t f_bsize # file system block size - uint64_t f_frsize # fragment size - uint64_t f_blocks # size of fs in f_frsize units - uint64_t f_bfree # free blocks - uint64_t f_bavail # free blocks for unprivileged users - uint64_t f_files # inodes - uint64_t f_ffree # free inodes - uint64_t f_favail # free inodes for unprivileged users - uint64_t f_fsid[2] # /* file system ID - uint64_t f_flag # mount flags - uint64_t f_namemax # maximum filename length - - void rgwfile_version(int *major, int *minor, int *extra) - - int rgw_lookup(rgw_fs *fs, - rgw_file_handle *parent_fh, const char *path, - rgw_file_handle **fh, stat* st, uint32_t st_mask, - uint32_t flags) - - int rgw_lookup_handle(rgw_fs *fs, rgw_fh_hk *fh_hk, - rgw_file_handle **fh, uint32_t flags) - - int rgw_fh_rele(rgw_fs *fs, rgw_file_handle *fh, - uint32_t flags) - - int rgw_mount(librgw_t rgw, const char *uid, const char *key, - const char *secret, rgw_fs **fs, uint32_t flags) - - int rgw_umount(rgw_fs *fs, uint32_t flags) - - int rgw_statfs(rgw_fs *fs, rgw_file_handle *parent_fh, - rgw_statvfs *vfs_st, uint32_t flags) - - int rgw_create(rgw_fs *fs, rgw_file_handle *parent_fh, - const char *name, stat *st, uint32_t mask, - rgw_file_handle **fh, uint32_t posix_flags, - uint32_t flags) - - int rgw_mkdir(rgw_fs *fs, - rgw_file_handle *parent_fh, - const char *name, stat *st, uint32_t mask, - rgw_file_handle **fh, uint32_t flags) - - int rgw_rename(rgw_fs *fs, - rgw_file_handle *olddir, const char* old_name, - rgw_file_handle *newdir, const char* new_name, - uint32_t flags) - - int rgw_unlink(rgw_fs *fs, - rgw_file_handle *parent_fh, const char* path, - uint32_t flags) - - int rgw_readdir(rgw_fs *fs, - rgw_file_handle *parent_fh, uint64_t *offset, - bool (*cb)(const char *name, void *arg, uint64_t offset, stat *st, uint32_t st_mask, uint32_t flags) nogil except? -9000, - void *cb_arg, bool *eof, uint32_t flags) except? -9000 - - int rgw_getattr(rgw_fs *fs, - rgw_file_handle *fh, stat *st, - uint32_t flags) - - int rgw_setattr(rgw_fs *fs, rgw_file_handle *fh, stat *st, - uint32_t mask, uint32_t flags) - - int rgw_truncate(rgw_fs *fs, rgw_file_handle *fh, uint64_t size, uint32_t flags) - - int rgw_open(rgw_fs *fs, rgw_file_handle *parent_fh, - uint32_t posix_flags, uint32_t flags) - - int rgw_close(rgw_fs *fs, rgw_file_handle *fh, - uint32_t flags) - - int rgw_read(rgw_fs *fs, - rgw_file_handle *fh, uint64_t offset, - size_t length, size_t *bytes_read, void *buffer, - uint32_t flags) - - int rgw_write(rgw_fs *fs, - rgw_file_handle *fh, uint64_t offset, - size_t length, size_t *bytes_written, void *buffer, - uint32_t flags) - - int rgw_fsync(rgw_fs *fs, rgw_file_handle *fh, - uint32_t flags) - - int rgw_commit(rgw_fs *fs, rgw_file_handle *fh, - uint64_t offset, uint64_t length, uint32_t flags) - - class Error(Exception): pass @@ -315,7 +165,7 @@ cdef make_ex(ret, msg): return Error(msg + (": error code %d" % ret)) -cdef bool readdir_cb(const char *name, void *arg, uint64_t offset, stat *st, uint32_t st_mask, uint32_t flags) \ +cdef bint readdir_cb(const char *name, void *arg, uint64_t offset, stat *st, uint32_t st_mask, uint32_t flags) \ except? -9000 with gil: if exc.PyErr_Occurred(): return False @@ -523,7 +373,7 @@ cdef class LibRGWFS(object): cdef: rgw_file_handle *_dir_handler = <rgw_file_handle*>dir_handler.handler uint64_t _offset = offset - bool _eof + bint _eof uint32_t _flags = flags with nogil: ret = rgw_readdir(self.fs, _dir_handler, &_offset, &readdir_cb, diff --git a/src/pybind/rgw/setup.py b/src/pybind/rgw/setup.py index 518fa9f81bc..663604e8f76 100755 --- a/src/pybind/rgw/setup.py +++ b/src/pybind/rgw/setup.py @@ -10,7 +10,6 @@ import distutils.core import os import shutil -import subprocess import sys import tempfile import textwrap @@ -137,10 +136,16 @@ def check_sanity(): shutil.rmtree(tmp_dir) -if 'BUILD_DOC' in os.environ.keys(): - pass +if 'BUILD_DOC' in os.environ or 'READTHEDOCS' in os.environ: + ext_args = {} + cython_constants = dict(BUILD_DOC=True) + cythonize_args = dict(compile_time_env=cython_constants) elif check_sanity(): - pass + ext_args = get_python_flags(['rados', 'rgw']) + cython_constants = dict(BUILD_DOC=False) + include_path = [os.path.join(os.path.dirname(__file__), "..", "rados")] + cythonize_args = dict(compile_time_env=cython_constants, + include_path=include_path) else: sys.exit(1) @@ -190,14 +195,12 @@ setup( Extension( "rgw", [source], - **get_python_flags(['rados', 'rgw']) + **ext_args ) ], compiler_directives={'language_level': sys.version_info.major}, build_dir=os.environ.get("CYTHON_BUILD_DIR", None), - include_path=[ - os.path.join(os.path.dirname(__file__), "..", "rados") - ] + **cythonize_args ), classifiers=[ 'Intended Audience :: Developers', diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 28c1e1a2528..1db8d904625 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -381,7 +381,7 @@ class ServiceSpec(object): """ KNOWN_SERVICE_TYPES = 'alertmanager crash grafana iscsi mds mgr mon nfs ' \ 'node-exporter osd prometheus rbd-mirror rgw ' \ - 'container'.split() + 'container cephadm-exporter'.split() REQUIRES_SERVICE_ID = 'iscsi mds nfs osd rgw container'.split() @classmethod diff --git a/src/rgw/rgw_acl.h b/src/rgw/rgw_acl.h index 9e8a0e5dd49..06c79e744ce 100644 --- a/src/rgw/rgw_acl.h +++ b/src/rgw/rgw_acl.h @@ -401,6 +401,7 @@ protected: string display_name; public: ACLOwner() {} + ACLOwner(const rgw_user& _id) : id(_id) {} ~ACLOwner() {} void encode(bufferlist& bl) const { diff --git a/src/rgw/rgw_acl_s3.h b/src/rgw/rgw_acl_s3.h index 7d6af264154..eac14124740 100644 --- a/src/rgw/rgw_acl_s3.h +++ b/src/rgw/rgw_acl_s3.h @@ -89,8 +89,12 @@ public: virtual int create_canned(ACLOwner& _owner, ACLOwner& bucket_owner, const string& canned_acl) { RGWAccessControlList_S3& _acl = static_cast<RGWAccessControlList_S3 &>(acl); - int ret = _acl.create_canned(_owner, bucket_owner, canned_acl); - owner = _owner; + if (_owner.get_id() == rgw_user("anonymous")) { + owner = bucket_owner; + } else { + owner = _owner; + } + int ret = _acl.create_canned(owner, bucket_owner, canned_acl); return ret; } int create_from_headers(RGWUserCtl *user_ctl, const RGWEnv *env, ACLOwner& _owner); diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index c85bf856404..88cb5ee5a3b 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -7224,7 +7224,7 @@ next: if (opt_cmd == OPT::LC_LIST) { formatter->open_array_section("lifecycle_list"); - vector<cls_rgw_lc_entry> bucket_lc_map; + vector<rgw::sal::Lifecycle::LCEntry> bucket_lc_map; string marker; int index{0}; #define MAX_LC_LIST_ENTRIES 100 @@ -9113,13 +9113,8 @@ next: } if (opt_cmd == OPT::PUBSUB_TOPICS_LIST) { - if (user_id.empty()) { - cerr << "ERROR: user id was not provided (via --uid)" << std::endl; - return EINVAL; - } - RGWUserInfo& user_info = user_op.get_user_info(); - RGWUserPubSub ups(store, user_info.user_id); + RGWPubSub ps(store, tenant); rgw_bucket bucket; @@ -9132,7 +9127,7 @@ next: return -ret; } - auto b = ups.get_bucket(bucket_info.bucket); + auto b = ps.get_bucket(bucket_info.bucket); ret = b->get_topics(&result); if (ret < 0) { cerr << "ERROR: could not get topics: " << cpp_strerror(-ret) << std::endl; @@ -9140,8 +9135,8 @@ next: } encode_json("result", result, formatter.get()); } else { - rgw_pubsub_user_topics result; - int ret = ups.get_user_topics(&result); + rgw_pubsub_topics result; + int ret = ps.get_topics(&result); if (ret < 0) { cerr << "ERROR: could not get topics: " << cpp_strerror(-ret) << std::endl; return -ret; @@ -9156,15 +9151,11 @@ next: cerr << "ERROR: topic name was not provided (via --topic)" << std::endl; return EINVAL; } - if (user_id.empty()) { - cerr << "ERROR: user id was not provided (via --uid)" << std::endl; - return EINVAL; - } - RGWUserInfo& user_info = user_op.get_user_info(); - RGWUserPubSub ups(store, user_info.user_id); + + RGWPubSub ps(store, tenant); rgw_pubsub_topic_subs topic; - ret = ups.get_topic(topic_name, &topic); + ret = ps.get_topic(topic_name, &topic); if (ret < 0) { cerr << "ERROR: could not get topic: " << cpp_strerror(-ret) << std::endl; return -ret; @@ -9178,14 +9169,10 @@ next: cerr << "ERROR: topic name was not provided (via --topic)" << std::endl; return EINVAL; } - if (user_id.empty()) { - cerr << "ERROR: user id was not provided (via --uid)" << std::endl; - return EINVAL; - } - RGWUserInfo& user_info = user_op.get_user_info(); - RGWUserPubSub ups(store, user_info.user_id); - ret = ups.remove_topic(topic_name, null_yield); + RGWPubSub ps(store, tenant); + + ret = ps.remove_topic(topic_name, null_yield); if (ret < 0) { cerr << "ERROR: could not remove topic: " << cpp_strerror(-ret) << std::endl; return -ret; @@ -9197,20 +9184,16 @@ next: cerr << "ERROR: only pubsub tier type supports this command" << std::endl; return EINVAL; } - if (user_id.empty()) { - cerr << "ERROR: user id was not provided (via --uid)" << std::endl; - return EINVAL; - } if (sub_name.empty()) { cerr << "ERROR: subscription name was not provided (via --subscription)" << std::endl; return EINVAL; } - RGWUserInfo& user_info = user_op.get_user_info(); - RGWUserPubSub ups(store, user_info.user_id); + + RGWPubSub ps(store, tenant); rgw_pubsub_sub_config sub_conf; - auto sub = ups.get_sub(sub_name); + auto sub = ps.get_sub(sub_name); ret = sub->get_conf(&sub_conf); if (ret < 0) { cerr << "ERROR: could not get subscription info: " << cpp_strerror(-ret) << std::endl; @@ -9225,18 +9208,14 @@ next: cerr << "ERROR: only pubsub tier type supports this command" << std::endl; return EINVAL; } - if (user_id.empty()) { - cerr << "ERROR: user id was not provided (via --uid)" << std::endl; - return EINVAL; - } if (sub_name.empty()) { cerr << "ERROR: subscription name was not provided (via --subscription)" << std::endl; return EINVAL; } - RGWUserInfo& user_info = user_op.get_user_info(); - RGWUserPubSub ups(store, user_info.user_id); - auto sub = ups.get_sub(sub_name); + RGWPubSub ps(store, tenant); + + auto sub = ps.get_sub(sub_name); ret = sub->unsubscribe(topic_name, null_yield); if (ret < 0) { cerr << "ERROR: could not get subscription info: " << cpp_strerror(-ret) << std::endl; @@ -9249,21 +9228,17 @@ next: cerr << "ERROR: only pubsub tier type supports this command" << std::endl; return EINVAL; } - if (user_id.empty()) { - cerr << "ERROR: user id was not provided (via --uid)" << std::endl; - return EINVAL; - } if (sub_name.empty()) { cerr << "ERROR: subscription name was not provided (via --subscription)" << std::endl; return EINVAL; } - RGWUserInfo& user_info = user_op.get_user_info(); - RGWUserPubSub ups(store, user_info.user_id); + + RGWPubSub ps(store, tenant); if (!max_entries_specified) { - max_entries = RGWUserPubSub::Sub::DEFAULT_MAX_EVENTS; + max_entries = RGWPubSub::Sub::DEFAULT_MAX_EVENTS; } - auto sub = ups.get_sub_with_events(sub_name); + auto sub = ps.get_sub_with_events(sub_name); ret = sub->list_events(marker, max_entries); if (ret < 0) { cerr << "ERROR: could not list events: " << cpp_strerror(-ret) << std::endl; @@ -9278,10 +9253,6 @@ next: cerr << "ERROR: only pubsub tier type supports this command" << std::endl; return EINVAL; } - if (user_id.empty()) { - cerr << "ERROR: user id was not provided (via --uid)" << std::endl; - return EINVAL; - } if (sub_name.empty()) { cerr << "ERROR: subscription name was not provided (via --subscription)" << std::endl; return EINVAL; @@ -9290,10 +9261,10 @@ next: cerr << "ERROR: event id was not provided (via --event-id)" << std::endl; return EINVAL; } - RGWUserInfo& user_info = user_op.get_user_info(); - RGWUserPubSub ups(store, user_info.user_id); - auto sub = ups.get_sub_with_events(sub_name); + RGWPubSub ps(store, tenant); + + auto sub = ps.get_sub_with_events(sub_name); ret = sub->remove_event(event_id); if (ret < 0) { cerr << "ERROR: could not remove event: " << cpp_strerror(-ret) << std::endl; diff --git a/src/rgw/rgw_auth.cc b/src/rgw/rgw_auth.cc index 66c5e9d36b6..28fd85e0ff7 100644 --- a/src/rgw/rgw_auth.cc +++ b/src/rgw/rgw_auth.cc @@ -384,11 +384,36 @@ void rgw::auth::WebIdentityApplier::load_acct_info(const DoutPrefixProvider* dpp federated_user.tenant = role_tenant; federated_user.ns = "oidc"; + //Check in oidc namespace if (ctl->user->get_info_by_uid(federated_user, &user_info, null_yield) >= 0) { /* Succeeded. */ return; } + federated_user.ns.clear(); + //Check for old users which wouldn't have been created in oidc namespace + if (ctl->user->get_info_by_uid(federated_user, &user_info, null_yield) >= 0) { + /* Succeeded. */ + return; + } + + //Check if user_id.buckets already exists, may have been from the time, when shadow users didnt exist + RGWStorageStats stats; + int ret = ctl->user->read_stats(federated_user, &stats, null_yield); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: reading stats for the user returned error " << ret << dendl; + return; + } + if (ret == -ENOENT) { /* in case of ENOENT, which means user doesnt have buckets */ + //In this case user will be created in oidc namespace + ldpp_dout(dpp, 5) << "NOTICE: incoming user has no buckets " << federated_user << dendl; + federated_user.ns = "oidc"; + } else { + //User already has buckets associated, hence wont be created in oidc namespace. + ldpp_dout(dpp, 5) << "NOTICE: incoming user already has buckets associated " << federated_user << ", won't be created in oidc namespace"<< dendl; + federated_user.ns = ""; + } + ldpp_dout(dpp, 0) << "NOTICE: couldn't map oidc federated user " << federated_user << dendl; create_account(dpp, federated_user, token_claims.user_name, user_info); } diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc index 9234a66630c..f1fd131c10e 100644 --- a/src/rgw/rgw_bucket.cc +++ b/src/rgw/rgw_bucket.cc @@ -439,7 +439,7 @@ int rgw_remove_bucket_bypass_gc(rgw::sal::RGWRadosStore *store, rgw_bucket& buck max_aio = concurrent_max; } - rgw_raw_obj last_obj = miter.get_location().get_raw_obj(store->getRados()); + rgw_raw_obj last_obj = miter.get_location().get_raw_obj(store); if (last_obj == raw_head_obj) { // have the head obj deleted at the end continue; @@ -1819,7 +1819,8 @@ static int fix_single_bucket_lc(rgw::sal::RGWRadosStore *store, return ret; } - return rgw::lc::fix_lc_shard_entry(store, bucket_info, bucket_attrs); + return rgw::lc::fix_lc_shard_entry(store, store->get_rgwlc()->get_lc(), bucket_info, + bucket_attrs); } static void format_lc_status(Formatter* formatter, diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc index 5e8da7dbc48..b657598dba6 100644 --- a/src/rgw/rgw_common.cc +++ b/src/rgw/rgw_common.cc @@ -1490,6 +1490,41 @@ bool verify_object_permission(const DoutPrefixProvider* dpp, struct req_state *s op); } + +int verify_object_lock(const DoutPrefixProvider* dpp, const rgw::sal::RGWAttrs& attrs, const bool bypass_perm, const bool bypass_governance_mode) { + auto aiter = attrs.find(RGW_ATTR_OBJECT_RETENTION); + if (aiter != attrs.end()) { + RGWObjectRetention obj_retention; + try { + decode(obj_retention, aiter->second); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode RGWObjectRetention" << dendl; + return -EIO; + } + if (ceph::real_clock::to_time_t(obj_retention.get_retain_until_date()) > ceph_clock_now()) { + if (obj_retention.get_mode().compare("GOVERNANCE") != 0 || !bypass_perm || !bypass_governance_mode) { + return -EACCES; + } + } + } + aiter = attrs.find(RGW_ATTR_OBJECT_LEGAL_HOLD); + if (aiter != attrs.end()) { + RGWObjectLegalHold obj_legal_hold; + try { + decode(obj_legal_hold, aiter->second); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode RGWObjectLegalHold" << dendl; + return -EIO; + } + if (obj_legal_hold.is_enabled()) { + return -EACCES; + } + } + + return 0; +} + + class HexTable { char table[256]; diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 8658a0020ff..dd62c3e254b 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -48,6 +48,7 @@ namespace rgw::sal { class RGWUser; class RGWBucket; class RGWObject; + using RGWAttrs = std::map<std::string, ceph::buffer::list>; } using ceph::crypto::MD5; @@ -2139,6 +2140,12 @@ extern bool verify_object_permission_no_policy( int perm); extern bool verify_object_permission_no_policy(const DoutPrefixProvider* dpp, struct req_state *s, int perm); +extern int verify_object_lock( + const DoutPrefixProvider* dpp, + const rgw::sal::RGWAttrs& attrs, + const bool bypass_perm, + const bool bypass_governance_mode); + /** Convert an input URL into a sane object name * by converting %-escaped strings into characters, etc*/ extern void rgw_uri_escape_char(char c, string& dst); diff --git a/src/rgw/rgw_coroutine.cc b/src/rgw/rgw_coroutine.cc index 4c658ca30ac..e5f9f9927ca 100644 --- a/src/rgw/rgw_coroutine.cc +++ b/src/rgw/rgw_coroutine.cc @@ -196,12 +196,17 @@ int64_t RGWCoroutinesManager::get_next_io_id() return (int64_t)++max_io_id; } +uint64_t RGWCoroutinesManager::get_next_stack_id() { + return (uint64_t)++max_stack_id; +} + RGWCoroutinesStack::RGWCoroutinesStack(CephContext *_cct, RGWCoroutinesManager *_ops_mgr, RGWCoroutine *start) : cct(_cct), ops_mgr(_ops_mgr), done_flag(false), error_flag(false), blocked_flag(false), sleep_flag(false), interval_wait_flag(false), is_scheduled(false), is_waiting_for_child(false), retcode(0), run_count(0), env(NULL), parent(NULL) { + id = ops_mgr->get_next_stack_id(); if (start) { ops.push_back(start); } @@ -217,6 +222,10 @@ RGWCoroutinesStack::~RGWCoroutinesStack() for (auto stack : spawned.entries) { stack->put(); } + + if (preallocated_stack) { + preallocated_stack->put(); + } } int RGWCoroutinesStack::operate(RGWCoroutinesEnv *_env) @@ -288,7 +297,12 @@ RGWCoroutinesStack *RGWCoroutinesStack::spawn(RGWCoroutine *source_op, RGWCorout rgw_spawned_stacks *s = (source_op ? &source_op->spawned : &spawned); - RGWCoroutinesStack *stack = env->manager->allocate_stack(); + RGWCoroutinesStack *stack = preallocated_stack; + if (!stack) { + stack = env->manager->allocate_stack(); + } + preallocated_stack = nullptr; + s->add_pending(stack); stack->parent = this; @@ -309,6 +323,14 @@ RGWCoroutinesStack *RGWCoroutinesStack::spawn(RGWCoroutine *op, bool wait) return spawn(NULL, op, wait); } +RGWCoroutinesStack *RGWCoroutinesStack::prealloc_stack() +{ + if (!preallocated_stack) { + preallocated_stack = env->manager->allocate_stack(); + } + return preallocated_stack; +} + int RGWCoroutinesStack::wait(const utime_t& interval) { RGWCompletionManager *completion_mgr = env->manager->get_completion_mgr(); @@ -360,7 +382,7 @@ void RGWCoroutinesStack::cancel() put(); } -bool RGWCoroutinesStack::collect(RGWCoroutine *op, int *ret, RGWCoroutinesStack *skip_stack) /* returns true if needs to be called again */ +bool RGWCoroutinesStack::collect(RGWCoroutine *op, int *ret, RGWCoroutinesStack *skip_stack, uint64_t *stack_id) /* returns true if needs to be called again */ { bool need_retry = false; rgw_spawned_stacks *s = (op ? &op->spawned : &spawned); @@ -378,6 +400,9 @@ bool RGWCoroutinesStack::collect(RGWCoroutine *op, int *ret, RGWCoroutinesStack } continue; } + if (stack_id) { + *stack_id = stack->get_id(); + } int r = stack->get_ret_status(); stack->put(); if (r < 0) { @@ -426,9 +451,9 @@ bool RGWCoroutinesStack::collect_next(RGWCoroutine *op, int *ret, RGWCoroutinesS return false; } -bool RGWCoroutinesStack::collect(int *ret, RGWCoroutinesStack *skip_stack) /* returns true if needs to be called again */ +bool RGWCoroutinesStack::collect(int *ret, RGWCoroutinesStack *skip_stack, uint64_t *stack_id) /* returns true if needs to be called again */ { - return collect(NULL, ret, skip_stack); + return collect(NULL, ret, skip_stack, stack_id); } static void _aio_completion_notifier_cb(librados::completion_t cb, void *arg) @@ -884,9 +909,19 @@ RGWCoroutinesStack *RGWCoroutine::spawn(RGWCoroutine *op, bool wait) return stack->spawn(this, op, wait); } -bool RGWCoroutine::collect(int *ret, RGWCoroutinesStack *skip_stack) /* returns true if needs to be called again */ +RGWCoroutinesStack *RGWCoroutine::prealloc_stack() +{ + return stack->prealloc_stack(); +} + +uint64_t RGWCoroutine::prealloc_stack_id() { - return stack->collect(this, ret, skip_stack); + return prealloc_stack()->get_id(); +} + +bool RGWCoroutine::collect(int *ret, RGWCoroutinesStack *skip_stack, uint64_t *stack_id) /* returns true if needs to be called again */ +{ + return stack->collect(this, ret, skip_stack, stack_id); } bool RGWCoroutine::collect_next(int *ret, RGWCoroutinesStack **collected_stack) /* returns true if found a stack to collect */ @@ -924,23 +959,60 @@ ostream& operator<<(ostream& out, const RGWCoroutine& cr) return out; } -bool RGWCoroutine::drain_children(int num_cr_left, RGWCoroutinesStack *skip_stack) +bool RGWCoroutine::drain_children(int num_cr_left, + RGWCoroutinesStack *skip_stack, + std::optional<std::function<void(uint64_t stack_id, int ret)> > cb) { bool done = false; ceph_assert(num_cr_left >= 0); if (num_cr_left == 0 && skip_stack) { num_cr_left = 1; } - reenter(&drain_cr) { + reenter(&drain_status.cr) { while (num_spawned() > (size_t)num_cr_left) { yield wait_for_child(); int ret; - while (collect(&ret, skip_stack)) { + uint64_t stack_id; + while (collect(&ret, skip_stack, &stack_id)) { + if (ret < 0) { + ldout(cct, 10) << "collect() returned ret=" << ret << dendl; + /* we should have reported this error */ + log_error() << "ERROR: collect() returned error (ret=" << ret << ")"; + } + if (cb) { + (*cb)(stack_id, ret); + } + } + } + done = true; + } + return done; +} + +bool RGWCoroutine::drain_children(int num_cr_left, + std::optional<std::function<int(uint64_t stack_id, int ret)> > cb) +{ + bool done = false; + ceph_assert(num_cr_left >= 0); + + reenter(&drain_status.cr) { + while (num_spawned() > (size_t)num_cr_left) { + yield wait_for_child(); + int ret; + uint64_t stack_id; + while (collect(&ret, nullptr, &stack_id)) { if (ret < 0) { ldout(cct, 10) << "collect() returned ret=" << ret << dendl; /* we should have reported this error */ log_error() << "ERROR: collect() returned error (ret=" << ret << ")"; } + if (cb && !drain_status.should_exit) { + int r = (*cb)(stack_id, ret); + if (r < 0) { + drain_status.ret = r; + num_cr_left = 0; /* need to drain all */ + } + } } } done = true; diff --git a/src/rgw/rgw_coroutine.h b/src/rgw/rgw_coroutine.h index a38421f4fc5..0d0b48bddc8 100644 --- a/src/rgw/rgw_coroutine.h +++ b/src/rgw/rgw_coroutine.h @@ -219,7 +219,18 @@ class RGWCoroutine : public RefCountedObject, public boost::asio::coroutine { protected: bool _yield_ret; - boost::asio::coroutine drain_cr; + + struct { + boost::asio::coroutine cr; + bool should_exit{false}; + int ret{0}; + + void init() { + cr = boost::asio::coroutine(); + should_exit = false; + ret = 0; + } + } drain_status; CephContext *cct; @@ -288,11 +299,23 @@ public: void call(RGWCoroutine *op); /* call at the same stack we're in */ RGWCoroutinesStack *spawn(RGWCoroutine *op, bool wait); /* execute on a different stack */ - bool collect(int *ret, RGWCoroutinesStack *skip_stack); /* returns true if needs to be called again */ + bool collect(int *ret, RGWCoroutinesStack *skip_stack, uint64_t *stack_id = nullptr); /* returns true if needs to be called again */ bool collect_next(int *ret, RGWCoroutinesStack **collected_stack = NULL); /* returns true if found a stack to collect */ + RGWCoroutinesStack *prealloc_stack(); /* prepare a stack that will be used in the next spawn operation */ + uint64_t prealloc_stack_id(); /* prepare a stack that will be used in the next spawn operation, return its id */ + int wait(const utime_t& interval); - bool drain_children(int num_cr_left, RGWCoroutinesStack *skip_stack = NULL); /* returns true if needed to be called again */ + bool drain_children(int num_cr_left, + RGWCoroutinesStack *skip_stack = nullptr, + std::optional<std::function<void(uint64_t stack_id, int ret)> > cb = std::nullopt); /* returns true if needed to be called again, + cb will be called on completion of every + completion. */ + bool drain_children(int num_cr_left, + std::optional<std::function<int(uint64_t stack_id, int ret)> > cb); /* returns true if needed to be called again, + cb will be called on every completion, can filter errors. + A negative return value from cb means that current cr + will need to exit */ void wakeup(); void set_sleeping(bool flag); /* put in sleep, or wakeup from sleep */ @@ -336,17 +359,39 @@ do { \ } while (0) #define drain_all() \ - drain_cr = boost::asio::coroutine(); \ + drain_status.init(); \ yield_until_true(drain_children(0)) #define drain_all_but(n) \ - drain_cr = boost::asio::coroutine(); \ + drain_status.init(); \ yield_until_true(drain_children(n)) #define drain_all_but_stack(stack) \ - drain_cr = boost::asio::coroutine(); \ + drain_status.init(); \ yield_until_true(drain_children(1, stack)) +#define drain_all_but_stack_cb(stack, cb) \ + drain_status.init(); \ + yield_until_true(drain_children(1, stack, cb)) + +#define drain_with_cb(n, cb) \ + drain_status.init(); \ + yield_until_true(drain_children(n, cb)); \ + if (drain_status.should_exit) { \ + return set_cr_error(drain_status.ret); \ + } + +#define drain_all_cb(cb) \ + drain_with_cb(0, cb) + +#define yield_spawn_window(cr, n, cb) \ + do { \ + spawn(cr, false); \ + drain_with_cb(n, cb); /* this is guaranteed to yield */ \ + } while (0) + + + template <class T> class RGWConsumerCR : public RGWCoroutine { list<T> product; @@ -383,6 +428,8 @@ class RGWCoroutinesStack : public RefCountedObject { CephContext *cct; + int64_t id{-1}; + RGWCoroutinesManager *ops_mgr; list<RGWCoroutine *> ops; @@ -390,6 +437,8 @@ class RGWCoroutinesStack : public RefCountedObject { rgw_spawned_stacks spawned; + RGWCoroutinesStack *preallocated_stack{nullptr}; + set<RGWCoroutinesStack *> blocked_by_stack; set<RGWCoroutinesStack *> blocking_stacks; @@ -415,12 +464,16 @@ protected: RGWCoroutinesStack *parent; RGWCoroutinesStack *spawn(RGWCoroutine *source_op, RGWCoroutine *next_op, bool wait); - bool collect(RGWCoroutine *op, int *ret, RGWCoroutinesStack *skip_stack); /* returns true if needs to be called again */ + bool collect(RGWCoroutine *op, int *ret, RGWCoroutinesStack *skip_stack, uint64_t *stack_id); /* returns true if needs to be called again */ bool collect_next(RGWCoroutine *op, int *ret, RGWCoroutinesStack **collected_stack); /* returns true if found a stack to collect */ public: RGWCoroutinesStack(CephContext *_cct, RGWCoroutinesManager *_ops_mgr, RGWCoroutine *start = NULL); ~RGWCoroutinesStack() override; + int64_t get_id() const { + return id; + } + int operate(RGWCoroutinesEnv *env); bool is_done() { @@ -483,6 +536,7 @@ public: void call(RGWCoroutine *next_op); RGWCoroutinesStack *spawn(RGWCoroutine *next_op, bool wait); + RGWCoroutinesStack *prealloc_stack(); int unwind(int retcode); int wait(const utime_t& interval); @@ -492,7 +546,7 @@ public: } void io_complete(const rgw_io_id& io_id); - bool collect(int *ret, RGWCoroutinesStack *skip_stack); /* returns true if needs to be called again */ + bool collect(int *ret, RGWCoroutinesStack *skip_stack, uint64_t *stack_id); /* returns true if needs to be called again */ void cancel(); @@ -575,6 +629,7 @@ class RGWCoroutinesManager { map<uint64_t, set<RGWCoroutinesStack *> > run_contexts; std::atomic<int64_t> max_io_id = { 0 }; + std::atomic<uint64_t> max_stack_id = { 0 }; mutable ceph::shared_mutex lock = ceph::make_shared_mutex("RGWCoroutinesManager::lock"); @@ -629,6 +684,7 @@ public: RGWCoroutinesStack *allocate_stack(); int64_t get_next_io_id(); + uint64_t get_next_stack_id(); void set_sleeping(RGWCoroutine *cr, bool flag); void io_complete(RGWCoroutine *cr, const rgw_io_id& io_id); diff --git a/src/rgw/rgw_cr_rados.cc b/src/rgw/rgw_cr_rados.cc index a269af767a3..38217e1854c 100644 --- a/src/rgw/rgw_cr_rados.cc +++ b/src/rgw/rgw_cr_rados.cc @@ -635,7 +635,7 @@ int RGWAsyncFetchRemoteObj::_send_request() char buf[16]; snprintf(buf, sizeof(buf), ".%lld", (long long)store->getRados()->instance_id()); - map<string, bufferlist> attrs; + rgw::sal::RGWAttrs attrs; rgw::sal::RGWRadosBucket bucket(store, src_bucket); rgw::sal::RGWRadosObject src_obj(store, key, &bucket); diff --git a/src/rgw/rgw_data_sync.cc b/src/rgw/rgw_data_sync.cc index 143e8d1175b..aaa289b9060 100644 --- a/src/rgw/rgw_data_sync.cc +++ b/src/rgw/rgw_data_sync.cc @@ -1218,8 +1218,10 @@ class RGWRunBucketSourcesSyncCR : public RGWCoroutine { RGWSyncTraceNodeRef tn; ceph::real_time* progress; - std::vector<ceph::real_time> shard_progress; - std::vector<ceph::real_time>::iterator cur_shard_progress; + std::map<uint64_t, ceph::real_time> shard_progress; + + ceph::real_time *cur_progress{nullptr}; + std::optional<ceph::real_time> min_progress; RGWRESTConn *conn{nullptr}; rgw_zone_id last_zone; @@ -1242,6 +1244,25 @@ public: ceph::real_time* progress); int operate() override; + + void handle_complete_stack(uint64_t stack_id) { + auto iter = shard_progress.find(stack_id); + if (iter == shard_progress.end()) { + lderr(cct) << "ERROR: RGWRunBucketSourcesSyncCR::handle_complete_stack(): stack_id=" << stack_id << " not found! Likely a bug" << dendl; + return; + } + if (progress) { + if (!min_progress) { + min_progress = iter->second; + } else { + if (iter->second < *min_progress) { + min_progress = iter->second; + } + } + } + + shard_progress.erase(stack_id); + } }; class RGWDataSyncSingleEntryCR : public RGWCoroutine { @@ -1576,16 +1597,12 @@ public: } sync_marker.marker = iter->first; - while ((int)num_spawned() > spawn_window) { - set_status() << "num_spawned() > spawn_window"; - yield wait_for_child(); - int ret; - while (collect(&ret, lease_stack.get())) { - if (ret < 0) { - tn->log(10, "a sync operation returned error"); - } - } - } + drain_all_but_stack_cb(lease_stack.get(), + [&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, "a sync operation returned error"); + } + }); } } while (omapvals->more); omapvals.reset(); @@ -1727,18 +1744,13 @@ public: spawn(sync_single_entry(source_bs, log_iter->entry.key, log_iter->log_id, log_iter->log_timestamp, false), false); } - while ((int)num_spawned() > spawn_window) { - set_status() << "num_spawned() > spawn_window"; - yield wait_for_child(); - int ret; - while (collect(&ret, lease_stack.get())) { - if (ret < 0) { - tn->log(10, "a sync operation returned error"); - /* we have reported this error */ - } - /* not waiting for child here */ - } - } + + drain_all_but_stack_cb(lease_stack.get(), + [&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, "a sync operation returned error"); + } + }); } tn->log(20, SSTR("shard_id=" << shard_id << " sync_marker=" << sync_marker.marker @@ -3746,7 +3758,6 @@ public: int RGWBucketShardFullSyncCR::operate() { - int ret; reenter(this) { list_marker = sync_info.full_marker.position; @@ -3802,34 +3813,26 @@ int RGWBucketShardFullSyncCR::operate() entry->key, &marker_tracker, zones_trace, tn), false); } - while (num_spawned() > BUCKET_SYNC_SPAWN_WINDOW) { - yield wait_for_child(); - bool again = true; - while (again) { - again = collect(&ret, nullptr); - if (ret < 0) { - tn->log(10, "a sync operation returned error"); - sync_status = ret; - /* we have reported this error */ - } - } - } + drain_with_cb(BUCKET_SYNC_SPAWN_WINDOW, + [&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, "a sync operation returned error"); + sync_status = ret; + } + return 0; + }); } } while (list_result.is_truncated && sync_status == 0); set_status("done iterating over all objects"); /* wait for all operations to complete */ - while (num_spawned()) { - yield wait_for_child(); - bool again = true; - while (again) { - again = collect(&ret, nullptr); - if (ret < 0) { - tn->log(10, "a sync operation returned error"); - sync_status = ret; - /* we have reported this error */ - } + + drain_all_cb([&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, "a sync operation returned error"); + sync_status = ret; } - } + return 0; + }); tn->unset_flag(RGW_SNS_FLAG_ACTIVE); if (lease_cr && !lease_cr->is_locked()) { return set_cr_error(-ECANCELED); @@ -4107,36 +4110,24 @@ int RGWBucketShardIncrementalSyncCR::operate() false); } // } - while (num_spawned() > BUCKET_SYNC_SPAWN_WINDOW) { - set_status() << "num_spawned() > spawn_window"; - yield wait_for_child(); - bool again = true; - while (again) { - again = collect(&ret, nullptr); - if (ret < 0) { - tn->log(10, "a sync operation returned error"); - sync_status = ret; - /* we have reported this error */ - } - /* not waiting for child here */ - } - } + drain_with_cb(BUCKET_SYNC_SPAWN_WINDOW, + [&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, "a sync operation returned error"); + sync_status = ret; + } + return 0; + }); } } while (!list_result.empty() && sync_status == 0 && !syncstopped); - while (num_spawned()) { - yield wait_for_child(); - bool again = true; - while (again) { - again = collect(&ret, nullptr); - if (ret < 0) { - tn->log(10, "a sync operation returned error"); - sync_status = ret; - /* we have reported this error */ - } - /* not waiting for child here */ + drain_all_cb([&](uint64_t stack_id, int ret) { + if (ret < 0) { + tn->log(10, "a sync operation returned error"); + sync_status = ret; } - } + return 0; + }); tn->unset_flag(RGW_SNS_FLAG_ACTIVE); if (syncstopped) { @@ -4363,9 +4354,7 @@ int RGWRunBucketSourcesSyncCR::operate() ldpp_dout(sync_env->dpp, 20) << __func__ << "(): num shards=" << num_shards << " cur_shard=" << cur_shard << dendl; - shard_progress.resize(num_shards); - cur_shard_progress = shard_progress.begin(); - for (; num_shards > 0; --num_shards, ++cur_shard, ++cur_shard_progress) { + for (; num_shards > 0; --num_shards, ++cur_shard) { /* * use a negatvie shard_id for backward compatibility, * this affects the crafted status oid @@ -4379,38 +4368,29 @@ int RGWRunBucketSourcesSyncCR::operate() ldpp_dout(sync_env->dpp, 20) << __func__ << "(): sync_pair=" << sync_pair << dendl; - yield spawn(new RGWRunBucketSyncCoroutine(sc, lease_cr, sync_pair, tn, - &*cur_shard_progress), false); - while (num_spawned() > BUCKET_SYNC_SPAWN_WINDOW) { - set_status() << "num_spawned() > spawn_window"; - yield wait_for_child(); - again = true; - while (again) { - again = collect(&ret, nullptr); - if (ret < 0) { - tn->log(10, "a sync operation returned error"); - drain_all(); - return set_cr_error(ret); - } - } - } - } - } - while (num_spawned()) { - set_status() << "draining"; - yield wait_for_child(); - again = true; - while (again) { - again = collect(&ret, nullptr); - if (ret < 0) { - tn->log(10, "a sync operation returned error"); - drain_all(); - return set_cr_error(ret); - } + cur_progress = (progress ? &shard_progress[prealloc_stack_id()] : nullptr); + + yield_spawn_window(new RGWRunBucketSyncCoroutine(sc, lease_cr, sync_pair, tn, + cur_progress), + BUCKET_SYNC_SPAWN_WINDOW, + [&](uint64_t stack_id, int ret) { + handle_complete_stack(stack_id); + if (ret < 0) { + tn->log(10, "a sync operation returned error"); + } + return ret; + }); } } - if (progress) { - *progress = *std::min_element(shard_progress.begin(), shard_progress.end()); + drain_all_cb([&](uint64_t stack_id, int ret) { + handle_complete_stack(stack_id); + if (ret < 0) { + tn->log(10, "a sync operation returned error"); + } + return ret; + }); + if (progress && min_progress) { + *progress = *min_progress; } return set_cr_done(); } diff --git a/src/rgw/rgw_dmclock_async_scheduler.h b/src/rgw/rgw_dmclock_async_scheduler.h index 20c04a66679..2a4c99f20a8 100644 --- a/src/rgw/rgw_dmclock_async_scheduler.h +++ b/src/rgw/rgw_dmclock_async_scheduler.h @@ -82,12 +82,8 @@ class AsyncScheduler : public md_config_obs_t, public Scheduler { using Completion = async::Completion<Signature, async::AsBase<Request>>; using Clock = ceph::coarse_real_clock; -#if BOOST_VERSION < 107000 - using Timer = boost::asio::basic_waitable_timer<Clock>; -#else using Timer = boost::asio::basic_waitable_timer<Clock, boost::asio::wait_traits<Clock>, executor_type>; -#endif Timer timer; //< timer for the next scheduled request CephContext *const cct; diff --git a/src/rgw/rgw_file.cc b/src/rgw/rgw_file.cc index ffa76b806d4..28c1e0e1abc 100644 --- a/src/rgw/rgw_file.cc +++ b/src/rgw/rgw_file.cc @@ -1583,7 +1583,8 @@ namespace rgw { &state->dest_placement, state->bucket_owner.get_id(), *static_cast<RGWObjectCtx *>(state->obj_ctx), - state->object->get_obj(), olh_epoch, state->req_id, this, state->yield); + std::move(state->object->clone()), olh_epoch, state->req_id, + this, state->yield); op_ret = processor->prepare(state->yield); if (op_ret < 0) { diff --git a/src/rgw/rgw_file.h b/src/rgw/rgw_file.h index 3dde51a9bc7..c79293e7341 100644 --- a/src/rgw/rgw_file.h +++ b/src/rgw/rgw_file.h @@ -1553,94 +1553,191 @@ public: void send_response() override { struct req_state* state = get_state(); - for (const auto& iter : objs) { + auto cnow = real_clock::now(); - std::string_view sref {iter.key.name}; + /* enumerate objs and common_prefixes in parallel, + * avoiding increment on and end iterator, which is + * undefined */ - lsubdout(cct, rgw, 15) << "readdir objects prefix: " << prefix - << " obj: " << sref << dendl; + class DirIterator + { + vector<rgw_bucket_dir_entry>& objs; + vector<rgw_bucket_dir_entry>::iterator obj_iter; - size_t last_del = sref.find_last_of('/'); - if (last_del != string::npos) - sref.remove_prefix(last_del+1); + map<string, bool>& common_prefixes; + map<string, bool>::iterator cp_iter; - /* leaf directory? */ - if (sref.empty()) - continue; + boost::optional<std::string_view> obj_sref; + boost::optional<std::string_view> cp_sref; + bool _skip_cp; - lsubdout(cct, rgw, 15) << "RGWReaddirRequest " - << __func__ << " " - << "list uri=" << state->relative_uri << " " - << " prefix=" << prefix << " " - << " obj path=" << iter.key.name - << " (" << sref << ")" << "" - << " mtime=" - << real_clock::to_time_t(iter.meta.mtime) - << " size=" << iter.meta.accounted_size - << dendl; + public: - if (! this->operator()(sref, next_marker, iter.meta.mtime, - iter.meta.accounted_size, RGW_FS_TYPE_FILE)) { - /* caller cannot accept more */ - lsubdout(cct, rgw, 5) << "readdir rcb failed" - << " dirent=" << sref.data() - << " call count=" << ix - << dendl; - rcb_eof = true; - return; + DirIterator(vector<rgw_bucket_dir_entry>& objs, + map<string, bool>& common_prefixes) + : objs(objs), common_prefixes(common_prefixes), _skip_cp(false) + { + obj_iter = objs.begin(); + parse_obj(); + cp_iter = common_prefixes.begin(); + parse_cp(); + } + + bool is_obj() { + return (obj_iter != objs.end()); } - ++ix; - } - auto cnow = real_clock::now(); - for (auto& iter : common_prefixes) { + bool is_cp(){ + return (cp_iter != common_prefixes.end()); + } - lsubdout(cct, rgw, 15) << "readdir common prefixes prefix: " << prefix - << " iter first: " << iter.first - << " iter second: " << iter.second - << dendl; + bool eof() { + return ((!is_obj()) && (!is_cp())); + } - /* XXX aieee--I have seen this case! */ - if (iter.first == "/") - continue; + void parse_obj() { + if (is_obj()) { + std::string_view sref{obj_iter->key.name}; + size_t last_del = sref.find_last_of('/'); + if (last_del != string::npos) + sref.remove_prefix(last_del+1); + obj_sref = sref; + } + } /* parse_obj */ - /* it's safest to modify the element in place--a suffix-modifying - * string_ref operation is problematic since ULP rgw_file callers - * will ultimately need a c-string */ - if (iter.first.back() == '/') - const_cast<std::string&>(iter.first).pop_back(); + void next_obj() { + ++obj_iter; + parse_obj(); + } - std::string_view sref{iter.first}; + void parse_cp() { + if (is_cp()) { + /* leading-/ skip case */ + if (cp_iter->first == "/") { + _skip_cp = true; + return; + } else + _skip_cp = false; + + /* it's safest to modify the element in place--a suffix-modifying + * string_ref operation is problematic since ULP rgw_file callers + * will ultimately need a c-string */ + if (cp_iter->first.back() == '/') + const_cast<std::string&>(cp_iter->first).pop_back(); + + std::string_view sref{cp_iter->first}; + size_t last_del = sref.find_last_of('/'); + if (last_del != string::npos) + sref.remove_prefix(last_del+1); + cp_sref = sref; + } /* is_cp */ + } /* parse_cp */ + + void next_cp() { + ++cp_iter; + parse_cp(); + } - size_t last_del = sref.find_last_of('/'); - if (last_del != string::npos) - sref.remove_prefix(last_del+1); + bool skip_cp() { + return _skip_cp; + } - lsubdout(cct, rgw, 15) << "RGWReaddirRequest " - << __func__ << " " - << "list uri=" << state->relative_uri << " " - << " prefix=" << prefix << " " - << " cpref=" << sref - << dendl; + bool entry_is_obj() { + return (is_obj() && + ((! is_cp()) || + (obj_sref.get() < cp_sref.get()))); + } - if (sref.empty()) { - /* null path segment--could be created in S3 but has no NFS - * interpretation */ - return; + std::string_view get_obj_sref() { + return obj_sref.get(); } - if (! this->operator()(sref, next_marker, cnow, 0, - RGW_FS_TYPE_DIRECTORY)) { - /* caller cannot accept more */ - lsubdout(cct, rgw, 5) << "readdir rcb failed" - << " dirent=" << sref.data() - << " call count=" << ix - << dendl; - rcb_eof = true; - return; + std::string_view get_cp_sref() { + return cp_sref.get(); } - ++ix; - } + + vector<rgw_bucket_dir_entry>::iterator& get_obj_iter() { + return obj_iter; + } + + map<string, bool>::iterator& get_cp_iter() { + return cp_iter; + } + + }; /* DirIterator */ + + DirIterator di{objs, common_prefixes}; + + for (;;) { + + if (di.eof()) { + break; // done + } + + /* assert: one of is_obj() || is_cp() holds */ + if (di.entry_is_obj()) { + auto sref = di.get_obj_sref(); + if (sref.empty()) { + /* recursive list of a leaf dir (iirc), do nothing */ + } else { + /* send a file entry */ + auto obj_entry = *(di.get_obj_iter()); + + lsubdout(cct, rgw, 15) << "RGWReaddirRequest " + << __func__ << " " + << "list uri=" << state->relative_uri << " " + << " prefix=" << prefix << " " + << " obj path=" << obj_entry.key.name + << " (" << sref << ")" << "" + << " mtime=" + << real_clock::to_time_t(obj_entry.meta.mtime) + << " size=" << obj_entry.meta.accounted_size + << dendl; + + if (! this->operator()(sref, next_marker, obj_entry.meta.mtime, + obj_entry.meta.accounted_size, + RGW_FS_TYPE_FILE)) { + /* caller cannot accept more */ + lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop" + << " dirent=" << sref.data() + << " call count=" << ix + << dendl; + rcb_eof = true; + return; + } + } + di.next_obj(); // and advance object + } else { + /* send a dir entry */ + if (! di.skip_cp()) { + auto sref = di.get_cp_sref(); + + lsubdout(cct, rgw, 15) << "RGWReaddirRequest " + << __func__ << " " + << "list uri=" << state->relative_uri << " " + << " prefix=" << prefix << " " + << " cpref=" << sref + << dendl; + + if (sref.empty()) { + /* null path segment--could be created in S3 but has no NFS + * interpretation */ + } else { + if (! this->operator()(sref, next_marker, cnow, 0, + RGW_FS_TYPE_DIRECTORY)) { + /* caller cannot accept more */ + lsubdout(cct, rgw, 5) << "readdir rcb caller signalled stop" + << " dirent=" << sref.data() + << " call count=" << ix + << dendl; + rcb_eof = true; + return; + } + } + } + di.next_cp(); // and advance common_prefixes + } /* ! di.entry_is_obj() */ + } /* for (;;) */ } virtual void send_versioned_response() { @@ -2041,6 +2138,8 @@ public: return 0; } + bool prefetch_data() override { return false; } + }; /* RGWReadRequest */ /* diff --git a/src/rgw/rgw_lc.cc b/src/rgw/rgw_lc.cc index c49e8c7e728..7e5e2c2e519 100644 --- a/src/rgw/rgw_lc.cc +++ b/src/rgw/rgw_lc.cc @@ -18,7 +18,6 @@ #include "common/containers.h" #include <common/errno.h> #include "include/random.h" -#include "cls/rgw/cls_rgw_client.h" #include "cls/lock/cls_lock_client.h" #include "rgw_perf_counters.h" #include "rgw_common.h" @@ -243,6 +242,7 @@ void *RGWLC::LCWorker::entry() { void RGWLC::initialize(CephContext *_cct, rgw::sal::RGWRadosStore *_store) { cct = _cct; store = _store; + sal_lc = std::move(store->get_lifecycle()); max_objs = cct->_conf->rgw_lc_max_objs; if (max_objs > HASH_PRIME) max_objs = HASH_PRIME; @@ -291,7 +291,7 @@ bool RGWLC::if_already_run_today(time_t start_date) return false; } -static inline std::ostream& operator<<(std::ostream &os, cls_rgw_lc_entry& ent) { +static inline std::ostream& operator<<(std::ostream &os, rgw::sal::Lifecycle::LCEntry& ent) { os << "<ent: bucket="; os << ent.bucket; os << "; start_time="; @@ -304,7 +304,7 @@ static inline std::ostream& operator<<(std::ostream &os, cls_rgw_lc_entry& ent) int RGWLC::bucket_lc_prepare(int index, LCWorker* worker) { - vector<cls_rgw_lc_entry> entries; + vector<rgw::sal::Lifecycle::LCEntry> entries; string marker; dout(5) << "RGWLC::bucket_lc_prepare(): PREPARE " @@ -313,16 +313,14 @@ int RGWLC::bucket_lc_prepare(int index, LCWorker* worker) #define MAX_LC_LIST_ENTRIES 100 do { - int ret = cls_rgw_lc_list(store->getRados()->lc_pool_ctx, obj_names[index], - marker, MAX_LC_LIST_ENTRIES, entries); + int ret = sal_lc->list_entries(obj_names[index], marker, MAX_LC_LIST_ENTRIES, entries); if (ret < 0) return ret; for (auto& entry : entries) { entry.start_time = ceph_clock_now(); entry.status = lc_uninitial; // lc_uninitial? really? - ret = cls_rgw_lc_set_entry(store->getRados()->lc_pool_ctx, - obj_names[index], entry); + ret = sal_lc->set_entry(obj_names[index], entry); if (ret < 0) { ldpp_dout(this, 0) << "RGWLC::bucket_lc_prepare() failed to set entry on " @@ -346,11 +344,11 @@ static bool obj_has_expired(CephContext *cct, ceph::real_time mtime, int days, utime_t base_time; if (cct->_conf->rgw_lc_debug_interval <= 0) { /* Normal case, run properly */ - cmp = days*24*60*60; + cmp = double(days)*24*60*60; base_time = ceph_clock_now().round_to_day(); } else { /* We're in debug mode; Treat each rgw_lc_debug_interval seconds as a day */ - cmp = days*cct->_conf->rgw_lc_debug_interval; + cmp = double(days)*cct->_conf->rgw_lc_debug_interval; base_time = ceph_clock_now(); } auto tt_mtime = ceph::real_clock::to_time_t(mtime); @@ -370,17 +368,13 @@ static bool obj_has_expired(CephContext *cct, ceph::real_time mtime, int days, return (timediff >= cmp); } -static bool pass_object_lock_check(RGWRados *store, RGWBucketInfo& bucket_info, - rgw_obj& obj, RGWObjectCtx& ctx) +static bool pass_object_lock_check(rgw::sal::RGWStore* store, rgw::sal::RGWObject* obj, RGWObjectCtx& ctx) { - if (!bucket_info.obj_lock_enabled()) { + if (!obj->get_bucket()->get_info().obj_lock_enabled()) { return true; } - RGWRados::Object op_target(store, bucket_info, ctx, obj); - RGWRados::Object::Read read_op(&op_target); - map<string, bufferlist> attrs; - read_op.params.attrs = &attrs; - int ret = read_op.prepare(null_yield); + std::unique_ptr<rgw::sal::RGWObject::ReadOp> read_op = obj->get_read_op(&ctx); + int ret = read_op->prepare(null_yield); if (ret < 0) { if (ret == -ENOENT) { return true; @@ -388,8 +382,8 @@ static bool pass_object_lock_check(RGWRados *store, RGWBucketInfo& bucket_info, return false; } } else { - auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION); - if (iter != attrs.end()) { + auto iter = obj->get_attrs().find(RGW_ATTR_OBJECT_RETENTION); + if (iter != obj->get_attrs().end()) { RGWObjectRetention retention; try { decode(retention, iter->second); @@ -403,8 +397,8 @@ static bool pass_object_lock_check(RGWRados *store, RGWBucketInfo& bucket_info, return false; } } - iter = attrs.find(RGW_ATTR_OBJECT_LEGAL_HOLD); - if (iter != attrs.end()) { + iter = obj->get_attrs().find(RGW_ATTR_OBJECT_LEGAL_HOLD); + if (iter != obj->get_attrs().end()) { RGWObjectLegalHold obj_legal_hold; try { decode(obj_legal_hold, iter->second); @@ -422,30 +416,26 @@ static bool pass_object_lock_check(RGWRados *store, RGWBucketInfo& bucket_info, } class LCObjsLister { - rgw::sal::RGWRadosStore *store; - RGWBucketInfo& bucket_info; - RGWRados::Bucket target; - RGWRados::Bucket::List list_op; - bool is_truncated{false}; - rgw_obj_key next_marker; + rgw::sal::RGWStore *store; + rgw::sal::RGWBucket* bucket; + rgw::sal::RGWBucket::ListParams list_params; + rgw::sal::RGWBucket::ListResults list_results; string prefix; - vector<rgw_bucket_dir_entry> objs; vector<rgw_bucket_dir_entry>::iterator obj_iter; rgw_bucket_dir_entry pre_obj; int64_t delay_ms; public: - LCObjsLister(rgw::sal::RGWRadosStore *_store, RGWBucketInfo& _bucket_info) : - store(_store), bucket_info(_bucket_info), - target(store->getRados(), bucket_info), list_op(&target) { - list_op.params.list_versions = bucket_info.versioned(); - list_op.params.allow_unordered = true; + LCObjsLister(rgw::sal::RGWStore *_store, rgw::sal::RGWBucket* _bucket) : + store(_store), bucket(_bucket) { + list_params.list_versions = bucket->versioned(); + list_params.allow_unordered = true; delay_ms = store->ctx()->_conf.get_val<int64_t>("rgw_lc_thread_delay"); } void set_prefix(const string& p) { prefix = p; - list_op.params.prefix = prefix; + list_params.prefix = prefix; } int init() { @@ -453,13 +443,12 @@ public: } int fetch() { - int ret = list_op.list_objects( - 1000, &objs, NULL, &is_truncated, null_yield); + int ret = bucket->list(list_params, 1000, list_results, null_yield); if (ret < 0) { return ret; } - obj_iter = objs.begin(); + obj_iter = list_results.objs.begin(); return 0; } @@ -471,13 +460,13 @@ public: bool get_obj(rgw_bucket_dir_entry **obj, std::function<void(void)> fetch_barrier = []() { /* nada */}) { - if (obj_iter == objs.end()) { - if (!is_truncated) { + if (obj_iter == list_results.objs.end()) { + if (!list_results.is_truncated) { delay(); return false; } else { fetch_barrier(); - list_op.params.marker = pre_obj.key; + list_params.marker = pre_obj.key; int ret = fetch(); if (ret < 0) { ldout(store->ctx(), 0) << "ERROR: list_op returned ret=" << ret @@ -489,7 +478,7 @@ public: } /* returning address of entry in objs */ *obj = &(*obj_iter); - return obj_iter != objs.end(); + return obj_iter != list_results.objs.end(); } rgw_bucket_dir_entry get_prev_obj() { @@ -502,8 +491,8 @@ public: } boost::optional<std::string> next_key_name() { - if (obj_iter == objs.end() || - (obj_iter + 1) == objs.end()) { + if (obj_iter == list_results.objs.end() || + (obj_iter + 1) == list_results.objs.end()) { /* this should have been called after get_obj() was called, so this should * only happen if is_truncated is false */ return boost::none; @@ -521,12 +510,12 @@ struct op_env { lc_op op; rgw::sal::RGWRadosStore *store; LCWorker* worker; - RGWBucketInfo& bucket_info; + rgw::sal::RGWBucket* bucket; LCObjsLister& ol; op_env(lc_op& _op, rgw::sal::RGWRadosStore *_store, LCWorker* _worker, - RGWBucketInfo& _bucket_info, LCObjsLister& _ol) - : op(_op), store(_store), worker(_worker), bucket_info(_bucket_info), + rgw::sal::RGWBucket* _bucket, LCObjsLister& _ol) + : op(_op), store(_store), worker(_worker), bucket(_bucket), ol(_ol) {} }; /* op_env */ @@ -541,11 +530,11 @@ struct lc_op_ctx { ceph::real_time effective_mtime; rgw::sal::RGWRadosStore *store; - RGWBucketInfo& bucket_info; + rgw::sal::RGWBucket* bucket; lc_op& op; // ok--refers to expanded env.op LCObjsLister& ol; - rgw_obj obj; + std::unique_ptr<rgw::sal::RGWObject> obj; RGWObjectCtx rctx; const DoutPrefixProvider *dpp; WorkQ* wq; @@ -556,9 +545,11 @@ struct lc_op_ctx { const DoutPrefixProvider *dpp, WorkQ* wq) : cct(env.store->ctx()), env(env), o(o), next_key_name(next_key_name), effective_mtime(effective_mtime), - store(env.store), bucket_info(env.bucket_info), op(env.op), ol(env.ol), - obj(env.bucket_info.bucket, o.key), rctx(env.store), dpp(dpp), wq(wq) - {} + store(env.store), bucket(env.bucket), op(env.op), ol(env.ol), + rctx(env.store), dpp(dpp), wq(wq) + { + obj = bucket->get_object(o.key); + } bool next_has_same_name(const std::string& key_name) { return (next_key_name && key_name.compare( @@ -570,10 +561,12 @@ struct lc_op_ctx { static int remove_expired_obj(lc_op_ctx& oc, bool remove_indeed) { auto& store = oc.store; - auto& bucket_info = oc.bucket_info; + auto& bucket_info = oc.bucket->get_info(); auto& o = oc.o; auto obj_key = o.key; auto& meta = o.meta; + int ret; + std::string version_id; if (!remove_indeed) { obj_key.instance.clear(); @@ -581,20 +574,24 @@ static int remove_expired_obj(lc_op_ctx& oc, bool remove_indeed) obj_key.instance = "null"; } - rgw_obj obj(bucket_info.bucket, obj_key); + std::unique_ptr<rgw::sal::RGWBucket> bucket; + std::unique_ptr<rgw::sal::RGWObject> obj; + + ret = store->get_bucket(nullptr, bucket_info, &bucket); + if (ret < 0) { + return ret; + } + + obj = bucket->get_object(obj_key); + ACLOwner obj_owner; obj_owner.set_id(rgw_user {meta.owner}); obj_owner.set_name(meta.owner_display_name); + ACLOwner bucket_owner; + bucket_owner.set_id(bucket_info.owner); - RGWRados::Object del_target(store->getRados(), bucket_info, oc.rctx, obj); - RGWRados::Object::Delete del_op(&del_target); - - del_op.params.bucket_owner = bucket_info.owner; - del_op.params.versioning_status = bucket_info.versioning_status(); - del_op.params.obj_owner = obj_owner; - del_op.params.unmod_since = meta.mtime; - - return del_op.delete_obj(null_yield); + return obj->delete_object(&oc.rctx, obj_owner, bucket_owner, meta.mtime, false, 0, + version_id, null_yield); } /* remove_expired_obj */ class LCOpAction { @@ -822,24 +819,23 @@ static inline bool worker_should_stop(time_t stop_at, bool once) return !once && stop_at < time(nullptr); } -int RGWLC::handle_multipart_expiration( - RGWRados::Bucket *target, const multimap<string, lc_op>& prefix_map, - LCWorker* worker, time_t stop_at, bool once) +int RGWLC::handle_multipart_expiration(rgw::sal::RGWBucket* target, + const multimap<string, lc_op>& prefix_map, + LCWorker* worker, time_t stop_at, bool once) { MultipartMetaFilter mp_filter; vector<rgw_bucket_dir_entry> objs; - bool is_truncated; int ret; - RGWBucketInfo& bucket_info = target->get_bucket_info(); - RGWRados::Bucket::List list_op(target); + rgw::sal::RGWBucket::ListParams params; + rgw::sal::RGWBucket::ListResults results; auto delay_ms = cct->_conf.get_val<int64_t>("rgw_lc_thread_delay"); - list_op.params.list_versions = false; + params.list_versions = false; /* lifecycle processing does not depend on total order, so can * take advantage of unordered listing optimizations--such as * operating on one shard at a time */ - list_op.params.allow_unordered = true; - list_op.params.ns = RGW_OBJ_NS_MULTIPART; - list_op.params.filter = &mp_filter; + params.allow_unordered = true; + params.ns = RGW_OBJ_NS_MULTIPART; + params.filter = &mp_filter; auto pf = [&](RGWLC::LCWorker* wk, WorkQ* wq, WorkItem& wi) { auto wt = boost::get<std::tuple<lc_op, rgw_bucket_dir_entry>>(wi); @@ -851,7 +847,7 @@ int RGWLC::handle_multipart_expiration( return; } RGWObjectCtx rctx(store); - int ret = abort_multipart_upload(store, cct, &rctx, bucket_info, mp_obj); + int ret = abort_multipart_upload(store, cct, &rctx, target->get_info(), mp_obj); if (ret == 0) { if (perfcounter) { perfcounter->inc(l_rgw_lc_abort_mpu, 1); @@ -889,11 +885,10 @@ int RGWLC::handle_multipart_expiration( if (!prefix_iter->second.status || prefix_iter->second.mp_expiration <= 0) { continue; } - list_op.params.prefix = prefix_iter->first; + params.prefix = prefix_iter->first; do { objs.clear(); - list_op.params.marker = list_op.get_next_marker(); - ret = list_op.list_objects(1000, &objs, NULL, &is_truncated, null_yield); + ret = target->list(params, 1000, results, null_yield); if (ret < 0) { if (ret == (-ENOENT)) return 0; @@ -911,20 +906,18 @@ int RGWLC::handle_multipart_expiration( } /* for objs */ std::this_thread::sleep_for(std::chrono::milliseconds(delay_ms)); - } while(is_truncated); + } while(results.is_truncated); } /* for prefix_map */ worker->workpool->drain(); return 0; } -static int read_obj_tags(RGWRados *store, RGWBucketInfo& bucket_info, - rgw_obj& obj, RGWObjectCtx& ctx, bufferlist& tags_bl) +static int read_obj_tags(rgw::sal::RGWObject* obj, RGWObjectCtx& ctx, bufferlist& tags_bl) { - RGWRados::Object op_target(store, bucket_info, ctx, obj); - RGWRados::Object::Read read_op(&op_target); + std::unique_ptr<rgw::sal::RGWObject::ReadOp> rop = obj->get_read_op(&ctx); - return read_op.get_attr(RGW_ATTR_TAGS, tags_bl, null_yield); + return rop->get_attr(RGW_ATTR_TAGS, tags_bl, null_yield); } static bool is_valid_op(const lc_op& op) @@ -968,8 +961,7 @@ static int check_tags(lc_op_ctx& oc, bool *skip) *skip = true; bufferlist tags_bl; - int ret = read_obj_tags(oc.store->getRados(), oc.bucket_info, oc.obj, - oc.rctx, tags_bl); + int ret = read_obj_tags(oc.obj.get(), oc.rctx, tags_bl); if (ret < 0) { if (ret != -ENODATA) { ldout(oc.cct, 5) << "ERROR: read_obj_tags returned r=" @@ -1084,20 +1076,20 @@ public: r = remove_expired_obj(oc, true); if (r < 0) { ldout(oc.cct, 0) << "ERROR: current is-dm remove_expired_obj " - << oc.bucket_info.bucket << ":" << o.key + << oc.bucket << ":" << o.key << " " << cpp_strerror(r) << " " << oc.wq->thr_name() << dendl; return r; } ldout(oc.cct, 2) << "DELETED: current is-dm " - << oc.bucket_info.bucket << ":" << o.key + << oc.bucket << ":" << o.key << " " << oc.wq->thr_name() << dendl; } else { /* ! o.is_delete_marker() */ - r = remove_expired_obj(oc, !oc.bucket_info.versioned()); + r = remove_expired_obj(oc, !oc.bucket->versioned()); if (r < 0) { ldout(oc.cct, 0) << "ERROR: remove_expired_obj " - << oc.bucket_info.bucket << ":" << o.key + << oc.bucket << ":" << o.key << " " << cpp_strerror(r) << " " << oc.wq->thr_name() << dendl; return r; @@ -1105,7 +1097,7 @@ public: if (perfcounter) { perfcounter->inc(l_rgw_lc_expire_current, 1); } - ldout(oc.cct, 2) << "DELETED:" << oc.bucket_info.bucket << ":" << o.key + ldout(oc.cct, 2) << "DELETED:" << oc.bucket << ":" << o.key << " " << oc.wq->thr_name() << dendl; } return 0; @@ -1136,8 +1128,7 @@ public: << oc.wq->thr_name() << dendl; return is_expired && - pass_object_lock_check(oc.store->getRados(), - oc.bucket_info, oc.obj, oc.rctx); + pass_object_lock_check(oc.store, oc.obj.get(), oc.rctx); } int process(lc_op_ctx& oc) { @@ -1145,7 +1136,7 @@ public: int r = remove_expired_obj(oc, true); if (r < 0) { ldout(oc.cct, 0) << "ERROR: remove_expired_obj (non-current expiration) " - << oc.bucket_info.bucket << ":" << o.key + << oc.bucket << ":" << o.key << " " << cpp_strerror(r) << " " << oc.wq->thr_name() << dendl; return r; @@ -1153,7 +1144,7 @@ public: if (perfcounter) { perfcounter->inc(l_rgw_lc_expire_noncurrent, 1); } - ldout(oc.cct, 2) << "DELETED:" << oc.bucket_info.bucket << ":" << o.key + ldout(oc.cct, 2) << "DELETED:" << oc.bucket << ":" << o.key << " (non-current expiration) " << oc.wq->thr_name() << dendl; return 0; @@ -1189,7 +1180,7 @@ public: int r = remove_expired_obj(oc, true); if (r < 0) { ldout(oc.cct, 0) << "ERROR: remove_expired_obj (delete marker expiration) " - << oc.bucket_info.bucket << ":" << o.key + << oc.bucket << ":" << o.key << " " << cpp_strerror(r) << " " << oc.wq->thr_name() << dendl; @@ -1198,7 +1189,7 @@ public: if (perfcounter) { perfcounter->inc(l_rgw_lc_expire_dm, 1); } - ldout(oc.cct, 2) << "DELETED:" << oc.bucket_info.bucket << ":" << o.key + ldout(oc.cct, 2) << "DELETED:" << oc.bucket << ":" << o.key << " (delete marker expiration) " << oc.wq->thr_name() << dendl; return 0; @@ -1262,33 +1253,30 @@ public: auto& o = oc.o; rgw_placement_rule target_placement; - target_placement.inherit_from(oc.bucket_info.placement_rule); + target_placement.inherit_from(oc.bucket->get_placement_rule()); target_placement.storage_class = transition.storage_class; if (!oc.store->svc()->zone->get_zone_params(). valid_placement(target_placement)) { ldpp_dout(oc.dpp, 0) << "ERROR: non existent dest placement: " << target_placement - << " bucket="<< oc.bucket_info.bucket + << " bucket="<< oc.bucket << " rule_id=" << oc.op.id << " " << oc.wq->thr_name() << dendl; return -EINVAL; } - rgw::sal::RGWRadosBucket bucket(oc.store, oc.bucket_info); - rgw::sal::RGWRadosObject obj(oc.store, oc.obj.key, &bucket); - int r = oc.store->getRados()->transition_obj( - oc.rctx, &bucket, obj, target_placement, o.meta.mtime, - o.versioned_epoch, oc.dpp, null_yield); + int r = oc.obj->transition(oc.rctx, oc.bucket, target_placement, o.meta.mtime, + o.versioned_epoch, oc.dpp, null_yield); if (r < 0) { ldpp_dout(oc.dpp, 0) << "ERROR: failed to transition obj " - << oc.bucket_info.bucket << ":" << o.key + << oc.bucket << ":" << o.key << " -> " << transition.storage_class << " " << cpp_strerror(r) << " " << oc.wq->thr_name() << dendl; return r; } - ldpp_dout(oc.dpp, 2) << "TRANSITIONED:" << oc.bucket_info.bucket + ldpp_dout(oc.dpp, 2) << "TRANSITIONED:" << oc.bucket << ":" << o.key << " -> " << transition.storage_class << " " << oc.wq->thr_name() << dendl; @@ -1427,12 +1415,12 @@ int LCOpRule::process(rgw_bucket_dir_entry& o, int r = (*selected)->process(ctx); if (r < 0) { ldpp_dout(dpp, 0) << "ERROR: remove_expired_obj " - << env.bucket_info.bucket << ":" << o.key + << env.bucket << ":" << o.key << " " << cpp_strerror(r) << " " << wq->thr_name() << dendl; return r; } - ldpp_dout(dpp, 20) << "processed:" << env.bucket_info.bucket << ":" + ldpp_dout(dpp, 20) << "processed:" << env.bucket << ":" << o.key << " " << wq->thr_name() << dendl; } @@ -1444,8 +1432,7 @@ int RGWLC::bucket_lc_process(string& shard_id, LCWorker* worker, time_t stop_at, bool once) { RGWLifecycleConfiguration config(cct); - RGWBucketInfo bucket_info; - map<string, bufferlist> bucket_attrs; + std::unique_ptr<rgw::sal::RGWBucket> bucket; string no_ns, list_versions; vector<rgw_bucket_dir_entry> objs; vector<std::string> result; @@ -1453,9 +1440,14 @@ int RGWLC::bucket_lc_process(string& shard_id, LCWorker* worker, string bucket_tenant = result[0]; string bucket_name = result[1]; string bucket_marker = result[2]; - int ret = store->getRados()->get_bucket_info( - store->svc(), bucket_tenant, bucket_name, bucket_info, NULL, null_yield, - &bucket_attrs); + int ret = store->get_bucket(nullptr, bucket_tenant, bucket_name, &bucket, null_yield); + if (ret < 0) { + ldpp_dout(this, 0) << "LC:get_bucket for " << bucket_name + << " failed" << dendl; + return ret; + } + + ret = bucket->get_bucket_info(null_yield); if (ret < 0) { ldpp_dout(this, 0) << "LC:get_bucket_info for " << bucket_name << " failed" << dendl; @@ -1469,18 +1461,16 @@ int RGWLC::bucket_lc_process(string& shard_id, LCWorker* worker, } ); - if (bucket_info.bucket.marker != bucket_marker) { + if (bucket->get_marker() != bucket_marker) { ldpp_dout(this, 1) << "LC: deleting stale entry found for bucket=" << bucket_tenant << ":" << bucket_name - << " cur_marker=" << bucket_info.bucket.marker + << " cur_marker=" << bucket->get_marker() << " orig_marker=" << bucket_marker << dendl; return -ENOENT; } - RGWRados::Bucket target(store->getRados(), bucket_info); - - map<string, bufferlist>::iterator aiter = bucket_attrs.find(RGW_ATTR_LC); - if (aiter == bucket_attrs.end()) + map<string, bufferlist>::iterator aiter = bucket->get_attrs().find(RGW_ATTR_LC); + if (aiter == bucket->get_attrs().end()) return 0; bufferlist::const_iterator iter{&aiter->second}; @@ -1541,7 +1531,7 @@ int RGWLC::bucket_lc_process(string& shard_id, LCWorker* worker, pre_marker = next_marker; } - LCObjsLister ol(store, bucket_info); + LCObjsLister ol(store, bucket.get()); ol.set_prefix(prefix_iter->first); ret = ol.init(); @@ -1552,7 +1542,7 @@ int RGWLC::bucket_lc_process(string& shard_id, LCWorker* worker, return ret; } - op_env oenv(op, store, worker, bucket_info, ol); + op_env oenv(op, store, worker, bucket.get(), ol); LCOpRule orule(oenv); orule.build(); // why can't ctor do it? rgw_bucket_dir_entry* o{nullptr}; @@ -1564,27 +1554,26 @@ int RGWLC::bucket_lc_process(string& shard_id, LCWorker* worker, worker->workpool->drain(); } - ret = handle_multipart_expiration(&target, prefix_map, worker, stop_at, once); + ret = handle_multipart_expiration(bucket.get(), prefix_map, worker, stop_at, once); return ret; } int RGWLC::bucket_lc_post(int index, int max_lock_sec, - cls_rgw_lc_entry& entry, int& result, + rgw::sal::Lifecycle::LCEntry& entry, int& result, LCWorker* worker) { utime_t lock_duration(cct->_conf->rgw_lc_lock_max_time, 0); - rados::cls::lock::Lock l(lc_index_lock_name); - l.set_cookie(cookie); - l.set_duration(lock_duration); + rgw::sal::LCSerializer* lock = sal_lc->get_serializer(lc_index_lock_name, + obj_names[index], + cookie); dout(5) << "RGWLC::bucket_lc_post(): POST " << entry << " index: " << index << " worker ix: " << worker->ix << dendl; do { - int ret = l.lock_exclusive( - &store->getRados()->lc_pool_ctx, obj_names[index]); + int ret = lock->try_lock(lock_duration, null_yield); if (ret == -EBUSY || ret == -EEXIST) { /* already locked by another lc processor */ ldpp_dout(this, 0) << "RGWLC::bucket_lc_post() failed to acquire lock on " @@ -1597,8 +1586,7 @@ int RGWLC::bucket_lc_post(int index, int max_lock_sec, ldpp_dout(this, 20) << "RGWLC::bucket_lc_post() lock " << obj_names[index] << dendl; if (result == -ENOENT) { - ret = cls_rgw_lc_rm_entry(store->getRados()->lc_pool_ctx, - obj_names[index], entry); + ret = sal_lc->rm_entry(obj_names[index], entry); if (ret < 0) { ldpp_dout(this, 0) << "RGWLC::bucket_lc_post() failed to remove entry " << obj_names[index] << dendl; @@ -1610,14 +1598,14 @@ int RGWLC::bucket_lc_post(int index, int max_lock_sec, entry.status = lc_complete; } - ret = cls_rgw_lc_set_entry(store->getRados()->lc_pool_ctx, - obj_names[index], entry); + ret = sal_lc->set_entry(obj_names[index], entry); if (ret < 0) { ldpp_dout(this, 0) << "RGWLC::process() failed to set entry on " << obj_names[index] << dendl; } clean: - l.unlock(&store->getRados()->lc_pool_ctx, obj_names[index]); + lock->unlock(); + delete lock; ldpp_dout(this, 20) << "RGWLC::bucket_lc_post() unlock " << obj_names[index] << dendl; return 0; @@ -1625,15 +1613,13 @@ clean: } int RGWLC::list_lc_progress(string& marker, uint32_t max_entries, - vector<cls_rgw_lc_entry>& progress_map, + vector<rgw::sal::Lifecycle::LCEntry>& progress_map, int& index) { progress_map.clear(); for(; index < max_objs; index++, marker="") { - vector<cls_rgw_lc_entry> entries; - int ret = - cls_rgw_lc_list(store->getRados()->lc_pool_ctx, obj_names[index], marker, - max_entries, entries); + vector<rgw::sal::Lifecycle::LCEntry> entries; + int ret = sal_lc->list_entries(obj_names[index], marker, max_entries, entries); if (ret < 0) { if (ret == -ENOENT) { ldpp_dout(this, 10) << __func__ << "() ignoring unfound lc object=" @@ -1718,19 +1704,19 @@ int RGWLC::process(int index, int max_lock_secs, LCWorker* worker, << "index: " << index << " worker ix: " << worker->ix << dendl; - rados::cls::lock::Lock l(lc_index_lock_name); + rgw::sal::LCSerializer* lock = sal_lc->get_serializer(lc_index_lock_name, + obj_names[index], + std::string()); do { utime_t now = ceph_clock_now(); //string = bucket_name:bucket_id, start_time, int = LC_BUCKET_STATUS - cls_rgw_lc_entry entry; + rgw::sal::Lifecycle::LCEntry entry; if (max_lock_secs <= 0) return -EAGAIN; utime_t time(max_lock_secs, 0); - l.set_duration(time); - int ret = l.lock_exclusive(&store->getRados()->lc_pool_ctx, - obj_names[index]); + int ret = lock->try_lock(time, null_yield); if (ret == -EBUSY || ret == -EEXIST) { /* already locked by another lc processor */ ldpp_dout(this, 0) << "RGWLC::process() failed to acquire lock on " @@ -1741,9 +1727,8 @@ int RGWLC::process(int index, int max_lock_secs, LCWorker* worker, if (ret < 0) return 0; - cls_rgw_lc_obj_head head; - ret = cls_rgw_lc_get_head(store->getRados()->lc_pool_ctx, obj_names[index], - head); + rgw::sal::Lifecycle::LCHead head; + ret = sal_lc->get_head(obj_names[index], head); if (ret < 0) { ldpp_dout(this, 0) << "RGWLC::process() failed to get obj head " << obj_names[index] << ", ret=" << ret << dendl; @@ -1751,8 +1736,7 @@ int RGWLC::process(int index, int max_lock_secs, LCWorker* worker, } if (! (cct->_conf->rgw_lc_lock_max_time == 9969)) { - ret = cls_rgw_lc_get_entry(store->getRados()->lc_pool_ctx, - obj_names[index], head.marker, entry); + ret = sal_lc->get_entry(obj_names[index], head.marker, entry); if (ret >= 0) { if (entry.status == lc_processing) { if (expired_session(entry.start_time)) { @@ -1784,8 +1768,7 @@ int RGWLC::process(int index, int max_lock_secs, LCWorker* worker, } } - ret = cls_rgw_lc_get_next_entry(store->getRados()->lc_pool_ctx, - obj_names[index], head.marker, entry); + ret = sal_lc->get_next_entry(obj_names[index], head.marker, entry); if (ret < 0) { ldpp_dout(this, 0) << "RGWLC::process() failed to get obj entry " << obj_names[index] << dendl; @@ -1801,8 +1784,7 @@ int RGWLC::process(int index, int max_lock_secs, LCWorker* worker, << dendl; entry.status = lc_processing; - ret = cls_rgw_lc_set_entry(store->getRados()->lc_pool_ctx, - obj_names[index], entry); + ret = sal_lc->set_entry(obj_names[index], entry); if (ret < 0) { ldpp_dout(this, 0) << "RGWLC::process() failed to set obj entry " << obj_names[index] << entry.bucket << entry.status << dendl; @@ -1810,8 +1792,7 @@ int RGWLC::process(int index, int max_lock_secs, LCWorker* worker, } head.marker = entry.bucket; - ret = cls_rgw_lc_put_head(store->getRados()->lc_pool_ctx, - obj_names[index], head); + ret = sal_lc->put_head(obj_names[index], head); if (ret < 0) { ldpp_dout(this, 0) << "RGWLC::process() failed to put head " << obj_names[index] @@ -1823,7 +1804,8 @@ int RGWLC::process(int index, int max_lock_secs, LCWorker* worker, << " index: " << index << " worker ix: " << worker->ix << dendl; - l.unlock(&store->getRados()->lc_pool_ctx, obj_names[index]); + lock->unlock(); + delete lock; ret = bucket_lc_process(entry.bucket, worker, thread_stop_at(), once); bucket_lc_post(index, max_lock_secs, entry, ret, worker); } while(1 && !once); @@ -1831,7 +1813,8 @@ int RGWLC::process(int index, int max_lock_secs, LCWorker* worker, return 0; exit: - l.unlock(&store->getRados()->lc_pool_ctx, obj_names[index]); + lock->unlock(); + delete lock; return 0; } @@ -1967,6 +1950,7 @@ static std::string get_lc_shard_name(const rgw_bucket& bucket){ template<typename F> static int guard_lc_modify(rgw::sal::RGWRadosStore* store, + rgw::sal::Lifecycle* sal_lc, const rgw_bucket& bucket, const string& cookie, const F& f) { CephContext *cct = store->ctx(); @@ -1977,21 +1961,20 @@ static int guard_lc_modify(rgw::sal::RGWRadosStore* store, get_lc_oid(cct, shard_id, &oid); /* XXX it makes sense to take shard_id for a bucket_id? */ - cls_rgw_lc_entry entry; + rgw::sal::Lifecycle::LCEntry entry; entry.bucket = shard_id; entry.status = lc_uninitial; int max_lock_secs = cct->_conf->rgw_lc_lock_max_time; - rados::cls::lock::Lock l(lc_index_lock_name); + rgw::sal::LCSerializer* lock = sal_lc->get_serializer(lc_index_lock_name, + oid, + cookie); utime_t time(max_lock_secs, 0); - l.set_duration(time); - l.set_cookie(cookie); - librados::IoCtx *ctx = store->getRados()->get_lc_pool_ctx(); int ret; do { - ret = l.lock_exclusive(ctx, oid); + ret = lock->try_lock(time, null_yield); if (ret == -EBUSY || ret == -EEXIST) { ldout(cct, 0) << "RGWLC::RGWPutLC() failed to acquire lock on " << oid << ", sleep 5, try again" << dendl; @@ -2003,14 +1986,15 @@ static int guard_lc_modify(rgw::sal::RGWRadosStore* store, << oid << ", ret=" << ret << dendl; break; } - ret = f(ctx, oid, entry); + ret = f(sal_lc, oid, entry); if (ret < 0) { ldout(cct, 0) << "RGWLC::RGWPutLC() failed to set entry on " << oid << ", ret=" << ret << dendl; } break; } while(true); - l.unlock(ctx, oid); + lock->unlock(); + delete lock; return ret; } @@ -2033,10 +2017,10 @@ int RGWLC::set_bucket_config(RGWBucketInfo& bucket_info, rgw_bucket& bucket = bucket_info.bucket; - ret = guard_lc_modify(store, bucket, cookie, - [&](librados::IoCtx *ctx, const string& oid, - const cls_rgw_lc_entry& entry) { - return cls_rgw_lc_set_entry(*ctx, oid, entry); + ret = guard_lc_modify(store, sal_lc.get(), bucket, cookie, + [&](rgw::sal::Lifecycle* sal_lc, const string& oid, + const rgw::sal::Lifecycle::LCEntry& entry) { + return sal_lc->set_entry(oid, entry); }); return ret; @@ -2060,10 +2044,10 @@ int RGWLC::remove_bucket_config(RGWBucketInfo& bucket_info, } - ret = guard_lc_modify(store, bucket, cookie, - [&](librados::IoCtx *ctx, const string& oid, - const cls_rgw_lc_entry& entry) { - return cls_rgw_lc_rm_entry(*ctx, oid, entry); + ret = guard_lc_modify(store, sal_lc.get(), bucket, cookie, + [&](rgw::sal::Lifecycle* sal_lc, const string& oid, + const rgw::sal::Lifecycle::LCEntry& entry) { + return sal_lc->rm_entry(oid, entry); }); return ret; @@ -2078,6 +2062,7 @@ RGWLC::~RGWLC() namespace rgw::lc { int fix_lc_shard_entry(rgw::sal::RGWRadosStore* store, + rgw::sal::Lifecycle* sal_lc, const RGWBucketInfo& bucket_info, const map<std::string,bufferlist>& battrs) { @@ -2090,20 +2075,18 @@ int fix_lc_shard_entry(rgw::sal::RGWRadosStore* store, std::string lc_oid; get_lc_oid(store->ctx(), shard_name, &lc_oid); - cls_rgw_lc_entry entry; + rgw::sal::Lifecycle::LCEntry entry; // There are multiple cases we need to encounter here // 1. entry exists and is already set to marker, happens in plain buckets & newly resharded buckets // 2. entry doesn't exist, which usually happens when reshard has happened prior to update and next LC process has already dropped the update // 3. entry exists matching the current bucket id which was after a reshard (needs to be updated to the marker) // We are not dropping the old marker here as that would be caught by the next LC process update - auto lc_pool_ctx = store->getRados()->get_lc_pool_ctx(); - int ret = cls_rgw_lc_get_entry(*lc_pool_ctx, - lc_oid, shard_name, entry); + int ret = sal_lc->get_entry(lc_oid, shard_name, entry); if (ret == 0) { ldout(store->ctx(), 5) << "Entry already exists, nothing to do" << dendl; return ret; // entry is already existing correctly set to marker } - ldout(store->ctx(), 5) << "cls_rgw_lc_get_entry errored ret code=" << ret << dendl; + ldout(store->ctx(), 5) << "lc_get_entry errored ret code=" << ret << dendl; if (ret == -ENOENT) { ldout(store->ctx(), 1) << "No entry for bucket=" << bucket_info.bucket.name << " creating " << dendl; @@ -2113,11 +2096,11 @@ int fix_lc_shard_entry(rgw::sal::RGWRadosStore* store, std::string cookie = cookie_buf; ret = guard_lc_modify( - store, bucket_info.bucket, cookie, - [&lc_pool_ctx, &lc_oid](librados::IoCtx* ctx, + store, sal_lc, bucket_info.bucket, cookie, + [&sal_lc, &lc_oid](rgw::sal::Lifecycle* slc, const string& oid, - const cls_rgw_lc_entry& entry) { - return cls_rgw_lc_set_entry(*lc_pool_ctx, lc_oid, entry); + const rgw::sal::Lifecycle::LCEntry& entry) { + return slc->set_entry(lc_oid, entry); }); } @@ -2234,7 +2217,7 @@ std::string s3_expiration_header( if (rule_expiration.has_days()) { rule_expiration_date = boost::optional<ceph::real_time>( - mtime + make_timespan(rule_expiration.get_days()*24*60*60 - ceph::real_clock::to_time_t(mtime)%(24*60*60) + 24*60*60)); + mtime + make_timespan(double(rule_expiration.get_days())*24*60*60 - ceph::real_clock::to_time_t(mtime)%(24*60*60) + 24*60*60)); } } diff --git a/src/rgw/rgw_lc.h b/src/rgw/rgw_lc.h index b0e87efba76..8f231af6b61 100644 --- a/src/rgw/rgw_lc.h +++ b/src/rgw/rgw_lc.h @@ -462,6 +462,7 @@ WRITE_CLASS_ENCODER(RGWLifecycleConfiguration) class RGWLC : public DoutPrefixProvider { CephContext *cct; rgw::sal::RGWRadosStore *store; + std::unique_ptr<rgw::sal::Lifecycle> sal_lc; int max_objs{0}; string *obj_names{nullptr}; std::atomic<bool> down_flag = { false }; @@ -516,12 +517,12 @@ public: bool expired_session(time_t started); time_t thread_stop_at(); int list_lc_progress(string& marker, uint32_t max_entries, - vector<cls_rgw_lc_entry>&, int& index); + vector<rgw::sal::Lifecycle::LCEntry>&, int& index); int bucket_lc_prepare(int index, LCWorker* worker); int bucket_lc_process(string& shard_id, LCWorker* worker, time_t stop_at, bool once); int bucket_lc_post(int index, int max_lock_sec, - cls_rgw_lc_entry& entry, int& result, LCWorker* worker); + rgw::sal::Lifecycle::LCEntry& entry, int& result, LCWorker* worker); bool going_down(); void start_processor(); void stop_processor(); @@ -532,19 +533,22 @@ public: const map<string, bufferlist>& bucket_attrs); CephContext *get_cct() const override { return cct; } + rgw::sal::Lifecycle *get_lc() const { return sal_lc.get(); } unsigned get_subsys() const; std::ostream& gen_prefix(std::ostream& out) const; private: - int handle_multipart_expiration(RGWRados::Bucket *target, + int handle_multipart_expiration(rgw::sal::RGWBucket* target, const multimap<string, lc_op>& prefix_map, LCWorker* worker, time_t stop_at, bool once); }; namespace rgw::lc { -int fix_lc_shard_entry(rgw::sal::RGWRadosStore *store, const RGWBucketInfo& bucket_info, +int fix_lc_shard_entry(rgw::sal::RGWRadosStore *store, + rgw::sal::Lifecycle* sal_lc, + const RGWBucketInfo& bucket_info, const map<std::string,bufferlist>& battrs); std::string s3_expiration_header( diff --git a/src/rgw/rgw_lc_s3.cc b/src/rgw/rgw_lc_s3.cc index cba2b00c0f8..57a996f0cf8 100644 --- a/src/rgw/rgw_lc_s3.cc +++ b/src/rgw/rgw_lc_s3.cc @@ -313,7 +313,7 @@ void LCRule_S3::dump_xml(Formatter *f) const { } } -int RGWLifecycleConfiguration_S3::rebuild(RGWRados *store, RGWLifecycleConfiguration& dest) +int RGWLifecycleConfiguration_S3::rebuild(RGWLifecycleConfiguration& dest) { int ret = 0; multimap<string, LCRule>::iterator iter; diff --git a/src/rgw/rgw_lc_s3.h b/src/rgw/rgw_lc_s3.h index 0d6ffa93c93..5aa9c8e8c49 100644 --- a/src/rgw/rgw_lc_s3.h +++ b/src/rgw/rgw_lc_s3.h @@ -95,7 +95,7 @@ public: RGWLifecycleConfiguration_S3() : RGWLifecycleConfiguration(nullptr) {} void decode_xml(XMLObj *obj); - int rebuild(RGWRados *store, RGWLifecycleConfiguration& dest); + int rebuild(RGWLifecycleConfiguration& dest); void dump_xml(Formatter *f) const; }; diff --git a/src/rgw/rgw_lua_utils.h b/src/rgw/rgw_lua_utils.h index e3a7a132cb1..ffd6e701980 100644 --- a/src/rgw/rgw_lua_utils.h +++ b/src/rgw/rgw_lua_utils.h @@ -6,7 +6,7 @@ #include <ctime> #include <lua.hpp> -class CephContext; +#include "include/common_fwd.h" namespace rgw::lua { diff --git a/src/rgw/rgw_multi.cc b/src/rgw/rgw_multi.cc index 873bb90f1d2..79284591b40 100644 --- a/src/rgw/rgw_multi.cc +++ b/src/rgw/rgw_multi.cc @@ -243,7 +243,7 @@ int abort_multipart_upload(rgw::sal::RGWRadosStore *store, CephContext *cct, RGWObjManifest::obj_iterator oiter = obj_part.manifest.obj_begin(); if (oiter != obj_part.manifest.obj_end()) { rgw_obj head; - rgw_raw_obj raw_head = oiter.get_location().get_raw_obj(store->getRados()); + rgw_raw_obj raw_head = oiter.get_location().get_raw_obj(store); RGWSI_Tier_RADOS::raw_obj_to_obj(bucket_info.bucket, raw_head, &head); rgw_obj_index_key key; diff --git a/src/rgw/rgw_notify.cc b/src/rgw/rgw_notify.cc index 3f56da71ac6..f611cf18810 100644 --- a/src/rgw/rgw_notify.cc +++ b/src/rgw/rgw_notify.cc @@ -650,8 +650,8 @@ bool match(const rgw_pubsub_topic_filter& filter, const req_state* s, const rgw: int publish_reserve(EventType event_type, reservation_t& res) { - RGWUserPubSub ps_user(res.store, res.s->user->get_id()); - RGWUserPubSub::Bucket ps_bucket(&ps_user, res.s->bucket->get_key()); + RGWPubSub ps(res.store, res.s->user->get_id().tenant); + RGWPubSub::Bucket ps_bucket(&ps, res.s->bucket->get_key()); rgw_pubsub_bucket_topics bucket_topics; auto rc = ps_bucket.get_topics(&bucket_topics); if (rc < 0) { diff --git a/src/rgw/rgw_obj_manifest.h b/src/rgw/rgw_obj_manifest.h index 0a6dfa67d6e..5423dea3564 100644 --- a/src/rgw/rgw_obj_manifest.h +++ b/src/rgw/rgw_obj_manifest.h @@ -17,6 +17,7 @@ #include "rgw_common.h" #include "rgw_compression_types.h" +#include "rgw_sal.h" class RGWSI_Zone; struct RGWZoneGroup; @@ -44,7 +45,7 @@ public: } rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const; - rgw_raw_obj get_raw_obj(RGWRados *store) const; + rgw_raw_obj get_raw_obj(rgw::sal::RGWStore* store) const; rgw_obj_select& operator=(const rgw_obj& rhs) { obj = rhs; @@ -548,7 +549,7 @@ public: int create_next(uint64_t ofs); rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); } - rgw_raw_obj get_cur_obj(RGWRados *store) const { return cur_obj.get_raw_obj(store); } + rgw_raw_obj get_cur_obj(rgw::sal::RGWStore* store) const { return cur_obj.get_raw_obj(store); } /* total max size of current stripe (including head obj) */ uint64_t cur_stripe_max_size() const { diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index b7765c9deae..d7b9f11180e 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -888,7 +888,7 @@ int RGWGetObj::verify_permission(optional_yield y) { s->object->set_atomic(s->obj_ctx); - if (get_data) { + if (prefetch_data()) { s->object->set_prefetch_data(s->obj_ctx); } @@ -2017,16 +2017,8 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl, optional_yield y) int RGWGetObj::get_data_cb(bufferlist& bl, off_t bl_ofs, off_t bl_len) { - /* garbage collection related handling */ - utime_t start_time = ceph_clock_now(); - if (start_time > gc_invalidate_time) { - int r = store->defer_gc(s->obj_ctx, s->bucket.get(), s->object.get(), s->yield); - if (r < 0) { - ldpp_dout(this, 0) << "WARNING: could not defer gc entry for obj" << dendl; - } - gc_invalidate_time = start_time; - gc_invalidate_time += (s->cct->_conf->rgw_gc_obj_min_wait / 2); - } + /* garbage collection related handling: + * defer_gc disabled for https://tracker.ceph.com/issues/47866 */ return send_response_data(bl, bl_ofs, bl_len); } @@ -2420,7 +2412,7 @@ void RGWGetUsage::execute(optional_yield y) RGWUsageIter usage_iter; - while (is_truncated) { + while (s->bucket && is_truncated) { op_ret = s->bucket->read_usage(start_epoch, end_epoch, max_entries, &is_truncated, usage_iter, usage); if (op_ret == -ENOENT) { @@ -3371,7 +3363,6 @@ void RGWDeleteBucket::execute(optional_yield y) int RGWPutObj::init_processing(optional_yield y) { copy_source = url_decode(s->info.env->get("HTTP_X_AMZ_COPY_SOURCE", "")); copy_source_range = s->info.env->get("HTTP_X_AMZ_COPY_SOURCE_RANGE"); - map<string, bufferlist> src_attrs; size_t pos; int ret; @@ -3413,15 +3404,20 @@ int RGWPutObj::init_processing(optional_yield y) { return ret; } } - ret = store->getRados()->get_bucket_info(store->svc(), - copy_source_tenant_name, - copy_source_bucket_name, - copy_source_bucket_info, - NULL, s->yield, &src_attrs); + std::unique_ptr<rgw::sal::RGWBucket> bucket; + ret = store->get_bucket(s->user.get(), copy_source_tenant_name, copy_source_bucket_name, + &bucket, s->yield); + if (ret < 0) { + ldpp_dout(this, 5) << __func__ << "(): get_bucket() returned ret=" << ret << dendl; + return ret; + } + + ret = bucket->get_bucket_info(s->yield); if (ret < 0) { ldpp_dout(this, 5) << __func__ << "(): get_bucket_info() returned ret=" << ret << dendl; return ret; } + copy_source_bucket_info = bucket->get_info(); /* handle x-amz-copy-source-range */ if (copy_source_range) { @@ -3790,12 +3786,7 @@ void RGWPutObj::execute(optional_yield y) /* Handle object versioning of Swift API. */ if (! multipart) { - op_ret = store->getRados()->swift_versioning_copy(obj_ctx, - s->bucket_owner.get_id(), - s->bucket.get(), - s->object.get(), - this, - s->yield); + op_ret = s->object->swift_versioning_copy(s->obj_ctx, this, s->yield); if (op_ret < 0) { return; } @@ -3837,7 +3828,7 @@ void RGWPutObj::execute(optional_yield y) ldpp_dout(this, 20) << "dest_placement for part=" << upload_info.dest_placement << dendl; processor.emplace<MultipartObjectProcessor>( &*aio, store, s->bucket.get(), pdest_placement, - s->owner.get_id(), obj_ctx, s->object->get_obj(), + s->owner.get_id(), obj_ctx, std::move(s->object->clone()), multipart_upload_id, multipart_part_num, multipart_part_str, this, s->yield); } else if(append) { @@ -3848,7 +3839,7 @@ void RGWPutObj::execute(optional_yield y) pdest_placement = &s->dest_placement; processor.emplace<AppendObjectProcessor>( &*aio, store, s->bucket.get(), pdest_placement, s->bucket_owner.get_id(), - obj_ctx, s->object->get_obj(), + obj_ctx, std::move(s->object->clone()), s->req_id, position, &cur_accounted_size, this, s->yield); } else { if (s->bucket->versioning_enabled()) { @@ -3862,8 +3853,8 @@ void RGWPutObj::execute(optional_yield y) pdest_placement = &s->dest_placement; processor.emplace<AtomicObjectProcessor>( &*aio, store, s->bucket.get(), pdest_placement, - s->bucket_owner.get_id(), obj_ctx, s->object->get_obj(), olh_epoch, - s->req_id, this, s->yield); + s->bucket_owner.get_id(), obj_ctx, std::move(s->object->clone()), + olh_epoch, s->req_id, this, s->yield); } op_ret = processor->prepare(s->yield); @@ -4201,7 +4192,7 @@ void RGWPostObj::execute(optional_yield y) &s->dest_placement, s->bucket_owner.get_id(), *static_cast<RGWObjectCtx*>(s->obj_ctx), - obj->get_obj(), 0, s->req_id, this, s->yield); + std::move(obj), 0, s->req_id, this, s->yield); op_ret = processor.prepare(s->yield); if (op_ret < 0) { return; @@ -4748,13 +4739,13 @@ void RGWDeleteObj::execute(optional_yield y) } if (check_obj_lock) { - /* check if obj exists, read orig attrs */ - if (op_ret == -ENOENT) { - /* object maybe delete_marker, skip check_obj_lock*/ - check_obj_lock = false; - } else { - return; - } + /* check if obj exists, read orig attrs */ + if (op_ret == -ENOENT) { + /* object maybe delete_marker, skip check_obj_lock*/ + check_obj_lock = false; + } else { + return; + } } } else { attrs = s->object->get_attrs(); @@ -4764,37 +4755,10 @@ void RGWDeleteObj::execute(optional_yield y) op_ret = 0; if (check_obj_lock) { - auto aiter = attrs.find(RGW_ATTR_OBJECT_RETENTION); - if (aiter != attrs.end()) { - RGWObjectRetention obj_retention; - try { - decode(obj_retention, aiter->second); - } catch (buffer::error& err) { - ldpp_dout(this, 0) << "ERROR: failed to decode RGWObjectRetention" << dendl; - op_ret = -EIO; - return; - } - if (ceph::real_clock::to_time_t(obj_retention.get_retain_until_date()) > ceph_clock_now()) { - if (obj_retention.get_mode().compare("GOVERNANCE") != 0 || !bypass_perm || !bypass_governance_mode) { - op_ret = -EACCES; - return; - } - } - } - aiter = attrs.find(RGW_ATTR_OBJECT_LEGAL_HOLD); - if (aiter != attrs.end()) { - RGWObjectLegalHold obj_legal_hold; - try { - decode(obj_legal_hold, aiter->second); - } catch (buffer::error& err) { - ldpp_dout(this, 0) << "ERROR: failed to decode RGWObjectLegalHold" << dendl; - op_ret = -EIO; - return; - } - if (obj_legal_hold.is_enabled()) { - op_ret = -EACCES; - return; - } + int object_lock_response = verify_object_lock(this, attrs, bypass_perm, bypass_governance_mode); + if (object_lock_response != 0) { + op_ret = object_lock_response; + return; } } @@ -4836,10 +4800,7 @@ void RGWDeleteObj::execute(optional_yield y) s->object->set_atomic(s->obj_ctx); bool ver_restored = false; - op_ret = store->getRados()->swift_versioning_restore(*obj_ctx, s->bucket_owner.get_id(), - s->bucket.get(), - s->object.get(), - ver_restored, this); + op_ret = s->object->swift_versioning_restore(s->obj_ctx, ver_restored, this); if (op_ret < 0) { return; } @@ -5157,15 +5118,14 @@ void RGWCopyObj::execute(optional_yield y) return; } - RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx); if ( ! version_id.empty()) { dest_object->set_instance(version_id); } else if (dest_bucket->versioning_enabled()) { dest_object->gen_rand_obj_instance_name(); } - src_object->set_atomic(&obj_ctx); - dest_object->set_atomic(&obj_ctx); + src_object->set_atomic(s->obj_ctx); + dest_object->set_atomic(s->obj_ctx); encode_delete_at_attr(delete_at, attrs); @@ -5189,16 +5149,12 @@ void RGWCopyObj::execute(optional_yield y) /* Handle object versioning of Swift API. In case of copying to remote this * should fail gently (op_ret == 0) as the dst_obj will not exist here. */ - op_ret = store->getRados()->swift_versioning_copy(obj_ctx, - dest_bucket->get_info().owner, - dest_bucket.get(), - dest_object.get(), - this, - s->yield); + op_ret = dest_object->swift_versioning_copy(s->obj_ctx, this, s->yield); if (op_ret < 0) { return; } + RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx); op_ret = src_object->copy_object(obj_ctx, s->user.get(), &s->info, @@ -5557,7 +5513,7 @@ void RGWPutLC::execute(optional_yield y) return; } - op_ret = config.rebuild(store->getRados(), new_config); + op_ret = config.rebuild(new_config); if (op_ret < 0) return; @@ -5575,7 +5531,7 @@ void RGWPutLC::execute(optional_yield y) return; } - op_ret = store->getRados()->get_lc()->set_bucket_config(s->bucket->get_info(), s->bucket_attrs, &new_config); + op_ret = store->get_rgwlc()->set_bucket_config(s->bucket->get_info(), s->bucket_attrs, &new_config); if (op_ret < 0) { return; } @@ -5591,7 +5547,7 @@ void RGWDeleteLC::execute(optional_yield y) return; } - op_ret = store->getRados()->get_lc()->remove_bucket_config(s->bucket->get_info(), s->bucket_attrs); + op_ret = store->get_rgwlc()->remove_bucket_config(s->bucket->get_info(), s->bucket_attrs); if (op_ret < 0) { return; } @@ -5823,8 +5779,7 @@ void RGWInitMultipart::pre_exec() void RGWInitMultipart::execute(optional_yield y) { bufferlist aclbl; - map<string, bufferlist> attrs; - rgw_obj obj; + rgw::sal::RGWAttrs attrs; if (get_params(y) < 0) return; @@ -5857,6 +5812,7 @@ void RGWInitMultipart::execute(optional_yield y) do { char buf[33]; + std::unique_ptr<rgw::sal::RGWObject> obj; gen_rand_alphanumeric(s->cct, buf, sizeof(buf) - 1); upload_id = MULTIPART_UPLOAD_ID_PREFIX; /* v2 upload id */ upload_id.append(buf); @@ -5865,29 +5821,30 @@ void RGWInitMultipart::execute(optional_yield y) RGWMPObj mp(s->object->get_name(), upload_id); tmp_obj_name = mp.get_meta(); - obj.init_ns(s->bucket->get_key(), tmp_obj_name, mp_ns); + obj = s->bucket->get_object(rgw_obj_key(tmp_obj_name, string(), mp_ns)); // the meta object will be indexed with 0 size, we c - obj.set_in_extra_data(true); - obj.index_hash_source = s->object->get_name(); + obj->set_in_extra_data(true); + obj->set_hash_source(s->object->get_name()); - RGWRados::Object op_target(store->getRados(), s->bucket->get_info(), *static_cast<RGWObjectCtx *>(s->obj_ctx), obj); - op_target.set_versioning_disabled(true); /* no versioning for multipart meta */ + std::unique_ptr<rgw::sal::RGWObject::WriteOp> obj_op = obj->get_write_op(s->obj_ctx); - RGWRados::Object::Write obj_op(&op_target); - - obj_op.meta.owner = s->owner.get_id(); - obj_op.meta.category = RGWObjCategory::MultiMeta; - obj_op.meta.flags = PUT_OBJ_CREATE_EXCL; - obj_op.meta.mtime = &mtime; + obj_op->params.versioning_disabled = true; /* no versioning for multipart meta */ + obj_op->params.owner = s->owner; + obj_op->params.category = RGWObjCategory::MultiMeta; + obj_op->params.flags = PUT_OBJ_CREATE_EXCL; + obj_op->params.mtime = &mtime; + obj_op->params.attrs = &attrs; multipart_upload_info upload_info; upload_info.dest_placement = s->dest_placement; bufferlist bl; encode(upload_info, bl); - obj_op.meta.data = &bl; + obj_op->params.data = &bl; + + op_ret = obj_op->prepare(s->yield); - op_ret = obj_op.write_meta(bl.length(), 0, attrs, s->yield); + op_ret = obj_op->write_meta(bl.length(), 0, s->yield); } while (op_ret == -EEXIST); // send request to notification manager @@ -5944,14 +5901,14 @@ void RGWCompleteMultipart::execute(optional_yield y) string meta_oid; map<uint32_t, RGWUploadPartInfo> obj_parts; map<uint32_t, RGWUploadPartInfo>::iterator obj_iter; - map<string, bufferlist> attrs; + rgw::sal::RGWAttrs attrs; off_t ofs = 0; MD5 hash; char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE]; char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16]; bufferlist etag_bl; std::unique_ptr<rgw::sal::RGWObject> meta_obj; - rgw_obj target_obj; + std::unique_ptr<rgw::sal::RGWObject> target_obj; RGWMPObj mp; RGWObjManifest manifest; uint64_t olh_epoch = 0; @@ -6026,18 +5983,12 @@ void RGWCompleteMultipart::execute(optional_yield y) /*take a cls lock on meta_obj to prevent racing completions (or retries) from deleting the parts*/ - rgw_pool meta_pool; - rgw_raw_obj raw_obj; int max_lock_secs_mp = s->cct->_conf.get_val<int64_t>("rgw_mp_lock_max_time"); utime_t dur(max_lock_secs_mp, 0); - store->getRados()->obj_to_raw((s->bucket->get_info()).placement_rule, meta_obj->get_obj(), &raw_obj); - store->getRados()->get_obj_data_pool((s->bucket->get_info()).placement_rule, - meta_obj->get_obj(), &meta_pool); - store->getRados()->open_pool_ctx(meta_pool, serializer.ioctx, true); - - op_ret = serializer.try_lock(raw_obj.oid, dur, y); + serializer = meta_obj->get_serializer("RGWCompleteMultipart"); + op_ret = serializer->try_lock(dur, y); if (op_ret < 0) { ldpp_dout(this, 0) << "failed to acquire lock" << dendl; op_ret = -ERR_INTERNAL_ERROR; @@ -6172,42 +6123,46 @@ void RGWCompleteMultipart::execute(optional_yield y) attrs[RGW_ATTR_COMPRESSION] = tmp; } - target_obj.init(s->bucket->get_key(), s->object->get_name()); + target_obj = s->bucket->get_object(rgw_obj_key(s->object->get_name())); if (versioned_object) { if (!version_id.empty()) { - target_obj.key.set_instance(version_id); + target_obj->set_instance(version_id); } else { - store->getRados()->gen_rand_obj_instance_name(&target_obj); - version_id = target_obj.key.get_instance(); + target_obj->gen_rand_obj_instance_name(); + version_id = target_obj->get_instance(); } } RGWObjectCtx& obj_ctx = *static_cast<RGWObjectCtx *>(s->obj_ctx); - obj_ctx.set_atomic(target_obj); + target_obj->set_atomic(&obj_ctx); + + std::unique_ptr<rgw::sal::RGWObject::WriteOp> obj_op = target_obj->get_write_op(&obj_ctx); - RGWRados::Object op_target(store->getRados(), s->bucket->get_info(), *static_cast<RGWObjectCtx *>(s->obj_ctx), target_obj); - RGWRados::Object::Write obj_op(&op_target); + obj_op->params.manifest = &manifest; + obj_op->params.remove_objs = &remove_objs; - obj_op.meta.manifest = &manifest; - obj_op.meta.remove_objs = &remove_objs; + obj_op->params.ptag = &s->req_id; /* use req_id as operation tag */ + obj_op->params.owner = s->owner; + obj_op->params.flags = PUT_OBJ_CREATE; + obj_op->params.modify_tail = true; + obj_op->params.completeMultipart = true; + obj_op->params.olh_epoch = olh_epoch; + obj_op->params.attrs = &attrs; + op_ret = obj_op->prepare(s->yield); + if (op_ret < 0) + return; - obj_op.meta.ptag = &s->req_id; /* use req_id as operation tag */ - obj_op.meta.owner = s->owner.get_id(); - obj_op.meta.flags = PUT_OBJ_CREATE; - obj_op.meta.modify_tail = true; - obj_op.meta.completeMultipart = true; - obj_op.meta.olh_epoch = olh_epoch; - op_ret = obj_op.write_meta(ofs, accounted_size, attrs, s->yield); + op_ret = obj_op->write_meta(ofs, accounted_size, s->yield); if (op_ret < 0) return; // remove the upload obj - int r = store->getRados()->delete_obj(*static_cast<RGWObjectCtx *>(s->obj_ctx), - s->bucket->get_info(), meta_obj->get_obj(), 0); + string version_id; + int r = meta_obj->delete_object(s->obj_ctx, ACLOwner(), ACLOwner(), ceph::real_time(), false, 0, version_id, null_yield); if (r >= 0) { /* serializer's exclusive lock is released */ - serializer.clear_locked(); + serializer->clear_locked(); } else { ldpp_dout(this, 0) << "WARNING: failed to remove object " << meta_obj << dendl; } @@ -6220,28 +6175,13 @@ void RGWCompleteMultipart::execute(optional_yield y) } } -int RGWCompleteMultipart::MPSerializer::try_lock( - const std::string& _oid, - utime_t dur, optional_yield y) -{ - oid = _oid; - op.assert_exists(); - lock.set_duration(dur); - lock.lock_exclusive(&op); - int ret = rgw_rados_operate(ioctx, oid, &op, y); - if (! ret) { - locked = true; - } - return ret; -} - void RGWCompleteMultipart::complete() { /* release exclusive lock iff not already */ - if (unlikely(serializer.locked)) { - int r = serializer.unlock(); + if (unlikely(serializer && serializer->locked)) { + int r = serializer->unlock(); if (r < 0) { - ldpp_dout(this, 0) << "WARNING: failed to unlock " << serializer.oid << dendl; + ldpp_dout(this, 0) << "WARNING: failed to unlock " << serializer->oid << dendl; } } send_response(); @@ -6411,10 +6351,31 @@ void RGWGetHealthCheck::execute(optional_yield y) int RGWDeleteMultiObj::verify_permission(optional_yield y) { + int op_ret = get_params(y); + if (op_ret) { + return op_ret; + } + if (s->iam_policy || ! s->iam_user_policies.empty()) { + if (s->bucket->get_info().obj_lock_enabled() && bypass_governance_mode) { + auto r = eval_user_policies(s->iam_user_policies, s->env, boost::none, + rgw::IAM::s3BypassGovernanceRetention, ARN(s->bucket->get_key())); + if (r == Effect::Deny) { + bypass_perm = false; + } else if (r == Effect::Pass && s->iam_policy) { + r = s->iam_policy->eval(s->env, *s->auth.identity, rgw::IAM::s3BypassGovernanceRetention, + ARN(s->bucket->get_key())); + if (r == Effect::Deny) { + bypass_perm = false; + } + } + } + + bool not_versioned = rgw::sal::RGWObject::empty(s->object.get()) || s->object->get_instance().empty(); + auto usr_policy_res = eval_user_policies(s->iam_user_policies, s->env, boost::none, - s->object->get_instance().empty() ? + not_versioned ? rgw::IAM::s3DeleteObject : rgw::IAM::s3DeleteObjectVersion, ARN(s->bucket->get_key())); @@ -6425,7 +6386,7 @@ int RGWDeleteMultiObj::verify_permission(optional_yield y) rgw::IAM::Effect r = Effect::Pass; if (s->iam_policy) { r = s->iam_policy->eval(s->env, *s->auth.identity, - s->object->get_instance().empty() ? + not_versioned ? rgw::IAM::s3DeleteObject : rgw::IAM::s3DeleteObjectVersion, ARN(s->bucket->get_key())); @@ -6458,11 +6419,6 @@ void RGWDeleteMultiObj::execute(optional_yield y) RGWObjectCtx *obj_ctx = static_cast<RGWObjectCtx *>(s->obj_ctx); char* buf; - op_ret = get_params(y); - if (op_ret < 0) { - goto error; - } - buf = data.c_str(); if (!buf) { op_ret = -EINVAL; @@ -6551,7 +6507,30 @@ void RGWDeleteMultiObj::execute(optional_yield y) continue; } } - + + // verify_object_lock + bool check_obj_lock = obj->have_instance() && bucket->get_info().obj_lock_enabled(); + if (check_obj_lock) { + int get_attrs_response = obj->get_obj_attrs(s->obj_ctx, s->yield); + if (get_attrs_response < 0) { + if (get_attrs_response == -ENOENT) { + // object maybe delete_marker, skip check_obj_lock + check_obj_lock = false; + } else { + // Something went wrong. + send_partial_response(*iter, false, "", get_attrs_response); + continue; + } + } + } + + if (check_obj_lock) { + int object_lock_response = verify_object_lock(this, obj->get_attrs(), bypass_perm, bypass_governance_mode); + if (object_lock_response != 0) { + send_partial_response(*iter, false, "", object_lock_response); + continue; + } + } // make reservation for notification if needed const auto versioned_object = s->bucket->versioning_enabled(); rgw::notify::reservation_t res(store, s, obj.get()); @@ -7052,7 +7031,7 @@ int RGWBulkUploadOp::handle_file(const std::string_view path, using namespace rgw::putobj; AtomicObjectProcessor processor(&*aio, store, bucket.get(), &s->dest_placement, bowner.get_id(), - obj_ctx, obj->get_obj(), 0, s->req_id, this, s->yield); + obj_ctx, std::move(obj), 0, s->req_id, this, s->yield); op_ret = processor.prepare(s->yield); if (op_ret < 0) { @@ -7500,6 +7479,10 @@ void RGWDefaultResponseOp::send_response() { void RGWPutBucketPolicy::send_response() { + if (!op_ret) { + /* A successful Put Bucket Policy should return a 204 on success */ + op_ret = STATUS_NO_CONTENT; + } if (op_ret) { set_req_state_err(s, op_ret); } @@ -7936,7 +7919,7 @@ void RGWGetObjLegalHold::execute(optional_yield y) void RGWGetClusterStat::execute(optional_yield y) { - op_ret = this->store->getRados()->get_rados_handle()->cluster_stat(stats_op); + op_ret = store->cluster_stat(stats_op); } diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index cbe85617241..be1d8028272 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -48,7 +48,6 @@ #include "rgw_torrent.h" #include "rgw_tag.h" #include "rgw_object_lock.h" -#include "cls/lock/cls_lock_client.h" #include "cls/rgw/cls_rgw_client.h" #include "rgw_public_access.h" @@ -1135,7 +1134,7 @@ protected: std::unique_ptr <RGWObjTags> obj_tags; const char *dlo_manifest; RGWSLOInfo *slo_info; - map<string, bufferlist> attrs; + rgw::sal::RGWAttrs attrs; ceph::real_time mtime; uint64_t olh_epoch; string version_id; @@ -1741,31 +1740,11 @@ protected: string etag; string version_id; bufferlist data; - - struct MPSerializer { - librados::IoCtx ioctx; - rados::cls::lock::Lock lock; - librados::ObjectWriteOperation op; - std::string oid; - bool locked; - - MPSerializer() : lock("RGWCompleteMultipart"), locked(false) - {} - - int try_lock(const std::string& oid, utime_t dur, optional_yield y); - - int unlock() { - return lock.unlock(&ioctx, oid); - } - - void clear_locked() { - locked = false; - } - } serializer; + rgw::sal::MPSerializer* serializer; public: - RGWCompleteMultipart() {} - ~RGWCompleteMultipart() override {} + RGWCompleteMultipart() : serializer(nullptr) {} + ~RGWCompleteMultipart() override { delete serializer; } int verify_permission(optional_yield y) override; void pre_exec() override; @@ -1928,11 +1907,16 @@ protected: bool quiet; bool status_dumped; bool acl_allowed = false; + bool bypass_perm; + bool bypass_governance_mode; + public: RGWDeleteMultiObj() { quiet = false; status_dumped = false; + bypass_perm = true; + bypass_governance_mode = false; } int verify_permission(optional_yield y) override; void pre_exec() override; @@ -2375,7 +2359,7 @@ public: class RGWGetClusterStat : public RGWOp { protected: - struct rados_cluster_stat_t stats_op; + RGWClusterStat stats_op; public: RGWGetClusterStat() {} diff --git a/src/rgw/rgw_orphan.cc b/src/rgw/rgw_orphan.cc index 20a2d85862d..4aae0779610 100644 --- a/src/rgw/rgw_orphan.cc +++ b/src/rgw/rgw_orphan.cc @@ -450,7 +450,7 @@ int RGWOrphanSearch::handle_stat_result(map<int, list<string> >& oids, RGWRados: RGWObjManifest::obj_iterator miter; for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) { - const rgw_raw_obj& loc = miter.get_location().get_raw_obj(store->getRados()); + const rgw_raw_obj& loc = miter.get_location().get_raw_obj(store); string s = loc.oid; obj_oids.insert(obj_fingerprint(s)); } @@ -1036,7 +1036,7 @@ int RGWRadosList::handle_stat_result(RGWRados::Object::Stat::Result& result, RGWObjManifest::obj_iterator miter; for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) { const rgw_raw_obj& loc = - miter.get_location().get_raw_obj(store->getRados()); + miter.get_location().get_raw_obj(store); string s = loc.oid; obj_oids.insert(s); } @@ -1525,7 +1525,7 @@ int RGWRadosList::do_incomplete_multipart( obj_it != manifest.obj_end(); ++obj_it) { const rgw_raw_obj& loc = - obj_it.get_location().get_raw_obj(store->getRados()); + obj_it.get_location().get_raw_obj(store); std::cout << loc.oid << std::endl; } } diff --git a/src/rgw/rgw_pubsub.cc b/src/rgw/rgw_pubsub.cc index d3a27807c3b..a4c83c40773 100644 --- a/src/rgw/rgw_pubsub.cc +++ b/src/rgw/rgw_pubsub.cc @@ -312,6 +312,26 @@ void rgw_pubsub_topic::dump_xml(Formatter *f) const encode_xml("OpaqueData", opaque_data, f); } +void encode_xml_key_value_entry(const std::string& key, const std::string& value, Formatter *f) { + f->open_object_section("entry"); + encode_xml("key", key, f); + encode_xml("value", value, f); + f->close_section(); // entry +} + +void rgw_pubsub_topic::dump_xml_as_attributes(Formatter *f) const +{ + f->open_array_section("Attributes"); + std::string str_user; + user.to_str(str_user); + encode_xml_key_value_entry("User", str_user, f); + encode_xml_key_value_entry("Name", name, f); + encode_xml_key_value_entry("EndPoint", dest.to_json_str(), f); + encode_xml_key_value_entry("TopicArn", arn, f); + encode_xml_key_value_entry("OpaqueData", opaque_data, f); + f->close_section(); // Attributes +} + void encode_json(const char *name, const rgw::notify::EventTypeList& l, Formatter *f) { f->open_array_section(name); @@ -341,7 +361,7 @@ void rgw_pubsub_bucket_topics::dump(Formatter *f) const } } -void rgw_pubsub_user_topics::dump(Formatter *f) const +void rgw_pubsub_topics::dump(Formatter *f) const { Formatter::ArraySection s(*f, "topics"); for (auto& t : topics) { @@ -349,7 +369,7 @@ void rgw_pubsub_user_topics::dump(Formatter *f) const } } -void rgw_pubsub_user_topics::dump_xml(Formatter *f) const +void rgw_pubsub_topics::dump_xml(Formatter *f) const { for (auto& t : topics) { encode_xml("member", t.second.topic, f); @@ -378,6 +398,23 @@ void rgw_pubsub_sub_dest::dump_xml(Formatter *f) const encode_xml("Persistent", persistent, f); } +std::string rgw_pubsub_sub_dest::to_json_str() const +{ + // first 2 members are omitted here since they + // dont apply to AWS compliant topics + JSONFormatter f; + f.open_object_section(""); + encode_json("EndpointAddress", push_endpoint, &f); + encode_json("EndpointArgs", push_endpoint_args, &f); + encode_json("EndpointTopic", arn_topic, &f); + encode_json("HasStoredSecret", stored_secret, &f); + encode_json("Persistent", persistent, &f); + f.close_section(); + std::stringstream ss; + f.flush(ss); + return ss.str(); +} + void rgw_pubsub_sub_config::dump(Formatter *f) const { encode_json("user", user, f); @@ -387,14 +424,14 @@ void rgw_pubsub_sub_config::dump(Formatter *f) const encode_json("s3_id", s3_id, f); } -RGWUserPubSub::RGWUserPubSub(rgw::sal::RGWRadosStore* _store, const rgw_user& _user) : +RGWPubSub::RGWPubSub(rgw::sal::RGWRadosStore* _store, const std::string& _tenant) : store(_store), - user(_user), + tenant(_tenant), obj_ctx(store->svc()->sysobj->init_obj_ctx()) { - get_user_meta_obj(&user_meta_obj); + get_meta_obj(&meta_obj); } -int RGWUserPubSub::remove(const rgw_raw_obj& obj, +int RGWPubSub::remove(const rgw_raw_obj& obj, RGWObjVersionTracker *objv_tracker, optional_yield y) { @@ -406,9 +443,9 @@ int RGWUserPubSub::remove(const rgw_raw_obj& obj, return 0; } -int RGWUserPubSub::read_user_topics(rgw_pubsub_user_topics *result, RGWObjVersionTracker *objv_tracker) +int RGWPubSub::read_topics(rgw_pubsub_topics *result, RGWObjVersionTracker *objv_tracker) { - int ret = read(user_meta_obj, result, objv_tracker); + int ret = read(meta_obj, result, objv_tracker); if (ret < 0) { ldout(store->ctx(), 10) << "WARNING: failed to read topics info: ret=" << ret << dendl; return ret; @@ -416,10 +453,10 @@ int RGWUserPubSub::read_user_topics(rgw_pubsub_user_topics *result, RGWObjVersio return 0; } -int RGWUserPubSub::write_user_topics(const rgw_pubsub_user_topics& topics, +int RGWPubSub::write_topics(const rgw_pubsub_topics& topics, RGWObjVersionTracker *objv_tracker, optional_yield y) { - int ret = write(user_meta_obj, topics, objv_tracker, y); + int ret = write(meta_obj, topics, objv_tracker, y); if (ret < 0 && ret != -ENOENT) { ldout(store->ctx(), 1) << "ERROR: failed to write topics info: ret=" << ret << dendl; return ret; @@ -427,12 +464,12 @@ int RGWUserPubSub::write_user_topics(const rgw_pubsub_user_topics& topics, return 0; } -int RGWUserPubSub::get_user_topics(rgw_pubsub_user_topics *result) +int RGWPubSub::get_topics(rgw_pubsub_topics *result) { - return read_user_topics(result, nullptr); + return read_topics(result, nullptr); } -int RGWUserPubSub::Bucket::read_topics(rgw_pubsub_bucket_topics *result, RGWObjVersionTracker *objv_tracker) +int RGWPubSub::Bucket::read_topics(rgw_pubsub_bucket_topics *result, RGWObjVersionTracker *objv_tracker) { int ret = ps->read(bucket_meta_obj, result, objv_tracker); if (ret < 0 && ret != -ENOENT) { @@ -442,7 +479,7 @@ int RGWUserPubSub::Bucket::read_topics(rgw_pubsub_bucket_topics *result, RGWObjV return 0; } -int RGWUserPubSub::Bucket::write_topics(const rgw_pubsub_bucket_topics& topics, +int RGWPubSub::Bucket::write_topics(const rgw_pubsub_bucket_topics& topics, RGWObjVersionTracker *objv_tracker, optional_yield y) { @@ -455,15 +492,15 @@ int RGWUserPubSub::Bucket::write_topics(const rgw_pubsub_bucket_topics& topics, return 0; } -int RGWUserPubSub::Bucket::get_topics(rgw_pubsub_bucket_topics *result) +int RGWPubSub::Bucket::get_topics(rgw_pubsub_bucket_topics *result) { return read_topics(result, nullptr); } -int RGWUserPubSub::get_topic(const string& name, rgw_pubsub_topic_subs *result) +int RGWPubSub::get_topic(const string& name, rgw_pubsub_topic_subs *result) { - rgw_pubsub_user_topics topics; - int ret = get_user_topics(&topics); + rgw_pubsub_topics topics; + int ret = get_topics(&topics); if (ret < 0) { ldout(store->ctx(), 1) << "ERROR: failed to read topics info: ret=" << ret << dendl; return ret; @@ -479,10 +516,10 @@ int RGWUserPubSub::get_topic(const string& name, rgw_pubsub_topic_subs *result) return 0; } -int RGWUserPubSub::get_topic(const string& name, rgw_pubsub_topic *result) +int RGWPubSub::get_topic(const string& name, rgw_pubsub_topic *result) { - rgw_pubsub_user_topics topics; - int ret = get_user_topics(&topics); + rgw_pubsub_topics topics; + int ret = get_topics(&topics); if (ret < 0) { ldout(store->ctx(), 1) << "ERROR: failed to read topics info: ret=" << ret << dendl; return ret; @@ -498,15 +535,15 @@ int RGWUserPubSub::get_topic(const string& name, rgw_pubsub_topic *result) return 0; } -int RGWUserPubSub::Bucket::create_notification(const string& topic_name, const rgw::notify::EventTypeList& events, optional_yield y) { +int RGWPubSub::Bucket::create_notification(const string& topic_name, const rgw::notify::EventTypeList& events, optional_yield y) { return create_notification(topic_name, events, std::nullopt, "", y); } -int RGWUserPubSub::Bucket::create_notification(const string& topic_name,const rgw::notify::EventTypeList& events, OptionalFilter s3_filter, const std::string& notif_name, optional_yield y) { - rgw_pubsub_topic_subs user_topic_info; +int RGWPubSub::Bucket::create_notification(const string& topic_name,const rgw::notify::EventTypeList& events, OptionalFilter s3_filter, const std::string& notif_name, optional_yield y) { + rgw_pubsub_topic_subs topic_info; rgw::sal::RGWRadosStore *store = ps->store; - int ret = ps->get_topic(topic_name, &user_topic_info); + int ret = ps->get_topic(topic_name, &topic_info); if (ret < 0) { ldout(store->ctx(), 1) << "ERROR: failed to read topic '" << topic_name << "' info: ret=" << ret << dendl; return ret; @@ -526,7 +563,7 @@ int RGWUserPubSub::Bucket::create_notification(const string& topic_name,const rg bucket.name << "'" << dendl; auto& topic_filter = bucket_topics.topics[topic_name]; - topic_filter.topic = user_topic_info.topic; + topic_filter.topic = topic_info.topic; topic_filter.events = events; topic_filter.s3_id = notif_name; if (s3_filter) { @@ -544,12 +581,12 @@ int RGWUserPubSub::Bucket::create_notification(const string& topic_name,const rg return 0; } -int RGWUserPubSub::Bucket::remove_notification(const string& topic_name, optional_yield y) +int RGWPubSub::Bucket::remove_notification(const string& topic_name, optional_yield y) { - rgw_pubsub_topic_subs user_topic_info; + rgw_pubsub_topic_subs topic_info; rgw::sal::RGWRadosStore *store = ps->store; - int ret = ps->get_topic(topic_name, &user_topic_info); + int ret = ps->get_topic(topic_name, &topic_info); if (ret < 0) { ldout(store->ctx(), 1) << "ERROR: failed to read topic info: ret=" << ret << dendl; return ret; @@ -575,15 +612,15 @@ int RGWUserPubSub::Bucket::remove_notification(const string& topic_name, optiona return 0; } -int RGWUserPubSub::create_topic(const string& name, optional_yield y) { +int RGWPubSub::create_topic(const string& name, optional_yield y) { return create_topic(name, rgw_pubsub_sub_dest(), "", "", y); } -int RGWUserPubSub::create_topic(const string& name, const rgw_pubsub_sub_dest& dest, const std::string& arn, const std::string& opaque_data, optional_yield y) { +int RGWPubSub::create_topic(const string& name, const rgw_pubsub_sub_dest& dest, const std::string& arn, const std::string& opaque_data, optional_yield y) { RGWObjVersionTracker objv_tracker; - rgw_pubsub_user_topics topics; + rgw_pubsub_topics topics; - int ret = read_user_topics(&topics, &objv_tracker); + int ret = read_topics(&topics, &objv_tracker); if (ret < 0 && ret != -ENOENT) { // its not an error if not topics exist, we create one ldout(store->ctx(), 1) << "ERROR: failed to read topics info: ret=" << ret << dendl; @@ -591,13 +628,13 @@ int RGWUserPubSub::create_topic(const string& name, const rgw_pubsub_sub_dest& d } rgw_pubsub_topic_subs& new_topic = topics.topics[name]; - new_topic.topic.user = user; + new_topic.topic.user = rgw_user("", tenant); new_topic.topic.name = name; new_topic.topic.dest = dest; new_topic.topic.arn = arn; new_topic.topic.opaque_data = opaque_data; - ret = write_user_topics(topics, &objv_tracker, y); + ret = write_topics(topics, &objv_tracker, y); if (ret < 0) { ldout(store->ctx(), 1) << "ERROR: failed to write topics info: ret=" << ret << dendl; return ret; @@ -606,12 +643,12 @@ int RGWUserPubSub::create_topic(const string& name, const rgw_pubsub_sub_dest& d return 0; } -int RGWUserPubSub::remove_topic(const string& name, optional_yield y) +int RGWPubSub::remove_topic(const string& name, optional_yield y) { RGWObjVersionTracker objv_tracker; - rgw_pubsub_user_topics topics; + rgw_pubsub_topics topics; - int ret = read_user_topics(&topics, &objv_tracker); + int ret = read_topics(&topics, &objv_tracker); if (ret < 0 && ret != -ENOENT) { ldout(store->ctx(), 1) << "ERROR: failed to read topics info: ret=" << ret << dendl; return ret; @@ -623,7 +660,7 @@ int RGWUserPubSub::remove_topic(const string& name, optional_yield y) topics.topics.erase(name); - ret = write_user_topics(topics, &objv_tracker, y); + ret = write_topics(topics, &objv_tracker, y); if (ret < 0) { ldout(store->ctx(), 1) << "ERROR: failed to remove topics info: ret=" << ret << dendl; return ret; @@ -632,7 +669,7 @@ int RGWUserPubSub::remove_topic(const string& name, optional_yield y) return 0; } -int RGWUserPubSub::Sub::read_sub(rgw_pubsub_sub_config *result, RGWObjVersionTracker *objv_tracker) +int RGWPubSub::Sub::read_sub(rgw_pubsub_sub_config *result, RGWObjVersionTracker *objv_tracker) { int ret = ps->read(sub_meta_obj, result, objv_tracker); if (ret < 0 && ret != -ENOENT) { @@ -642,7 +679,7 @@ int RGWUserPubSub::Sub::read_sub(rgw_pubsub_sub_config *result, RGWObjVersionTra return 0; } -int RGWUserPubSub::Sub::write_sub(const rgw_pubsub_sub_config& sub_conf, +int RGWPubSub::Sub::write_sub(const rgw_pubsub_sub_config& sub_conf, RGWObjVersionTracker *objv_tracker, optional_yield y) { @@ -655,7 +692,7 @@ int RGWUserPubSub::Sub::write_sub(const rgw_pubsub_sub_config& sub_conf, return 0; } -int RGWUserPubSub::Sub::remove_sub(RGWObjVersionTracker *objv_tracker, +int RGWPubSub::Sub::remove_sub(RGWObjVersionTracker *objv_tracker, optional_yield y) { int ret = ps->remove(sub_meta_obj, objv_tracker, y); @@ -667,18 +704,18 @@ int RGWUserPubSub::Sub::remove_sub(RGWObjVersionTracker *objv_tracker, return 0; } -int RGWUserPubSub::Sub::get_conf(rgw_pubsub_sub_config *result) +int RGWPubSub::Sub::get_conf(rgw_pubsub_sub_config *result) { return read_sub(result, nullptr); } -int RGWUserPubSub::Sub::subscribe(const string& topic, const rgw_pubsub_sub_dest& dest, optional_yield y, const std::string& s3_id) +int RGWPubSub::Sub::subscribe(const string& topic, const rgw_pubsub_sub_dest& dest, optional_yield y, const std::string& s3_id) { - RGWObjVersionTracker user_objv_tracker; - rgw_pubsub_user_topics topics; + RGWObjVersionTracker objv_tracker; + rgw_pubsub_topics topics; rgw::sal::RGWRadosStore *store = ps->store; - int ret = ps->read_user_topics(&topics, &user_objv_tracker); + int ret = ps->read_topics(&topics, &objv_tracker); if (ret < 0) { ldout(store->ctx(), 1) << "ERROR: failed to read topics info: ret=" << ret << dendl; return ret != -ENOENT ? ret : -EINVAL; @@ -694,7 +731,7 @@ int RGWUserPubSub::Sub::subscribe(const string& topic, const rgw_pubsub_sub_dest rgw_pubsub_sub_config sub_conf; - sub_conf.user = ps->user; + sub_conf.user = rgw_user("", ps->tenant); sub_conf.name = sub; sub_conf.topic = topic; sub_conf.dest = dest; @@ -702,7 +739,7 @@ int RGWUserPubSub::Sub::subscribe(const string& topic, const rgw_pubsub_sub_dest t.subs.insert(sub); - ret = ps->write_user_topics(topics, &user_objv_tracker, y); + ret = ps->write_topics(topics, &objv_tracker, y); if (ret < 0) { ldout(store->ctx(), 1) << "ERROR: failed to write topics info: ret=" << ret << dendl; return ret; @@ -716,7 +753,7 @@ int RGWUserPubSub::Sub::subscribe(const string& topic, const rgw_pubsub_sub_dest return 0; } -int RGWUserPubSub::Sub::unsubscribe(const string& _topic, optional_yield y) +int RGWPubSub::Sub::unsubscribe(const string& _topic, optional_yield y) { string topic = _topic; RGWObjVersionTracker sobjv_tracker; @@ -733,9 +770,9 @@ int RGWUserPubSub::Sub::unsubscribe(const string& _topic, optional_yield y) } RGWObjVersionTracker objv_tracker; - rgw_pubsub_user_topics topics; + rgw_pubsub_topics topics; - int ret = ps->read_user_topics(&topics, &objv_tracker); + int ret = ps->read_topics(&topics, &objv_tracker); if (ret < 0) { // not an error - could be that topic was already deleted ldout(store->ctx(), 10) << "WARNING: failed to read topics info: ret=" << ret << dendl; @@ -746,7 +783,7 @@ int RGWUserPubSub::Sub::unsubscribe(const string& _topic, optional_yield y) t.subs.erase(sub); - ret = ps->write_user_topics(topics, &objv_tracker, y); + ret = ps->write_topics(topics, &objv_tracker, y); if (ret < 0) { ldout(store->ctx(), 1) << "ERROR: failed to write topics info: ret=" << ret << dendl; return ret; @@ -763,7 +800,7 @@ int RGWUserPubSub::Sub::unsubscribe(const string& _topic, optional_yield y) } template<typename EventType> -void RGWUserPubSub::SubWithEvents<EventType>::list_events_result::dump(Formatter *f) const +void RGWPubSub::SubWithEvents<EventType>::list_events_result::dump(Formatter *f) const { encode_json("next_marker", next_marker, f); encode_json("is_truncated", is_truncated, f); @@ -775,7 +812,7 @@ void RGWUserPubSub::SubWithEvents<EventType>::list_events_result::dump(Formatter } template<typename EventType> -int RGWUserPubSub::SubWithEvents<EventType>::list_events(const string& marker, int max_events) +int RGWPubSub::SubWithEvents<EventType>::list_events(const string& marker, int max_events) { RGWRados *store = ps->store->getRados(); rgw_pubsub_sub_config sub_conf; @@ -840,7 +877,7 @@ int RGWUserPubSub::SubWithEvents<EventType>::list_events(const string& marker, i } template<typename EventType> -int RGWUserPubSub::SubWithEvents<EventType>::remove_event(const string& event_id) +int RGWPubSub::SubWithEvents<EventType>::remove_event(const string& event_id) { rgw::sal::RGWRadosStore *store = ps->store; rgw_pubsub_sub_config sub_conf; @@ -878,25 +915,25 @@ int RGWUserPubSub::SubWithEvents<EventType>::remove_event(const string& event_id return 0; } -void RGWUserPubSub::get_user_meta_obj(rgw_raw_obj *obj) const { - *obj = rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, user_meta_oid()); +void RGWPubSub::get_meta_obj(rgw_raw_obj *obj) const { + *obj = rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, meta_oid()); } -void RGWUserPubSub::get_bucket_meta_obj(const rgw_bucket& bucket, rgw_raw_obj *obj) const { +void RGWPubSub::get_bucket_meta_obj(const rgw_bucket& bucket, rgw_raw_obj *obj) const { *obj = rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, bucket_meta_oid(bucket)); } -void RGWUserPubSub::get_sub_meta_obj(const string& name, rgw_raw_obj *obj) const { +void RGWPubSub::get_sub_meta_obj(const string& name, rgw_raw_obj *obj) const { *obj = rgw_raw_obj(store->svc()->zone->get_zone_params().log_pool, sub_meta_oid(name)); } template<typename EventType> -void RGWUserPubSub::SubWithEvents<EventType>::dump(Formatter* f) const { +void RGWPubSub::SubWithEvents<EventType>::dump(Formatter* f) const { list.dump(f); } // explicit instantiation for the only two possible types // no need to move implementation to header -template class RGWUserPubSub::SubWithEvents<rgw_pubsub_event>; -template class RGWUserPubSub::SubWithEvents<rgw_pubsub_s3_record>; +template class RGWPubSub::SubWithEvents<rgw_pubsub_event>; +template class RGWPubSub::SubWithEvents<rgw_pubsub_s3_record>; diff --git a/src/rgw/rgw_pubsub.h b/src/rgw/rgw_pubsub.h index 8c76a4df201..d5331197fe5 100644 --- a/src/rgw/rgw_pubsub.h +++ b/src/rgw/rgw_pubsub.h @@ -403,6 +403,7 @@ struct rgw_pubsub_sub_dest { void dump(Formatter *f) const; void dump_xml(Formatter *f) const; + std::string to_json_str() const; }; WRITE_CLASS_ENCODER(rgw_pubsub_sub_dest) @@ -471,11 +472,12 @@ struct rgw_pubsub_topic { } string to_str() const { - return user.to_str() + "/" + name; + return user.tenant + "/" + name; } void dump(Formatter *f) const; void dump_xml(Formatter *f) const; + void dump_xml_as_attributes(Formatter *f) const; bool operator<(const rgw_pubsub_topic& t) const { return to_str().compare(t.to_str()); @@ -564,7 +566,7 @@ struct rgw_pubsub_bucket_topics { }; WRITE_CLASS_ENCODER(rgw_pubsub_bucket_topics) -struct rgw_pubsub_user_topics { +struct rgw_pubsub_topics { std::map<std::string, rgw_pubsub_topic_subs> topics; void encode(bufferlist& bl) const { @@ -582,66 +584,66 @@ struct rgw_pubsub_user_topics { void dump(Formatter *f) const; void dump_xml(Formatter *f) const; }; -WRITE_CLASS_ENCODER(rgw_pubsub_user_topics) +WRITE_CLASS_ENCODER(rgw_pubsub_topics) -static std::string pubsub_user_oid_prefix = "pubsub.user."; +static std::string pubsub_oid_prefix = "pubsub."; -class RGWUserPubSub +class RGWPubSub { friend class Bucket; rgw::sal::RGWRadosStore *store; - rgw_user user; + const std::string tenant; RGWSysObjectCtx obj_ctx; - rgw_raw_obj user_meta_obj; + rgw_raw_obj meta_obj; - std::string user_meta_oid() const { - return pubsub_user_oid_prefix + user.to_str(); + std::string meta_oid() const { + return pubsub_oid_prefix + tenant; } std::string bucket_meta_oid(const rgw_bucket& bucket) const { - return pubsub_user_oid_prefix + user.to_str() + ".bucket." + bucket.name + "/" + bucket.bucket_id; + return pubsub_oid_prefix + tenant + ".bucket." + bucket.name + "/" + bucket.bucket_id; } std::string sub_meta_oid(const string& name) const { - return pubsub_user_oid_prefix + user.to_str() + ".sub." + name; + return pubsub_oid_prefix + tenant + ".sub." + name; } template <class T> - int read(const rgw_raw_obj& obj, T *data, RGWObjVersionTracker *objv_tracker); + int read(const rgw_raw_obj& obj, T* data, RGWObjVersionTracker* objv_tracker); template <class T> int write(const rgw_raw_obj& obj, const T& info, - RGWObjVersionTracker *obj_tracker, optional_yield y); + RGWObjVersionTracker* obj_tracker, optional_yield y); - int remove(const rgw_raw_obj& obj, RGWObjVersionTracker *objv_tracker, + int remove(const rgw_raw_obj& obj, RGWObjVersionTracker* objv_tracker, optional_yield y); - int read_user_topics(rgw_pubsub_user_topics *result, RGWObjVersionTracker *objv_tracker); - int write_user_topics(const rgw_pubsub_user_topics& topics, - RGWObjVersionTracker *objv_tracker, optional_yield y); + int read_topics(rgw_pubsub_topics *result, RGWObjVersionTracker* objv_tracker); + int write_topics(const rgw_pubsub_topics& topics, + RGWObjVersionTracker* objv_tracker, optional_yield y); public: - RGWUserPubSub(rgw::sal::RGWRadosStore *_store, const rgw_user& _user); + RGWPubSub(rgw::sal::RGWRadosStore *_store, const std::string& tenant); class Bucket { - friend class RGWUserPubSub; - RGWUserPubSub *ps; + friend class RGWPubSub; + RGWPubSub *ps; rgw_bucket bucket; rgw_raw_obj bucket_meta_obj; // read the list of topics associated with a bucket and populate into result // use version tacker to enforce atomicity between read/write // return 0 on success or if no topic was associated with the bucket, error code otherwise - int read_topics(rgw_pubsub_bucket_topics *result, RGWObjVersionTracker *objv_tracker); + int read_topics(rgw_pubsub_bucket_topics *result, RGWObjVersionTracker* objv_tracker); // set the list of topics associated with a bucket // use version tacker to enforce atomicity between read/write // return 0 on success, error code otherwise int write_topics(const rgw_pubsub_bucket_topics& topics, - RGWObjVersionTracker *objv_tracker, optional_yield y); + RGWObjVersionTracker* objv_tracker, optional_yield y); public: - Bucket(RGWUserPubSub *_ps, const rgw_bucket& _bucket) : ps(_ps), bucket(_bucket) { + Bucket(RGWPubSub *_ps, const rgw_bucket& _bucket) : ps(_ps), bucket(_bucket) { ps->get_bucket_meta_obj(bucket, &bucket_meta_obj); } @@ -665,18 +667,18 @@ public: // base class for subscription class Sub { - friend class RGWUserPubSub; + friend class RGWPubSub; protected: - RGWUserPubSub* const ps; + RGWPubSub* const ps; const std::string sub; rgw_raw_obj sub_meta_obj; - int read_sub(rgw_pubsub_sub_config *result, RGWObjVersionTracker *objv_tracker); + int read_sub(rgw_pubsub_sub_config *result, RGWObjVersionTracker* objv_tracker); int write_sub(const rgw_pubsub_sub_config& sub_conf, - RGWObjVersionTracker *objv_tracker, optional_yield y); - int remove_sub(RGWObjVersionTracker *objv_tracker, optional_yield y); + RGWObjVersionTracker* objv_tracker, optional_yield y); + int remove_sub(RGWObjVersionTracker* objv_tracker, optional_yield y); public: - Sub(RGWUserPubSub *_ps, const std::string& _sub) : ps(_ps), sub(_sub) { + Sub(RGWPubSub *_ps, const std::string& _sub) : ps(_ps), sub(_sub) { ps->get_sub_meta_obj(sub, &sub_meta_obj); } @@ -706,7 +708,7 @@ public: } list; public: - SubWithEvents(RGWUserPubSub *_ps, const string& _sub) : Sub(_ps, _sub) {} + SubWithEvents(RGWPubSub *_ps, const string& _sub) : Sub(_ps, _sub) {} virtual ~SubWithEvents() = default; @@ -738,14 +740,14 @@ public: return std::make_shared<SubWithEvents<rgw_pubsub_s3_record>>(this, sub); } - void get_user_meta_obj(rgw_raw_obj *obj) const; + void get_meta_obj(rgw_raw_obj *obj) const; void get_bucket_meta_obj(const rgw_bucket& bucket, rgw_raw_obj *obj) const; void get_sub_meta_obj(const string& name, rgw_raw_obj *obj) const; - // get all topics defined for the user and populate them into "result" + // get all topics (per tenant, if used)) and populate them into "result" // return 0 on success or if no topics exist, error code otherwise - int get_user_topics(rgw_pubsub_user_topics *result); + int get_topics(rgw_pubsub_topics *result); // get a topic with its subscriptions by its name and populate it into "result" // return -ENOENT if the topic does not exists // return 0 on success, error code otherwise @@ -770,7 +772,7 @@ public: template <class T> -int RGWUserPubSub::read(const rgw_raw_obj& obj, T *result, RGWObjVersionTracker *objv_tracker) +int RGWPubSub::read(const rgw_raw_obj& obj, T* result, RGWObjVersionTracker* objv_tracker) { bufferlist bl; int ret = rgw_get_system_obj(obj_ctx, @@ -793,8 +795,8 @@ int RGWUserPubSub::read(const rgw_raw_obj& obj, T *result, RGWObjVersionTracker } template <class T> -int RGWUserPubSub::write(const rgw_raw_obj& obj, const T& info, - RGWObjVersionTracker *objv_tracker, optional_yield y) +int RGWPubSub::write(const rgw_raw_obj& obj, const T& info, + RGWObjVersionTracker* objv_tracker, optional_yield y) { bufferlist bl; encode(info, bl); diff --git a/src/rgw/rgw_putobj_processor.cc b/src/rgw/rgw_putobj_processor.cc index f013aa2adf3..2906e13c1cc 100644 --- a/src/rgw/rgw_putobj_processor.cc +++ b/src/rgw/rgw_putobj_processor.cc @@ -125,9 +125,9 @@ RadosWriter::~RadosWriter() bool need_to_remove_head = false; std::optional<rgw_raw_obj> raw_head; - if (!head_obj.empty()) { + if (!rgw::sal::RGWObject::empty(head_obj.get())) { raw_head.emplace(); - store->getRados()->obj_to_raw(bucket->get_placement_rule(), head_obj, &*raw_head); + head_obj->get_raw_obj(&*raw_head); } /** @@ -149,15 +149,17 @@ RadosWriter::~RadosWriter() continue; } - int r = store->getRados()->delete_raw_obj(obj); + int r = store->delete_raw_obj(obj); if (r < 0 && r != -ENOENT) { ldpp_dout(dpp, 0) << "WARNING: failed to remove obj (" << obj << "), leaked" << dendl; } } if (need_to_remove_head) { + std::string version_id; ldpp_dout(dpp, 5) << "NOTE: we are going to process the head obj (" << *raw_head << ")" << dendl; - int r = store->getRados()->delete_obj(obj_ctx, bucket->get_info(), head_obj, 0, 0); + int r = head_obj->delete_object(&obj_ctx, ACLOwner(), bucket->get_acl_owner(), ceph::real_time(), + false, 0, version_id, null_yield); if (r < 0 && r != -ENOENT) { ldpp_dout(dpp, 0) << "WARNING: failed to remove obj (" << *raw_head << "), leaked" << dendl; } @@ -174,10 +176,10 @@ int ManifestObjectProcessor::next(uint64_t offset, uint64_t *pstripe_size) return r; } - rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store->getRados()); + rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store); uint64_t chunk_size = 0; - r = store->getRados()->get_max_chunk_size(stripe_obj.pool, &chunk_size); + r = store->get_raw_chunk_size(stripe_obj, &chunk_size); if (r < 0) { return r; } @@ -207,33 +209,21 @@ int AtomicObjectProcessor::prepare(optional_yield y) uint64_t head_max_size; uint64_t chunk_size = 0; uint64_t alignment; - rgw_pool head_pool; - if (!store->getRados()->get_obj_data_pool(bucket->get_placement_rule(), head_obj, &head_pool)) { - return -EIO; - } - - int r = store->getRados()->get_max_chunk_size(head_pool, &max_head_chunk_size, &alignment); + int r = head_obj->get_max_chunk_size(bucket->get_placement_rule(), + &max_head_chunk_size, &alignment); if (r < 0) { return r; } bool same_pool = true; - if (bucket->get_placement_rule() != tail_placement_rule) { - rgw_pool tail_pool; - if (!store->getRados()->get_obj_data_pool(tail_placement_rule, head_obj, &tail_pool)) { - return -EIO; - } - - if (tail_pool != head_pool) { + if (!head_obj->placement_rules_match(bucket->get_placement_rule(), tail_placement_rule)) { same_pool = false; - - r = store->getRados()->get_max_chunk_size(tail_pool, &chunk_size); + r = head_obj->get_max_chunk_size(tail_placement_rule, &chunk_size); if (r < 0) { return r; } - head_max_size = 0; } } @@ -246,19 +236,21 @@ int AtomicObjectProcessor::prepare(optional_yield y) uint64_t stripe_size; const uint64_t default_stripe_size = store->ctx()->_conf->rgw_obj_stripe_size; - store->getRados()->get_max_aligned_size(default_stripe_size, alignment, &stripe_size); + head_obj->get_max_aligned_size(default_stripe_size, alignment, &stripe_size); manifest.set_trivial_rule(head_max_size, stripe_size); + rgw_obj obj = head_obj->get_obj(); + r = manifest_gen.create_begin(store->ctx(), &manifest, bucket->get_placement_rule(), &tail_placement_rule, - head_obj.bucket, head_obj); + obj.bucket, obj); if (r < 0) { return r; } - rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store->getRados()); + rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store); r = writer.set_stripe_obj(stripe_obj); if (r < 0) { @@ -276,7 +268,7 @@ int AtomicObjectProcessor::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, - std::map<std::string, bufferlist>& attrs, + rgw::sal::RGWAttrs& attrs, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, @@ -294,40 +286,43 @@ int AtomicObjectProcessor::complete(size_t accounted_size, return r; } - obj_ctx.set_atomic(head_obj); + head_obj->set_atomic(&obj_ctx); - RGWRados::Object op_target(store->getRados(), bucket->get_info(), obj_ctx, head_obj); + std::unique_ptr<rgw::sal::RGWObject::WriteOp> obj_op = head_obj->get_write_op(&obj_ctx); /* some object types shouldn't be versioned, e.g., multipart parts */ - op_target.set_versioning_disabled(!bucket->versioning_enabled()); - - RGWRados::Object::Write obj_op(&op_target); - - obj_op.meta.data = &first_chunk; - obj_op.meta.manifest = &manifest; - obj_op.meta.ptag = &unique_tag; /* use req_id as operation tag */ - obj_op.meta.if_match = if_match; - obj_op.meta.if_nomatch = if_nomatch; - obj_op.meta.mtime = mtime; - obj_op.meta.set_mtime = set_mtime; - obj_op.meta.owner = owner; - obj_op.meta.flags = PUT_OBJ_CREATE; - obj_op.meta.olh_epoch = olh_epoch; - obj_op.meta.delete_at = delete_at; - obj_op.meta.user_data = user_data; - obj_op.meta.zones_trace = zones_trace; - obj_op.meta.modify_tail = true; - - r = obj_op.write_meta(actual_size, accounted_size, attrs, y); + obj_op->params.versioning_disabled = !bucket->versioning_enabled(); + obj_op->params.data = &first_chunk; + obj_op->params.manifest = &manifest; + obj_op->params.ptag = &unique_tag; /* use req_id as operation tag */ + obj_op->params.if_match = if_match; + obj_op->params.if_nomatch = if_nomatch; + obj_op->params.mtime = mtime; + obj_op->params.set_mtime = set_mtime; + obj_op->params.owner = ACLOwner(owner); + obj_op->params.flags = PUT_OBJ_CREATE; + obj_op->params.olh_epoch = olh_epoch; + obj_op->params.delete_at = delete_at; + obj_op->params.user_data = user_data; + obj_op->params.zones_trace = zones_trace; + obj_op->params.modify_tail = true; + obj_op->params.attrs = &attrs; + + r = obj_op->prepare(y); + if (r < 0) { + return r; + } + + r = obj_op->write_meta(actual_size, accounted_size, y); if (r < 0) { return r; } - if (!obj_op.meta.canceled) { + if (!obj_op->params.canceled) { // on success, clear the set of objects for deletion writer.clear_written(); } if (pcanceled) { - *pcanceled = obj_op.meta.canceled; + *pcanceled = obj_op->params.canceled; } return 0; } @@ -344,8 +339,8 @@ int MultipartObjectProcessor::process_first_chunk(bufferlist&& data, std::string oid_rand(32, 0); gen_rand_alphanumeric(store->ctx(), oid_rand.data(), oid_rand.size()); - mp.init(target_obj.key.name, upload_id, oid_rand); - manifest.set_prefix(target_obj.key.name + "." + oid_rand); + mp.init(target_obj->get_name(), upload_id, oid_rand); + manifest.set_prefix(target_obj->get_name() + "." + oid_rand); r = prepare_head(); if (r < 0) { @@ -368,26 +363,27 @@ int MultipartObjectProcessor::prepare_head() uint64_t stripe_size; uint64_t alignment; - int r = store->getRados()->get_max_chunk_size(tail_placement_rule, target_obj, &chunk_size, &alignment); + int r = target_obj->get_max_chunk_size(tail_placement_rule, &chunk_size, &alignment); if (r < 0) { ldpp_dout(dpp, 0) << "ERROR: unexpected: get_max_chunk_size(): placement_rule=" << tail_placement_rule.to_str() << " obj=" << target_obj << " returned r=" << r << dendl; return r; } - store->getRados()->get_max_aligned_size(default_stripe_size, alignment, &stripe_size); + target_obj->get_max_aligned_size(default_stripe_size, alignment, &stripe_size); manifest.set_multipart_part_rule(stripe_size, part_num); r = manifest_gen.create_begin(store->ctx(), &manifest, - bucket->get_placement_rule(), - &tail_placement_rule, - target_obj.bucket, target_obj); + bucket->get_placement_rule(), + &tail_placement_rule, + target_obj->get_bucket()->get_key(), + target_obj->get_obj()); if (r < 0) { return r; } - rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store->getRados()); - RGWSI_Tier_RADOS::raw_obj_to_obj(head_obj.bucket, stripe_obj, &head_obj); - head_obj.index_hash_source = target_obj.key.name; + rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store); + head_obj->raw_obj_to_obj(stripe_obj); + head_obj->set_hash_source(target_obj->get_name()); r = writer.set_stripe_obj(stripe_obj); if (r < 0) { @@ -403,7 +399,7 @@ int MultipartObjectProcessor::prepare_head() int MultipartObjectProcessor::prepare(optional_yield y) { - manifest.set_prefix(target_obj.key.name + "." + upload_id); + manifest.set_prefix(target_obj->get_name() + "." + upload_id); return prepare_head(); } @@ -430,18 +426,22 @@ int MultipartObjectProcessor::complete(size_t accounted_size, return r; } - RGWRados::Object op_target(store->getRados(), bucket->get_info(), obj_ctx, head_obj); - op_target.set_versioning_disabled(true); - RGWRados::Object::Write obj_op(&op_target); + std::unique_ptr<rgw::sal::RGWObject::WriteOp> obj_op = head_obj->get_write_op(&obj_ctx); - obj_op.meta.set_mtime = set_mtime; - obj_op.meta.mtime = mtime; - obj_op.meta.owner = owner; - obj_op.meta.delete_at = delete_at; - obj_op.meta.zones_trace = zones_trace; - obj_op.meta.modify_tail = true; + obj_op->params.versioning_disabled = true; + obj_op->params.set_mtime = set_mtime; + obj_op->params.mtime = mtime; + obj_op->params.owner = ACLOwner(owner); + obj_op->params.delete_at = delete_at; + obj_op->params.zones_trace = zones_trace; + obj_op->params.modify_tail = true; + obj_op->params.attrs = &attrs; + r = obj_op->prepare(y); + if (r < 0) { + return r; + } - r = obj_op.write_meta(actual_size, accounted_size, attrs, y); + r = obj_op->write_meta(actual_size, accounted_size, y); if (r < 0) return r; @@ -473,30 +473,21 @@ int MultipartObjectProcessor::complete(size_t accounted_size, encode(info, bl); - rgw_obj meta_obj; - meta_obj.init_ns(bucket->get_key(), mp.get_meta(), RGW_OBJ_NS_MULTIPART); - meta_obj.set_in_extra_data(true); - - rgw_raw_obj raw_meta_obj; + std::unique_ptr<rgw::sal::RGWObject> meta_obj = + bucket->get_object(rgw_obj_key(mp.get_meta(), std::string(), RGW_OBJ_NS_MULTIPART)); + meta_obj->set_in_extra_data(true); - store->getRados()->obj_to_raw(bucket->get_placement_rule(), meta_obj, &raw_meta_obj); - - auto obj_ctx = store->svc()->sysobj->init_obj_ctx(); - auto sysobj = obj_ctx.get_obj(raw_meta_obj); - - r = sysobj.omap() - .set_must_exist(true) - .set(p, bl, null_yield); + r = meta_obj->omap_set_val_by_key(p, bl, true, null_yield); if (r < 0) { return r == -ENOENT ? -ERR_NO_SUCH_UPLOAD : r; } - if (!obj_op.meta.canceled) { + if (!obj_op->params.canceled) { // on success, clear the set of objects for deletion writer.clear_written(); } if (pcanceled) { - *pcanceled = obj_op.meta.canceled; + *pcanceled = obj_op->params.canceled; } return 0; } @@ -514,7 +505,7 @@ int AppendObjectProcessor::process_first_chunk(bufferlist &&data, rgw::putobj::D int AppendObjectProcessor::prepare(optional_yield y) { RGWObjState *astate; - int r = store->getRados()->get_obj_state(&obj_ctx, bucket->get_info(), head_obj, &astate, y); + int r = head_obj->get_obj_state(&obj_ctx, *bucket, &astate, y); if (r < 0) { return r; } @@ -529,7 +520,7 @@ int AppendObjectProcessor::prepare(optional_yield y) //set the prefix char buf[33]; gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1); - string oid_prefix = head_obj.key.name; + string oid_prefix = head_obj->get_name(); oid_prefix.append("."); oid_prefix.append(buf); oid_prefix.append("_"); @@ -572,14 +563,16 @@ int AppendObjectProcessor::prepare(optional_yield y) } manifest.set_multipart_part_rule(store->ctx()->_conf->rgw_obj_stripe_size, cur_part_num); - r = manifest_gen.create_begin(store->ctx(), &manifest, bucket->get_placement_rule(), &tail_placement_rule, head_obj.bucket, head_obj); + rgw_obj obj = head_obj->get_obj(); + + r = manifest_gen.create_begin(store->ctx(), &manifest, bucket->get_placement_rule(), &tail_placement_rule, obj.bucket, obj); if (r < 0) { return r; } - rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store->getRados()); + rgw_raw_obj stripe_obj = manifest_gen.get_cur_obj(store); uint64_t chunk_size = 0; - r = store->getRados()->get_max_chunk_size(stripe_obj.pool, &chunk_size); + r = store->get_raw_chunk_size(stripe_obj, &chunk_size); if (r < 0) { return r; } @@ -601,7 +594,7 @@ int AppendObjectProcessor::prepare(optional_yield y) } int AppendObjectProcessor::complete(size_t accounted_size, const string &etag, ceph::real_time *mtime, - ceph::real_time set_mtime, map <string, bufferlist> &attrs, + ceph::real_time set_mtime, rgw::sal::RGWAttrs& attrs, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const string *user_data, rgw_zone_set *zones_trace, bool *pcanceled, optional_yield y) @@ -614,27 +607,27 @@ int AppendObjectProcessor::complete(size_t accounted_size, const string &etag, c if (r < 0) { return r; } - obj_ctx.set_atomic(head_obj); - RGWRados::Object op_target(store->getRados(), bucket->get_info(), obj_ctx, head_obj); + head_obj->set_atomic(&obj_ctx); + std::unique_ptr<rgw::sal::RGWObject::WriteOp> obj_op = head_obj->get_write_op(&obj_ctx); //For Append obj, disable versioning - op_target.set_versioning_disabled(true); - RGWRados::Object::Write obj_op(&op_target); + obj_op->params.versioning_disabled = true; if (cur_manifest) { cur_manifest->append(manifest, store->svc()->zone); - obj_op.meta.manifest = cur_manifest; + obj_op->params.manifest = cur_manifest; } else { - obj_op.meta.manifest = &manifest; - } - obj_op.meta.ptag = &unique_tag; /* use req_id as operation tag */ - obj_op.meta.mtime = mtime; - obj_op.meta.set_mtime = set_mtime; - obj_op.meta.owner = owner; - obj_op.meta.flags = PUT_OBJ_CREATE; - obj_op.meta.delete_at = delete_at; - obj_op.meta.user_data = user_data; - obj_op.meta.zones_trace = zones_trace; - obj_op.meta.modify_tail = true; - obj_op.meta.appendable = true; + obj_op->params.manifest = &manifest; + } + obj_op->params.ptag = &unique_tag; /* use req_id as operation tag */ + obj_op->params.mtime = mtime; + obj_op->params.set_mtime = set_mtime; + obj_op->params.owner = ACLOwner(owner); + obj_op->params.flags = PUT_OBJ_CREATE; + obj_op->params.delete_at = delete_at; + obj_op->params.user_data = user_data; + obj_op->params.zones_trace = zones_trace; + obj_op->params.modify_tail = true; + obj_op->params.appendable = true; + obj_op->params.attrs = &attrs; //Add the append part number bufferlist cur_part_num_bl; using ceph::encode; @@ -658,16 +651,20 @@ int AppendObjectProcessor::complete(size_t accounted_size, const string &etag, c etag_bl.append(final_etag_str, strlen(final_etag_str) + 1); attrs[RGW_ATTR_ETAG] = etag_bl; } - r = obj_op.write_meta(actual_size + cur_size, accounted_size + *cur_accounted_size, attrs, y); + r = obj_op->prepare(y); + if (r < 0) { + return r; + } + r = obj_op->write_meta(actual_size + cur_size, accounted_size + *cur_accounted_size, y); if (r < 0) { return r; } - if (!obj_op.meta.canceled) { + if (!obj_op->params.canceled) { // on success, clear the set of objects for deletion writer.clear_written(); } if (pcanceled) { - *pcanceled = obj_op.meta.canceled; + *pcanceled = obj_op->params.canceled; } *cur_accounted_size += accounted_size; diff --git a/src/rgw/rgw_putobj_processor.h b/src/rgw/rgw_putobj_processor.h index 42edfa77ebf..2a7e89446a7 100644 --- a/src/rgw/rgw_putobj_processor.h +++ b/src/rgw/rgw_putobj_processor.h @@ -21,6 +21,7 @@ #include "services/svc_rados.h" #include "services/svc_tier_rados.h" #include "rgw_sal.h" +#include "rgw_obj_manifest.h" namespace rgw { @@ -82,7 +83,7 @@ class RadosWriter : public DataProcessor { rgw::sal::RGWRadosStore *const store; rgw::sal::RGWBucket* bucket; RGWObjectCtx& obj_ctx; - const rgw_obj head_obj; + std::unique_ptr<rgw::sal::RGWObject> head_obj; RGWSI_RADOS::Obj stripe_obj; // current stripe object RawObjSet written; // set of written objects for deletion const DoutPrefixProvider *dpp; @@ -91,11 +92,16 @@ class RadosWriter : public DataProcessor { public: RadosWriter(Aio *aio, rgw::sal::RGWRadosStore *store, rgw::sal::RGWBucket* bucket, - RGWObjectCtx& obj_ctx, const rgw_obj& head_obj, + RGWObjectCtx& obj_ctx, std::unique_ptr<rgw::sal::RGWObject> _head_obj, const DoutPrefixProvider *dpp, optional_yield y) : aio(aio), store(store), bucket(bucket), - obj_ctx(obj_ctx), head_obj(head_obj), dpp(dpp), y(y) + obj_ctx(obj_ctx), head_obj(std::move(_head_obj)), dpp(dpp), y(y) {} + RadosWriter(RadosWriter&& r) + : aio(r.aio), store(r.store), bucket(r.bucket), + obj_ctx(r.obj_ctx), head_obj(std::move(r.head_obj)), dpp(r.dpp), y(r.y) + {} + ~RadosWriter(); // change the current stripe object @@ -124,7 +130,7 @@ class ManifestObjectProcessor : public HeadObjectProcessor, rgw_placement_rule tail_placement_rule; rgw_user owner; RGWObjectCtx& obj_ctx; - rgw_obj head_obj; + std::unique_ptr<rgw::sal::RGWObject> head_obj; RadosWriter writer; RGWObjManifest manifest; @@ -141,13 +147,13 @@ class ManifestObjectProcessor : public HeadObjectProcessor, rgw::sal::RGWBucket* bucket, const rgw_placement_rule *ptail_placement_rule, const rgw_user& owner, RGWObjectCtx& obj_ctx, - rgw_obj& head_obj, + std::unique_ptr<rgw::sal::RGWObject> _head_obj, const DoutPrefixProvider* dpp, optional_yield y) : HeadObjectProcessor(0), store(store), bucket(bucket), owner(owner), - obj_ctx(obj_ctx), head_obj(head_obj), - writer(aio, store, bucket, obj_ctx, head_obj, dpp, y), + obj_ctx(obj_ctx), head_obj(std::move(_head_obj)), + writer(aio, store, bucket, obj_ctx, std::move(head_obj->clone()), dpp, y), chunk(&writer, 0), stripe(&chunk, this, 0), dpp(dpp) { if (ptail_placement_rule) { tail_placement_rule = *ptail_placement_rule; @@ -181,12 +187,13 @@ class AtomicObjectProcessor : public ManifestObjectProcessor { rgw::sal::RGWBucket* bucket, const rgw_placement_rule *ptail_placement_rule, const rgw_user& owner, - RGWObjectCtx& obj_ctx, rgw_obj head_obj, + RGWObjectCtx& obj_ctx, + std::unique_ptr<rgw::sal::RGWObject> _head_obj, std::optional<uint64_t> olh_epoch, const std::string& unique_tag, const DoutPrefixProvider *dpp, optional_yield y) : ManifestObjectProcessor(aio, store, bucket, ptail_placement_rule, - owner, obj_ctx, head_obj, dpp, y), + owner, obj_ctx, std::move(_head_obj), dpp, y), olh_epoch(olh_epoch), unique_tag(unique_tag) {} @@ -209,7 +216,7 @@ class AtomicObjectProcessor : public ManifestObjectProcessor { // part's head is written with an exclusive create to detect racing uploads of // the same part/upload id, which are restarted with a random oid prefix class MultipartObjectProcessor : public ManifestObjectProcessor { - const rgw_obj target_obj; // target multipart object + std::unique_ptr<rgw::sal::RGWObject> target_obj; // target multipart object const std::string upload_id; const int part_num; const std::string part_num_str; @@ -225,15 +232,15 @@ class MultipartObjectProcessor : public ManifestObjectProcessor { rgw::sal::RGWBucket* bucket, const rgw_placement_rule *ptail_placement_rule, const rgw_user& owner, RGWObjectCtx& obj_ctx, - rgw_obj head_obj, + std::unique_ptr<rgw::sal::RGWObject> _head_obj, const std::string& upload_id, uint64_t part_num, const std::string& part_num_str, const DoutPrefixProvider *dpp, optional_yield y) : ManifestObjectProcessor(aio, store, bucket, ptail_placement_rule, - owner, obj_ctx, head_obj, dpp, y), - target_obj(head_obj), upload_id(upload_id), + owner, obj_ctx, std::move(_head_obj), dpp, y), + target_obj(std::move(head_obj->clone())), upload_id(upload_id), part_num(part_num), part_num_str(part_num_str), - mp(head_obj.key.name, upload_id) + mp(head_obj->get_name(), upload_id) {} // prepare a multipart manifest @@ -268,12 +275,12 @@ class MultipartObjectProcessor : public ManifestObjectProcessor { rgw::sal::RGWBucket* bucket, const rgw_placement_rule *ptail_placement_rule, const rgw_user& owner, RGWObjectCtx& obj_ctx, - rgw_obj head_obj, + std::unique_ptr<rgw::sal::RGWObject> _head_obj, const std::string& unique_tag, uint64_t position, uint64_t *cur_accounted_size, const DoutPrefixProvider *dpp, optional_yield y) : ManifestObjectProcessor(aio, store, bucket, ptail_placement_rule, - owner, obj_ctx, head_obj, dpp, y), + owner, obj_ctx, std::move(_head_obj), dpp, y), position(position), cur_size(0), cur_accounted_size(cur_accounted_size), unique_tag(unique_tag), cur_manifest(nullptr) {} diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 2057026cf37..0bb75ebb822 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -149,11 +149,11 @@ rgw_raw_obj rgw_obj_select::get_raw_obj(const RGWZoneGroup& zonegroup, const RGW return raw_obj; } -rgw_raw_obj rgw_obj_select::get_raw_obj(RGWRados *store) const +rgw_raw_obj rgw_obj_select::get_raw_obj(rgw::sal::RGWStore* store) const { if (!is_raw) { rgw_raw_obj r; - store->obj_to_raw(placement_rule, obj, &r); + store->get_raw_obj(placement_rule, obj, &r); return r; } return raw_obj; @@ -2575,7 +2575,7 @@ int RGWRados::fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key RGWObjManifest::obj_iterator miter; RGWObjManifest& manifest = *astate->manifest; for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) { - rgw_raw_obj raw_loc = miter.get_location().get_raw_obj(this); + rgw_raw_obj raw_loc = miter.get_location().get_raw_obj(store); rgw_obj loc; string oid; string locator; @@ -3772,7 +3772,7 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx, const char *if_nomatch, AttrsMod attrs_mod, bool copy_if_newer, - map<string, bufferlist>& attrs, + rgw::sal::RGWAttrs& attrs, RGWObjCategory category, std::optional<uint64_t> olh_epoch, real_time delete_at, @@ -3798,7 +3798,8 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx, rgw::BlockingAioThrottle aio(cct->_conf->rgw_put_obj_min_window_size); using namespace rgw::putobj; AtomicObjectProcessor processor(&aio, this->store, dest_bucket, nullptr, user_id, - obj_ctx, dest_obj->get_obj(), olh_epoch, tag, dpp, null_yield); + obj_ctx, std::move(dest_obj->clone()), olh_epoch, + tag, dpp, null_yield); RGWRESTConn *conn; auto& zone_conn_map = svc.zone->get_zone_conn_map(); auto& zonegroup_conn_map = svc.zone->get_zonegroup_conn_map(); @@ -4155,7 +4156,7 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, const char *if_nomatch, AttrsMod attrs_mod, bool copy_if_newer, - map<string, bufferlist>& attrs, + rgw::sal::RGWAttrs& attrs, RGWObjCategory category, uint64_t olh_epoch, real_time delete_at, @@ -4328,7 +4329,7 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, } rgw_rados_ref ref; - ret = get_raw_obj_ref(miter.get_location().get_raw_obj(this), &ref); + ret = get_raw_obj_ref(miter.get_location().get_raw_obj(store), &ref); if (ret < 0) { return ret; } @@ -4364,7 +4365,7 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, ObjectWriteOperation op; ref_tag = tag + '\0'; cls_refcount_get(op, ref_tag, true); - const rgw_raw_obj& loc = miter.get_location().get_raw_obj(this); + const rgw_raw_obj& loc = miter.get_location().get_raw_obj(store); auto& ioctx = ref.pool.ioctx(); ioctx.locator_set_key(loc.loc); @@ -4442,7 +4443,7 @@ int RGWRados::copy_obj_data(RGWObjectCtx& obj_ctx, rgw::sal::RGWObject* dest_obj, real_time *mtime, real_time set_mtime, - map<string, bufferlist>& attrs, + rgw::sal::RGWAttrs& attrs, uint64_t olh_epoch, real_time delete_at, string *petag, @@ -4458,7 +4459,8 @@ int RGWRados::copy_obj_data(RGWObjectCtx& obj_ctx, // it causes crashes in the ragweed tests AtomicObjectProcessor processor(&aio, this->store, bucket, &dest_placement, bucket->get_info().owner, obj_ctx, - dest_obj->get_obj(), olh_epoch, tag, dpp, null_yield); + std::move(dest_obj->clone()), olh_epoch, tag, + dpp, null_yield); int ret = processor.prepare(y); if (ret < 0) return ret; @@ -4524,7 +4526,7 @@ int RGWRados::transition_obj(RGWObjectCtx& obj_ctx, const DoutPrefixProvider *dpp, optional_yield y) { - map<string, bufferlist> attrs; + rgw::sal::RGWAttrs attrs; real_time read_mtime; uint64_t obj_size; @@ -4784,7 +4786,7 @@ void RGWRados::update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_ rgw_raw_obj raw_head; obj_to_raw(manifest.get_head_placement_rule(), head_obj, &raw_head); for (iter = manifest.obj_begin(); iter != manifest.obj_end(); ++iter) { - const rgw_raw_obj& mobj = iter.get_location().get_raw_obj(this); + const rgw_raw_obj& mobj = iter.get_location().get_raw_obj(store); if (mobj == raw_head) continue; cls_rgw_obj_key key(mobj.oid); @@ -5233,7 +5235,7 @@ int RGWRados::delete_obj_index(const rgw_obj& obj, ceph::real_time mtime) return index_op.complete_del(-1 /* pool */, 0, mtime, NULL); } -static void generate_fake_tag(RGWRados *store, map<string, bufferlist>& attrset, RGWObjManifest& manifest, bufferlist& manifest_bl, bufferlist& tag_bl) +static void generate_fake_tag(rgw::sal::RGWStore* store, map<string, bufferlist>& attrset, RGWObjManifest& manifest, bufferlist& manifest_bl, bufferlist& tag_bl) { string tag; @@ -5406,7 +5408,7 @@ int RGWRados::get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket s->manifest->has_explicit_objs()) { RGWObjManifest::obj_iterator mi; for (mi = s->manifest->obj_begin(); mi != s->manifest->obj_end(); ++mi) { - ldout(cct, 20) << "manifest: ofs=" << mi.get_ofs() << " loc=" << mi.get_location().get_raw_obj(this) << dendl; + ldout(cct, 20) << "manifest: ofs=" << mi.get_ofs() << " loc=" << mi.get_location().get_raw_obj(store) << dendl; } } @@ -5415,7 +5417,7 @@ int RGWRados::get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket * Uh oh, something's wrong, object with manifest should have tag. Let's * create one out of the manifest, would be unique */ - generate_fake_tag(this, s->attrset, *s->manifest, manifest_bl, s->obj_tag); + generate_fake_tag(store, s->attrset, *s->manifest, manifest_bl, s->obj_tag); s->fake_tag = true; } } @@ -6214,7 +6216,7 @@ int RGWRados::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl, optio RGWObjManifest::obj_iterator iter = astate->manifest->obj_find(ofs); uint64_t stripe_ofs = iter.get_stripe_ofs(); - read_obj = iter.get_location().get_raw_obj(store); + read_obj = iter.get_location().get_raw_obj(store->store); len = std::min(len, iter.get_stripe_size() - (ofs - stripe_ofs)); read_ofs = iter.location_ofs() + (ofs - stripe_ofs); reading_from_head = (read_obj == state.head_obj); @@ -6465,7 +6467,7 @@ int RGWRados::iterate_obj(RGWObjectCtx& obj_ctx, off_t next_stripe_ofs = stripe_ofs + iter.get_stripe_size(); while (ofs < next_stripe_ofs && ofs <= end) { - read_obj = iter.get_location().get_raw_obj(this); + read_obj = iter.get_location().get_raw_obj(store); uint64_t read_len = std::min(len, iter.get_stripe_size() - (ofs - stripe_ofs)); read_ofs = iter.location_ofs() + (ofs - stripe_ofs); @@ -8143,7 +8145,7 @@ int RGWRados::process_gc(bool expired_only) } int RGWRados::list_lc_progress(string& marker, uint32_t max_entries, - vector<cls_rgw_lc_entry>& progress_map, + vector<rgw::sal::Lifecycle::LCEntry>& progress_map, int& index) { return lc->list_lc_progress(marker, max_entries, progress_map, index); @@ -8901,7 +8903,7 @@ int RGWRados::check_disk_state(librados::IoCtx io_ctx, RGWObjManifest::obj_iterator miter; RGWObjManifest& manifest = *astate->manifest; for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) { - const rgw_raw_obj& raw_loc = miter.get_location().get_raw_obj(this); + const rgw_raw_obj& raw_loc = miter.get_location().get_raw_obj(store); rgw_obj loc; RGWSI_Tier_RADOS::raw_obj_to_obj(manifest.get_obj().bucket, raw_loc, &loc); diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 1fb788fe8c4..53fdb9b5562 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -370,7 +370,11 @@ public: class RGWGetDirHeader_CB; class RGWGetUserHeader_CB; -namespace rgw { namespace sal { class RGWRadosStore; } } +namespace rgw { namespace sal { + class RGWRadosStore; + class MPRadosSerializer; + class LCRadosSerializer; +} } class RGWAsyncRadosProcessor; @@ -396,7 +400,6 @@ class RGWRados friend class RGWGC; friend class RGWMetaNotifier; friend class RGWDataNotifier; - friend class RGWLC; friend class RGWObjectExpirer; friend class RGWMetaSyncProcessorThread; friend class RGWDataSyncProcessorThread; @@ -404,7 +407,8 @@ class RGWRados friend class RGWBucketReshard; friend class RGWBucketReshardLock; friend class BucketIndexLockGuard; - friend class RGWCompleteMultipart; + friend class rgw::sal::MPRadosSerializer; + friend class rgw::sal::LCRadosSerializer; friend class rgw::sal::RGWRadosStore; /** Open the pool used as root for this gateway */ @@ -1121,7 +1125,7 @@ public: const char *if_nomatch, AttrsMod attrs_mod, bool copy_if_newer, - map<string, bufferlist>& attrs, + rgw::sal::RGWAttrs& attrs, RGWObjCategory category, std::optional<uint64_t> olh_epoch, ceph::real_time delete_at, @@ -1446,7 +1450,7 @@ public: int process_lc(); int list_lc_progress(string& marker, uint32_t max_entries, - vector<cls_rgw_lc_entry>& progress_map, int& index); + vector<rgw::sal::Lifecycle::LCEntry>& progress_map, int& index); int bucket_check_index(RGWBucketInfo& bucket_info, map<RGWObjCategory, RGWStorageStats> *existing_stats, diff --git a/src/rgw/rgw_reshard.h b/src/rgw/rgw_reshard.h index 4bfbf5f810a..a5c190f5914 100644 --- a/src/rgw/rgw_reshard.h +++ b/src/rgw/rgw_reshard.h @@ -253,13 +253,9 @@ class RGWReshardWait { ceph::condition_variable cond; struct Waiter : boost::intrusive::list_base_hook<> { -#if BOOST_VERSION < 107000 - using Timer = boost::asio::basic_waitable_timer<Clock>; -#else using Executor = boost::asio::io_context::executor_type; using Timer = boost::asio::basic_waitable_timer<Clock, boost::asio::wait_traits<Clock>, Executor>; -#endif Timer timer; explicit Waiter(boost::asio::io_context& ioc) : timer(ioc) {} }; diff --git a/src/rgw/rgw_rest_pubsub.cc b/src/rgw/rgw_rest_pubsub.cc index d6de68c2cf1..17a75be0674 100644 --- a/src/rgw/rgw_rest_pubsub.cc +++ b/src/rgw/rgw_rest_pubsub.cc @@ -21,6 +21,7 @@ #define dout_context g_ceph_context #define dout_subsys ceph_subsys_rgw +static const char* AWS_SNS_NS("https://sns.amazonaws.com/doc/2010-03-31/"); // command (AWS compliant): // POST @@ -86,14 +87,14 @@ public: } const auto f = s->formatter; - f->open_object_section_in_ns("CreateTopicResponse", "https://sns.amazonaws.com/doc/2010-03-31/"); + f->open_object_section_in_ns("CreateTopicResponse", AWS_SNS_NS); f->open_object_section("CreateTopicResult"); encode_xml("TopicArn", topic_arn, f); - f->close_section(); + f->close_section(); // CreateTopicResult f->open_object_section("ResponseMetadata"); encode_xml("RequestId", s->req_id, f); - f->close_section(); - f->close_section(); + f->close_section(); // ResponseMetadata + f->close_section(); // CreateTopicResponse rgw_flush_formatter_and_reset(s, f); } }; @@ -115,14 +116,14 @@ public: } const auto f = s->formatter; - f->open_object_section_in_ns("ListTopicsResponse", "https://sns.amazonaws.com/doc/2010-03-31/"); + f->open_object_section_in_ns("ListTopicsResponse", AWS_SNS_NS); f->open_object_section("ListTopicsResult"); encode_xml("Topics", result, f); - f->close_section(); + f->close_section(); // ListTopicsResult f->open_object_section("ResponseMetadata"); encode_xml("RequestId", s->req_id, f); - f->close_section(); - f->close_section(); + f->close_section(); // ResponseMetadat + f->close_section(); // ListTopicsResponse rgw_flush_formatter_and_reset(s, f); } }; @@ -170,6 +171,47 @@ public: // command (AWS compliant): // POST +// Action=GetTopicAttributes&TopicArn=<topic-arn> +class RGWPSGetTopicAttributes_ObjStore_AWS : public RGWPSGetTopicOp { +public: + int get_params() override { + const auto topic_arn = rgw::ARN::parse((s->info.args.get("TopicArn"))); + + if (!topic_arn || topic_arn->resource.empty()) { + ldout(s->cct, 1) << "GetTopicAttribute Action 'TopicArn' argument is missing or invalid" << dendl; + return -EINVAL; + } + + topic_name = topic_arn->resource; + return 0; + } + + void send_response() override { + if (op_ret) { + set_req_state_err(s, op_ret); + } + dump_errno(s); + end_header(s, this, "application/xml"); + + if (op_ret < 0) { + return; + } + + const auto f = s->formatter; + f->open_object_section_in_ns("GetTopicAttributesResponse", AWS_SNS_NS); + f->open_object_section("GetTopicAttributesResult"); + result.topic.dump_xml_as_attributes(f); + f->close_section(); // GetTopicAttributesResult + f->open_object_section("ResponseMetadata"); + encode_xml("RequestId", s->req_id, f); + f->close_section(); // ResponseMetadata + f->close_section(); // GetTopicAttributesResponse + rgw_flush_formatter_and_reset(s, f); + } +}; + +// command (AWS compliant): +// POST // Action=DeleteTopic&TopicArn=<topic-arn> class RGWPSDeleteTopic_ObjStore_AWS : public RGWPSDeleteTopicOp { public: @@ -210,11 +252,11 @@ public: } const auto f = s->formatter; - f->open_object_section_in_ns("DeleteTopicResponse", "https://sns.amazonaws.com/doc/2010-03-31/"); + f->open_object_section_in_ns("DeleteTopicResponse", AWS_SNS_NS); f->open_object_section("ResponseMetadata"); encode_xml("RequestId", s->req_id, f); - f->close_section(); - f->close_section(); + f->close_section(); // ResponseMetadata + f->close_section(); // DeleteTopicResponse rgw_flush_formatter_and_reset(s, f); } }; @@ -344,16 +386,14 @@ RGWOp* RGWHandler_REST_PSTopic_AWS::op_post() { return new RGWPSListTopics_ObjStore_AWS(); if (action.compare("GetTopic") == 0) return new RGWPSGetTopic_ObjStore_AWS(); + if (action.compare("GetTopicAttributes") == 0) + return new RGWPSGetTopicAttributes_ObjStore_AWS(); } return nullptr; } int RGWHandler_REST_PSTopic_AWS::authorize(const DoutPrefixProvider* dpp, optional_yield y) { - /*if (s->info.args.exists("Action") && s->info.args.get("Action").find("Topic") != std::string::npos) { - // TODO: some topic specific authorization - return 0; - }*/ return RGW_Auth_S3::authorize(dpp, store, auth_registry, s, y); } @@ -452,8 +492,8 @@ void RGWPSCreateNotif_ObjStore_S3::execute(optional_yield y) { return; } - ups.emplace(store, s->owner.get_id()); - auto b = ups->get_bucket(bucket_info.bucket); + ps.emplace(store, s->owner.get_id().tenant); + auto b = ps->get_bucket(bucket_info.bucket); ceph_assert(b); std::string data_bucket_prefix = ""; std::string data_oid_prefix = ""; @@ -499,7 +539,7 @@ void RGWPSCreateNotif_ObjStore_S3::execute(optional_yield y) { // get topic information. destination information is stored in the topic rgw_pubsub_topic topic_info; - op_ret = ups->get_topic(topic_name, &topic_info); + op_ret = ps->get_topic(topic_name, &topic_info); if (op_ret < 0) { ldout(s->cct, 1) << "failed to get topic '" << topic_name << "', ret=" << op_ret << dendl; return; @@ -514,7 +554,7 @@ void RGWPSCreateNotif_ObjStore_S3::execute(optional_yield y) { // generate the internal topic. destination is stored here for the "push-only" case // when no subscription exists // ARN is cached to make the "GET" method faster - op_ret = ups->create_topic(unique_topic_name, topic_info.dest, topic_info.arn, topic_info.opaque_data, y); + op_ret = ps->create_topic(unique_topic_name, topic_info.dest, topic_info.arn, topic_info.opaque_data, y); if (op_ret < 0) { ldout(s->cct, 1) << "failed to auto-generate unique topic '" << unique_topic_name << "', ret=" << op_ret << dendl; @@ -528,7 +568,7 @@ void RGWPSCreateNotif_ObjStore_S3::execute(optional_yield y) { ldout(s->cct, 1) << "failed to auto-generate notification for unique topic '" << unique_topic_name << "', ret=" << op_ret << dendl; // rollback generated topic (ignore return value) - ups->remove_topic(unique_topic_name, y); + ps->remove_topic(unique_topic_name, y); return; } ldout(s->cct, 20) << "successfully auto-generated notification for unique topic '" << unique_topic_name << "'" << dendl; @@ -538,14 +578,14 @@ void RGWPSCreateNotif_ObjStore_S3::execute(optional_yield y) { rgw_pubsub_sub_dest dest = topic_info.dest; dest.bucket_name = data_bucket_prefix + s->owner.get_id().to_str() + "-" + unique_topic_name; dest.oid_prefix = data_oid_prefix + notif_name + "/"; - auto sub = ups->get_sub(notif_name); + auto sub = ps->get_sub(notif_name); op_ret = sub->subscribe(unique_topic_name, dest, y, notif_name); if (op_ret < 0) { ldout(s->cct, 1) << "failed to auto-generate subscription '" << notif_name << "', ret=" << op_ret << dendl; // rollback generated notification (ignore return value) b->remove_notification(unique_topic_name, y); // rollback generated topic (ignore return value) - ups->remove_topic(unique_topic_name, y); + ps->remove_topic(unique_topic_name, y); return; } ldout(s->cct, 20) << "successfully auto-generated subscription '" << notif_name << "'" << dendl; @@ -573,12 +613,12 @@ private: return 0; } - void remove_notification_by_topic(const std::string& topic_name, const RGWUserPubSub::BucketRef& b, optional_yield y) { + void remove_notification_by_topic(const std::string& topic_name, const RGWPubSub::BucketRef& b, optional_yield y) { op_ret = b->remove_notification(topic_name, y); if (op_ret < 0) { ldout(s->cct, 1) << "failed to remove notification of topic '" << topic_name << "', ret=" << op_ret << dendl; } - op_ret = ups->remove_topic(topic_name, y); + op_ret = ps->remove_topic(topic_name, y); if (op_ret < 0) { ldout(s->cct, 1) << "failed to remove auto-generated topic '" << topic_name << "', ret=" << op_ret << dendl; } @@ -595,8 +635,8 @@ void RGWPSDeleteNotif_ObjStore_S3::execute(optional_yield y) { return; } - ups.emplace(store, s->owner.get_id()); - auto b = ups->get_bucket(bucket_info.bucket); + ps.emplace(store, s->owner.get_id().tenant); + auto b = ps->get_bucket(bucket_info.bucket); ceph_assert(b); // get all topics on a bucket @@ -613,7 +653,7 @@ void RGWPSDeleteNotif_ObjStore_S3::execute(optional_yield y) { if (unique_topic) { // remove the auto generated subscription according to notification name (if exist) const auto unique_topic_name = unique_topic->get().topic.name; - auto sub = ups->get_sub(notif_name); + auto sub = ps->get_sub(notif_name); op_ret = sub->unsubscribe(unique_topic_name, y); if (op_ret < 0 && op_ret != -ENOENT) { ldout(s->cct, 1) << "failed to remove auto-generated subscription '" << notif_name << "', ret=" << op_ret << dendl; @@ -631,9 +671,9 @@ void RGWPSDeleteNotif_ObjStore_S3::execute(optional_yield y) { for (const auto& topic : bucket_topics.topics) { // remove the auto generated subscription of the topic (if exist) rgw_pubsub_topic_subs topic_subs; - op_ret = ups->get_topic(topic.first, &topic_subs); + op_ret = ps->get_topic(topic.first, &topic_subs); for (const auto& topic_sub_name : topic_subs.subs) { - auto sub = ups->get_sub(topic_sub_name); + auto sub = ps->get_sub(topic_sub_name); rgw_pubsub_sub_config sub_conf; op_ret = sub->get_conf(&sub_conf); if (op_ret < 0) { @@ -694,8 +734,8 @@ public: }; void RGWPSListNotifs_ObjStore_S3::execute(optional_yield y) { - ups.emplace(store, s->owner.get_id()); - auto b = ups->get_bucket(bucket_info.bucket); + ps.emplace(store, s->owner.get_id().tenant); + auto b = ps->get_bucket(bucket_info.bucket); ceph_assert(b); // get all topics on a bucket diff --git a/src/rgw/rgw_rest_pubsub.h b/src/rgw/rgw_rest_pubsub.h index 7e31642b3f4..3b1a1bc9670 100644 --- a/src/rgw/rgw_rest_pubsub.h +++ b/src/rgw/rgw_rest_pubsub.h @@ -27,7 +27,6 @@ class RGWHandler_REST_PSTopic_AWS : public RGWHandler_REST { const rgw::auth::StrategyRegistry& auth_registry; const std::string& post_body; void rgw_topic_parse_input(); - //static int init_from_header(struct req_state *s, int default_formatter, bool configurable_format); protected: RGWOp* op_post() override; public: diff --git a/src/rgw/rgw_rest_pubsub_common.cc b/src/rgw/rgw_rest_pubsub_common.cc index 3b7d71828db..1d1311d2b92 100644 --- a/src/rgw/rgw_rest_pubsub_common.cc +++ b/src/rgw/rgw_rest_pubsub_common.cc @@ -40,7 +40,7 @@ bool topic_has_endpoint_secret(const rgw_pubsub_topic_subs& topic) { return topic.topic.dest.stored_secret; } -bool topics_has_endpoint_secret(const rgw_pubsub_user_topics& topics) { +bool topics_has_endpoint_secret(const rgw_pubsub_topics& topics) { for (const auto& topic : topics.topics) { if (topic_has_endpoint_secret(topic.second)) return true; } @@ -53,8 +53,8 @@ void RGWPSCreateTopicOp::execute(optional_yield y) { return; } - ups.emplace(store, s->owner.get_id()); - op_ret = ups->create_topic(topic_name, dest, topic_arn, opaque_data, y); + ps.emplace(store, s->owner.get_id().tenant); + op_ret = ps->create_topic(topic_name, dest, topic_arn, opaque_data, y); if (op_ret < 0) { ldout(s->cct, 1) << "failed to create topic '" << topic_name << "', ret=" << op_ret << dendl; return; @@ -63,8 +63,8 @@ void RGWPSCreateTopicOp::execute(optional_yield y) { } void RGWPSListTopicsOp::execute(optional_yield y) { - ups.emplace(store, s->owner.get_id()); - op_ret = ups->get_user_topics(&result); + ps.emplace(store, s->owner.get_id().tenant); + op_ret = ps->get_topics(&result); // if there are no topics it is not considered an error op_ret = op_ret == -ENOENT ? 0 : op_ret; if (op_ret < 0) { @@ -84,8 +84,8 @@ void RGWPSGetTopicOp::execute(optional_yield y) { if (op_ret < 0) { return; } - ups.emplace(store, s->owner.get_id()); - op_ret = ups->get_topic(topic_name, &result); + ps.emplace(store, s->owner.get_id().tenant); + op_ret = ps->get_topic(topic_name, &result); if (topic_has_endpoint_secret(result) && !rgw_transport_is_secure(s->cct, *(s->info.env))) { ldout(s->cct, 1) << "topic '" << topic_name << "' contain secret and cannot be sent over insecure transport" << dendl; op_ret = -EPERM; @@ -103,8 +103,8 @@ void RGWPSDeleteTopicOp::execute(optional_yield y) { if (op_ret < 0) { return; } - ups.emplace(store, s->owner.get_id()); - op_ret = ups->remove_topic(topic_name, y); + ps.emplace(store, s->owner.get_id().tenant); + op_ret = ps->remove_topic(topic_name, y); if (op_ret < 0) { ldout(s->cct, 1) << "failed to remove topic '" << topic_name << ", ret=" << op_ret << dendl; return; @@ -117,8 +117,8 @@ void RGWPSCreateSubOp::execute(optional_yield y) { if (op_ret < 0) { return; } - ups.emplace(store, s->owner.get_id()); - auto sub = ups->get_sub(sub_name); + ps.emplace(store, s->owner.get_id().tenant); + auto sub = ps->get_sub(sub_name); op_ret = sub->subscribe(topic_name, dest, y); if (op_ret < 0) { ldout(s->cct, 1) << "failed to create subscription '" << sub_name << "', ret=" << op_ret << dendl; @@ -132,8 +132,8 @@ void RGWPSGetSubOp::execute(optional_yield y) { if (op_ret < 0) { return; } - ups.emplace(store, s->owner.get_id()); - auto sub = ups->get_sub(sub_name); + ps.emplace(store, s->owner.get_id().tenant); + auto sub = ps->get_sub(sub_name); op_ret = sub->get_conf(&result); if (subscription_has_endpoint_secret(result) && !rgw_transport_is_secure(s->cct, *(s->info.env))) { ldout(s->cct, 1) << "subscription '" << sub_name << "' contain secret and cannot be sent over insecure transport" << dendl; @@ -152,8 +152,8 @@ void RGWPSDeleteSubOp::execute(optional_yield y) { if (op_ret < 0) { return; } - ups.emplace(store, s->owner.get_id()); - auto sub = ups->get_sub(sub_name); + ps.emplace(store, s->owner.get_id().tenant); + auto sub = ps->get_sub(sub_name); op_ret = sub->unsubscribe(topic_name, y); if (op_ret < 0) { ldout(s->cct, 1) << "failed to remove subscription '" << sub_name << "', ret=" << op_ret << dendl; @@ -167,8 +167,8 @@ void RGWPSAckSubEventOp::execute(optional_yield y) { if (op_ret < 0) { return; } - ups.emplace(store, s->owner.get_id()); - auto sub = ups->get_sub_with_events(sub_name); + ps.emplace(store, s->owner.get_id().tenant); + auto sub = ps->get_sub_with_events(sub_name); op_ret = sub->remove_event(event_id); if (op_ret < 0) { ldout(s->cct, 1) << "failed to ack event on subscription '" << sub_name << "', ret=" << op_ret << dendl; @@ -182,8 +182,8 @@ void RGWPSPullSubEventsOp::execute(optional_yield y) { if (op_ret < 0) { return; } - ups.emplace(store, s->owner.get_id()); - sub = ups->get_sub_with_events(sub_name); + ps.emplace(store, s->owner.get_id().tenant); + sub = ps->get_sub_with_events(sub_name); if (!sub) { op_ret = -ENOENT; ldout(s->cct, 1) << "failed to get subscription '" << sub_name << "' for events, ret=" << op_ret << dendl; diff --git a/src/rgw/rgw_rest_pubsub_common.h b/src/rgw/rgw_rest_pubsub_common.h index 0abb27c5ccf..f42a40e2fd1 100644 --- a/src/rgw/rgw_rest_pubsub_common.h +++ b/src/rgw/rgw_rest_pubsub_common.h @@ -14,7 +14,7 @@ bool validate_and_update_endpoint_secret(rgw_pubsub_sub_dest& dest, CephContext // create a topic class RGWPSCreateTopicOp : public RGWDefaultResponseOp { protected: - std::optional<RGWUserPubSub> ups; + std::optional<RGWPubSub> ps; std::string topic_name; rgw_pubsub_sub_dest dest; std::string topic_arn; @@ -39,8 +39,8 @@ public: // list all topics class RGWPSListTopicsOp : public RGWOp { protected: - std::optional<RGWUserPubSub> ups; - rgw_pubsub_user_topics result; + std::optional<RGWPubSub> ps; + rgw_pubsub_topics result; public: int verify_permission(optional_yield) override { @@ -60,7 +60,7 @@ public: class RGWPSGetTopicOp : public RGWOp { protected: std::string topic_name; - std::optional<RGWUserPubSub> ups; + std::optional<RGWPubSub> ps; rgw_pubsub_topic_subs result; virtual int get_params() = 0; @@ -83,7 +83,7 @@ public: class RGWPSDeleteTopicOp : public RGWDefaultResponseOp { protected: string topic_name; - std::optional<RGWUserPubSub> ups; + std::optional<RGWPubSub> ps; virtual int get_params() = 0; @@ -106,7 +106,7 @@ class RGWPSCreateSubOp : public RGWDefaultResponseOp { protected: std::string sub_name; std::string topic_name; - std::optional<RGWUserPubSub> ups; + std::optional<RGWPubSub> ps; rgw_pubsub_sub_dest dest; virtual int get_params() = 0; @@ -129,7 +129,7 @@ public: class RGWPSGetSubOp : public RGWOp { protected: std::string sub_name; - std::optional<RGWUserPubSub> ups; + std::optional<RGWPubSub> ps; rgw_pubsub_sub_config result; virtual int get_params() = 0; @@ -153,7 +153,7 @@ class RGWPSDeleteSubOp : public RGWDefaultResponseOp { protected: std::string sub_name; std::string topic_name; - std::optional<RGWUserPubSub> ups; + std::optional<RGWPubSub> ps; virtual int get_params() = 0; @@ -176,7 +176,7 @@ class RGWPSAckSubEventOp : public RGWDefaultResponseOp { protected: std::string sub_name; std::string event_id; - std::optional<RGWUserPubSub> ups; + std::optional<RGWPubSub> ps; virtual int get_params() = 0; @@ -204,8 +204,8 @@ protected: int max_entries{0}; std::string sub_name; std::string marker; - std::optional<RGWUserPubSub> ups; - RGWUserPubSub::SubRef sub; + std::optional<RGWPubSub> ps; + RGWPubSub::SubRef sub; virtual int get_params() = 0; @@ -228,7 +228,7 @@ public: // notification creation class RGWPSCreateNotifOp : public RGWDefaultResponseOp { protected: - std::optional<RGWUserPubSub> ups; + std::optional<RGWPubSub> ps; string bucket_name; RGWBucketInfo bucket_info; @@ -248,7 +248,7 @@ public: // delete a notification class RGWPSDeleteNotifOp : public RGWDefaultResponseOp { protected: - std::optional<RGWUserPubSub> ups; + std::optional<RGWPubSub> ps; std::string bucket_name; RGWBucketInfo bucket_info; @@ -270,7 +270,7 @@ class RGWPSListNotifsOp : public RGWOp { protected: std::string bucket_name; RGWBucketInfo bucket_info; - std::optional<RGWUserPubSub> ups; + std::optional<RGWPubSub> ps; virtual int get_params() = 0; diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index 3a6e52b507c..5c1a2d94121 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -1411,6 +1411,14 @@ void RGWGetUsage_ObjStore_S3::send_response() formatter->open_object_section("Stats"); } + // send info about quota config + auto user_info = s->user->get_info(); + encode_json("QuotaMaxBytes", user_info.user_quota.max_size, formatter); + encode_json("QuotaMaxBuckets", user_info.max_buckets, formatter); + encode_json("QuotaMaxObjCount", user_info.user_quota.max_objects, formatter); + encode_json("QuotaMaxBytesPerBucket", user_info.bucket_quota.max_objects, formatter); + encode_json("QuotaMaxObjCountPerBucket", user_info.bucket_quota.max_size, formatter); + // send info about user's capacity utilization encode_json("TotalBytes", stats.size, formatter); encode_json("TotalBytesRounded", stats.size_rounded, formatter); encode_json("TotalEntries", stats.num_objects, formatter); @@ -2141,6 +2149,15 @@ static void dump_bucket_metadata(struct req_state *s, rgw::sal::RGWBucket* bucke { dump_header(s, "X-RGW-Object-Count", static_cast<long long>(bucket->get_count())); dump_header(s, "X-RGW-Bytes-Used", static_cast<long long>(bucket->get_size())); + // only bucket's owner is allowed to get the quota settings of the account + if (bucket->is_owner(s->user.get())) { + auto user_info = s->user->get_info(); + dump_header(s, "X-RGW-Quota-User-Size", static_cast<long long>(user_info.user_quota.max_size)); + dump_header(s, "X-RGW-Quota-User-Objects", static_cast<long long>(user_info.user_quota.max_objects)); + dump_header(s, "X-RGW-Quota-Max-Buckets", static_cast<long long>(user_info.max_buckets)); + dump_header(s, "X-RGW-Quota-Bucket-Size", static_cast<long long>(user_info.bucket_quota.max_size)); + dump_header(s, "X-RGW-Quota-Bucket-Objects", static_cast<long long>(user_info.bucket_quota.max_objects)); + } } void RGWStatBucket_ObjStore_S3::send_response() @@ -3870,6 +3887,12 @@ int RGWDeleteMultiObj_ObjStore_S3::get_params(optional_yield y) return ret; } + const char *bypass_gov_header = s->info.env->get("HTTP_X_AMZ_BYPASS_GOVERNANCE_RETENTION"); + if (bypass_gov_header) { + std::string bypass_gov_decoded = url_decode(bypass_gov_header); + bypass_governance_mode = boost::algorithm::iequals(bypass_gov_decoded, "true"); + } + return do_aws4_auth_completion(); } @@ -3961,7 +3984,7 @@ void RGWGetObjLayout_ObjStore_S3::send_response() f.open_array_section("data_location"); for (auto miter = manifest->obj_begin(); miter != manifest->obj_end(); ++miter) { f.open_object_section("obj"); - rgw_raw_obj raw_loc = miter.get_location().get_raw_obj(store->getRados()); + rgw_raw_obj raw_loc = miter.get_location().get_raw_obj(store); uint64_t ofs = miter.get_ofs(); uint64_t left = manifest->get_obj_size() - ofs; ::encode_json("ofs", miter.get_ofs(), &f); diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc index 48e824e90a4..0a8d6cdf993 100644 --- a/src/rgw/rgw_rest_swift.cc +++ b/src/rgw/rgw_rest_swift.cc @@ -176,7 +176,7 @@ void RGWListBuckets_ObjStore_SWIFT::send_response_begin(bool has_buckets) global_stats, policies_stats, attrs, - user_quota, + s->user->get_info().user_quota, static_cast<RGWAccessControlPolicy_SWIFTAcct&>(*s->user_acl)); dump_errno(s); dump_header(s, "Accept-Ranges", "bytes"); @@ -282,7 +282,7 @@ void RGWListBuckets_ObjStore_SWIFT::send_response_end() global_stats, policies_stats, attrs, - user_quota, + s->user->get_info().user_quota, static_cast<RGWAccessControlPolicy_SWIFTAcct&>(*s->user_acl)); dump_errno(s); end_header(s, nullptr, nullptr, s->formatter->get_len(), true); @@ -556,7 +556,7 @@ void RGWStatAccount_ObjStore_SWIFT::send_response() global_stats, policies_stats, attrs, - user_quota, + s->user->get_info().user_quota, static_cast<RGWAccessControlPolicy_SWIFTAcct&>(*s->user_acl)); } diff --git a/src/rgw/rgw_sal.h b/src/rgw/rgw_sal.h index f21ca91036f..c0ea9df9c9f 100644 --- a/src/rgw/rgw_sal.h +++ b/src/rgw/rgw_sal.h @@ -16,11 +16,13 @@ #pragma once #include "rgw_user.h" -#include "rgw_obj_manifest.h" class RGWGetDataCB; struct RGWObjState; class RGWAccessListFilter; +class RGWLC; +class RGWObjManifest; +struct RGWZoneGroup; struct RGWUsageIter { string read_iter; @@ -29,6 +31,22 @@ struct RGWUsageIter { RGWUsageIter() : index(0) {} }; +/** + * @struct RGWClusterStat + * Cluster-wide usage information + */ +struct RGWClusterStat { + /// total device size + uint64_t kb; + /// total used + uint64_t kb_used; + /// total available/free + uint64_t kb_avail; + /// number of objects + uint64_t num_objects; +}; + + namespace rgw { namespace sal { #define RGW_SAL_VERSION 1 @@ -37,6 +55,8 @@ class RGWUser; class RGWBucket; class RGWObject; class RGWBucketList; +struct MPSerializer; +class Lifecycle; enum AttrsMod { ATTRSMOD_NONE = 0, @@ -55,7 +75,7 @@ class RGWStore : public DoutPrefixProvider { virtual std::unique_ptr<RGWObject> get_object(const rgw_obj_key& k) = 0; virtual int get_bucket(RGWUser* u, const rgw_bucket& b, std::unique_ptr<RGWBucket>* bucket, optional_yield y) = 0; virtual int get_bucket(RGWUser* u, const RGWBucketInfo& i, std::unique_ptr<RGWBucket>* bucket) = 0; - virtual int get_bucket(RGWUser* u, const std::string& tenant, const std::string&name, std::unique_ptr<RGWBucket>* bucket, optional_yield y) = 0; + virtual int get_bucket(RGWUser* u, const std::string& tenant, const std::string& name, std::unique_ptr<RGWBucket>* bucket, optional_yield y) = 0; virtual int create_bucket(RGWUser& u, const rgw_bucket& b, const std::string& zonegroup_id, rgw_placement_rule& placement_rule, @@ -80,6 +100,12 @@ class RGWStore : public DoutPrefixProvider { optional_yield y) = 0; virtual const RGWZoneGroup& get_zonegroup() = 0; virtual int get_zonegroup(const string& id, RGWZoneGroup& zonegroup) = 0; + virtual int cluster_stat(RGWClusterStat& stats) = 0; + virtual std::unique_ptr<Lifecycle> get_lifecycle(void) = 0; + virtual RGWLC* get_rgwlc(void) = 0; + virtual int delete_raw_obj(const rgw_raw_obj& obj) = 0; + virtual void get_raw_obj(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj* raw_obj) = 0; + virtual int get_raw_chunk_size(const rgw_raw_obj& obj, uint64_t* chunk_size) = 0; virtual void finalize(void)=0; @@ -157,12 +183,12 @@ class RGWBucket { RGWAccessListFilter *filter{nullptr}; bool list_versions{false}; bool allow_unordered{false}; - int shard_id{0}; + int shard_id{-1}; }; struct ListResults { vector<rgw_bucket_dir_entry> objs; map<std::string, bool> common_prefixes; - bool is_truncated; + bool is_truncated{false}; rgw_obj_key next_marker; }; @@ -205,6 +231,8 @@ class RGWBucket { virtual int chown(RGWUser* new_user, RGWUser* old_user, optional_yield y) = 0; virtual int put_instance_info(bool exclusive, ceph::real_time mtime) = 0; virtual bool is_owner(RGWUser* user) = 0; + virtual RGWUser* get_owner(void) { return owner; }; + virtual ACLOwner get_acl_owner(void) { return ACLOwner(info.owner); }; virtual int check_empty(optional_yield y) = 0; virtual int check_quota(RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size, optional_yield y, bool check_size_only = false) = 0; virtual int set_instance_attrs(RGWAttrs& attrs, optional_yield y) = 0; @@ -338,6 +366,39 @@ class RGWObject { virtual int get_attr(const char *name, bufferlist& dest, optional_yield y) = 0; }; + struct WriteOp { + struct Params { + bool versioning_disabled{false}; + ceph::real_time* mtime{nullptr}; + RGWAttrs* rmattrs{nullptr}; + const bufferlist* data{nullptr}; + RGWObjManifest* manifest{nullptr}; + const string* ptag{nullptr}; + list<rgw_obj_index_key>* remove_objs{nullptr}; + ceph::real_time set_mtime; + ACLOwner owner; + RGWObjCategory category{RGWObjCategory::Main}; + int flags{0}; + const char* if_match{nullptr}; + const char* if_nomatch{nullptr}; + std::optional<uint64_t> olh_epoch; + ceph::real_time delete_at; + bool canceled{false}; + const string* user_data{nullptr}; + rgw_zone_set* zones_trace{nullptr}; + bool modify_tail{false}; + bool completeMultipart{false}; + bool appendable{false}; + RGWAttrs* attrs{nullptr}; + } params; + + virtual ~WriteOp() = default; + + virtual int prepare(optional_yield y) = 0; + virtual int write_meta(uint64_t size, uint64_t accounted_size, optional_yield y) = 0; + //virtual int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive) = 0; + }; + RGWObject() : key(), bucket(nullptr), @@ -368,7 +429,7 @@ class RGWObject { virtual int delete_object(RGWObjectCtx* obj_ctx, ACLOwner obj_owner, ACLOwner bucket_owner, ceph::real_time unmod_since, bool high_precision_time, uint64_t epoch, - std::string& version_id,optional_yield y) = 0; + std::string& version_id, optional_yield y) = 0; virtual int copy_object(RGWObjectCtx& obj_ctx, RGWUser* user, req_info *info, const rgw_zone_id& source_zone, rgw::sal::RGWObject* dest_object, rgw::sal::RGWBucket* dest_bucket, @@ -399,6 +460,22 @@ class RGWObject { virtual int delete_obj_attrs(RGWObjectCtx *rctx, const char *attr_name, optional_yield y) = 0; virtual int copy_obj_data(RGWObjectCtx& rctx, RGWBucket* dest_bucket, RGWObject* dest_obj, uint16_t olh_epoch, std::string* petag, const DoutPrefixProvider *dpp, optional_yield y) = 0; virtual bool is_expired() = 0; + virtual void gen_rand_obj_instance_name() = 0; + virtual void raw_obj_to_obj(const rgw_raw_obj& raw_obj) = 0; + virtual void get_raw_obj(rgw_raw_obj* raw_obj) = 0; + virtual MPSerializer* get_serializer(const std::string& lock_name) = 0; + virtual int transition(RGWObjectCtx& rctx, + RGWBucket* bucket, + const rgw_placement_rule& placement_rule, + const real_time& mtime, + uint64_t olh_epoch, + const DoutPrefixProvider *dpp, + optional_yield y) = 0; + virtual int get_max_chunk_size(rgw_placement_rule placement_rule, + uint64_t* max_chunk_size, + uint64_t* alignment = nullptr) = 0; + virtual void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size) = 0; + virtual bool placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) = 0; RGWAttrs& get_attrs(void) { return attrs; } ceph::real_time get_mtime(void) const { return mtime; } @@ -412,28 +489,39 @@ class RGWObject { bool get_in_extra_data(void) { return in_extra_data; } void set_in_extra_data(bool i) { in_extra_data = i; } int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end); + void set_obj_size(uint64_t s) { obj_size = s; } + virtual void set_name(const std::string& n) { key = n; } + virtual void set_key(const rgw_obj_key& k) { key = k; } + virtual rgw_obj get_obj(void) const { + rgw_obj obj(bucket->get_key(), key); + obj.set_in_extra_data(in_extra_data); + obj.index_hash_source = index_hash_source; + return obj; + } + + /* Swift versioning */ + virtual int swift_versioning_restore(RGWObjectCtx* obj_ctx, + bool& restored, /* out */ + const DoutPrefixProvider *dpp) = 0; + virtual int swift_versioning_copy(RGWObjectCtx* obj_ctx, + const DoutPrefixProvider *dpp, + optional_yield y) = 0; /* OPs */ - virtual std::unique_ptr<ReadOp> get_read_op(RGWObjectCtx *) = 0; + virtual std::unique_ptr<ReadOp> get_read_op(RGWObjectCtx*) = 0; + virtual std::unique_ptr<WriteOp> get_write_op(RGWObjectCtx*) = 0; /* OMAP */ virtual int omap_get_vals_by_keys(const std::string& oid, const std::set<std::string>& keys, RGWAttrs *vals) = 0; + virtual int omap_set_val_by_key(const std::string& key, bufferlist& val, + bool must_exist, optional_yield y) = 0; static bool empty(RGWObject* o) { return (!o || o->empty()); } virtual std::unique_ptr<RGWObject> clone() = 0; /* dang - Not sure if we want this, but it simplifies things a lot */ - void set_obj_size(uint64_t s) { obj_size = s; } - virtual void set_name(const std::string& n) { key = n; } - virtual void set_key(const rgw_obj_key& k) { key = k; } - virtual rgw_obj get_obj(void) const { - rgw_obj obj(bucket->get_key(), key); - obj.set_in_extra_data(in_extra_data); - return obj; - } - virtual void gen_rand_obj_instance_name() = 0; /* dang - This is temporary, until the API is completed */ rgw_obj_key& get_key() { return key; } @@ -442,6 +530,8 @@ class RGWObject { bool have_instance(void) { return key.have_instance(); } friend inline ostream& operator<<(ostream& out, const RGWObject& o) { + if (o.bucket) + out << o.bucket << ":"; out << o.key; return out; } @@ -449,7 +539,7 @@ class RGWObject { if (!o) out << "<NULL>"; else - out << o->key; + out << *o; return out; } friend inline ostream& operator<<(ostream& out, const std::unique_ptr<RGWObject>& p) { @@ -458,5 +548,63 @@ class RGWObject { } }; +struct Serializer { + Serializer() = default; + virtual ~Serializer() = default; + + virtual int try_lock(utime_t dur, optional_yield y) = 0; + virtual int unlock() = 0; +}; + +struct MPSerializer : Serializer { + bool locked; + std::string oid; + MPSerializer() : locked(false) {} + virtual ~MPSerializer() = default; + + void clear_locked() { + locked = false; + } +}; + +struct LCSerializer : Serializer { + LCSerializer() {} + virtual ~LCSerializer() = default; +}; + +class Lifecycle { +public: + struct LCHead { + time_t start_date{0}; + std::string marker; + + LCHead() = default; + LCHead(time_t _date, std::string& _marker) : start_date(_date), marker(_marker) {} + }; + + struct LCEntry { + std::string bucket; + uint64_t start_time{0}; + uint32_t status{0}; + + LCEntry() = default; + LCEntry(std::string& _bucket, uint64_t _time, uint32_t _status) : bucket(_bucket), start_time(_time), status(_status) {} + }; + + Lifecycle() = default; + virtual ~Lifecycle() = default; + + virtual int get_entry(const string& oid, const std::string& marker, LCEntry& entry) = 0; + virtual int get_next_entry(const string& oid, std::string& marker, LCEntry& entry) = 0; + virtual int set_entry(const string& oid, const LCEntry& entry) = 0; + virtual int list_entries(const string& oid, const string& marker, + uint32_t max_entries, vector<LCEntry>& entries) = 0; + virtual int rm_entry(const string& oid, const LCEntry& entry) = 0; + virtual int get_head(const string& oid, LCHead& head) = 0; + virtual int put_head(const string& oid, const LCHead& head) = 0; + + virtual LCSerializer* get_serializer(const std::string& lock_name, const std::string& oid, const std::string& cookie) = 0; +}; + } } // namespace rgw::sal diff --git a/src/rgw/rgw_sal_rados.cc b/src/rgw/rgw_sal_rados.cc index 6ae2f3edd44..043a9837515 100644 --- a/src/rgw/rgw_sal_rados.cc +++ b/src/rgw/rgw_sal_rados.cc @@ -28,11 +28,12 @@ #include "rgw_multi.h" #include "rgw_acl_s3.h" -/* Stuff for RGWRadosStore. Move to separate file when store split out */ #include "rgw_zone.h" #include "rgw_rest_conn.h" #include "services/svc_sys_obj.h" #include "services/svc_zone.h" +#include "services/svc_tier_rados.h" +#include "cls/rgw/cls_rgw_client.h" #define dout_subsys ceph_subsys_rgw @@ -530,6 +531,18 @@ void RGWRadosObject::gen_rand_obj_instance_name() store->getRados()->gen_rand_obj_instance_name(&key); } +void RGWRadosObject::raw_obj_to_obj(const rgw_raw_obj& raw_obj) +{ + rgw_obj tobj = get_obj(); + RGWSI_Tier_RADOS::raw_obj_to_obj(get_bucket()->get_key(), raw_obj, &tobj); + set_key(tobj.key); +} + +void RGWRadosObject::get_raw_obj(rgw_raw_obj* raw_obj) +{ + store->getRados()->obj_to_raw((bucket->get_info()).placement_rule, get_obj(), raw_obj); +} + int RGWRadosObject::omap_get_vals_by_keys(const std::string& oid, const std::set<std::string>& keys, RGWAttrs *vals) @@ -548,6 +561,67 @@ int RGWRadosObject::omap_get_vals_by_keys(const std::string& oid, return cur_ioctx.omap_get_vals_by_keys(oid, keys, vals); } +int RGWRadosObject::omap_set_val_by_key(const std::string& key, bufferlist& val, + bool must_exist, optional_yield y) +{ + rgw_raw_obj raw_meta_obj; + rgw_obj obj = get_obj(); + + store->getRados()->obj_to_raw(bucket->get_placement_rule(), obj, &raw_meta_obj); + + auto obj_ctx = store->svc()->sysobj->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(raw_meta_obj); + + return sysobj.omap().set_must_exist(must_exist).set(key, val, y); +} + +MPSerializer* RGWRadosObject::get_serializer(const std::string& lock_name) +{ + return new MPRadosSerializer(store, this, lock_name); +} + +int RGWRadosObject::transition(RGWObjectCtx& rctx, + RGWBucket* bucket, + const rgw_placement_rule& placement_rule, + const real_time& mtime, + uint64_t olh_epoch, + const DoutPrefixProvider *dpp, + optional_yield y) +{ + return store->getRados()->transition_obj(rctx, bucket, *this, placement_rule, mtime, olh_epoch, dpp, y); +} + +int RGWRadosObject::get_max_chunk_size(rgw_placement_rule placement_rule, uint64_t *max_chunk_size, uint64_t *alignment) +{ + return store->getRados()->get_max_chunk_size(placement_rule, get_obj(), max_chunk_size, alignment); +} + +void RGWRadosObject::get_max_aligned_size(uint64_t size, uint64_t alignment, + uint64_t *max_size) +{ + store->getRados()->get_max_aligned_size(size, alignment, max_size); +} + +bool RGWRadosObject::placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) +{ + rgw_obj obj; + rgw_pool p1, p2; + + obj = get_obj(); + + if (r1 == r2) + return true; + + if (!store->getRados()->get_obj_data_pool(r1, obj, &p1)) { + return false; + } + if (!store->getRados()->get_obj_data_pool(r2, obj, &p2)) { + return false; + } + + return p1 == p2; +} + std::unique_ptr<RGWObject::ReadOp> RGWRadosObject::get_read_op(RGWObjectCtx *ctx) { return std::unique_ptr<RGWObject::ReadOp>(new RGWRadosObject::RadosReadOp(this, ctx)); @@ -694,6 +768,80 @@ int RGWRadosObject::RadosReadOp::iterate(int64_t ofs, int64_t end, RGWGetDataCB return parent_op.iterate(ofs, end, cb, y); } +std::unique_ptr<RGWObject::WriteOp> RGWRadosObject::get_write_op(RGWObjectCtx* ctx) +{ + return std::unique_ptr<RGWObject::WriteOp>(new RGWRadosObject::RadosWriteOp(this, ctx)); +} + +RGWRadosObject::RadosWriteOp::RadosWriteOp(RGWRadosObject* _source, RGWObjectCtx* _rctx) : + source(_source), + rctx(_rctx), + op_target(_source->store->getRados(), + _source->get_bucket()->get_info(), + *static_cast<RGWObjectCtx *>(rctx), + _source->get_obj()), + parent_op(&op_target) +{ } + +int RGWRadosObject::RadosWriteOp::prepare(optional_yield y) +{ + op_target.set_versioning_disabled(params.versioning_disabled); + parent_op.meta.mtime = params.mtime; + parent_op.meta.rmattrs = params.rmattrs; + parent_op.meta.data = params.data; + parent_op.meta.manifest = params.manifest; + parent_op.meta.ptag = params.ptag; + parent_op.meta.remove_objs = params.remove_objs; + parent_op.meta.set_mtime = params.set_mtime; + parent_op.meta.owner = params.owner.get_id(); + parent_op.meta.category = params.category; + parent_op.meta.flags = params.flags; + parent_op.meta.if_match = params.if_match; + parent_op.meta.if_nomatch = params.if_nomatch; + parent_op.meta.olh_epoch = params.olh_epoch; + parent_op.meta.delete_at = params.delete_at; + parent_op.meta.canceled = params.canceled; + parent_op.meta.user_data = params.user_data; + parent_op.meta.zones_trace = params.zones_trace; + parent_op.meta.modify_tail = params.modify_tail; + parent_op.meta.completeMultipart = params.completeMultipart; + parent_op.meta.appendable = params.appendable; + + return 0; +} + +int RGWRadosObject::RadosWriteOp::write_meta(uint64_t size, uint64_t accounted_size, optional_yield y) +{ + int ret = parent_op.write_meta(size, accounted_size, *params.attrs, y); + params.canceled = parent_op.meta.canceled; + + return ret; +} + +int RGWRadosObject::swift_versioning_restore(RGWObjectCtx* obj_ctx, + bool& restored, + const DoutPrefixProvider *dpp) +{ + return store->getRados()->swift_versioning_restore(*obj_ctx, + bucket->get_owner()->get_id(), + bucket, + this, + restored, + dpp); +} + +int RGWRadosObject::swift_versioning_copy(RGWObjectCtx* obj_ctx, + const DoutPrefixProvider *dpp, + optional_yield y) +{ + return store->getRados()->swift_versioning_copy(*obj_ctx, + bucket->get_info().owner, + bucket, + this, + dpp, + y); +} + int RGWRadosStore::get_bucket(RGWUser* u, const rgw_bucket& b, std::unique_ptr<RGWBucket>* bucket, optional_yield y) { int ret; @@ -828,6 +976,23 @@ int RGWRadosStore::get_zonegroup(const string& id, RGWZoneGroup& zonegroup) return rados->svc.zone->get_zonegroup(id, zonegroup); } +int RGWRadosStore::cluster_stat(RGWClusterStat& stats) +{ + rados_cluster_stat_t rados_stats; + int ret; + + ret = rados->get_rados_handle()->cluster_stat(rados_stats); + if (ret < 0) + return ret; + + stats.kb = rados_stats.kb; + stats.kb_used = rados_stats.kb_used; + stats.kb_avail = rados_stats.kb_avail; + stats.num_objects = rados_stats.num_objects; + + return ret; +} + int RGWRadosStore::create_bucket(RGWUser& u, const rgw_bucket& b, const string& zonegroup_id, rgw_placement_rule& placement_rule, @@ -944,6 +1109,155 @@ int RGWRadosStore::create_bucket(RGWUser& u, const rgw_bucket& b, return ret; } +std::unique_ptr<Lifecycle> RGWRadosStore::get_lifecycle(void) +{ + return std::unique_ptr<Lifecycle>(new RadosLifecycle(this)); +} + +int RGWRadosStore::delete_raw_obj(const rgw_raw_obj& obj) +{ + return rados->delete_raw_obj(obj); +} + +void RGWRadosStore::get_raw_obj(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj* raw_obj) +{ + rados->obj_to_raw(placement_rule, obj, raw_obj); +} + +int RGWRadosStore::get_raw_chunk_size(const rgw_raw_obj& obj, uint64_t* chunk_size) +{ + return rados->get_max_chunk_size(obj.pool, chunk_size); +} + +MPRadosSerializer::MPRadosSerializer(RGWRadosStore* store, RGWRadosObject* obj, const std::string& lock_name) : + lock(lock_name) +{ + rgw_pool meta_pool; + rgw_raw_obj raw_obj; + + obj->get_raw_obj(&raw_obj); + oid = raw_obj.oid; + store->getRados()->get_obj_data_pool(obj->get_bucket()->get_placement_rule(), + obj->get_obj(), &meta_pool); + store->getRados()->open_pool_ctx(meta_pool, ioctx, true); +} + +int MPRadosSerializer::try_lock(utime_t dur, optional_yield y) +{ + op.assert_exists(); + lock.set_duration(dur); + lock.lock_exclusive(&op); + int ret = rgw_rados_operate(ioctx, oid, &op, y); + if (! ret) { + locked = true; + } + return ret; +} + +LCRadosSerializer::LCRadosSerializer(RGWRadosStore* store, const std::string& _oid, const std::string& lock_name, const std::string& cookie) : + lock(lock_name), oid(_oid) +{ + ioctx = &store->getRados()->lc_pool_ctx; + lock.set_cookie(cookie); +} + +int LCRadosSerializer::try_lock(utime_t dur, optional_yield y) +{ + lock.set_duration(dur); + return lock.lock_exclusive(ioctx, oid); +} + +int RadosLifecycle::get_entry(const string& oid, const std::string& marker, + LCEntry& entry) +{ + cls_rgw_lc_entry cls_entry; + int ret = cls_rgw_lc_get_entry(*store->getRados()->get_lc_pool_ctx(), oid, marker, cls_entry); + + entry.bucket = cls_entry.bucket; + entry.start_time = cls_entry.start_time; + entry.status = cls_entry.status; + + return ret; +} + +int RadosLifecycle::get_next_entry(const string& oid, std::string& marker, + LCEntry& entry) +{ + cls_rgw_lc_entry cls_entry; + int ret = cls_rgw_lc_get_next_entry(*store->getRados()->get_lc_pool_ctx(), oid, marker, + cls_entry); + + entry.bucket = cls_entry.bucket; + entry.start_time = cls_entry.start_time; + entry.status = cls_entry.status; + + return ret; +} + +int RadosLifecycle::set_entry(const string& oid, const LCEntry& entry) +{ + cls_rgw_lc_entry cls_entry; + + cls_entry.bucket = entry.bucket; + cls_entry.start_time = entry.start_time; + cls_entry.status = entry.status; + + return cls_rgw_lc_set_entry(*store->getRados()->get_lc_pool_ctx(), oid, cls_entry); +} + +int RadosLifecycle::list_entries(const string& oid, const string& marker, + uint32_t max_entries, vector<LCEntry>& entries) +{ + vector<cls_rgw_lc_entry> cls_entries; + int ret = cls_rgw_lc_list(*store->getRados()->get_lc_pool_ctx(), oid, marker, max_entries, cls_entries); + + if (ret < 0) + return ret; + + for (auto& entry : cls_entries) { + entries.push_back(LCEntry(entry.bucket, entry.start_time, entry.status)); + } + + return ret; +} + +int RadosLifecycle::rm_entry(const string& oid, const LCEntry& entry) +{ + cls_rgw_lc_entry cls_entry; + + cls_entry.bucket = entry.bucket; + cls_entry.start_time = entry.start_time; + cls_entry.status = entry.status; + + return cls_rgw_lc_rm_entry(*store->getRados()->get_lc_pool_ctx(), oid, cls_entry); +} + +int RadosLifecycle::get_head(const string& oid, LCHead& head) +{ + cls_rgw_lc_obj_head cls_head; + int ret = cls_rgw_lc_get_head(*store->getRados()->get_lc_pool_ctx(), oid, cls_head); + + head.marker = cls_head.marker; + head.start_date = cls_head.start_date; + + return ret; +} + +int RadosLifecycle::put_head(const string& oid, const LCHead& head) +{ + cls_rgw_lc_obj_head cls_head; + + cls_head.marker = head.marker; + cls_head.start_date = head.start_date; + + return cls_rgw_lc_put_head(*store->getRados()->get_lc_pool_ctx(), oid, cls_head); +} + +LCSerializer* RadosLifecycle::get_serializer(const std::string& lock_name, const std::string& oid, const std::string& cookie) +{ + return new LCRadosSerializer(store, oid, lock_name, cookie); +} + } // namespace rgw::sal rgw::sal::RGWRadosStore *RGWStoreManager::init_storage_provider(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread, bool run_reshard_thread, bool use_cache) diff --git a/src/rgw/rgw_sal_rados.h b/src/rgw/rgw_sal_rados.h index bb0acd167bd..bc522ac733b 100644 --- a/src/rgw/rgw_sal_rados.h +++ b/src/rgw/rgw_sal_rados.h @@ -17,6 +17,7 @@ #include "rgw_sal.h" #include "rgw_rados.h" +#include "cls/lock/cls_lock_client.h" namespace rgw { namespace sal { @@ -67,6 +68,21 @@ class RGWRadosObject : public RGWObject { virtual int get_attr(const char *name, bufferlist& dest, optional_yield y) override; }; + struct RadosWriteOp : public WriteOp { + private: + RGWRadosObject* source; + RGWObjectCtx* rctx; + RGWRados::Object op_target; + RGWRados::Object::Write parent_op; + + public: + RadosWriteOp(RGWRadosObject* _source, RGWObjectCtx* _rctx); + + virtual int prepare(optional_yield y) override; + virtual int write_meta(uint64_t size, uint64_t accounted_size, optional_yield y) override; + //virtual int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive) override; + }; + RGWRadosObject() = default; RGWRadosObject(RGWRadosStore *_st, const rgw_obj_key& _k) @@ -115,17 +131,43 @@ class RGWRadosObject : public RGWObject { virtual int copy_obj_data(RGWObjectCtx& rctx, RGWBucket* dest_bucket, RGWObject* dest_obj, uint16_t olh_epoch, std::string* petag, const DoutPrefixProvider *dpp, optional_yield y) override; virtual bool is_expired() override; virtual void gen_rand_obj_instance_name() override; + virtual void raw_obj_to_obj(const rgw_raw_obj& raw_obj) override; + virtual void get_raw_obj(rgw_raw_obj* raw_obj) override; virtual std::unique_ptr<RGWObject> clone() { return std::unique_ptr<RGWObject>(new RGWRadosObject(*this)); } + virtual MPSerializer* get_serializer(const std::string& lock_name) override; + virtual int transition(RGWObjectCtx& rctx, + RGWBucket* bucket, + const rgw_placement_rule& placement_rule, + const real_time& mtime, + uint64_t olh_epoch, + const DoutPrefixProvider *dpp, + optional_yield y) override; + virtual int get_max_chunk_size(rgw_placement_rule placement_rule, + uint64_t *max_chunk_size, + uint64_t *alignment = nullptr) override; + virtual void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size) override; + virtual bool placement_rules_match(rgw_placement_rule& r1, rgw_placement_rule& r2) override; + + /* Swift versioning */ + virtual int swift_versioning_restore(RGWObjectCtx* obj_ctx, + bool& restored, + const DoutPrefixProvider *dpp) override; + virtual int swift_versioning_copy(RGWObjectCtx* obj_ctx, + const DoutPrefixProvider *dpp, + optional_yield y) override; /* OPs */ virtual std::unique_ptr<ReadOp> get_read_op(RGWObjectCtx *) override; + virtual std::unique_ptr<WriteOp> get_write_op(RGWObjectCtx *) override; /* OMAP */ virtual int omap_get_vals_by_keys(const std::string& oid, const std::set<std::string>& keys, RGWAttrs *vals) override; + virtual int omap_set_val_by_key(const std::string& key, bufferlist& val, + bool must_exist, optional_yield y) override; private: int read_attrs(RGWRados::Object::Read &read_op, optional_yield y, rgw_obj *target_obj = nullptr); @@ -211,7 +253,7 @@ class RGWRadosBucket : public RGWBucket { bool *is_truncated, RGWUsageIter& usage_iter, map<rgw_user_bucket, rgw_usage_log_entry>& usage) override; virtual std::unique_ptr<RGWBucket> clone() { - return std::unique_ptr<RGWBucket>(new RGWRadosBucket(*this)); + return std::make_unique<RGWRadosBucket>(*this); } friend class RGWRadosStore; @@ -259,6 +301,12 @@ class RGWRadosStore : public RGWStore { optional_yield y) override; virtual const RGWZoneGroup& get_zonegroup() override; virtual int get_zonegroup(const string& id, RGWZoneGroup& zonegroup) override; + virtual int cluster_stat(RGWClusterStat& stats) override; + virtual std::unique_ptr<Lifecycle> get_lifecycle(void) override; + virtual RGWLC* get_rgwlc(void) { return rados->get_lc(); } + virtual int delete_raw_obj(const rgw_raw_obj& obj) override; + virtual void get_raw_obj(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj* raw_obj) override; + virtual int get_raw_chunk_size(const rgw_raw_obj& obj, uint64_t* chunk_size) override; void setRados(RGWRados * st) { rados = st; } RGWRados *getRados(void) { return rados; } @@ -285,6 +333,51 @@ class RGWRadosStore : public RGWStore { }; +class MPRadosSerializer : public MPSerializer { + librados::IoCtx ioctx; + rados::cls::lock::Lock lock; + librados::ObjectWriteOperation op; + +public: + MPRadosSerializer(RGWRadosStore* store, RGWRadosObject* obj, const std::string& lock_name); + + virtual int try_lock(utime_t dur, optional_yield y) override; + int unlock() { + return lock.unlock(&ioctx, oid); + } +}; + +class LCRadosSerializer : public LCSerializer { + librados::IoCtx* ioctx; + rados::cls::lock::Lock lock; + const std::string oid; + +public: + LCRadosSerializer(RGWRadosStore* store, const std::string& oid, const std::string& lock_name, const std::string& cookie); + + virtual int try_lock(utime_t dur, optional_yield y) override; + int unlock() { + return lock.unlock(ioctx, oid); + } +}; + +class RadosLifecycle : public Lifecycle { + RGWRadosStore* store; + +public: + RadosLifecycle(RGWRadosStore* _st) : store(_st) {} + + virtual int get_entry(const string& oid, const std::string& marker, LCEntry& entry) override; + virtual int get_next_entry(const string& oid, std::string& marker, LCEntry& entry) override; + virtual int set_entry(const string& oid, const LCEntry& entry) override; + virtual int list_entries(const string& oid, const string& marker, + uint32_t max_entries, vector<LCEntry>& entries) override; + virtual int rm_entry(const string& oid, const LCEntry& entry) override; + virtual int get_head(const string& oid, LCHead& head) override; + virtual int put_head(const string& oid, const LCHead& head) override; + virtual LCSerializer* get_serializer(const std::string& lock_name, const std::string& oid, const std::string& cookie) override; +}; + } } // namespace rgw::sal class RGWStoreManager { diff --git a/src/rgw/rgw_sync_module_pubsub.cc b/src/rgw/rgw_sync_module_pubsub.cc index 75e08300386..c1610747c39 100644 --- a/src/rgw/rgw_sync_module_pubsub.cc +++ b/src/rgw/rgw_sync_module_pubsub.cc @@ -780,9 +780,9 @@ class PSManager } else { using ReadInfoCR = RGWSimpleRadosReadCR<rgw_pubsub_sub_config>; yield { - RGWUserPubSub ups(sync_env->store, owner); + RGWPubSub ps(sync_env->store, owner.tenant); rgw_raw_obj obj; - ups.get_sub_meta_obj(sub_name, &obj); + ps.get_sub_meta_obj(sub_name, &obj); bool empty_on_enoent = false; call(new ReadInfoCR(sync_env->async_rados, sync_env->store->svc()->sysobj, obj, @@ -942,12 +942,12 @@ class RGWPSFindBucketTopicsCR : public RGWCoroutine { rgw_obj_key key; rgw::notify::EventType event_type; - RGWUserPubSub ups; + RGWPubSub ps; rgw_raw_obj bucket_obj; rgw_raw_obj user_obj; rgw_pubsub_bucket_topics bucket_topics; - rgw_pubsub_user_topics user_topics; + rgw_pubsub_topics user_topics; TopicsRef *topics; public: RGWPSFindBucketTopicsCR(RGWDataSyncCtx *_sc, @@ -963,14 +963,14 @@ public: bucket(_bucket), key(_key), event_type(_event_type), - ups(sync_env->store, owner), + ps(sync_env->store, owner.tenant), topics(_topics) { *topics = std::make_shared<vector<PSTopicConfigRef> >(); } int operate() override { reenter(this) { - ups.get_bucket_meta_obj(bucket, &bucket_obj); - ups.get_user_meta_obj(&user_obj); + ps.get_bucket_meta_obj(bucket, &bucket_obj); + ps.get_meta_obj(&user_obj); using ReadInfoCR = RGWSimpleRadosReadCR<rgw_pubsub_bucket_topics>; yield { @@ -986,7 +986,7 @@ public: ldout(sync_env->cct, 20) << "RGWPSFindBucketTopicsCR(): found " << bucket_topics.topics.size() << " topics for bucket " << bucket << dendl; if (!bucket_topics.topics.empty()) { - using ReadUserTopicsInfoCR = RGWSimpleRadosReadCR<rgw_pubsub_user_topics>; + using ReadUserTopicsInfoCR = RGWSimpleRadosReadCR<rgw_pubsub_topics>; yield { bool empty_on_enoent = true; call(new ReadUserTopicsInfoCR(sync_env->async_rados, sync_env->store->svc()->sysobj, diff --git a/src/rgw/rgw_sync_module_pubsub_rest.cc b/src/rgw/rgw_sync_module_pubsub_rest.cc index 2326e958a0a..c7feff50022 100644 --- a/src/rgw/rgw_sync_module_pubsub_rest.cc +++ b/src/rgw/rgw_sync_module_pubsub_rest.cc @@ -246,7 +246,7 @@ public: sub_name = s->object->get_name(); marker = s->info.args.get("marker"); const int ret = s->info.args.get_int("max-entries", &max_entries, - RGWUserPubSub::Sub::DEFAULT_MAX_EVENTS); + RGWPubSub::Sub::DEFAULT_MAX_EVENTS); if (ret < 0) { ldout(s->cct, 1) << "failed to parse 'max-entries' param" << dendl; return -EINVAL; @@ -374,9 +374,9 @@ public: void RGWPSCreateNotif_ObjStore::execute(optional_yield y) { - ups.emplace(store, s->owner.get_id()); + ps.emplace(store, s->owner.get_id().tenant); - auto b = ups->get_bucket(bucket_info.bucket); + auto b = ps->get_bucket(bucket_info.bucket); op_ret = b->create_notification(topic_name, events, y); if (op_ret < 0) { ldout(s->cct, 1) << "failed to create notification for topic '" << topic_name << "', ret=" << op_ret << dendl; @@ -411,8 +411,8 @@ void RGWPSDeleteNotif_ObjStore::execute(optional_yield y) { return; } - ups.emplace(store, s->owner.get_id()); - auto b = ups->get_bucket(bucket_info.bucket); + ps.emplace(store, s->owner.get_id().tenant); + auto b = ps->get_bucket(bucket_info.bucket); op_ret = b->remove_notification(topic_name, y); if (op_ret < 0) { ldout(s->cct, 1) << "failed to remove notification from topic '" << topic_name << "', ret=" << op_ret << dendl; @@ -450,8 +450,8 @@ public: void RGWPSListNotifs_ObjStore::execute(optional_yield y) { - ups.emplace(store, s->owner.get_id()); - auto b = ups->get_bucket(bucket_info.bucket); + ps.emplace(store, s->owner.get_id().tenant); + auto b = ps->get_bucket(bucket_info.bucket); op_ret = b->get_topics(&result); if (op_ret < 0) { ldout(s->cct, 1) << "failed to get topics, ret=" << op_ret << dendl; diff --git a/src/rgw/rgw_tools.cc b/src/rgw/rgw_tools.cc index 09074929b33..89a322b0675 100644 --- a/src/rgw/rgw_tools.cc +++ b/src/rgw/rgw_tools.cc @@ -486,7 +486,7 @@ int RGWDataAccess::Object::put(bufferlist& data, using namespace rgw::putobj; AtomicObjectProcessor processor(&aio, store, b.get(), nullptr, - owner.get_id(), obj_ctx, obj->get_obj(), olh_epoch, + owner.get_id(), obj_ctx, std::move(obj), olh_epoch, req_id, dpp, y); int ret = processor.prepare(y); diff --git a/src/sample.ceph.conf b/src/sample.ceph.conf index a8f8b9e04af..13394d31f21 100644 --- a/src/sample.ceph.conf +++ b/src/sample.ceph.conf @@ -31,7 +31,7 @@ # ; Example: /var/run/ceph/$cluster-$name.asok [global] -### http://docs.ceph.com/docs/master/rados/configuration/general-config-ref/ +### http://docs.ceph.com/en/latest/rados/configuration/general-config-ref/ ;fsid = {UUID} # use `uuidgen` to generate your own UUID ;public network = 192.168.0.0/24 @@ -51,8 +51,8 @@ ;max open files = 131072 -### http://docs.ceph.com/docs/master/rados/operations/ -### http://docs.ceph.com/docs/master/rados/configuration/auth-config-ref/ +### http://docs.ceph.com/en/latest/rados/operations/ +### http://docs.ceph.com/en/latest/rados/configuration/auth-config-ref/ # If enabled, the Ceph Storage Cluster daemons (i.e., ceph-mon, ceph-osd, # and ceph-mds) must authenticate with each other. @@ -90,7 +90,7 @@ ;keyring = /etc/ceph/$cluster.$name.keyring -### http://docs.ceph.com/docs/master/rados/configuration/pool-pg-config-ref/ +### http://docs.ceph.com/en/latest/rados/configuration/pool-pg-config-ref/ ## Replication level, number of data copies. @@ -140,7 +140,7 @@ ;osd crush chooseleaf type = 1 -### http://docs.ceph.com/docs/master/rados/troubleshooting/log-and-debug/ +### http://docs.ceph.com/en/latest/rados/troubleshooting/log-and-debug/ # The location of the logging file for your cluster. # Type: String @@ -155,7 +155,7 @@ ;log to syslog = true -### http://docs.ceph.com/docs/master/rados/configuration/ms-ref/ +### http://docs.ceph.com/en/latest/rados/configuration/ms-ref/ # Enable if you want your daemons to bind to IPv6 address instead of # IPv4 ones. (Not required if you specify a daemon or cluster IP.) @@ -168,8 +168,8 @@ ## You need at least one. You need at least three if you want to ## tolerate any node failures. Always create an odd number. [mon] -### http://docs.ceph.com/docs/master/rados/configuration/mon-config-ref/ -### http://docs.ceph.com/docs/master/rados/configuration/mon-osd-interaction/ +### http://docs.ceph.com/en/latest/rados/configuration/mon-config-ref/ +### http://docs.ceph.com/en/latest/rados/configuration/mon-osd-interaction/ # The IDs of initial monitors in a cluster during startup. # If specified, Ceph requires an odd number of monitors to form an @@ -217,7 +217,7 @@ # (Default: 900) ;mon osd report timeout = 300 -### http://docs.ceph.com/docs/master/rados/troubleshooting/log-and-debug/ +### http://docs.ceph.com/en/latest/rados/troubleshooting/log-and-debug/ # logging, for debugging monitor crashes, in order of # their likelihood of being helpful :) @@ -246,7 +246,7 @@ # experimental support for running multiple metadata servers. Do not run # multiple metadata servers in production. [mds] -### http://docs.ceph.com/docs/master/cephfs/mds-config-ref/ +### http://docs.ceph.com/en/latest/cephfs/mds-config-ref/ # where the mds keeps it's secret encryption keys ;keyring = /var/lib/ceph/mds/$name/keyring @@ -277,7 +277,7 @@ # You need at least one. Two or more if you want data to be replicated. # Define as many as you like. [osd] -### http://docs.ceph.com/docs/master/rados/configuration/osd-config-ref/ +### http://docs.ceph.com/en/latest/rados/configuration/osd-config-ref/ # The path to the OSDs data. # You must create the directory when deploying Ceph. @@ -337,7 +337,7 @@ # (Default: false) ;osd check for log corruption = true -### http://docs.ceph.com/docs/master/rados/configuration/journal-ref/ +### http://docs.ceph.com/en/latest/rados/configuration/journal-ref/ # The size of the journal in megabytes. If this is 0, # and the journal is a block device, the entire block device is used. @@ -363,7 +363,7 @@ ;debug filestore = 20 ;debug journal = 20 -### http://docs.ceph.com/docs/master/rados/configuration/filestore-config-ref/ +### http://docs.ceph.com/en/latest/rados/configuration/filestore-config-ref/ # The maximum interval in seconds for synchronizing the filestore. # Type: Double (optional) @@ -391,7 +391,7 @@ ## Filestore and OSD settings can be tweak to achieve better performance -### http://docs.ceph.com/docs/master/rados/configuration/filestore-config-ref/#misc +### http://docs.ceph.com/en/latest/rados/configuration/filestore-config-ref/#misc # Min number of files in a subdir before merging into parent NOTE: A negative value means to disable subdir merging # Type: Integer @@ -436,7 +436,7 @@ ## client settings [client] -### http://docs.ceph.com/docs/master/rbd/rbd-config-ref/ +### http://docs.ceph.com/en/latest/rbd/rbd-config-ref/ # Enable caching for RADOS Block Device (RBD). # Type: Boolean @@ -492,7 +492,7 @@ ## radosgw client settings [client.radosgw.gateway] -### http://docs.ceph.com/docs/master/radosgw/config-ref/ +### http://docs.ceph.com/en/latest/radosgw/config-ref/ # Sets the location of the data files for Ceph Object Gateway. # You must create the directory when deploying Ceph. diff --git a/src/seastar b/src/seastar -Subproject d2c0df06d1fd92734d093cea04a13f820c0dbd0 +Subproject afafbaa8d43627fffa00541ac26a31904d0268a diff --git a/src/test/ObjectMap/test_object_map.cc b/src/test/ObjectMap/test_object_map.cc index ad2c56e5a03..db19c17e8cd 100644 --- a/src/test/ObjectMap/test_object_map.cc +++ b/src/test/ObjectMap/test_object_map.cc @@ -1,4 +1,5 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include <iterator> #include <map> #include <set> #include <boost/scoped_ptr.hpp> @@ -20,14 +21,10 @@ using namespace std; template <typename T> typename T::iterator rand_choose(T &cont) { - if (cont.size() == 0) { - return cont.end(); + if (std::empty(cont)) { + return std::end(cont); } - int index = rand() % cont.size(); - typename T::iterator retval = cont.begin(); - - for (; index > 0; --index) ++retval; - return retval; + return std::next(std::begin(cont), rand() % cont.size()); } string num_str(unsigned i) { diff --git a/src/test/admin_socket_output.cc b/src/test/admin_socket_output.cc index 76e6e567726..b178e3841cd 100644 --- a/src/test/admin_socket_output.cc +++ b/src/test/admin_socket_output.cc @@ -14,7 +14,6 @@ #include <iostream> #include <regex> // For regex, regex_search -#include <experimental/filesystem> // For extension #include "common/admin_socket_client.h" // For AdminSocketClient #include "common/ceph_json.h" // For JSONParser, JSONObjIter diff --git a/src/test/admin_socket_output.h b/src/test/admin_socket_output.h index e8e65d69ed4..b09f05167c4 100644 --- a/src/test/admin_socket_output.h +++ b/src/test/admin_socket_output.h @@ -19,9 +19,13 @@ #include <map> #include <set> #include <vector> -#include <experimental/filesystem> // For path - +#if __has_include(<filesystem>) // For extension +#include <filesystem> +namespace fs = std::filesystem; +#else +#include <experimental/filesystem> namespace fs = std::experimental::filesystem; +#endif using socket_results = std::map<std::string, std::string>; using test_functions = diff --git a/src/test/cls_cas/test_cls_cas.cc b/src/test/cls_cas/test_cls_cas.cc index 01db8a32be6..e730f0d128a 100644 --- a/src/test/cls_cas/test_cls_cas.cc +++ b/src/test/cls_cas/test_cls_cas.cc @@ -327,7 +327,7 @@ TEST(chunk_refs_t, size) bufferlist bl2; encode(a, bl2); if (!bl.contents_equal(bl2)) { - Formatter *f = Formatter::create("json-pretty"); + std::unique_ptr<Formatter> f(Formatter::create("json-pretty")); cout << "original:\n"; f->dump_object("refs", r); f->flush(cout); diff --git a/src/test/common/test_cdc.cc b/src/test/common/test_cdc.cc index 692c2ecb8fd..36047655ef7 100644 --- a/src/test/common/test_cdc.cc +++ b/src/test/common/test_cdc.cc @@ -11,28 +11,6 @@ #include "common/CDC.h" #include "gtest/gtest.h" -void generate_buffer(int size, bufferlist *outbl, int seed = 0) -{ - std::mt19937_64 engine, engine2; - engine.seed(seed); - engine2.seed(seed); - - // assemble from randomly-sized segments! - outbl->clear(); - auto left = size; - while (left) { - size_t l = std::min<size_t>((engine2() & 0xffff0) + 16, left); - left -= l; - bufferptr p(l); - p.set_length(l); - char *b = p.c_str(); - for (size_t i = 0; i < l / sizeof(uint64_t); ++i) { - ((ceph_le64 *)b)[i] = init_le64(engine()); - } - outbl->append(p); - } -} - class CDCTest : public ::testing::Test, public ::testing::WithParamInterface<const char*> { public: diff --git a/src/test/common/test_static_ptr.cc b/src/test/common/test_static_ptr.cc index 4bfc77bb278..f1c07c81b45 100644 --- a/src/test/common/test_static_ptr.cc +++ b/src/test/common/test_static_ptr.cc @@ -108,7 +108,7 @@ TEST(StaticPtr, CreateEmplace) { EXPECT_EQ(p->func(), 9); } -TEST(StaticPtr, CopyMove) { +TEST(StaticPtr, Move) { // Won't compile. Good. // static_ptr<base, sizeof(base)> p1(std::in_place_type_t<grandchild>{}, 3); @@ -116,11 +116,6 @@ TEST(StaticPtr, CopyMove) { static_ptr<base, sizeof(grandchild)> p2(std::in_place_type_t<grandchild>{}, 3); - // This also does not compile. Good. - // p1 = p2; - p2 = p1; - EXPECT_EQ(p1->func(), 0); - p2 = std::move(p1); EXPECT_EQ(p1->func(), 0); } @@ -129,9 +124,6 @@ TEST(StaticPtr, ImplicitUpcast) { static_ptr<base, sizeof(grandchild)> p1; static_ptr<sibling2, sizeof(grandchild)> p2(std::in_place_type_t<grandchild>{}, 3); - p1 = p2; - EXPECT_EQ(p1->func(), 9); - p1 = std::move(p2); EXPECT_EQ(p1->func(), 9); @@ -145,10 +137,6 @@ TEST(StaticPtr, StaticCast) { static_ptr<base, sizeof(grandchild)> p1(std::in_place_type_t<grandchild>{}, 3); static_ptr<sibling2, sizeof(grandchild)> p2; - p2 = ceph::static_pointer_cast<sibling2, sizeof(grandchild)>(p1); - EXPECT_EQ(p2->func(), 9); - EXPECT_EQ(p2->call(10), 30); - p2 = ceph::static_pointer_cast<sibling2, sizeof(grandchild)>(std::move(p1)); EXPECT_EQ(p2->func(), 9); EXPECT_EQ(p2->call(10), 30); @@ -158,24 +146,12 @@ TEST(StaticPtr, DynamicCast) { static constexpr auto sz = sizeof(great_grandchild); { static_ptr<base, sz> p1(std::in_place_type_t<grandchild>{}, 3); - auto p2 = ceph::dynamic_pointer_cast<great_grandchild, sz>(p1); - EXPECT_FALSE(p2); - } - { - static_ptr<base, sz> p1(std::in_place_type_t<grandchild>{}, 3); auto p2 = ceph::dynamic_pointer_cast<great_grandchild, sz>(std::move(p1)); EXPECT_FALSE(p2); } { static_ptr<base, sz> p1(std::in_place_type_t<grandchild>{}, 3); - auto p2 = ceph::dynamic_pointer_cast<grandchild, sz>(p1); - EXPECT_TRUE(p2); - EXPECT_EQ(p2->func(), 9); - EXPECT_EQ(p2->call(10), 30); - } - { - static_ptr<base, sz> p1(std::in_place_type_t<grandchild>{}, 3); auto p2 = ceph::dynamic_pointer_cast<grandchild, sz>(std::move(p1)); EXPECT_TRUE(p2); EXPECT_EQ(p2->func(), 9); @@ -197,17 +173,6 @@ TEST(StaticPtr, ConstCast) { static constexpr auto sz = sizeof(constable); { auto p1 = make_static<const constable>(); - static_assert(std::is_const<decltype(p1)::element_type>{}, - "Things are not as const as they ought to be."); - EXPECT_EQ(p1->foo(), 5); - auto p2 = ceph::const_pointer_cast<constable, sz>(p1); - static_assert(!std::is_const<decltype(p2)::element_type>{}, - "Things are more const than they ought to be."); - EXPECT_TRUE(p2); - EXPECT_EQ(p2->foo(), 2); - } - { - auto p1 = make_static<const constable>(); EXPECT_EQ(p1->foo(), 5); auto p2 = ceph::const_pointer_cast<constable, sz>(std::move(p1)); static_assert(!std::is_const<decltype(p2)::element_type>{}, @@ -221,17 +186,6 @@ TEST(StaticPtr, ReinterpretCast) { static constexpr auto sz = sizeof(grandchild); { auto p1 = make_static<grandchild>(3); - auto p2 = ceph::reinterpret_pointer_cast<constable, sz>(p1); - static_assert(std::is_same<decltype(p2)::element_type, constable>{}, - "Reinterpret is screwy."); - auto p3 = ceph::reinterpret_pointer_cast<grandchild, sz>(p2); - static_assert(std::is_same<decltype(p3)::element_type, grandchild>{}, - "Reinterpret is screwy."); - EXPECT_EQ(p3->func(), 9); - EXPECT_EQ(p3->call(10), 30); - } - { - auto p1 = make_static<grandchild>(3); auto p2 = ceph::reinterpret_pointer_cast<constable, sz>(std::move(p1)); static_assert(std::is_same<decltype(p2)::element_type, constable>{}, "Reinterpret is screwy."); @@ -255,6 +209,5 @@ struct exceptional { TEST(StaticPtr, Exceptional) { static_ptr<exceptional> p1(std::in_place_type_t<exceptional>{}); - EXPECT_ANY_THROW(static_ptr<exceptional> p2(p1)); EXPECT_ANY_THROW(static_ptr<exceptional> p2(std::move(p1))); } diff --git a/src/test/common/test_util.cc b/src/test/common/test_util.cc index cf589bafc84..6249d387656 100644 --- a/src/test/common/test_util.cc +++ b/src/test/common/test_util.cc @@ -16,12 +16,18 @@ #include "include/util.h" #include "gtest/gtest.h" +#if __has_include(<filesystem>) +#include <filesystem> +namespace fs = std::filesystem; +#else #include <experimental/filesystem> +namespace fs = std::experimental::filesystem; +#endif #if defined(__linux__) TEST(util, collect_sys_info) { - if (!std::experimental::filesystem::exists("/etc/os-release")) { + if (!fs::exists("/etc/os-release")) { GTEST_SKIP() << "skipping as '/etc/os-release' does not exist"; } diff --git a/src/test/confutils.cc b/src/test/confutils.cc index 831e913351f..88e5bf450ac 100644 --- a/src/test/confutils.cc +++ b/src/test/confutils.cc @@ -231,17 +231,6 @@ const char illegal_conf4[] = "\ keyring = osd_keyring ; osd's keyring\n\ "; -#if BOOST_VERSION < 107200 -// Boost::spirit > 1.72 asserts on chars that are not < 0x7f -// unicode config file -const char unicode_config_1[] = "\ -[global]\n\ - log file = \x66\xd1\x86\xd1\x9d\xd3\xad\xd3\xae \n\ - pid file = foo-bar\n\ -[osd0]\n\ -"; -#endif - const char override_config_1[] = "\ [global]\n\ log file = global_log\n\ @@ -365,15 +354,6 @@ TEST(ConfUtils, ReadFiles2) { ASSERT_EQ(val, "/quite/a/long/path/for/a/log/file"); ASSERT_EQ(cf1.read("global", "pid file", val), 0); ASSERT_EQ(val, "spork"); - -#if BOOST_VERSION < 107200 - std::string unicode_config_1f(next_tempfile(unicode_config_1)); - ConfFile cf2; - ASSERT_EQ(cf2.parse_file(unicode_config_1f.c_str(), &err), 0); - ASSERT_EQ(err.tellp(), 0U); - ASSERT_EQ(cf2.read("global", "log file", val), 0); - ASSERT_EQ(val, "\x66\xd1\x86\xd1\x9d\xd3\xad\xd3\xae"); -#endif } TEST(ConfUtils, IllegalFiles) { diff --git a/src/test/crimson/CMakeLists.txt b/src/test/crimson/CMakeLists.txt index 86f76dee64e..3dedce103f6 100644 --- a/src/test/crimson/CMakeLists.txt +++ b/src/test/crimson/CMakeLists.txt @@ -1,3 +1,12 @@ +# the crimson's backfill doesn't need nor use seastar +add_executable(unittest-crimson-backfill + test_backfill.cc + ${PROJECT_SOURCE_DIR}/src/auth/Crypto.cc + ${PROJECT_SOURCE_DIR}/src/crimson/osd/backfill_state.cc + ${PROJECT_SOURCE_DIR}/src/osd/recovery_types.cc) +add_ceph_unittest(unittest-crimson-backfill) +target_link_libraries(unittest-crimson-backfill crimson GTest::Main) + add_executable(unittest-seastar-buffer test_buffer.cc) add_ceph_test(unittest-seastar-buffer diff --git a/src/test/crimson/seastore/onode_tree/CMakeLists.txt b/src/test/crimson/seastore/onode_tree/CMakeLists.txt index 4d6f414d9d0..5947d8a68f0 100644 --- a/src/test/crimson/seastore/onode_tree/CMakeLists.txt +++ b/src/test/crimson/seastore/onode_tree/CMakeLists.txt @@ -6,3 +6,10 @@ target_link_libraries(test-seastore-onode-tree-node GTest::Main crimson-os crimson-common) + +add_executable(unittest-staged-fltree + test_staged_fltree.cc + ../../gtest_seastar.cc) +add_ceph_unittest(unittest-staged-fltree) +target_link_libraries(unittest-staged-fltree + crimson-seastore) diff --git a/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc b/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc new file mode 100644 index 00000000000..e29481c23e3 --- /dev/null +++ b/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc @@ -0,0 +1,1197 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#include <array> +#include <cstring> +#include <memory> +#include <set> +#include <sstream> +#include <vector> + +#include "crimson/common/log.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_layout.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/tree.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h" + +#include "test/crimson/gtest_seastar.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" + +using namespace crimson::os::seastore::onode; + +namespace { + constexpr bool IS_DUMMY_SYNC = false; + + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } + + ghobject_t make_ghobj( + shard_t shard, pool_t pool, crush_hash_t crush, + std::string ns, std::string oid, snap_t snap, gen_t gen) { + return ghobject_t{shard_id_t{shard}, pool, crush, ns, oid, snap, gen}; + } + + // return a key_view_t and its underlying memory buffer. + // the buffer needs to be freed manually. + std::pair<key_view_t, void*> build_key_view(const ghobject_t& hobj) { + key_hobj_t key_hobj(hobj); + size_t key_size = sizeof(shard_pool_crush_t) + sizeof(snap_gen_t) + + ns_oid_view_t::estimate_size<KeyT::HOBJ>(key_hobj); + void* p_mem = std::malloc(key_size); + + key_view_t key_view; + char* p_fill = (char*)p_mem + key_size; + + auto spc = shard_pool_crush_t::from_key<KeyT::HOBJ>(key_hobj); + p_fill -= sizeof(shard_pool_crush_t); + std::memcpy(p_fill, &spc, sizeof(shard_pool_crush_t)); + key_view.set(*reinterpret_cast<const shard_pool_crush_t*>(p_fill)); + + auto p_ns_oid = p_fill; + ns_oid_view_t::test_append<KeyT::HOBJ>(key_hobj, p_fill); + ns_oid_view_t ns_oid_view(p_ns_oid); + key_view.set(ns_oid_view); + + auto sg = snap_gen_t::from_key<KeyT::HOBJ>(key_hobj); + p_fill -= sizeof(snap_gen_t); + ceph_assert(p_fill == (char*)p_mem); + std::memcpy(p_fill, &sg, sizeof(snap_gen_t)); + key_view.set(*reinterpret_cast<const snap_gen_t*>(p_fill)); + + return {key_view, p_mem}; + } +} + +struct a_basic_test_t : public seastar_test_suite_t {}; + +TEST_F(a_basic_test_t, 1_basic_sizes) +{ + logger().info("\n" + "Bytes of struct:\n" + " node_header_t: {}\n" + " shard_pool_t: {}\n" + " shard_pool_crush_t: {}\n" + " crush_t: {}\n" + " snap_gen_t: {}\n" + " slot_0_t: {}\n" + " slot_1_t: {}\n" + " slot_3_t: {}\n" + " node_fields_0_t: {}\n" + " node_fields_1_t: {}\n" + " node_fields_2_t: {}\n" + " internal_fields_3_t: {}\n" + " leaf_fields_3_t: {}\n" + " internal_sub_item_t: {}", + sizeof(node_header_t), sizeof(shard_pool_t), + sizeof(shard_pool_crush_t), sizeof(crush_t), sizeof(snap_gen_t), + sizeof(slot_0_t), sizeof(slot_1_t), sizeof(slot_3_t), + sizeof(node_fields_0_t), sizeof(node_fields_1_t), sizeof(node_fields_2_t), + sizeof(internal_fields_3_t), sizeof(leaf_fields_3_t), sizeof(internal_sub_item_t) + ); + + auto hobj = make_ghobj(0, 0, 0, "n", "o", 0, 0); + key_hobj_t key(hobj); + auto [key_view, p_mem] = build_key_view(hobj); + onode_t value = {2}; +#define _STAGE_T(NodeType) node_to_stage_t<typename NodeType::node_stage_t> +#define NXT_T(StageType) staged<typename StageType::next_param_t> + laddr_packed_t i_value{0}; + logger().info("\n" + "Bytes of a key-value insertion (full-string):\n" + " s-p-c, 'n'-'o', s-g => onode_t(2): typically internal 41B, leaf 35B\n" + " InternalNode0: {} {} {}\n" + " InternalNode1: {} {} {}\n" + " InternalNode2: {} {}\n" + " InternalNode3: {}\n" + " LeafNode0: {} {} {}\n" + " LeafNode1: {} {} {}\n" + " LeafNode2: {} {}\n" + " LeafNode3: {}", + _STAGE_T(InternalNode0)::template insert_size<KeyT::VIEW>(key_view, i_value), + NXT_T(_STAGE_T(InternalNode0))::template insert_size<KeyT::VIEW>(key_view, i_value), + NXT_T(NXT_T(_STAGE_T(InternalNode0)))::template insert_size<KeyT::VIEW>(key_view, i_value), + _STAGE_T(InternalNode1)::template insert_size<KeyT::VIEW>(key_view, i_value), + NXT_T(_STAGE_T(InternalNode1))::template insert_size<KeyT::VIEW>(key_view, i_value), + NXT_T(NXT_T(_STAGE_T(InternalNode1)))::template insert_size<KeyT::VIEW>(key_view, i_value), + _STAGE_T(InternalNode2)::template insert_size<KeyT::VIEW>(key_view, i_value), + NXT_T(_STAGE_T(InternalNode2))::template insert_size<KeyT::VIEW>(key_view, i_value), + _STAGE_T(InternalNode3)::template insert_size<KeyT::VIEW>(key_view, i_value), + _STAGE_T(LeafNode0)::template insert_size<KeyT::HOBJ>(key, value), + NXT_T(_STAGE_T(LeafNode0))::template insert_size<KeyT::HOBJ>(key, value), + NXT_T(NXT_T(_STAGE_T(LeafNode0)))::template insert_size<KeyT::HOBJ>(key, value), + _STAGE_T(LeafNode1)::template insert_size<KeyT::HOBJ>(key, value), + NXT_T(_STAGE_T(LeafNode1))::template insert_size<KeyT::HOBJ>(key, value), + NXT_T(NXT_T(_STAGE_T(LeafNode1)))::template insert_size<KeyT::HOBJ>(key, value), + _STAGE_T(LeafNode2)::template insert_size<KeyT::HOBJ>(key, value), + NXT_T(_STAGE_T(LeafNode2))::template insert_size<KeyT::HOBJ>(key, value), + _STAGE_T(LeafNode3)::template insert_size<KeyT::HOBJ>(key, value) + ); + std::free(p_mem); +} + +TEST_F(a_basic_test_t, 2_node_sizes) +{ + run_async([this] { + auto nm = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); + auto t = make_transaction(); + context_t c{*nm, *t}; + std::array<std::pair<NodeImplURef, NodeExtentMutable>, 16> nodes = { + InternalNode0::allocate(c, false, 1u).unsafe_get0().make_pair(), + InternalNode1::allocate(c, false, 1u).unsafe_get0().make_pair(), + InternalNode2::allocate(c, false, 1u).unsafe_get0().make_pair(), + InternalNode3::allocate(c, false, 1u).unsafe_get0().make_pair(), + InternalNode0::allocate(c, true, 1u).unsafe_get0().make_pair(), + InternalNode1::allocate(c, true, 1u).unsafe_get0().make_pair(), + InternalNode2::allocate(c, true, 1u).unsafe_get0().make_pair(), + InternalNode3::allocate(c, true, 1u).unsafe_get0().make_pair(), + LeafNode0::allocate(c, false, 0u).unsafe_get0().make_pair(), + LeafNode1::allocate(c, false, 0u).unsafe_get0().make_pair(), + LeafNode2::allocate(c, false, 0u).unsafe_get0().make_pair(), + LeafNode3::allocate(c, false, 0u).unsafe_get0().make_pair(), + LeafNode0::allocate(c, true, 0u).unsafe_get0().make_pair(), + LeafNode1::allocate(c, true, 0u).unsafe_get0().make_pair(), + LeafNode2::allocate(c, true, 0u).unsafe_get0().make_pair(), + LeafNode3::allocate(c, true, 0u).unsafe_get0().make_pair() + }; + std::ostringstream oss; + oss << "\nallocated nodes:"; + for (auto iter = nodes.begin(); iter != nodes.end(); ++iter) { + oss << "\n "; + auto& ref_node = iter->first; + ref_node->dump_brief(oss); + } + logger().info("{}", oss.str()); + }); +} + +struct b_dummy_tree_test_t : public seastar_test_suite_t { + NodeExtentManagerURef moved_nm; + TransactionRef ref_t; + Transaction& t; + context_t c; + Btree tree; + + b_dummy_tree_test_t() + : moved_nm{NodeExtentManager::create_dummy(IS_DUMMY_SYNC)}, + ref_t{make_transaction()}, + t{*ref_t}, + c{*moved_nm, t}, + tree{std::move(moved_nm)} {} + + seastar::future<> set_up_fut() override final { + return tree.mkfs(t).handle_error( + crimson::ct_error::all_same_way([] { + ASSERT_FALSE("Unable to mkfs"); + }) + ); + } +}; + +TEST_F(b_dummy_tree_test_t, 3_random_insert_leaf_node) +{ + run_async([this] { + logger().info("\n---------------------------------------------" + "\nrandomized leaf node insert:\n"); + auto key_s = make_ghobj(0, 0, 0, "ns", "oid", 0, 0); + auto key_e = make_ghobj( + std::numeric_limits<shard_t>::max(), 0, 0, "ns", "oid", 0, 0); + ASSERT_TRUE(tree.find(t, key_s).unsafe_get0().is_end()); + ASSERT_TRUE(tree.begin(t).unsafe_get0().is_end()); + ASSERT_TRUE(tree.last(t).unsafe_get0().is_end()); + + std::vector<std::tuple<ghobject_t, + const onode_t*, + Btree::Cursor>> insert_history; + auto f_validate_insert_new = [this, &insert_history] ( + const ghobject_t& key, const onode_t& value) { + auto [cursor, success] = tree.insert(t, key, value).unsafe_get0(); + ceph_assert(success); + insert_history.emplace_back(key, &value, cursor); + Onodes::validate_cursor(cursor, key, value); + auto cursor_ = tree.lower_bound(t, key).unsafe_get0(); + ceph_assert(cursor_.get_ghobj() == key); + ceph_assert(cursor_.value() == cursor.value()); + return cursor.value(); + }; + auto onodes = Onodes(15); + + // insert key1, onode1 at STAGE_LEFT + auto key1 = make_ghobj(3, 3, 3, "ns3", "oid3", 3, 3); + auto& onode1 = onodes.pick(); + auto p_value1 = f_validate_insert_new(key1, onode1); + + // validate lookup + { + auto cursor1_s = tree.lower_bound(t, key_s).unsafe_get0(); + ASSERT_EQ(cursor1_s.get_ghobj(), key1); + ASSERT_EQ(cursor1_s.value(), p_value1); + auto cursor1_e = tree.lower_bound(t, key_e).unsafe_get0(); + ASSERT_TRUE(cursor1_e.is_end()); + } + + // insert the same key1 with a different onode + { + auto& onode1_dup = onodes.pick(); + auto [cursor1_dup, ret1_dup] = tree.insert( + t, key1, onode1_dup).unsafe_get0(); + ASSERT_FALSE(ret1_dup); + Onodes::validate_cursor(cursor1_dup, key1, onode1); + } + + // insert key2, onode2 to key1's left at STAGE_LEFT + // insert node front at STAGE_LEFT + auto key2 = make_ghobj(2, 2, 2, "ns3", "oid3", 3, 3); + auto& onode2 = onodes.pick(); + f_validate_insert_new(key2, onode2); + + // insert key3, onode3 to key1's right at STAGE_LEFT + // insert node last at STAGE_LEFT + auto key3 = make_ghobj(4, 4, 4, "ns3", "oid3", 3, 3); + auto& onode3 = onodes.pick(); + f_validate_insert_new(key3, onode3); + + // insert key4, onode4 to key1's left at STAGE_STRING (collision) + auto key4 = make_ghobj(3, 3, 3, "ns2", "oid2", 3, 3); + auto& onode4 = onodes.pick(); + f_validate_insert_new(key4, onode4); + + // insert key5, onode5 to key1's right at STAGE_STRING (collision) + auto key5 = make_ghobj(3, 3, 3, "ns4", "oid4", 3, 3); + auto& onode5 = onodes.pick(); + f_validate_insert_new(key5, onode5); + + // insert key6, onode6 to key1's left at STAGE_RIGHT + auto key6 = make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2); + auto& onode6 = onodes.pick(); + f_validate_insert_new(key6, onode6); + + // insert key7, onode7 to key1's right at STAGE_RIGHT + auto key7 = make_ghobj(3, 3, 3, "ns3", "oid3", 4, 4); + auto& onode7 = onodes.pick(); + f_validate_insert_new(key7, onode7); + + // insert node front at STAGE_RIGHT + auto key8 = make_ghobj(2, 2, 2, "ns3", "oid3", 2, 2); + auto& onode8 = onodes.pick(); + f_validate_insert_new(key8, onode8); + + // insert node front at STAGE_STRING (collision) + auto key9 = make_ghobj(2, 2, 2, "ns2", "oid2", 3, 3); + auto& onode9 = onodes.pick(); + f_validate_insert_new(key9, onode9); + + // insert node last at STAGE_RIGHT + auto key10 = make_ghobj(4, 4, 4, "ns3", "oid3", 4, 4); + auto& onode10 = onodes.pick(); + f_validate_insert_new(key10, onode10); + + // insert node last at STAGE_STRING (collision) + auto key11 = make_ghobj(4, 4, 4, "ns4", "oid4", 3, 3); + auto& onode11 = onodes.pick(); + f_validate_insert_new(key11, onode11); + + // insert key, value randomly until a perfect 3-ary tree is formed + std::vector<std::pair<ghobject_t, const onode_t*>> kvs{ + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2), &onodes.pick()}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 4, 4), &onodes.pick()}, + {make_ghobj(2, 2, 2, "ns3", "oid3", 4, 4), &onodes.pick()}, + {make_ghobj(2, 2, 2, "ns4", "oid4", 2, 2), &onodes.pick()}, + {make_ghobj(2, 2, 2, "ns4", "oid4", 3, 3), &onodes.pick()}, + {make_ghobj(2, 2, 2, "ns4", "oid4", 4, 4), &onodes.pick()}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2), &onodes.pick()}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 4, 4), &onodes.pick()}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2), &onodes.pick()}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 4, 4), &onodes.pick()}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2), &onodes.pick()}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 3, 3), &onodes.pick()}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 4, 4), &onodes.pick()}, + {make_ghobj(4, 4, 4, "ns3", "oid3", 2, 2), &onodes.pick()}, + {make_ghobj(4, 4, 4, "ns4", "oid4", 2, 2), &onodes.pick()}, + {make_ghobj(4, 4, 4, "ns4", "oid4", 4, 4), &onodes.pick()}}; + auto [smallest_key, smallest_value] = kvs[0]; + auto [largest_key, largest_value] = kvs[kvs.size() - 1]; + std::random_shuffle(kvs.begin(), kvs.end()); + std::for_each(kvs.begin(), kvs.end(), [&f_validate_insert_new] (auto& kv) { + f_validate_insert_new(kv.first, *kv.second); + }); + ASSERT_EQ(tree.height(t).unsafe_get0(), 1); + ASSERT_FALSE(tree.test_is_clean()); + + for (auto& [k, v, c] : insert_history) { + // validate values in tree keep intact + auto cursor = tree.lower_bound(t, k).unsafe_get0(); + Onodes::validate_cursor(cursor, k, *v); + // validate values in cursors keep intact + Onodes::validate_cursor(c, k, *v); + } + Onodes::validate_cursor( + tree.lower_bound(t, key_s).unsafe_get0(), smallest_key, *smallest_value); + Onodes::validate_cursor( + tree.begin(t).unsafe_get0(), smallest_key, *smallest_value); + Onodes::validate_cursor( + tree.last(t).unsafe_get0(), largest_key, *largest_value); + + std::ostringstream oss; + tree.dump(t, oss); + logger().info("\n{}\n", oss.str()); + + insert_history.clear(); + }); +} + +static std::set<ghobject_t> build_key_set( + std::pair<unsigned, unsigned> range_2, + std::pair<unsigned, unsigned> range_1, + std::pair<unsigned, unsigned> range_0, + std::string padding = "", + bool is_internal = false) { + ceph_assert(range_1.second <= 10); + std::set<ghobject_t> ret; + ghobject_t key; + for (unsigned i = range_2.first; i < range_2.second; ++i) { + for (unsigned j = range_1.first; j < range_1.second; ++j) { + for (unsigned k = range_0.first; k < range_0.second; ++k) { + std::ostringstream os_ns; + os_ns << "ns" << j; + std::ostringstream os_oid; + os_oid << "oid" << j << padding; + key = make_ghobj(i, i, i, os_ns.str(), os_oid.str(), k, k); + ret.insert(key); + } + } + } + if (is_internal) { + ret.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); + } + return ret; +} + +class TestTree { + public: + TestTree() + : moved_nm{NodeExtentManager::create_dummy(IS_DUMMY_SYNC)}, + ref_t{make_transaction()}, + t{*ref_t}, + c{*moved_nm, t}, + tree{std::move(moved_nm)}, + onodes{0} {} + + seastar::future<> build_tree( + std::pair<unsigned, unsigned> range_2, + std::pair<unsigned, unsigned> range_1, + std::pair<unsigned, unsigned> range_0, + size_t onode_size) { + return seastar::async([this, range_2, range_1, range_0, onode_size] { + tree.mkfs(t).unsafe_get0(); + //logger().info("\n---------------------------------------------" + // "\nbefore leaf node split:\n"); + auto keys = build_key_set(range_2, range_1, range_0); + for (auto& key : keys) { + auto& value = onodes.create(onode_size); + insert_tree(key, value).get0(); + } + ASSERT_EQ(tree.height(t).unsafe_get0(), 1); + ASSERT_FALSE(tree.test_is_clean()); + //std::ostringstream oss; + //tree.dump(t, oss); + //logger().info("\n{}\n", oss.str()); + }); + } + + seastar::future<> build_tree( + const std::vector<ghobject_t>& keys, const std::vector<const onode_t*>& values) { + return seastar::async([this, keys, values] { + tree.mkfs(t).unsafe_get0(); + //logger().info("\n---------------------------------------------" + // "\nbefore leaf node split:\n"); + ASSERT_EQ(keys.size(), values.size()); + auto key_iter = keys.begin(); + auto value_iter = values.begin(); + while (key_iter != keys.end()) { + insert_tree(*key_iter, **value_iter).get0(); + ++key_iter; + ++value_iter; + } + ASSERT_EQ(tree.height(t).unsafe_get0(), 1); + ASSERT_FALSE(tree.test_is_clean()); + //std::ostringstream oss; + //tree.dump(t, oss); + //logger().info("\n{}\n", oss.str()); + }); + } + + seastar::future<> split(const ghobject_t& key, const onode_t& value, + const split_expectation_t& expected) { + return seastar::async([this, key, &value, expected] { + Btree tree_clone(NodeExtentManager::create_dummy(IS_DUMMY_SYNC)); + auto ref_t_clone = make_transaction(); + Transaction& t_clone = *ref_t_clone; + tree_clone.test_clone_from(t_clone, t, tree).unsafe_get0(); + + logger().info("insert {}:", key_hobj_t(key)); + auto [cursor, success] = tree_clone.insert(t_clone, key, value).unsafe_get0(); + ASSERT_TRUE(success); + Onodes::validate_cursor(cursor, key, value); + + std::ostringstream oss; + tree_clone.dump(t_clone, oss); + logger().info("dump new root:\n{}", oss.str()); + EXPECT_EQ(tree_clone.height(t_clone).unsafe_get0(), 2); + + for (auto& [k, v, c] : insert_history) { + auto result = tree_clone.lower_bound(t_clone, k).unsafe_get0(); + Onodes::validate_cursor(result, k, *v); + } + auto result = tree_clone.lower_bound(t_clone, key).unsafe_get0(); + Onodes::validate_cursor(result, key, value); + EXPECT_TRUE(last_split.match(expected)); + }); + } + + const onode_t& create_onode(size_t size) { + return onodes.create(size); + } + + private: + seastar::future<> insert_tree(const ghobject_t& key, const onode_t& value) { + return seastar::async([this, &key, &value] { + auto [cursor, success] = tree.insert(t, key, value).unsafe_get0(); + ASSERT_TRUE(success); + Onodes::validate_cursor(cursor, key, value); + insert_history.emplace_back(key, &value, cursor); + }); + } + + NodeExtentManagerURef moved_nm; + TransactionRef ref_t; + Transaction& t; + context_t c; + Btree tree; + Onodes onodes; + std::vector<std::tuple< + ghobject_t, const onode_t*, Btree::Cursor>> insert_history; +}; + +struct c_dummy_test_t : public seastar_test_suite_t {}; + +TEST_F(c_dummy_test_t, 4_split_leaf_node) +{ + run_async([this] { + { + TestTree test; + test.build_tree({2, 5}, {2, 5}, {2, 5}, 120).get0(); + + auto& onode = test.create_onode(1144); + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left front at stage 2, 1, 0\n"); + test.split(make_ghobj(1, 1, 1, "ns3", "oid3", 3, 3), onode, + {2u, 2u, true, InsertType::BEGIN}).get0(); + test.split(make_ghobj(2, 2, 2, "ns1", "oid1", 3, 3), onode, + {2u, 1u, true, InsertType::BEGIN}).get0(); + test.split(make_ghobj(2, 2, 2, "ns2", "oid2", 1, 1), onode, + {2u, 0u, true, InsertType::BEGIN}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left back at stage 0, 1, 2, 1, 0\n"); + test.split(make_ghobj(2, 2, 2, "ns4", "oid4", 5, 5), onode, + {2u, 0u, true, InsertType::LAST}).get0(); + test.split(make_ghobj(2, 2, 2, "ns5", "oid5", 3, 3), onode, + {2u, 1u, true, InsertType::LAST}).get0(); + test.split(make_ghobj(2, 3, 3, "ns3", "oid3", 3, 3), onode, + {2u, 2u, true, InsertType::LAST}).get0(); + test.split(make_ghobj(3, 3, 3, "ns1", "oid1", 3, 3), onode, + {2u, 1u, true, InsertType::LAST}).get0(); + test.split(make_ghobj(3, 3, 3, "ns2", "oid2", 1, 1), onode, + {2u, 0u, true, InsertType::LAST}).get0(); + + auto& onode0 = test.create_onode(1416); + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right front at stage 0, 1, 2, 1, 0\n"); + test.split(make_ghobj(3, 3, 3, "ns4", "oid4", 5, 5), onode0, + {2u, 0u, false, InsertType::BEGIN}).get0(); + test.split(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), onode0, + {2u, 1u, false, InsertType::BEGIN}).get0(); + test.split(make_ghobj(3, 4, 4, "ns3", "oid3", 3, 3), onode0, + {2u, 2u, false, InsertType::BEGIN}).get0(); + test.split(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), onode0, + {2u, 1u, false, InsertType::BEGIN}).get0(); + test.split(make_ghobj(4, 4, 4, "ns2", "oid2", 1, 1), onode0, + {2u, 0u, false, InsertType::BEGIN}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right back at stage 0, 1, 2\n"); + test.split(make_ghobj(4, 4, 4, "ns4", "oid4", 5, 5), onode0, + {2u, 0u, false, InsertType::LAST}).get0(); + test.split(make_ghobj(4, 4, 4, "ns5", "oid5", 3, 3), onode0, + {2u, 1u, false, InsertType::LAST}).get0(); + test.split(make_ghobj(5, 5, 5, "ns3", "oid3", 3, 3), onode0, + {2u, 2u, false, InsertType::LAST}).get0(); + + auto& onode1 = test.create_onode(316); + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to left middle at stage 0, 1, 2, 1, 0\n"); + test.split(make_ghobj(2, 2, 2, "ns4", "oid4", 5, 5), onode1, + {1u, 0u, true, InsertType::MID}).get0(); + test.split(make_ghobj(2, 2, 2, "ns5", "oid5", 3, 3), onode1, + {1u, 1u, true, InsertType::MID}).get0(); + test.split(make_ghobj(2, 2, 3, "ns3", "oid3", 3, 3), onode1, + {1u, 2u, true, InsertType::MID}).get0(); + test.split(make_ghobj(3, 3, 3, "ns1", "oid1", 3, 3), onode1, + {1u, 1u, true, InsertType::MID}).get0(); + test.split(make_ghobj(3, 3, 3, "ns2", "oid2", 1, 1), onode1, + {1u, 0u, true, InsertType::MID}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to left back at stage 0, 1, 0\n"); + test.split(make_ghobj(3, 3, 3, "ns2", "oid2", 5, 5), onode1, + {1u, 0u, true, InsertType::LAST}).get0(); + test.split(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), onode1, + {1u, 1u, true, InsertType::LAST}).get0(); + test.split(make_ghobj(3, 3, 3, "ns3", "oid3", 1, 1), onode1, + {1u, 0u, true, InsertType::LAST}).get0(); + + auto& onode2 = test.create_onode(452); + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to right front at stage 0, 1, 0\n"); + test.split(make_ghobj(3, 3, 3, "ns3", "oid3", 5, 5), onode2, + {1u, 0u, false, InsertType::BEGIN}).get0(); + test.split(make_ghobj(3, 3, 3, "ns3", "oid4", 3, 3), onode2, + {1u, 1u, false, InsertType::BEGIN}).get0(); + test.split(make_ghobj(3, 3, 3, "ns4", "oid4", 1, 1), onode2, + {1u, 0u, false, InsertType::BEGIN}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to right middle at stage 0, 1, 2, 1, 0\n"); + test.split(make_ghobj(3, 3, 3, "ns4", "oid4", 5, 5), onode2, + {1u, 0u, false, InsertType::MID}).get0(); + test.split(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), onode2, + {1u, 1u, false, InsertType::MID}).get0(); + test.split(make_ghobj(3, 3, 4, "ns3", "oid3", 3, 3), onode2, + {1u, 2u, false, InsertType::MID}).get0(); + test.split(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), onode2, + {1u, 1u, false, InsertType::MID}).get0(); + test.split(make_ghobj(4, 4, 4, "ns2", "oid2", 1, 1), onode2, + {1u, 0u, false, InsertType::MID}).get0(); + + auto& onode3 = test.create_onode(834); + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to right middle at stage 0, 1, 2, 1, 0\n"); + test.split(make_ghobj(3, 3, 3, "ns4", "oid4", 5, 5), onode3, + {0u, 0u, false, InsertType::MID}).get0(); + test.split(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), onode3, + {0u, 1u, false, InsertType::MID}).get0(); + test.split(make_ghobj(3, 3, 4, "ns3", "oid3", 3, 3), onode3, + {0u, 2u, false, InsertType::MID}).get0(); + test.split(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), onode3, + {0u, 1u, false, InsertType::MID}).get0(); + test.split(make_ghobj(4, 4, 4, "ns2", "oid2", 1, 1), onode3, + {0u, 0u, false, InsertType::MID}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to right front at stage 0\n"); + test.split(make_ghobj(3, 3, 3, "ns4", "oid4", 2, 3), onode3, + {0u, 0u, false, InsertType::BEGIN}).get0(); + + auto& onode4 = test.create_onode(572); + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to left back at stage 0\n"); + test.split(make_ghobj(3, 3, 3, "ns2", "oid2", 3, 4), onode4, + {0u, 0u, true, InsertType::LAST}).get0(); + } + + { + TestTree test; + test.build_tree({2, 4}, {2, 4}, {2, 4}, 232).get0(); + auto& onode = test.create_onode(1996); + logger().info("\n---------------------------------------------" + "\nsplit at [0, 0, 0]; insert to left front at stage 2, 1, 0\n"); + test.split(make_ghobj(1, 1, 1, "ns3", "oid3", 3, 3), onode, + {2u, 2u, true, InsertType::BEGIN}).get0(); + EXPECT_TRUE(last_split.match_split_pos({0, {0, {0}}})); + test.split(make_ghobj(2, 2, 2, "ns1", "oid1", 3, 3), onode, + {2u, 1u, true, InsertType::BEGIN}).get0(); + EXPECT_TRUE(last_split.match_split_pos({0, {0, {0}}})); + test.split(make_ghobj(2, 2, 2, "ns2", "oid2", 1, 1), onode, + {2u, 0u, true, InsertType::BEGIN}).get0(); + EXPECT_TRUE(last_split.match_split_pos({0, {0, {0}}})); + } + + { + TestTree test; + std::vector<ghobject_t> keys = { + make_ghobj(2, 2, 2, "ns3", "oid3", 3, 3), + make_ghobj(3, 3, 3, "ns3", "oid3", 3, 3)}; + std::vector<const onode_t*> values = { + &test.create_onode(1360), + &test.create_onode(1632)}; + test.build_tree(keys, values).get0(); + auto& onode = test.create_onode(1640); + logger().info("\n---------------------------------------------" + "\nsplit at [END, END, END]; insert to right at stage 0, 1, 2\n"); + test.split(make_ghobj(3, 3, 3, "ns3", "oid3", 4, 4), onode, + {0u, 0u, false, InsertType::BEGIN}).get0(); + EXPECT_TRUE(last_split.match_split_pos({1, {0, {1}}})); + test.split(make_ghobj(3, 3, 3, "ns4", "oid4", 3, 3), onode, + {1u, 1u, false, InsertType::BEGIN}).get0(); + EXPECT_TRUE(last_split.match_split_pos({1, {1, {0}}})); + test.split(make_ghobj(4, 4, 4, "ns3", "oid3", 3, 3), onode, + {2u, 2u, false, InsertType::BEGIN}).get0(); + EXPECT_TRUE(last_split.match_split_pos({2, {0, {0}}})); + } + }); +} + +namespace crimson::os::seastore::onode { + +class DummyChildPool { + class DummyChildImpl final : public NodeImpl { + public: + using URef = std::unique_ptr<DummyChildImpl>; + DummyChildImpl(const std::set<ghobject_t>& keys, bool is_level_tail, laddr_t laddr) + : keys{keys}, _is_level_tail{is_level_tail}, _laddr{laddr} { + std::tie(key_view, p_mem_key_view) = build_key_view(*keys.crbegin()); + } + ~DummyChildImpl() override { + std::free(p_mem_key_view); + } + + const std::set<ghobject_t>& get_keys() const { return keys; } + + void reset(const std::set<ghobject_t>& _keys, bool level_tail) { + keys = _keys; + _is_level_tail = level_tail; + std::free(p_mem_key_view); + std::tie(key_view, p_mem_key_view) = build_key_view(*keys.crbegin()); + } + + public: + laddr_t laddr() const override { return _laddr; } + bool is_level_tail() const override { return _is_level_tail; } + + protected: + field_type_t field_type() const override { return field_type_t::N0; } + level_t level() const override { return 0u; } + key_view_t get_largest_key_view() const override { return key_view; } + void prepare_mutate(context_t) override { + ceph_abort("impossible path"); } + bool is_empty() const override { + ceph_abort("impossible path"); } + node_offset_t free_size() const override { + ceph_abort("impossible path"); } + key_view_t get_key_view(const search_position_t&) const override { + ceph_abort("impossible path"); } + void next_position(search_position_t&) const override { + ceph_abort("impossible path"); } + node_stats_t get_stats() const override { + ceph_abort("impossible path"); } + std::ostream& dump(std::ostream&) const override { + ceph_abort("impossible path"); } + std::ostream& dump_brief(std::ostream&) const override { + ceph_abort("impossible path"); } + void validate_layout() const override { + ceph_abort("impossible path"); } + void test_copy_to(NodeExtentMutable&) const override { + ceph_abort("impossible path"); } + void test_set_tail(NodeExtentMutable&) override { + ceph_abort("impossible path"); } + + private: + std::set<ghobject_t> keys; + bool _is_level_tail; + laddr_t _laddr; + + key_view_t key_view; + void* p_mem_key_view; + }; + + class DummyChild final : public Node { + public: + ~DummyChild() override = default; + + node_future<> populate_split( + context_t c, std::set<Ref<DummyChild>>& splitable_nodes) { + ceph_assert(can_split()); + ceph_assert(splitable_nodes.find(this) != splitable_nodes.end()); + + size_t index; + const auto& keys = impl->get_keys(); + if (keys.size() == 2) { + index = 1; + } else { + index = rd() % (keys.size() - 2) + 1; + } + auto iter = keys.begin(); + std::advance(iter, index); + + std::set<ghobject_t> left_keys(keys.begin(), iter); + std::set<ghobject_t> right_keys(iter, keys.end()); + bool right_is_tail = impl->is_level_tail(); + impl->reset(left_keys, false); + auto right_child = DummyChild::create_new(right_keys, right_is_tail, pool); + if (!can_split()) { + splitable_nodes.erase(this); + } + if (right_child->can_split()) { + splitable_nodes.insert(right_child); + } + return insert_parent(c, right_child); + } + + node_future<> insert_and_split( + context_t c, const ghobject_t& insert_key, + std::set<Ref<DummyChild>>& splitable_nodes) { + const auto& keys = impl->get_keys(); + ceph_assert(keys.size() == 1); + auto& key = *keys.begin(); + ceph_assert(insert_key < key); + + std::set<ghobject_t> new_keys; + new_keys.insert(insert_key); + new_keys.insert(key); + impl->reset(new_keys, impl->is_level_tail()); + + splitable_nodes.clear(); + splitable_nodes.insert(this); + auto fut = populate_split(c, splitable_nodes); + ceph_assert(splitable_nodes.size() == 0); + return fut; + } + + bool match_pos(const search_position_t& pos) const { + ceph_assert(!is_root()); + return pos == parent_info().position; + } + + static Ref<DummyChild> create( + const std::set<ghobject_t>& keys, bool is_level_tail, + laddr_t addr, DummyChildPool& pool) { + auto ref_impl = std::make_unique<DummyChildImpl>(keys, is_level_tail, addr); + return new DummyChild(ref_impl.get(), std::move(ref_impl), pool); + } + + static Ref<DummyChild> create_new( + const std::set<ghobject_t>& keys, bool is_level_tail, DummyChildPool& pool) { + static laddr_t seed = 0; + return create(keys, is_level_tail, seed++, pool); + } + + static node_future<Ref<DummyChild>> create_initial( + context_t c, const std::set<ghobject_t>& keys, + DummyChildPool& pool, RootNodeTracker& root_tracker) { + auto initial = create_new(keys, true, pool); + return c.nm.get_super(c.t, root_tracker + ).safe_then([c, &pool, initial](auto super) { + initial->make_root_new(c, std::move(super)); + return initial->upgrade_root(c).safe_then([initial] { + return initial; + }); + }); + } + + protected: + node_future<> test_clone_non_root( + context_t, Ref<InternalNode> new_parent) const override { + ceph_assert(!is_root()); + auto p_pool_clone = pool.pool_clone_in_progress; + ceph_assert(p_pool_clone != nullptr); + auto clone = create( + impl->get_keys(), impl->is_level_tail(), impl->laddr(), *p_pool_clone); + clone->as_child(parent_info().position, new_parent); + return node_ertr::now(); + } + node_future<Ref<tree_cursor_t>> lookup_smallest(context_t) override { + ceph_abort("impossible path"); } + node_future<Ref<tree_cursor_t>> lookup_largest(context_t) override { + ceph_abort("impossible path"); } + node_future<> test_clone_root(context_t, RootNodeTracker&) const override { + ceph_abort("impossible path"); } + node_future<search_result_t> lower_bound_tracked( + context_t, const key_hobj_t&, MatchHistory&) override { + ceph_abort("impossible path"); } + node_future<> do_get_tree_stats(context_t, tree_stats_t&) override { + ceph_abort("impossible path"); } + + private: + DummyChild(DummyChildImpl* impl, DummyChildImpl::URef&& ref, DummyChildPool& pool) + : Node(std::move(ref)), impl{impl}, pool{pool} { + pool.track_node(this); + } + + bool can_split() const { return impl->get_keys().size() > 1; } + + DummyChildImpl* impl; + DummyChildPool& pool; + mutable std::random_device rd; + }; + + public: + using node_ertr = Node::node_ertr; + template <class ValueT=void> + using node_future = Node::node_future<ValueT>; + + DummyChildPool() = default; + ~DummyChildPool() { reset(); } + + node_future<> build_tree(const std::set<ghobject_t>& keys) { + reset(); + + // create tree + auto ref_nm = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); + p_nm = ref_nm.get(); + p_btree.emplace(std::move(ref_nm)); + return DummyChild::create_initial(get_context(), keys, *this, *p_btree->root_tracker + ).safe_then([this](auto initial_child) { + // split + splitable_nodes.insert(initial_child); + return crimson::do_until([this] { + if (splitable_nodes.empty()) { + return node_ertr::make_ready_future<bool>(true); + } + auto index = rd() % splitable_nodes.size(); + auto iter = splitable_nodes.begin(); + std::advance(iter, index); + Ref<DummyChild> child = *iter; + return child->populate_split(get_context(), splitable_nodes + ).safe_then([] { + return node_ertr::make_ready_future<bool>(false); + }); + }); + }).safe_then([this] { + //std::ostringstream oss; + //p_btree->dump(t(), oss); + //logger().info("\n{}\n", oss.str()); + return p_btree->height(t()); + }).safe_then([](auto height) { + ceph_assert(height == 2); + }); + } + + seastar::future<> test_split(ghobject_t key, search_position_t pos, + const split_expectation_t& expected) { + return seastar::async([this, key, pos, expected] { + logger().info("insert {} at {}:", key_hobj_t(key), pos); + DummyChildPool pool_clone; + pool_clone_in_progress = &pool_clone; + auto ref_nm = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); + pool_clone.p_nm = ref_nm.get(); + pool_clone.p_btree.emplace(std::move(ref_nm)); + pool_clone.p_btree->test_clone_from( + pool_clone.t(), t(), *p_btree).unsafe_get0(); + pool_clone_in_progress = nullptr; + auto node_to_split = pool_clone.get_node_by_pos(pos); + node_to_split->insert_and_split( + pool_clone.get_context(), key, pool_clone.splitable_nodes).unsafe_get0(); + std::ostringstream oss; + pool_clone.p_btree->dump(pool_clone.t(), oss); + logger().info("dump new root:\n{}", oss.str()); + EXPECT_EQ(pool_clone.p_btree->height(pool_clone.t()).unsafe_get0(), 3); + EXPECT_TRUE(last_split.match(expected)); + }); + } + + private: + void reset() { + ceph_assert(pool_clone_in_progress == nullptr); + if (tracked_children.size()) { + ceph_assert(!p_btree->test_is_clean()); + tracked_children.clear(); + ceph_assert(p_btree->test_is_clean()); + p_nm = nullptr; + p_btree.reset(); + } else { + ceph_assert(!p_btree.has_value()); + } + splitable_nodes.clear(); + } + + void track_node(Ref<DummyChild> node) { + ceph_assert(tracked_children.find(node) == tracked_children.end()); + tracked_children.insert(node); + } + + Ref<DummyChild> get_node_by_pos(const search_position_t& pos) const { + auto iter = std::find_if( + tracked_children.begin(), tracked_children.end(), [&pos](auto& child) { + return child->match_pos(pos); + }); + ceph_assert(iter != tracked_children.end()); + return *iter; + } + + context_t get_context() { + ceph_assert(p_nm != nullptr); + return {*p_nm, t()}; + } + + Transaction& t() const { return *ref_t; } + + std::set<Ref<DummyChild>> tracked_children; + std::optional<Btree> p_btree; + NodeExtentManager* p_nm = nullptr; + TransactionRef ref_t = make_transaction(); + + std::random_device rd; + std::set<Ref<DummyChild>> splitable_nodes; + + DummyChildPool* pool_clone_in_progress = nullptr; +}; + +} + +TEST_F(c_dummy_test_t, 5_split_internal_node) +{ + run_async([this] { + DummyChildPool pool; + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert:\n"); + auto padding = std::string(250, '_'); + auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 2, 2)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 3, 3)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 4, 4)); + keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 2, 2)); + keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 3, 3)); + keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 4, 4)); + auto padding_s = std::string(257, '_'); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 2, 2)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 3, 3)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 4, 4)); + auto padding_e = std::string(248, '_'); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 2, 2)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 3, 3)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 4, 4)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right front at stage 0, 1, 2, 1, 0\n"); + pool.test_split(make_ghobj(3, 3, 3, "ns4", "oid4" + padding, 5, 5), {2, {0, {0}}}, + {2u, 0u, false, InsertType::BEGIN}).get(); + pool.test_split(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), {2, {0, {0}}}, + {2u, 1u, false, InsertType::BEGIN}).get(); + pool.test_split(make_ghobj(3, 4, 4, "ns3", "oid3", 3, 3), {2, {0, {0}}}, + {2u, 2u, false, InsertType::BEGIN}).get(); + pool.test_split(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), {2, {0, {0}}}, + {2u, 1u, false, InsertType::BEGIN}).get(); + pool.test_split(make_ghobj(4, 4, 4, "ns2", "oid2" + padding, 1, 1), {2, {0, {0}}}, + {2u, 0u, false, InsertType::BEGIN}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right middle at stage 0, 1, 2, 1, 0\n"); + pool.test_split(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5), {3, {0, {0}}}, + {2u, 0u, false, InsertType::MID}).get(); + pool.test_split(make_ghobj(4, 4, 4, "ns5", "oid5", 3, 3), {3, {0, {0}}}, + {2u, 1u, false, InsertType::MID}).get(); + pool.test_split(make_ghobj(4, 4, 5, "ns3", "oid3", 3, 3), {3, {0, {0}}}, + {2u, 2u, false, InsertType::MID}).get(); + pool.test_split(make_ghobj(5, 5, 5, "ns1", "oid1", 3, 3), {3, {0, {0}}}, + {2u, 1u, false, InsertType::MID}).get(); + pool.test_split(make_ghobj(5, 5, 5, "ns2", "oid2" + padding, 1, 1), {3, {0, {0}}}, + {2u, 0u, false, InsertType::MID}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right back at stage 0, 1, 2\n"); + pool.test_split(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 5, 5), search_position_t::end(), + {2u, 0u, false, InsertType::LAST}).get(); + pool.test_split(make_ghobj(5, 5, 5, "ns5", "oid5", 3, 3), search_position_t::end(), + {2u, 1u, false, InsertType::LAST}).get(); + pool.test_split(make_ghobj(6, 6, 6, "ns3", "oid3", 3, 3), search_position_t::end(), + {2u, 2u, false, InsertType::LAST}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to left front at stage 2, 1, 0\n"); + pool.test_split(make_ghobj(1, 1, 1, "ns3", "oid3", 3, 3), {0, {0, {0}}}, + {0u, 2u, true, InsertType::BEGIN}).get(); + pool.test_split(make_ghobj(2, 2, 2, "ns1", "oid1", 3, 3), {0, {0, {0}}}, + {0u, 1u, true, InsertType::BEGIN}).get(); + pool.test_split(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 1, 1), {0, {0, {0}}}, + {0u, 0u, true, InsertType::BEGIN}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to left middle at stage 0, 1, 2, 1, 0\n"); + pool.test_split(make_ghobj(2, 2, 2, "ns4", "oid4" + padding, 5, 5), {1, {0, {0}}}, + {0u, 0u, true, InsertType::MID}).get(); + pool.test_split(make_ghobj(2, 2, 2, "ns5", "oid5", 3, 3), {1, {0, {0}}}, + {0u, 1u, true, InsertType::MID}).get(); + pool.test_split(make_ghobj(2, 2, 3, "ns3", "oid3" + std::string(80, '_'), 3, 3), {1, {0, {0}}}, + {0u, 2u, true, InsertType::MID}).get(); + pool.test_split(make_ghobj(3, 3, 3, "ns1", "oid1", 3, 3), {1, {0, {0}}}, + {0u, 1u, true, InsertType::MID}).get(); + pool.test_split(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 1, 1), {1, {0, {0}}}, + {0u, 0u, true, InsertType::MID}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to left back at stage 0\n"); + pool.test_split(make_ghobj(3, 3, 3, "ns4", "oid4" + padding, 3, 4), {1, {2, {2}}}, + {0u, 0u, true, InsertType::LAST}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (1):\n"); + auto padding = std::string(244, '_'); + auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 5, 5)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 6, 6)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 7, 7)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left back at stage 0, 1, 2, 1\n"); + pool.test_split(make_ghobj(3, 3, 3, "ns4", "oid4" + padding, 5, 5), {2, {0, {0}}}, + {2u, 0u, true, InsertType::LAST}).get(); + pool.test_split(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), {2, {0, {0}}}, + {2u, 1u, true, InsertType::LAST}).get(); + pool.test_split(make_ghobj(3, 4, 4, "n", "o", 3, 3), {2, {0, {0}}}, + {2u, 2u, true, InsertType::LAST}).get(); + pool.test_split(make_ghobj(4, 4, 4, "n", "o", 3, 3), {2, {0, {0}}}, + {2u, 1u, true, InsertType::LAST}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left middle at stage 2\n"); + pool.test_split(make_ghobj(2, 3, 3, "n", "o", 3, 3), {1, {0, {0}}}, + {2u, 2u, true, InsertType::MID}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (2):\n"); + auto padding = std::string(243, '_'); + auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true); + keys.insert(make_ghobj(4, 4, 4, "n", "o", 3, 3)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 5, 5)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 6, 6)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left back at stage (0, 1, 2, 1,) 0\n"); + pool.test_split(make_ghobj(4, 4, 4, "n", "o", 2, 2), {2, {0, {0}}}, + {2u, 0u, true, InsertType::LAST}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (3):\n"); + auto padding = std::string(420, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding, true); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 2, 2)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 3, 3)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 4, 4)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to right front at stage 0, 1, 0\n"); + pool.test_split(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 5, 5), {1, {1, {0}}}, + {1u, 0u, false, InsertType::BEGIN}).get(); + pool.test_split(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), {1, {1, {0}}}, + {1u, 1u, false, InsertType::BEGIN}).get(); + pool.test_split(make_ghobj(3, 3, 3, "ns3", "oid3" + padding, 1, 1), {1, {1, {0}}}, + {1u, 0u, false, InsertType::BEGIN}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (4):\n"); + auto padding = std::string(361, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding, true); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 2, 2)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 3, 3)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 4, 4)); + auto padding_s = std::string(387, '_'); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 2, 2)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 3, 3)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 4, 4)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to left back at stage 0, 1\n"); + pool.test_split(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 5, 5), {1, {1, {0}}}, + {1u, 0u, true, InsertType::LAST}).get(); + pool.test_split(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), {1, {1, {0}}}, + {1u, 1u, true, InsertType::LAST}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (5):\n"); + auto padding = std::string(412, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding); + keys.insert(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3)); + keys.insert(make_ghobj(4, 4, 4, "ns3", "oid3" + padding, 5, 5)); + keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 2, 2)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 3, 3)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 4, 4)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to left back at stage (0, 1,) 0\n"); + pool.test_split(make_ghobj(3, 3, 3, "ns2", "oid3", 2, 2), {1, {1, {0}}}, + {1u, 0u, true, InsertType::LAST}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (6):\n"); + auto padding = std::string(328, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding); + keys.insert(make_ghobj(5, 5, 5, "ns3", "oid3" + std::string(271, '_'), 3, 3)); + keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to right front at stage 0\n"); + pool.test_split(make_ghobj(3, 3, 3, "ns3", "oid3" + padding, 2, 3), {1, {1, {1}}}, + {0u, 0u, false, InsertType::BEGIN}).get(); + } + + // Impossible to split at {0, 0, 0} + // Impossible to split at [END, END, END] + }); +} + +struct d_seastore_tree_test_t : + public seastar_test_suite_t, TMTestState { + KVPool kvs; + std::unique_ptr<TreeBuilder<true>> tree; + + d_seastore_tree_test_t() + : kvs{{8, 11, 64, 256, 301, 320}, + {8, 16, 128, 512, 576, 640}, + {0, 32}, {0, 10}, {0, 4}} {} + + seastar::future<> set_up_fut() override final { + return tm_setup().then([this] { + tree = std::make_unique<TreeBuilder<true>>(kvs, +#if 0 + NodeExtentManager::create_dummy(IS_DUMMY_SYNC) +#else + NodeExtentManager::create_seastore(*tm) +#endif + ); + return tree->bootstrap(); + }).handle_error( + crimson::ct_error::all_same_way([] { + ASSERT_FALSE("Unable to initiate tree"); + }) + ); + } + + seastar::future<> tear_down_fut() override final { + tree.reset(); + return tm_teardown(); + } +}; + +TEST_F(d_seastore_tree_test_t, 6_random_insert_leaf_node) +{ + run([this] { + return tree->run().handle_error( + crimson::ct_error::all_same_way([] { + ASSERT_FALSE("Test failed"); + }) + ); + }); +} diff --git a/src/test/crimson/test_alien_echo.cc b/src/test/crimson/test_alien_echo.cc index b5710adcbea..4434b522be2 100644 --- a/src/test/crimson/test_alien_echo.cc +++ b/src/test/crimson/test_alien_echo.cc @@ -41,16 +41,18 @@ struct Server { crimson::common::Throttle byte_throttler; crimson::net::MessengerRef msgr; crimson::auth::DummyAuthClientServer dummy_auth; - struct ServerDispatcher : crimson::net::Dispatcher { + struct ServerDispatcher final : crimson::net::Dispatcher { unsigned count = 0; seastar::condition_variable on_reply; - seastar::future<> ms_dispatch(crimson::net::Connection* c, - MessageRef m) override { + std::optional<seastar::future<>> ms_dispatch(crimson::net::ConnectionRef c, + MessageRef m) final + { std::cout << "server got ping " << *m << std::endl; // reply with a pong return c->send(make_message<MPing>()).then([this] { ++count; on_reply.signal(); + return seastar::now(); }); } } dispatcher; @@ -67,11 +69,12 @@ struct Client { crimson::common::Throttle byte_throttler; crimson::net::MessengerRef msgr; crimson::auth::DummyAuthClientServer dummy_auth; - struct ClientDispatcher : crimson::net::Dispatcher { + struct ClientDispatcher final : crimson::net::Dispatcher { unsigned count = 0; seastar::condition_variable on_reply; - seastar::future<> ms_dispatch(crimson::net::Connection* c, - MessageRef m) override { + std::optional<seastar::future<>> ms_dispatch(crimson::net::ConnectionRef c, + MessageRef m) final + { std::cout << "client got pong " << *m << std::endl; ++count; on_reply.signal(); @@ -180,11 +183,11 @@ seastar_echo(const entity_addr_t addr, echo_role role, unsigned count) server.msgr->set_auth_client(&server.dummy_auth); server.msgr->set_auth_server(&server.dummy_auth); return server.msgr->bind(entity_addrvec_t{addr} - ).then([&server] { - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(server.dispatcher); - return server.msgr->start(chained_dispatchers); - }).then([&dispatcher=server.dispatcher, count] { + ).safe_then([&server] { + return server.msgr->start({&server.dispatcher}); + }, crimson::net::Messenger::bind_ertr::all_same_way([](auto& e) { + ceph_abort_msg("bind failed"); + })).then([&dispatcher=server.dispatcher, count] { return dispatcher.on_reply.wait([&dispatcher, count] { return dispatcher.count >= count; }); @@ -205,9 +208,7 @@ seastar_echo(const entity_addr_t addr, echo_role role, unsigned count) client.msgr->set_require_authorizer(false); client.msgr->set_auth_client(&client.dummy_auth); client.msgr->set_auth_server(&client.dummy_auth); - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(client.dispatcher); - return client.msgr->start(chained_dispatchers).then( + return client.msgr->start({&client.dispatcher}).then( [addr, &client, &disp=client.dispatcher, count] { auto conn = client.msgr->connect(addr, entity_name_t::TYPE_OSD); return seastar::do_until( diff --git a/src/test/crimson/test_backfill.cc b/src/test/crimson/test_backfill.cc new file mode 100644 index 00000000000..8f3bc0d9b1d --- /dev/null +++ b/src/test/crimson/test_backfill.cc @@ -0,0 +1,500 @@ +#include <algorithm> +#include <cstdlib> +#include <deque> +#include <functional> +#include <initializer_list> +#include <iostream> +#include <iterator> +#include <limits> +#include <map> +#include <set> +#include <string> + +#include <boost/statechart/event_base.hpp> +#include <gmock/gmock.h> +#include <gtest/gtest.h> + +#include "common/hobject.h" +#include "crimson/osd/backfill_state.h" +#include "osd/recovery_types.h" + + +// The sole purpose is to convert from the string representation. +// An alternative approach could use boost::range in FakeStore's +// constructor. +struct improved_hobject_t : hobject_t { + improved_hobject_t(const char parsable_name[]) { + this->parse(parsable_name); + } + improved_hobject_t(const hobject_t& obj) + : hobject_t(obj) { + } + bool operator==(const improved_hobject_t& rhs) const { + return static_cast<const hobject_t&>(*this) == \ + static_cast<const hobject_t&>(rhs); + } +}; + + +struct FakeStore { + using objs_t = std::map<improved_hobject_t, eversion_t>; + + objs_t objs; + + void push(const hobject_t& obj, eversion_t version) { + objs[obj] = version; + } + + void drop(const hobject_t& obj, const eversion_t version) { + auto it = objs.find(obj); + ceph_assert(it != std::end(objs)); + ceph_assert(it->second == version); + objs.erase(it); + } + + template <class Func> + hobject_t list(const hobject_t& start, Func&& per_entry) const { + auto it = objs.lower_bound(start); + for (auto max = std::numeric_limits<std::uint64_t>::max(); + it != std::end(objs) && max > 0; + ++it, --max) { + per_entry(*it); + } + return it != std::end(objs) ? static_cast<const hobject_t&>(it->first) + : hobject_t::get_max(); + } + + bool operator==(const FakeStore& rhs) const { + return std::size(objs) == std::size(rhs.objs) && \ + std::equal(std::begin(objs), std::end(objs), std::begin(rhs.objs)); + } + bool operator!=(const FakeStore& rhs) const { + return !(*this == rhs); + } +}; + + +struct FakeReplica { + FakeStore store; + hobject_t last_backfill; + + FakeReplica(FakeStore&& store) + : store(std::move(store)) { + } +}; + +struct FakePrimary { + FakeStore store; + eversion_t last_update; + eversion_t projected_last_update; + eversion_t log_tail; + + FakePrimary(FakeStore&& store) + : store(std::move(store)) { + } +}; + +class BackfillFixture : public crimson::osd::BackfillState::BackfillListener { + friend class BackfillFixtureBuilder; + + FakePrimary backfill_source; + std::map<pg_shard_t, FakeReplica> backfill_targets; + std::map<pg_shard_t, + std::vector<std::pair<hobject_t, eversion_t>>> enqueued_drops; + std::deque< + boost::intrusive_ptr< + const boost::statechart::event_base>> events_to_dispatch; + crimson::osd::BackfillState backfill_state; + + BackfillFixture(FakePrimary&& backfill_source, + std::map<pg_shard_t, FakeReplica>&& backfill_targets); + + template <class EventT> + void schedule_event(const EventT& event) { + events_to_dispatch.emplace_back(event.intrusive_from_this()); + } + + // BackfillListener { + void request_replica_scan( + const pg_shard_t& target, + const hobject_t& begin, + const hobject_t& end) override; + + void request_primary_scan( + const hobject_t& begin) override; + + void enqueue_push( + const hobject_t& obj, + const eversion_t& v) override; + + void enqueue_drop( + const pg_shard_t& target, + const hobject_t& obj, + const eversion_t& v) override; + + void maybe_flush() override; + + void update_peers_last_backfill( + const hobject_t& new_last_backfill) override; + + bool budget_available() const override; + +public: + MOCK_METHOD(void, backfilled, (), (override)); + // } + + void next_round(std::size_t how_many=1) { + ceph_assert(events_to_dispatch.size() >= how_many); + while (how_many-- > 0) { + backfill_state.process_event(std::move(events_to_dispatch.front())); + events_to_dispatch.pop_front(); + } + } + + void next_till_done() { + while (!events_to_dispatch.empty()) { + next_round(); + } + } + + bool all_stores_look_like(const FakeStore& reference) const { + const bool all_replica_match = std::all_of( + std::begin(backfill_targets), std::end(backfill_targets), + [&reference] (const auto kv) { + return kv.second.store == reference; + }); + return backfill_source.store == reference && all_replica_match; + } + + struct PeeringFacade; + struct PGFacade; +}; + +struct BackfillFixture::PeeringFacade + : public crimson::osd::BackfillState::PeeringFacade { + FakePrimary& backfill_source; + std::map<pg_shard_t, FakeReplica>& backfill_targets; + // sorry, this is duplicative but that's the interface + std::set<pg_shard_t> backfill_targets_as_set; + + PeeringFacade(FakePrimary& backfill_source, + std::map<pg_shard_t, FakeReplica>& backfill_targets) + : backfill_source(backfill_source), + backfill_targets(backfill_targets) { + std::transform( + std::begin(backfill_targets), std::end(backfill_targets), + std::inserter(backfill_targets_as_set, std::end(backfill_targets_as_set)), + [](auto pair) { + return pair.first; + }); + } + + hobject_t earliest_backfill() const override { + hobject_t e = hobject_t::get_max(); + for (const auto& kv : backfill_targets) { + e = std::min(kv.second.last_backfill, e); + } + return e; + } + const std::set<pg_shard_t>& get_backfill_targets() const override { + return backfill_targets_as_set; + } + const hobject_t& get_peer_last_backfill(pg_shard_t peer) const override { + return backfill_targets.at(peer).last_backfill; + } + const eversion_t& get_last_update() const override { + return backfill_source.last_update; + } + const eversion_t& get_log_tail() const override { + return backfill_source.log_tail; + } + + void scan_log_after(eversion_t, scan_log_func_t) const override { + /* NOP */ + } + + bool is_backfill_target(pg_shard_t peer) const override { + return backfill_targets.count(peer) == 1; + } + void update_complete_backfill_object_stats(const hobject_t &hoid, + const pg_stat_t &stats) override { + } + bool is_backfilling() const override { + return true; + } +}; + +struct BackfillFixture::PGFacade : public crimson::osd::BackfillState::PGFacade { + FakePrimary& backfill_source; + + PGFacade(FakePrimary& backfill_source) + : backfill_source(backfill_source) { + } + + const eversion_t& get_projected_last_update() const override { + return backfill_source.projected_last_update; + } +}; + +BackfillFixture::BackfillFixture( + FakePrimary&& backfill_source, + std::map<pg_shard_t, FakeReplica>&& backfill_targets) + : backfill_source(std::move(backfill_source)), + backfill_targets(std::move(backfill_targets)), + backfill_state(*this, + std::make_unique<PeeringFacade>(this->backfill_source, + this->backfill_targets), + std::make_unique<PGFacade>(this->backfill_source)) +{ + backfill_state.process_event(crimson::osd::BackfillState::Triggered{}.intrusive_from_this()); +} + +void BackfillFixture::request_replica_scan( + const pg_shard_t& target, + const hobject_t& begin, + const hobject_t& end) +{ + BackfillInterval bi; + bi.end = backfill_targets.at(target).store.list(begin, [&bi](auto kv) { + bi.objects.insert(std::move(kv)); + }); + bi.begin = begin; + bi.version = backfill_source.last_update; + + schedule_event(crimson::osd::BackfillState::ReplicaScanned{ target, std::move(bi) }); +} + +void BackfillFixture::request_primary_scan( + const hobject_t& begin) +{ + BackfillInterval bi; + bi.end = backfill_source.store.list(begin, [&bi](auto kv) { + bi.objects.insert(std::move(kv)); + }); + bi.begin = begin; + bi.version = backfill_source.last_update; + + schedule_event(crimson::osd::BackfillState::PrimaryScanned{ std::move(bi) }); +} + +void BackfillFixture::enqueue_push( + const hobject_t& obj, + const eversion_t& v) +{ + for (auto& [ _, bt ] : backfill_targets) { + bt.store.push(obj, v); + } + schedule_event(crimson::osd::BackfillState::ObjectPushed{ obj }); +} + +void BackfillFixture::enqueue_drop( + const pg_shard_t& target, + const hobject_t& obj, + const eversion_t& v) +{ + enqueued_drops[target].emplace_back(obj, v); +} + +void BackfillFixture::maybe_flush() +{ + for (const auto& [target, versioned_objs] : enqueued_drops) { + for (const auto& [obj, v] : versioned_objs) { + backfill_targets.at(target).store.drop(obj, v); + } + } + enqueued_drops.clear(); +} + +void BackfillFixture::update_peers_last_backfill( + const hobject_t& new_last_backfill) +{ +} + +bool BackfillFixture::budget_available() const +{ + return true; +} + +struct BackfillFixtureBuilder { + FakeStore backfill_source; + std::map<pg_shard_t, FakeReplica> backfill_targets; + + static BackfillFixtureBuilder add_source(FakeStore::objs_t objs) { + BackfillFixtureBuilder bfb; + bfb.backfill_source = FakeStore{ std::move(objs) }; + return bfb; + } + + BackfillFixtureBuilder&& add_target(FakeStore::objs_t objs) && { + const auto new_osd_num = std::size(backfill_targets); + const auto [ _, inserted ] = backfill_targets.emplace( + new_osd_num, FakeReplica{ FakeStore{std::move(objs)} }); + ceph_assert(inserted); + return std::move(*this); + } + + BackfillFixture get_result() && { + return BackfillFixture{ std::move(backfill_source), + std::move(backfill_targets) }; + } +}; + +// The straightest case: single primary, single replica. All have the same +// content in their object stores, so the entire backfill boils into just +// `request_primary_scan()` and `request_replica_scan()`. +TEST(backfill, same_primary_same_replica) +{ + const auto reference_store = FakeStore{ { + { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} }, + { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} }, + { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} }, + }}; + auto cluster_fixture = BackfillFixtureBuilder::add_source( + reference_store.objs + ).add_target( + reference_store.objs + ).get_result(); + + cluster_fixture.next_round(); + EXPECT_CALL(cluster_fixture, backfilled); + cluster_fixture.next_round(); + EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store)); +} + +TEST(backfill, one_empty_replica) +{ + const auto reference_store = FakeStore{ { + { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} }, + { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} }, + { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} }, + }}; + auto cluster_fixture = BackfillFixtureBuilder::add_source( + reference_store.objs + ).add_target( + { /* nothing */ } + ).get_result(); + + cluster_fixture.next_round(); + cluster_fixture.next_round(); + cluster_fixture.next_round(2); + EXPECT_CALL(cluster_fixture, backfilled); + cluster_fixture.next_round(); + EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store)); +} + +TEST(backfill, two_empty_replicas) +{ + const auto reference_store = FakeStore{ { + { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} }, + { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} }, + { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} }, + }}; + auto cluster_fixture = BackfillFixtureBuilder::add_source( + reference_store.objs + ).add_target( + { /* nothing 1 */ } + ).add_target( + { /* nothing 2 */ } + ).get_result(); + + EXPECT_CALL(cluster_fixture, backfilled); + cluster_fixture.next_till_done(); + + EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store)); +} + +namespace StoreRandomizer { + // FIXME: copied & pasted from test/test_snap_mapper.cc. We need to + // find a way to avoid code duplication in test. A static library? + std::string random_string(std::size_t size) { + std::string name; + for (size_t j = 0; j < size; ++j) { + name.push_back('a' + (std::rand() % 26)); + } + return name; + } + + hobject_t random_hobject() { + uint32_t mask{0}; + uint32_t bits{0}; + return hobject_t( + random_string(1+(std::rand() % 16)), + random_string(1+(std::rand() % 16)), + snapid_t(std::rand() % 1000), + (std::rand() & ((~0)<<bits)) | (mask & ~((~0)<<bits)), + 0, random_string(std::rand() % 16)); + } + + eversion_t random_eversion() { + return eversion_t{ std::rand() % 512U, std::rand() % 256UL }; + } + + FakeStore create() { + FakeStore store; + for (std::size_t i = std::rand() % 2048; i > 0; --i) { + store.push(random_hobject(), random_eversion()); + } + return store; + } + + template <class... Args> + void execute_random(Args&&... args) { + std::array<std::function<void()>, sizeof...(Args)> funcs = { + std::forward<Args>(args)... + }; + return std::move(funcs[std::rand() % std::size(funcs)])(); + } + + FakeStore mutate(const FakeStore& source_store) { + FakeStore mutated_store; + source_store.list(hobject_t{}, [&] (const auto& kv) { + const auto& [ oid, version ] = kv; + execute_random( + [] { /* just drop the entry */ }, + [&] { mutated_store.push(oid, version); }, + [&] { mutated_store.push(oid, random_eversion()); }, + [&] { mutated_store.push(random_hobject(), version); }, + [&] { + for (auto how_many = std::rand() % 8; how_many > 0; --how_many) { + mutated_store.push(random_hobject(), random_eversion()); + } + } + ); + }); + return mutated_store; + } +} + +// The name might suggest randomness is involved here. Well, that's true +// but till we know the seed the test still is repeatable. +TEST(backfill, one_pseudorandomized_replica) +{ + const auto reference_store = StoreRandomizer::create(); + auto cluster_fixture = BackfillFixtureBuilder::add_source( + reference_store.objs + ).add_target( + StoreRandomizer::mutate(reference_store).objs + ).get_result(); + + EXPECT_CALL(cluster_fixture, backfilled); + cluster_fixture.next_till_done(); + + EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store)); +} + +TEST(backfill, two_pseudorandomized_replicas) +{ + const auto reference_store = StoreRandomizer::create(); + auto cluster_fixture = BackfillFixtureBuilder::add_source( + reference_store.objs + ).add_target( + StoreRandomizer::mutate(reference_store).objs + ).add_target( + StoreRandomizer::mutate(reference_store).objs + ).get_result(); + + EXPECT_CALL(cluster_fixture, backfilled); + cluster_fixture.next_till_done(); + + EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store)); +} diff --git a/src/test/crimson/test_messenger.cc b/src/test/crimson/test_messenger.cc index 429509c4352..9e7adbe3bff 100644 --- a/src/test/crimson/test_messenger.cc +++ b/src/test/crimson/test_messenger.cc @@ -38,6 +38,15 @@ static std::random_device rd; static std::default_random_engine rng{rd()}; static bool verbose = false; +static entity_addr_t get_server_addr() { + static int port = 9030; + ++port; + entity_addr_t saddr; + saddr.parse("127.0.0.1", nullptr); + saddr.set_port(port); + return saddr; +} + static seastar::future<> test_echo(unsigned rounds, double keepalive_ratio, bool v2) @@ -48,13 +57,14 @@ static seastar::future<> test_echo(unsigned rounds, crimson::net::MessengerRef msgr; crimson::auth::DummyAuthClientServer dummy_auth; - seastar::future<> ms_dispatch(crimson::net::Connection* c, - MessageRef m) override { + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef c, MessageRef m) override { if (verbose) { logger().info("server got {}", *m); } // reply with a pong - return c->send(make_message<MPing>()); + std::ignore = c->send(make_message<MPing>()); + return {seastar::now()}; } seastar::future<> init(const entity_name_t& name, @@ -66,15 +76,18 @@ static seastar::future<> test_echo(unsigned rounds, msgr->set_require_authorizer(false); msgr->set_auth_client(&dummy_auth); msgr->set_auth_server(&dummy_auth); - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(*this); - return msgr->bind(entity_addrvec_t{addr}).then([this, chained_dispatchers]() mutable { - return msgr->start(chained_dispatchers); - }); + return msgr->bind(entity_addrvec_t{addr}).safe_then([this] { + return msgr->start({this}); + }, crimson::net::Messenger::bind_ertr::all_same_way( + [addr] (const std::error_code& e) { + logger().error("test_echo(): " + "there is another instance running at {}", addr); + ceph_abort(); + })); } seastar::future<> shutdown() { ceph_assert(msgr); - msgr->remove_dispatcher(*this); + msgr->stop(); return msgr->shutdown(); } }; @@ -91,15 +104,15 @@ static seastar::future<> test_echo(unsigned rounds, unsigned rounds; std::bernoulli_distribution keepalive_dist; crimson::net::MessengerRef msgr; - std::map<crimson::net::Connection*, seastar::promise<>> pending_conns; - std::map<crimson::net::Connection*, PingSessionRef> sessions; + std::map<crimson::net::ConnectionRef, seastar::promise<>> pending_conns; + std::map<crimson::net::ConnectionRef, PingSessionRef> sessions; crimson::auth::DummyAuthClientServer dummy_auth; Client(unsigned rounds, double keepalive_ratio) : rounds(rounds), keepalive_dist(std::bernoulli_distribution{keepalive_ratio}) {} - PingSessionRef find_session(crimson::net::Connection* c) { + PingSessionRef find_session(crimson::net::ConnectionRef c) { auto found = sessions.find(c); if (found == sessions.end()) { ceph_assert(false); @@ -109,13 +122,13 @@ static seastar::future<> test_echo(unsigned rounds, void ms_handle_connect(crimson::net::ConnectionRef conn) override { auto session = seastar::make_shared<PingSession>(); - auto [i, added] = sessions.emplace(conn.get(), session); + auto [i, added] = sessions.emplace(conn, session); std::ignore = i; ceph_assert(added); session->connected_time = mono_clock::now(); } - seastar::future<> ms_dispatch(crimson::net::Connection* c, - MessageRef m) override { + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef c, MessageRef m) override { auto session = find_session(c); ++(session->count); if (verbose) { @@ -129,7 +142,7 @@ static seastar::future<> test_echo(unsigned rounds, ceph_assert(found != pending_conns.end()); found->second.set_value(); } - return seastar::now(); + return {seastar::now()}; } seastar::future<> init(const entity_name_t& name, @@ -139,14 +152,12 @@ static seastar::future<> test_echo(unsigned rounds, msgr->set_default_policy(crimson::net::SocketPolicy::lossy_client(0)); msgr->set_auth_client(&dummy_auth); msgr->set_auth_server(&dummy_auth); - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(*this); - return msgr->start(chained_dispatchers); + return msgr->start({this}); } seastar::future<> shutdown() { ceph_assert(msgr); - msgr->remove_dispatcher(*this); + msgr->stop(); return msgr->shutdown(); } @@ -154,9 +165,9 @@ static seastar::future<> test_echo(unsigned rounds, mono_time start_time = mono_clock::now(); auto conn = msgr->connect(peer_addr, entity_name_t::TYPE_OSD); return seastar::futurize_invoke([this, conn] { - return do_dispatch_pingpong(conn.get()); - }).finally([this, conn, start_time] { - auto session = find_session(conn.get()); + return do_dispatch_pingpong(conn); + }).then([this, conn, start_time] { + auto session = find_session(conn); std::chrono::duration<double> dur_handshake = session->connected_time - start_time; std::chrono::duration<double> dur_pingpong = session->finish_time - session->connected_time; logger().info("{}: handshake {}, pingpong {}", @@ -165,7 +176,7 @@ static seastar::future<> test_echo(unsigned rounds, } private: - seastar::future<> do_dispatch_pingpong(crimson::net::Connection* conn) { + seastar::future<> do_dispatch_pingpong(crimson::net::ConnectionRef conn) { auto [i, added] = pending_conns.emplace(conn, seastar::promise<>()); std::ignore = i; ceph_assert(added); @@ -215,10 +226,8 @@ static seastar::future<> test_echo(unsigned rounds, auto client1 = seastar::make_shared<test_state::Client>(rounds, keepalive_ratio); auto client2 = seastar::make_shared<test_state::Client>(rounds, keepalive_ratio); // start servers and clients - entity_addr_t addr1; - addr1.parse("127.0.0.1:9010", nullptr); - entity_addr_t addr2; - addr2.parse("127.0.0.1:9011", nullptr); + auto addr1 = get_server_addr(); + auto addr2 = get_server_addr(); if (v2) { addr1.set_type(entity_addr_t::TYPE_MSGR2); addr2.set_type(entity_addr_t::TYPE_MSGR2); @@ -240,19 +249,19 @@ static seastar::future<> test_echo(unsigned rounds, // shutdown }).then_unpack([] { return seastar::now(); - }).finally([client1] { + }).then([client1] { logger().info("client1 shutdown..."); return client1->shutdown(); - }).finally([client2] { + }).then([client2] { logger().info("client2 shutdown..."); return client2->shutdown(); - }).finally([server1] { + }).then([server1] { logger().info("server1 shutdown..."); return server1->shutdown(); - }).finally([server2] { + }).then([server2] { logger().info("server2 shutdown..."); return server2->shutdown(); - }).finally([server1, server2, client1, client2] { + }).then([server1, server2, client1, client2] { logger().info("test_echo() done!\n"); }); } @@ -268,20 +277,20 @@ static seastar::future<> test_concurrent_dispatch(bool v2) seastar::promise<> on_done; // satisfied when first dispatch unblocks crimson::auth::DummyAuthClientServer dummy_auth; - seastar::future<> ms_dispatch(crimson::net::Connection* c, - MessageRef m) override { + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef, MessageRef m) override { switch (++count) { case 1: // block on the first request until we reenter with the second - return on_second.get_future().then([this] { - on_done.set_value(); - }); + std::ignore = on_second.get_future().then([this] { on_done.set_value(); }); + break; case 2: on_second.set_value(); - return seastar::now(); + break; default: throw std::runtime_error("unexpected count"); } + return {seastar::now()}; } seastar::future<> wait() { return on_done.get_future(); } @@ -294,11 +303,14 @@ static seastar::future<> test_concurrent_dispatch(bool v2) msgr->set_default_policy(crimson::net::SocketPolicy::stateless_server(0)); msgr->set_auth_client(&dummy_auth); msgr->set_auth_server(&dummy_auth); - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(*this); - return msgr->bind(entity_addrvec_t{addr}).then([this, chained_dispatchers]() mutable { - return msgr->start(chained_dispatchers); - }); + return msgr->bind(entity_addrvec_t{addr}).safe_then([this] { + return msgr->start({this}); + }, crimson::net::Messenger::bind_ertr::all_same_way( + [addr] (const std::error_code& e) { + logger().error("test_concurrent_dispatch(): " + "there is another instance running at {}", addr); + ceph_abort(); + })); } }; @@ -307,6 +319,11 @@ static seastar::future<> test_concurrent_dispatch(bool v2) crimson::net::MessengerRef msgr; crimson::auth::DummyAuthClientServer dummy_auth; + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef, MessageRef m) override { + return {seastar::now()}; + } + seastar::future<> init(const entity_name_t& name, const std::string& lname, const uint64_t nonce) { @@ -314,9 +331,7 @@ static seastar::future<> test_concurrent_dispatch(bool v2) msgr->set_default_policy(crimson::net::SocketPolicy::lossy_client(0)); msgr->set_auth_client(&dummy_auth); msgr->set_auth_server(&dummy_auth); - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(*this); - return msgr->start(chained_dispatchers); + return msgr->start({this}); } }; }; @@ -324,8 +339,7 @@ static seastar::future<> test_concurrent_dispatch(bool v2) logger().info("test_concurrent_dispatch(v2={}):", v2); auto server = seastar::make_shared<test_state::Server>(); auto client = seastar::make_shared<test_state::Client>(); - entity_addr_t addr; - addr.parse("127.0.0.1:9010", nullptr); + auto addr = get_server_addr(); if (v2) { addr.set_type(entity_addr_t::TYPE_MSGR2); } else { @@ -344,15 +358,15 @@ static seastar::future<> test_concurrent_dispatch(bool v2) }); }).then([server] { return server->wait(); - }).finally([client] { + }).then([client] { logger().info("client shutdown..."); - client->msgr->remove_dispatcher(*client); + client->msgr->stop(); return client->msgr->shutdown(); - }).finally([server] { + }).then([server] { logger().info("server shutdown..."); - server->msgr->remove_dispatcher(*server); + server->msgr->stop(); return server->msgr->shutdown(); - }).finally([server, client] { + }).then([server, client] { logger().info("test_concurrent_dispatch() done!\n"); }); } @@ -364,9 +378,10 @@ seastar::future<> test_preemptive_shutdown(bool v2) { crimson::net::MessengerRef msgr; crimson::auth::DummyAuthClientServer dummy_auth; - seastar::future<> ms_dispatch(crimson::net::Connection* c, - MessageRef m) override { - return c->send(make_message<MPing>()); + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef c, MessageRef m) override { + std::ignore = c->send(make_message<MPing>()); + return {seastar::now()}; } public: @@ -378,17 +393,20 @@ seastar::future<> test_preemptive_shutdown(bool v2) { msgr->set_default_policy(crimson::net::SocketPolicy::stateless_server(0)); msgr->set_auth_client(&dummy_auth); msgr->set_auth_server(&dummy_auth); - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(*this); - return msgr->bind(entity_addrvec_t{addr}).then([this, chained_dispatchers]() mutable { - return msgr->start(chained_dispatchers); - }); + return msgr->bind(entity_addrvec_t{addr}).safe_then([this] { + return msgr->start({this}); + }, crimson::net::Messenger::bind_ertr::all_same_way( + [addr] (const std::error_code& e) { + logger().error("test_preemptive_shutdown(): " + "there is another instance running at {}", addr); + ceph_abort(); + })); } entity_addr_t get_addr() const { return msgr->get_myaddr(); } seastar::future<> shutdown() { - msgr->remove_dispatcher(*this); + msgr->stop(); return msgr->shutdown(); } }; @@ -401,9 +419,9 @@ seastar::future<> test_preemptive_shutdown(bool v2) { bool stop_send = false; seastar::promise<> stopped_send_promise; - seastar::future<> ms_dispatch(crimson::net::Connection* c, - MessageRef m) override { - return seastar::now(); + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef, MessageRef m) override { + return {seastar::now()}; } public: @@ -414,9 +432,7 @@ seastar::future<> test_preemptive_shutdown(bool v2) { msgr->set_default_policy(crimson::net::SocketPolicy::lossy_client(0)); msgr->set_auth_client(&dummy_auth); msgr->set_auth_server(&dummy_auth); - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(*this); - return msgr->start(chained_dispatchers); + return msgr->start({this}); } void send_pings(const entity_addr_t& addr) { auto conn = msgr->connect(addr, entity_name_t::TYPE_OSD); @@ -433,7 +449,7 @@ seastar::future<> test_preemptive_shutdown(bool v2) { }); } seastar::future<> shutdown() { - msgr->remove_dispatcher(*this); + msgr->stop(); return msgr->shutdown().then([this] { stop_send = true; return stopped_send_promise.get_future(); @@ -445,8 +461,7 @@ seastar::future<> test_preemptive_shutdown(bool v2) { logger().info("test_preemptive_shutdown(v2={}):", v2); auto server = seastar::make_shared<test_state::Server>(); auto client = seastar::make_shared<test_state::Client>(); - entity_addr_t addr; - addr.parse("127.0.0.1:9010", nullptr); + auto addr = get_server_addr(); if (v2) { addr.set_type(entity_addr_t::TYPE_MSGR2); } else { @@ -462,10 +477,10 @@ seastar::future<> test_preemptive_shutdown(bool v2) { }).then([client] { logger().info("client shutdown..."); return client->shutdown(); - }).finally([server] { + }).then([server] { logger().info("server shutdown..."); return server->shutdown(); - }).finally([server, client] { + }).then([server, client] { logger().info("test_preemptive_shutdown() done!\n"); }); } @@ -798,14 +813,14 @@ class FailoverSuite : public Dispatcher { unsigned pending_peer_receive = 0; unsigned pending_receive = 0; - seastar::future<> ms_dispatch(Connection* c, MessageRef m) override { - auto result = interceptor.find_result(c->shared_from_this()); + std::optional<seastar::future<>> ms_dispatch(ConnectionRef c, MessageRef m) override { + auto result = interceptor.find_result(c); if (result == nullptr) { logger().error("Untracked ms dispatched connection: {}", *c); ceph_abort(); } - if (tracked_conn != c->shared_from_this()) { + if (tracked_conn != c) { logger().error("[{}] {} got op, but doesn't match tracked_conn [{}] {}", result->index, *c, tracked_index, *tracked_conn); ceph_abort(); @@ -820,7 +835,7 @@ class FailoverSuite : public Dispatcher { } logger().info("[Test] got op, left {} ops -- [{}] {}", pending_receive, result->index, *c); - return seastar::now(); + return {seastar::now()}; } void ms_handle_accept(ConnectionRef conn) override { @@ -911,11 +926,13 @@ class FailoverSuite : public Dispatcher { test_msgr->set_auth_client(&dummy_auth); test_msgr->set_auth_server(&dummy_auth); test_msgr->interceptor = &interceptor; - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(*this); - return test_msgr->bind(entity_addrvec_t{addr}).then([this, chained_dispatchers]() mutable { - return test_msgr->start(chained_dispatchers); - }); + return test_msgr->bind(entity_addrvec_t{addr}).safe_then([this] { + return test_msgr->start({this}); + }, Messenger::bind_ertr::all_same_way([addr] (const std::error_code& e) { + logger().error("FailoverSuite: " + "there is another instance running at {}", addr); + ceph_abort(); + })); } seastar::future<> send_op(bool expect_reply=true) { @@ -1035,7 +1052,7 @@ class FailoverSuite : public Dispatcher { } seastar::future<> shutdown() { - test_msgr->remove_dispatcher(*this); + test_msgr->stop(); return test_msgr->shutdown(); } @@ -1192,29 +1209,30 @@ class FailoverTest : public Dispatcher { std::unique_ptr<FailoverSuite> test_suite; - seastar::future<> ms_dispatch(Connection* c, MessageRef m) override { + std::optional<seastar::future<>> ms_dispatch(ConnectionRef c, MessageRef m) override { switch (m->get_type()) { case CEPH_MSG_PING: ceph_assert(recv_pong); recv_pong->set_value(); recv_pong = std::nullopt; - return seastar::now(); + break; case MSG_COMMAND_REPLY: ceph_assert(recv_cmdreply); recv_cmdreply->set_value(); recv_cmdreply = std::nullopt; - return seastar::now(); + break; case MSG_COMMAND: { auto m_cmd = boost::static_pointer_cast<MCommand>(m); ceph_assert(static_cast<cmd_t>(m_cmd->cmd[0][0]) == cmd_t::suite_recv_op); ceph_assert(test_suite); test_suite->notify_peer_reply(); - return seastar::now(); + break; } default: logger().error("{} got unexpected msg from cmd server: {}", *c, *m); ceph_abort(); } + return {seastar::now()}; } private: @@ -1258,9 +1276,7 @@ class FailoverTest : public Dispatcher { cmd_msgr->set_default_policy(SocketPolicy::lossy_client(0)); cmd_msgr->set_auth_client(&dummy_auth); cmd_msgr->set_auth_server(&dummy_auth); - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(*this); - return cmd_msgr->start(chained_dispatchers).then([this, cmd_peer_addr] { + return cmd_msgr->start({this}).then([this, cmd_peer_addr] { logger().info("CmdCli connect to CmdSrv({}) ...", cmd_peer_addr); cmd_conn = cmd_msgr->connect(cmd_peer_addr, entity_name_t::TYPE_OSD); return pingpong(); @@ -1282,8 +1298,8 @@ class FailoverTest : public Dispatcher { m->cmd.emplace_back(1, static_cast<char>(cmd_t::shutdown)); return cmd_conn->send(m).then([] { return seastar::sleep(200ms); - }).finally([this] { - cmd_msgr->remove_dispatcher(*this); + }).then([this] { + cmd_msgr->stop(); return cmd_msgr->shutdown(); }); } @@ -1329,9 +1345,9 @@ class FailoverTest : public Dispatcher { logger().info("\n[FAIL: {}]", eptr); test_suite->dump_results(); throw; - }).finally([this] { + }).then([this] { return stop_peer(); - }).finally([this] { + }).then([this] { return test_suite->shutdown().then([this] { test_suite.reset(); }); @@ -1391,11 +1407,12 @@ class FailoverSuitePeer : public Dispatcher { ConnectionRef tracked_conn; unsigned pending_send = 0; - seastar::future<> ms_dispatch(Connection* c, MessageRef m) override { + std::optional<seastar::future<>> ms_dispatch(ConnectionRef c, MessageRef m) override { logger().info("[TestPeer] got op from Test"); ceph_assert(m->get_type() == CEPH_MSG_OSD_OP); - ceph_assert(tracked_conn == c->shared_from_this()); - return op_callback(); + ceph_assert(tracked_conn == c); + std::ignore = op_callback(); + return {seastar::now()}; } void ms_handle_accept(ConnectionRef conn) override { @@ -1418,11 +1435,13 @@ class FailoverSuitePeer : public Dispatcher { peer_msgr->set_default_policy(policy); peer_msgr->set_auth_client(&dummy_auth); peer_msgr->set_auth_server(&dummy_auth); - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - return peer_msgr->bind(entity_addrvec_t{addr}).then([this, chained_dispatchers]() mutable { - chained_dispatchers->push_back(*this); - return peer_msgr->start(chained_dispatchers); - }); + return peer_msgr->bind(entity_addrvec_t{addr}).safe_then([this] { + return peer_msgr->start({this}); + }, Messenger::bind_ertr::all_same_way([addr] (const std::error_code& e) { + logger().error("FailoverSuitePeer: " + "there is another instance running at {}", addr); + ceph_abort(); + })); } seastar::future<> send_op() { @@ -1453,7 +1472,7 @@ class FailoverSuitePeer : public Dispatcher { : peer_msgr(peer_msgr), op_callback(op_callback) { } seastar::future<> shutdown() { - peer_msgr->remove_dispatcher(*this); + peer_msgr->stop(); return peer_msgr->shutdown(); } @@ -1518,29 +1537,32 @@ class FailoverTestPeer : public Dispatcher { const entity_addr_t test_peer_addr; std::unique_ptr<FailoverSuitePeer> test_suite; - seastar::future<> ms_dispatch(Connection* c, MessageRef m) override { - ceph_assert(cmd_conn == c->shared_from_this()); + std::optional<seastar::future<>> ms_dispatch(ConnectionRef c, MessageRef m) override { + ceph_assert(cmd_conn == c); switch (m->get_type()) { case CEPH_MSG_PING: - return c->send(make_message<MPing>()); + std::ignore = c->send(make_message<MPing>()); + break; case MSG_COMMAND: { auto m_cmd = boost::static_pointer_cast<MCommand>(m); auto cmd = static_cast<cmd_t>(m_cmd->cmd[0][0]); if (cmd == cmd_t::shutdown) { logger().info("CmdSrv shutdown..."); // forwarded to FailoverTestPeer::wait() - cmd_msgr->remove_dispatcher(*this); - (void) cmd_msgr->shutdown(); - return seastar::now(); + cmd_msgr->stop(); + std::ignore = cmd_msgr->shutdown(); + } else { + std::ignore = handle_cmd(cmd, m_cmd).then([c] { + return c->send(make_message<MCommandReply>()); + }); } - return handle_cmd(cmd, m_cmd).then([c] { - return c->send(make_message<MCommandReply>()); - }); + break; } default: logger().error("{} got unexpected msg from cmd client: {}", *c, m); ceph_abort(); } + return {seastar::now()}; } void ms_handle_accept(ConnectionRef conn) override { @@ -1597,21 +1619,13 @@ class FailoverTestPeer : public Dispatcher { cmd_msgr->set_default_policy(SocketPolicy::stateless_server(0)); cmd_msgr->set_auth_client(&dummy_auth); cmd_msgr->set_auth_server(&dummy_auth); - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(*this); - return cmd_msgr->bind(entity_addrvec_t{cmd_peer_addr}).then( - [this, chained_dispatchers]() mutable { - return cmd_msgr->start(chained_dispatchers); - }).handle_exception_type([cmd_peer_addr](const std::system_error& e) { - if (e.code() == std::errc::address_in_use) { - logger().error("FailoverTestPeer::init({}) " - "likely there is another instance of " - "unittest_seastar_messenger running", cmd_peer_addr); - } else { - logger().error("FailoverTestPeer::init({}): {}", cmd_peer_addr, e.what()); - } - abort(); - }); + return cmd_msgr->bind(entity_addrvec_t{cmd_peer_addr}).safe_then([this] { + return cmd_msgr->start({this}); + }, Messenger::bind_ertr::all_same_way([cmd_peer_addr] (const std::error_code& e) { + logger().error("FailoverTestPeer: " + "there is another instance running at {}", cmd_peer_addr); + ceph_abort(); + })); } public: @@ -3453,7 +3467,7 @@ test_v2_protocol(entity_addr_t test_addr, return FailoverTestPeer::create(test_peer_addr ).then([test_addr, test_peer_addr] (auto peer) { return test_v2_protocol(test_addr, test_peer_addr, false - ).finally([peer = std::move(peer)] () mutable { + ).then([peer = std::move(peer)] () mutable { return peer->wait().then([peer = std::move(peer)] {}); }); }).handle_exception([] (auto eptr) { @@ -3549,7 +3563,7 @@ test_v2_protocol(entity_addr_t test_addr, return test_v2_lossless_peer_connector(*test); }).then([test] { return test_v2_lossless_peer_acceptor(*test); - }).finally([test] { + }).then([test] { return test->shutdown().then([test] {}); }); }).handle_exception([] (auto eptr) { diff --git a/src/test/crimson/test_monc.cc b/src/test/crimson/test_monc.cc index 45335bd5030..f590ce73a14 100644 --- a/src/test/crimson/test_monc.cc +++ b/src/test/crimson/test_monc.cc @@ -24,7 +24,6 @@ DummyAuthHandler dummy_handler; static seastar::future<> test_monc() { - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); return crimson::common::sharded_conf().start(EntityName{}, string_view{"ceph"}).then([] { std::vector<const char*> args; std::string cluster; @@ -39,7 +38,7 @@ static seastar::future<> test_monc() return conf.parse_config_files(conf_file_list); }).then([] { return crimson::common::sharded_perf_coll().start(); - }).then([chained_dispatchers]() mutable { + }).then([]() mutable { auto msgr = crimson::net::Messenger::create(entity_name_t::OSD(0), "monc", 0); auto& conf = crimson::common::local_conf(); if (conf->ms_crc_data) { @@ -50,9 +49,8 @@ static seastar::future<> test_monc() } msgr->set_require_authorizer(false); return seastar::do_with(MonClient{*msgr, dummy_handler}, - [msgr, chained_dispatchers](auto& monc) mutable { - chained_dispatchers->push_back(monc); - return msgr->start(chained_dispatchers).then([&monc] { + [msgr](auto& monc) mutable { + return msgr->start({&monc}).then([&monc] { return seastar::with_timeout( seastar::lowres_clock::now() + std::chrono::seconds{10}, monc.start()); diff --git a/src/test/crimson/test_socket.cc b/src/test/crimson/test_socket.cc index bccfb36526a..bfdeeea2a78 100644 --- a/src/test/crimson/test_socket.cc +++ b/src/test/crimson/test_socket.cc @@ -26,15 +26,19 @@ using crimson::net::stop_t; using SocketFRef = seastar::foreign_ptr<SocketRef>; static seastar::logger logger{"crimsontest"}; -static entity_addr_t server_addr = [] { +static entity_addr_t get_server_addr() { + static int port = 9020; + ++port; + ceph_assert(port < 9030 && "socket and messenger test ports should not overlap"); entity_addr_t saddr; - saddr.parse("127.0.0.1:9020", nullptr); + saddr.parse("127.0.0.1", nullptr); + saddr.set_port(port); return saddr; -} (); +} -future<SocketRef> socket_connect() { - logger.debug("socket_connect()..."); - return Socket::connect(server_addr).then([] (auto socket) { +future<SocketRef> socket_connect(const entity_addr_t& saddr) { + logger.debug("socket_connect() to {} ...", saddr); + return Socket::connect(saddr).then([] (auto socket) { logger.debug("socket_connect() connected"); return socket; }); @@ -42,8 +46,10 @@ future<SocketRef> socket_connect() { future<> test_refused() { logger.info("test_refused()..."); - return socket_connect().discard_result().then([] { - ceph_abort_msg("connection is not refused"); + auto saddr = get_server_addr(); + return socket_connect(saddr).discard_result().then([saddr] { + logger.error("test_refused(): connection to {} is not refused", saddr); + ceph_abort(); }).handle_exception_type([] (const std::system_error& e) { if (e.code() != std::errc::connection_refused) { logger.error("test_refused() got unexpeted error {}", e); @@ -60,26 +66,35 @@ future<> test_refused() { future<> test_bind_same() { logger.info("test_bind_same()..."); return FixedCPUServerSocket::create().then([] (auto pss1) { - return pss1->listen(server_addr).then([] { + auto saddr = get_server_addr(); + return pss1->listen(saddr).safe_then([saddr] { // try to bind the same address - return FixedCPUServerSocket::create().then([] (auto pss2) { - return pss2->listen(server_addr).then([] { - ceph_abort("Should raise address_in_use!"); - }).handle_exception_type([] (const std::system_error& e) { - assert(e.code() == std::errc::address_in_use); - // successful! - }).finally([pss2] { - return pss2->destroy(); - }).handle_exception_type([] (const std::system_error& e) { - if (e.code() != std::errc::address_in_use) { - logger.error("test_bind_same() got unexpeted error {}", e); - ceph_abort(); - } else { + return FixedCPUServerSocket::create().then([saddr] (auto pss2) { + return pss2->listen(saddr).safe_then([] { + logger.error("test_bind_same() should raise address_in_use"); + ceph_abort(); + }, FixedCPUServerSocket::listen_ertr::all_same_way( + [] (const std::error_code& e) { + if (e == std::errc::address_in_use) { + // successful! logger.info("test_bind_same() ok\n"); + } else { + logger.error("test_bind_same() got unexpected error {}", e); + ceph_abort(); } + // Note: need to return a explicit ready future, or there will be a + // runtime error: member access within null pointer of type 'struct promise_base' + return seastar::now(); + })).then([pss2] { + return pss2->destroy(); }); }); - }).finally([pss1] { + }, FixedCPUServerSocket::listen_ertr::all_same_way( + [saddr] (const std::error_code& e) { + logger.error("test_bind_same(): there is another instance running at {}", + saddr); + ceph_abort(); + })).then([pss1] { return pss1->destroy(); }).handle_exception([] (auto eptr) { logger.error("test_bind_same() got unexpeted exception {}", eptr); @@ -91,20 +106,26 @@ future<> test_bind_same() { future<> test_accept() { logger.info("test_accept()"); return FixedCPUServerSocket::create().then([] (auto pss) { - return pss->listen(server_addr).then([pss] { + auto saddr = get_server_addr(); + return pss->listen(saddr).safe_then([pss] { return pss->accept([] (auto socket, auto paddr) { // simple accept return seastar::sleep(100ms).then([socket = std::move(socket)] () mutable { return socket->close().finally([cleanup = std::move(socket)] {}); }); }); - }).then([] { + }, FixedCPUServerSocket::listen_ertr::all_same_way( + [saddr] (const std::error_code& e) { + logger.error("test_accept(): there is another instance running at {}", + saddr); + ceph_abort(); + })).then([saddr] { return seastar::when_all( - socket_connect().then([] (auto socket) { + socket_connect(saddr).then([] (auto socket) { return socket->close().finally([cleanup = std::move(socket)] {}); }), - socket_connect().then([] (auto socket) { + socket_connect(saddr).then([] (auto socket) { return socket->close().finally([cleanup = std::move(socket)] {}); }), - socket_connect().then([] (auto socket) { + socket_connect(saddr).then([] (auto socket) { return socket->close().finally([cleanup = std::move(socket)] {}); }) ).discard_result(); }).then([] { @@ -112,7 +133,7 @@ future<> test_accept() { return seastar::sleep(50ms); }).then([] { logger.info("test_accept() ok\n"); - }).finally([pss] { + }).then([pss] { return pss->destroy(); }).handle_exception([] (auto eptr) { logger.error("test_accept() got unexpeted exception {}", eptr); @@ -134,15 +155,22 @@ class SocketFactory { assert(seastar::this_shard_id() == 0u); auto owner = std::make_unique<SocketFactory>(); auto psf = owner.get(); - return seastar::smp::submit_to(1u, [psf] { - return FixedCPUServerSocket::create().then([psf] (auto pss) { + auto saddr = get_server_addr(); + return seastar::smp::submit_to(1u, [psf, saddr] { + return FixedCPUServerSocket::create().then([psf, saddr] (auto pss) { psf->pss = pss; - return pss->listen(server_addr); + return pss->listen(saddr + ).safe_then([]{}, FixedCPUServerSocket::listen_ertr::all_same_way( + [saddr] (const std::error_code& e) { + logger.error("dispatch_sockets(): there is another instance running at {}", + saddr); + ceph_abort(); + })); }); - }).then([psf] { + }).then([psf, saddr] { return seastar::when_all_succeed( - seastar::smp::submit_to(0u, [psf] { - return socket_connect().then([psf] (auto socket) { + seastar::smp::submit_to(0u, [psf, saddr] { + return socket_connect(saddr).then([psf] (auto socket) { psf->client_socket = std::move(socket); }); }), @@ -159,7 +187,7 @@ class SocketFactory { return seastar::now(); }).then([psf] { return psf->server_connected.get_future(); - }).finally([psf] { + }).then([psf] { if (psf->pss) { return seastar::smp::submit_to(1u, [psf] { return psf->pss->destroy(); @@ -173,7 +201,7 @@ class SocketFactory { return seastar::when_all_succeed( seastar::smp::submit_to(0u, [socket = psf->client_socket.get(), cb_client = std::move(cb_client)] { - return cb_client(socket).finally([socket] { + return cb_client(socket).then([socket] { logger.debug("closing client socket..."); return socket->close(); }).handle_exception([] (auto eptr) { @@ -184,7 +212,7 @@ class SocketFactory { }), seastar::smp::submit_to(1u, [socket = psf->server_socket.get(), cb_server = std::move(cb_server)] { - return cb_server(socket).finally([socket] { + return cb_server(socket).then([socket] { logger.debug("closing server socket..."); return socket->close(); }).handle_exception([] (auto eptr) { diff --git a/src/test/immutable_object_cache/test_object_store.cc b/src/test/immutable_object_cache/test_object_store.cc index 12e6e5aaa4a..e100ed44456 100644 --- a/src/test/immutable_object_cache/test_object_store.cc +++ b/src/test/immutable_object_cache/test_object_store.cc @@ -4,7 +4,13 @@ #include <iostream> #include <unistd.h> +#if __has_include(<filesystem>) +#include <filesystem> +namespace fs = std::filesystem; +#else #include <experimental/filesystem> +namespace fs = std::experimental::filesystem; +#endif #include "gtest/gtest.h" #include "include/Context.h" @@ -18,7 +24,6 @@ #include "tools/immutable_object_cache/ObjectCacheStore.h" -namespace efs = std::experimental::filesystem; using namespace ceph::immutable_obj_cache; std::string test_cache_path("/tmp/test_ceph_immutable_shared_cache"); @@ -87,7 +92,7 @@ TEST_F(TestObjectStore, test_1) { std::string cache_path(test_cache_path); - efs::remove_all(test_cache_path); + fs::remove_all(test_cache_path); init_object_cache_store(m_temp_pool_name, m_temp_volume_name, 1000, true); diff --git a/src/test/lazy-omap-stats/lazy_omap_stats_test.h b/src/test/lazy-omap-stats/lazy_omap_stats_test.h index 5399012eae7..28e194441e1 100644 --- a/src/test/lazy-omap-stats/lazy_omap_stats_test.h +++ b/src/test/lazy-omap-stats/lazy_omap_stats_test.h @@ -22,8 +22,8 @@ #include "include/rados/librados.hpp" struct index_t { - uint byte_index = 0; - uint key_index = 0; + unsigned byte_index = 0; + unsigned key_index = 0; }; class LazyOmapStatsTest @@ -33,13 +33,13 @@ class LazyOmapStatsTest std::map<std::string, librados::bufferlist> payload; struct lazy_omap_test_t { - uint payload_size = 0; - uint replica_count = 3; - uint keys = 2000; - uint how_many = 50; + unsigned payload_size = 0; + unsigned replica_count = 3; + unsigned keys = 2000; + unsigned how_many = 50; std::string pool_name = "lazy_omap_test_pool"; - uint total_bytes = 0; - uint total_keys = 0; + unsigned total_bytes = 0; + unsigned total_keys = 0; } conf; LazyOmapStatsTest(LazyOmapStatsTest&) = delete; @@ -49,13 +49,13 @@ class LazyOmapStatsTest void write_omap(const std::string& object_name); const std::string get_name() const; void create_payload(); - void write_many(const uint how_many); + void write_many(const unsigned how_many); void scrub() const; const int find_matches(std::string& output, std::regex& reg) const; void check_one(); const int find_index(std::string& haystack, std::regex& needle, std::string label) const; - const uint tally_column(const uint omap_bytes_index, + const unsigned tally_column(const unsigned omap_bytes_index, const std::string& table, bool header) const; void check_column(const int index, const std::string& table, const std::string& type, bool header = true) const; diff --git a/src/test/libcephfs/test.cc b/src/test/libcephfs/test.cc index dc4e627eeee..ef43e108705 100644 --- a/src/test/libcephfs/test.cc +++ b/src/test/libcephfs/test.cc @@ -33,6 +33,7 @@ #include <sys/xattr.h> #endif +#include <fmt/format.h> #include <map> #include <vector> #include <thread> @@ -80,6 +81,30 @@ TEST(LibCephFS, OpenEmptyComponent) { ceph_shutdown(cmount); } +TEST(LibCephFS, OpenReadTruncate) { + struct ceph_mount_info *cmount; + ASSERT_EQ(0, ceph_create(&cmount, NULL)); + ASSERT_EQ(0, ceph_conf_read_file(cmount, NULL)); + ASSERT_EQ(0, ceph_conf_parse_env(cmount, NULL)); + ASSERT_EQ(0, ceph_mount(cmount, "/")); + + auto path = fmt::format("test_open_rdt_{}", getpid()); + int fd = ceph_open(cmount, path.c_str(), O_WRONLY|O_CREAT, 0666); + ASSERT_LE(0, fd); + + auto data = std::string("hello world"); + ASSERT_EQ(ceph_write(cmount, fd, data.c_str(), data.size(), 0), (int)data.size()); + ASSERT_EQ(0, ceph_close(cmount, fd)); + + fd = ceph_open(cmount, path.c_str(), O_RDONLY, 0); + ASSERT_LE(0, fd); + ASSERT_EQ(ceph_ftruncate(cmount, fd, 0), -EBADF); + ASSERT_EQ(ceph_ftruncate(cmount, fd, 1), -EBADF); + ASSERT_EQ(0, ceph_close(cmount, fd)); + + ceph_shutdown(cmount); +} + TEST(LibCephFS, OpenReadWrite) { struct ceph_mount_info *cmount; ASSERT_EQ(0, ceph_create(&cmount, NULL)); diff --git a/src/test/librados/tier_cxx.cc b/src/test/librados/tier_cxx.cc index a65e1da205c..5dd09f1d2c5 100644 --- a/src/test/librados/tier_cxx.cc +++ b/src/test/librados/tier_cxx.cc @@ -4647,6 +4647,735 @@ TEST_F(LibRadosTwoPoolsPP, ManifestEvict) { } +#include <common/CDC.h> +TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) { + // skip test if not yet octopus + if (_get_required_osd_release(cluster) < "octopus") { + GTEST_SKIP() << "cluster is not yet octopus, skipping test"; + } + + bufferlist inbl; + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_tier", pool_name), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024), + inbl, NULL, NULL)); + + // wait for maps to settle + cluster.wait_for_latest_osdmap(); + + // create object + bufferlist gbl; + { + generate_buffer(1024*8, &gbl); + ObjectWriteOperation op; + op.write_full(gbl); + ASSERT_EQ(0, cache_ioctx.operate("foo-chunk", &op)); + } + { + bufferlist bl; + bl.append("DDse chunk"); + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, ioctx.operate("bar-chunk", &op)); + } + + // set-chunk to set manifest object + { + ObjectReadOperation op; + op.set_chunk(0, 2, ioctx, "bar-chunk", 0, + CEPH_OSD_OP_FLAG_WITH_REFERENCE); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate("foo-chunk", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + // flush + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo-chunk", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + std::unique_ptr<CDC> cdc = CDC::create("fastcdc", cbits(1024)-1); + vector<pair<uint64_t, uint64_t>> chunks; + bufferlist chunk; + cdc->calc_chunks(gbl, &chunks); + chunk.substr_of(gbl, chunks[1].first, chunks[1].second); + string tgt_oid; + { + unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0}; + char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0}; + SHA1 sha1_gen; + int size = chunk.length(); + sha1_gen.Update((const unsigned char *)chunk.c_str(), size); + sha1_gen.Final(fingerprint); + buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str); + tgt_oid = string(p_str); + } + + // read and verify the chunked object + { + bufferlist test_bl; + ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0)); + ASSERT_EQ(test_bl[1], chunk[1]); + } + + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 512), + inbl, NULL, NULL)); + cluster.wait_for_latest_osdmap(); + + // make a dirty chunks + { + bufferlist bl; + bl.append("hi"); + ASSERT_EQ(0, cache_ioctx.write("foo-chunk", bl, bl.length(), 0)); + } + + // flush + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo-chunk", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + cdc = CDC::create("fastcdc", cbits(512)-1); + chunks.clear(); + cdc->calc_chunks(gbl, &chunks); + bufferlist chunk_512; + chunk_512.substr_of(gbl, chunks[3].first, chunks[3].second); + { + unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0}; + char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0}; + SHA1 sha1_gen; + int size = chunk_512.length(); + sha1_gen.Update((const unsigned char *)chunk_512.c_str(), size); + sha1_gen.Final(fingerprint); + buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str); + tgt_oid = string(p_str); + } + + // read and verify the chunked object + { + bufferlist test_bl; + ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0)); + ASSERT_EQ(test_bl[1], chunk_512[1]); + } + + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 16384), + inbl, NULL, NULL)); + cluster.wait_for_latest_osdmap(); + + // make a dirty chunks + { + bufferlist bl; + bl.append("hi"); + ASSERT_EQ(0, cache_ioctx.write("foo-chunk", bl, bl.length(), 0)); + gbl.begin(0).copy_in(bl.length(), bl); + } + // flush + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo-chunk", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + cdc = CDC::create("fastcdc", cbits(16384)-1); + chunks.clear(); + cdc->calc_chunks(gbl, &chunks); + bufferlist chunk_16384; + chunk_16384.substr_of(gbl, chunks[0].first, chunks[0].second); + { + unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0}; + char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0}; + SHA1 sha1_gen; + int size = chunk_16384.length(); + sha1_gen.Update((const unsigned char *)chunk_16384.c_str(), size); + sha1_gen.Final(fingerprint); + buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str); + tgt_oid = string(p_str); + } + // read and verify the chunked object + { + bufferlist test_bl; + ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0)); + ASSERT_EQ(test_bl[0], chunk_16384[0]); + } + + // less than object size + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024), + inbl, NULL, NULL)); + cluster.wait_for_latest_osdmap(); + + // make a dirty chunks + // a chunk_info is deleted by write, which converts the manifest object to non-manifest object + { + bufferlist bl; + bl.append("hi"); + ASSERT_EQ(0, cache_ioctx.write("foo-chunk", bl, bl.length(), 0)); + } + + // reset set-chunk + { + bufferlist bl; + bl.append("DDse chunk"); + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, ioctx.operate("bar-chunk", &op)); + } + // set-chunk to set manifest object + { + ObjectReadOperation op; + op.set_chunk(0, 2, ioctx, "bar-chunk", 0, + CEPH_OSD_OP_FLAG_WITH_REFERENCE); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate("foo-chunk", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + // flush + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo-chunk", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + cdc = CDC::create("fastcdc", cbits(1024)-1); + chunks.clear(); + cdc->calc_chunks(gbl, &chunks); + bufferlist small_chunk; + small_chunk.substr_of(gbl, chunks[1].first, chunks[1].second); + { + unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0}; + char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0}; + SHA1 sha1_gen; + int size = small_chunk.length(); + sha1_gen.Update((const unsigned char *)small_chunk.c_str(), size); + sha1_gen.Final(fingerprint); + buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str); + tgt_oid = string(p_str); + } + // read and verify the chunked object + { + bufferlist test_bl; + ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0)); + ASSERT_EQ(test_bl[0], small_chunk[0]); + } + +} + +TEST_F(LibRadosTwoPoolsPP, ManifestFlushSnap) { + // skip test if not yet octopus + if (_get_required_osd_release(cluster) < "octopus") { + cout << "cluster is not yet octopus, skipping test" << std::endl; + return; + } + + bufferlist inbl; + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_tier", pool_name), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024), + inbl, NULL, NULL)); + + // wait for maps to settle + cluster.wait_for_latest_osdmap(); + + // create object + bufferlist gbl; + { + //bufferlist bl; + //bl.append("there hi"); + generate_buffer(1024*8, &gbl); + ObjectWriteOperation op; + op.write_full(gbl); + ASSERT_EQ(0, cache_ioctx.operate("foo", &op)); + } + { + bufferlist bl; + bl.append("there hi"); + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, ioctx.operate("bar", &op)); + } + + // set-chunk (dedup) + manifest_set_chunk(cluster, ioctx, cache_ioctx, 2, 2, "bar", "foo"); + + // create a snapshot, clone + vector<uint64_t> my_snaps(1); + ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0])); + ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], + my_snaps)); + + // make a dirty chunks + { + bufferlist bl; + bl.append("Thbbe"); + ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0)); + } + + // and another + my_snaps.resize(2); + my_snaps[1] = my_snaps[0]; + ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0])); + ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], + my_snaps)); + + // make a dirty chunks + { + bufferlist bl; + bl.append("Thcce"); + ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0)); + } + + // flush on head (should fail) + cache_ioctx.snap_set_read(librados::SNAP_HEAD); + // flush + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(-EBUSY, completion->get_return_value()); + completion->release(); + } + + // flush on recent snap (should fail) + cache_ioctx.snap_set_read(my_snaps[0]); + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(-EBUSY, completion->get_return_value()); + completion->release(); + } + + // flush on oldest snap + cache_ioctx.snap_set_read(my_snaps[1]); + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + // flush on oldest snap + cache_ioctx.snap_set_read(my_snaps[0]); + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + // flush on oldest snap + cache_ioctx.snap_set_read(librados::SNAP_HEAD); + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + // check chunk's refcount + std::unique_ptr<CDC> cdc = CDC::create("fastcdc", cbits(1024)-1); + vector<pair<uint64_t, uint64_t>> chunks; + bufferlist chunk; + cdc->calc_chunks(gbl, &chunks); + chunk.substr_of(gbl, chunks[1].first, chunks[1].second); + string tgt_oid; + { + unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0}; + char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0}; + SHA1 sha1_gen; + int size = chunk.length(); + sha1_gen.Update((const unsigned char *)chunk.c_str(), size); + sha1_gen.Final(fingerprint); + buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str); + tgt_oid = string(p_str); + } + // read and verify the chunked object + { + bufferlist test_bl; + ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0)); + ASSERT_EQ(test_bl[1], chunk[1]); + } + + cache_ioctx.snap_set_read(librados::SNAP_HEAD); + { + bufferlist bl; + ASSERT_EQ(4, cache_ioctx.read("foo", bl, 4, 0)); + ASSERT_EQ('c', bl[2]); + } + + cache_ioctx.snap_set_read(my_snaps[0]); + { + bufferlist bl; + ASSERT_EQ(4, cache_ioctx.read("foo", bl, 4, 0)); + ASSERT_EQ('b', bl[2]); + } +} + +TEST_F(LibRadosTwoPoolsPP, ManifestFlushDupCount) { + // skip test if not yet octopus + if (_get_required_osd_release(cluster) < "octopus") { + cout << "cluster is not yet octopus, skipping test" << std::endl; + return; + } + + bufferlist inbl; + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_tier", pool_name), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024), + inbl, NULL, NULL)); + + // create object + bufferlist gbl; + { + //bufferlist bl; + generate_buffer(1024*8, &gbl); + ObjectWriteOperation op; + op.write_full(gbl); + ASSERT_EQ(0, cache_ioctx.operate("foo", &op)); + } + { + bufferlist bl; + bl.append("there hiHI"); + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, ioctx.operate("bar", &op)); + } + + // wait for maps to settle + cluster.wait_for_latest_osdmap(); + + // set-chunk to set manifest object + { + ObjectReadOperation op; + op.set_chunk(0, 2, ioctx, "bar", 0, + CEPH_OSD_OP_FLAG_WITH_REFERENCE); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + // create a snapshot, clone + vector<uint64_t> my_snaps(1); + ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0])); + ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], + my_snaps)); + + // make a dirty chunks + { + bufferlist bl; + bl.append("Thbbe hi"); + ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0)); + } + + // and another + my_snaps.resize(2); + my_snaps[1] = my_snaps[0]; + ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0])); + ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], + my_snaps)); + + // make a dirty chunks + { + bufferlist bl; + bl.append("Thcce hi"); + ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0)); + } + + //flush on oldest snap + cache_ioctx.snap_set_read(my_snaps[1]); + // flush + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + // flush on oldest snap + cache_ioctx.snap_set_read(my_snaps[0]); + // flush + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + cache_ioctx.snap_set_read(librados::SNAP_HEAD); + // flush + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + std::unique_ptr<CDC> cdc = CDC::create("fastcdc", cbits(1024)-1); + vector<pair<uint64_t, uint64_t>> chunks; + bufferlist chunk; + cdc->calc_chunks(gbl, &chunks); + chunk.substr_of(gbl, chunks[1].first, chunks[1].second); + string tgt_oid; + // check chunk's refcount + { + unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0}; + char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0}; + bufferlist t; + SHA1 sha1_gen; + int size = chunk.length(); + sha1_gen.Update((const unsigned char *)chunk.c_str(), size); + sha1_gen.Final(fingerprint); + buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str); + tgt_oid = string(p_str); + ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t); + chunk_refs_t refs; + try { + auto iter = t.cbegin(); + decode(refs, iter); + } catch (buffer::error& err) { + ASSERT_TRUE(0); + } + ASSERT_EQ(1u, refs.count()); + } + + bufferlist chunk2; + chunk2.substr_of(gbl, chunks[0].first, chunks[0].second); + // check chunk's refcount + { + unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0}; + char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0}; + bufferlist t; + SHA1 sha1_gen; + int size = chunk2.length(); + sha1_gen.Update((const unsigned char *)chunk2.c_str(), size); + sha1_gen.Final(fingerprint); + buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str); + tgt_oid = string(p_str); + ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t); + chunk_refs_t refs; + try { + auto iter = t.cbegin(); + decode(refs, iter); + } catch (buffer::error& err) { + ASSERT_TRUE(0); + } + ASSERT_EQ(1u, refs.count()); + } + + // make a dirty chunks + { + bufferlist bl; + bl.append("ThDDe hi"); + ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0)); + } + + // flush + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + bufferlist tmp; + tmp.append("Thcce hi"); + gbl.begin(0).copy_in(tmp.length(), tmp); + bufferlist chunk3; + cdc->calc_chunks(gbl, &chunks); + chunk3.substr_of(gbl, chunks[0].first, chunks[0].second); + // check chunk's refcount + { + unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0}; + char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0}; + bufferlist t; + SHA1 sha1_gen; + int size = chunk2.length(); + sha1_gen.Update((const unsigned char *)chunk2.c_str(), size); + sha1_gen.Final(fingerprint); + buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str); + tgt_oid = string(p_str); + ASSERT_EQ(-ENOENT, ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t)); + } +} + +TEST_F(LibRadosTwoPoolsPP, TierFlushDuringFlush) { + // skip test if not yet octopus + if (_get_required_osd_release(cluster) < "octopus") { + cout << "cluster is not yet octopus, skipping test" << std::endl; + return; + } + + bufferlist inbl; + + // create a new pool + std::string temp_pool_name = get_temp_pool_name() + "-test-flush"; + ASSERT_EQ(0, cluster.pool_create(temp_pool_name.c_str())); + + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_tier", temp_pool_name), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024), + inbl, NULL, NULL)); + + // create object + bufferlist gbl; + { + //bufferlist bl; + generate_buffer(1024*8, &gbl); + ObjectWriteOperation op; + op.write_full(gbl); + ASSERT_EQ(0, cache_ioctx.operate("foo", &op)); + } + { + bufferlist bl; + bl.append("there hiHI"); + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, ioctx.operate("bar", &op)); + } + + // set-chunk to set manifest object + { + ObjectReadOperation op; + op.set_chunk(0, 2, ioctx, "bar", 0, + CEPH_OSD_OP_FLAG_WITH_REFERENCE); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(0, completion->get_return_value()); + completion->release(); + } + + // delete temp pool, so flushing chunk will fail + ASSERT_EQ(0, s_cluster.pool_delete(temp_pool_name.c_str())); + + // flush to check if proper error is returned + { + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, cache_ioctx.aio_operate( + "foo", completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(-ENOENT, completion->get_return_value()); + completion->release(); + } + +} + class LibRadosTwoPoolsECPP : public RadosTestECPP { public: @@ -7466,6 +8195,15 @@ TEST_F(LibRadosTwoPoolsECPP, ManifestPromoteRead) { cluster.wait_for_latest_osdmap(); } +TEST_F(LibRadosTwoPoolsECPP, TrySetDedupTier) { + // note: require >= mimic + + bufferlist inbl; + ASSERT_EQ(-EOPNOTSUPP, cluster.mon_command( + set_pool_str(pool_name, "dedup_tier", cache_pool_name), + inbl, NULL, NULL)); +} + TEST_F(LibRadosTwoPoolsPP, PropagateBaseTierError) { // write object to base tier bufferlist omap_bl; diff --git a/src/test/librbd/CMakeLists.txt b/src/test/librbd/CMakeLists.txt index 2c77d38b35d..51bf7d321c3 100644 --- a/src/test/librbd/CMakeLists.txt +++ b/src/test/librbd/CMakeLists.txt @@ -90,7 +90,12 @@ set(unittest_librbd_srcs managed_lock/test_mock_ReacquireRequest.cc managed_lock/test_mock_ReleaseRequest.cc migration/test_mock_FileStream.cc + migration/test_mock_HttpClient.cc + migration/test_mock_HttpStream.cc migration/test_mock_RawFormat.cc + migration/test_mock_RawSnapshot.cc + migration/test_mock_S3Stream.cc + migration/test_mock_Utils.cc mirror/snapshot/test_mock_CreateNonPrimaryRequest.cc mirror/snapshot/test_mock_CreatePrimaryRequest.cc mirror/snapshot/test_mock_ImageMeta.cc @@ -129,6 +134,12 @@ if(WITH_RBD_RWL) cache/pwl/test_WriteLogMap.cc) endif(WITH_RBD_RWL) +if(LINUX AND HAVE_LIBCRYPTSETUP) + list(APPEND unittest_librbd_srcs + crypto/luks/test_mock_FormatRequest.cc + crypto/luks/test_mock_LoadRequest.cc) +endif() + add_executable(unittest_librbd ${unittest_librbd_srcs} $<TARGET_OBJECTS:common_texttable_obj>) @@ -154,8 +165,14 @@ target_link_libraries(unittest_librbd osdc ceph-common global + OpenSSL::SSL ${UNITTEST_LIBS}) +if(WITH_RBD_RWL OR WITH_RBD_SSD_CACHE) + target_link_libraries(unittest_librbd + librbd_plugin_pwl_cache) +endif() + add_executable(ceph_test_librbd test_main.cc $<TARGET_OBJECTS:common_texttable_obj>) diff --git a/src/test/librbd/cache/pwl/test_mock_ReplicatedWriteLog.cc b/src/test/librbd/cache/pwl/test_mock_ReplicatedWriteLog.cc index d1622a2ce34..5160b189d63 100644 --- a/src/test/librbd/cache/pwl/test_mock_ReplicatedWriteLog.cc +++ b/src/test/librbd/cache/pwl/test_mock_ReplicatedWriteLog.cc @@ -7,10 +7,10 @@ #include "test/librbd/test_support.h" #include "test/librbd/mock/MockImageCtx.h" #include "include/rbd/librbd.hpp" -#include "librbd/cache/pwl/AbstractWriteLog.h" #include "librbd/cache/pwl/ImageCacheState.h" #include "librbd/cache/pwl/Types.h" #include "librbd/cache/ImageWriteback.h" +#include "librbd/plugin/Api.h" namespace librbd { namespace { @@ -37,11 +37,13 @@ inline ImageCtx *get_image_ctx(MockImageCtx *image_ctx) { #include "librbd/cache/pwl/AbstractWriteLog.cc" #include "librbd/cache/pwl/ReplicatedWriteLog.cc" +template class librbd::cache::pwl::ReplicatedWriteLog<librbd::MockImageCtx>; // template definitions #include "librbd/cache/ImageWriteback.cc" #include "librbd/cache/pwl/ImageCacheState.cc" #include "librbd/cache/pwl/Request.cc" +#include "librbd/plugin/Api.cc" namespace librbd { namespace cache { @@ -52,12 +54,18 @@ using ::testing::DoDefault; using ::testing::InSequence; using ::testing::Invoke; +typedef io::Extent Extent; +typedef io::Extents Extents; + struct TestMockCacheReplicatedWriteLog : public TestMockFixture { typedef librbd::cache::pwl::ReplicatedWriteLog<librbd::MockImageCtx> MockReplicatedWriteLog; typedef librbd::cache::pwl::ImageCacheState<librbd::MockImageCtx> MockImageCacheStateRWL; + typedef librbd::cache::ImageWriteback<librbd::MockImageCtx> MockImageWriteback; + typedef librbd::plugin::Api<librbd::MockImageCtx> MockApi; - MockImageCacheStateRWL *get_cache_state(MockImageCtx& mock_image_ctx) { - MockImageCacheStateRWL *rwl_state = new MockImageCacheStateRWL(&mock_image_ctx); + MockImageCacheStateRWL *get_cache_state( + MockImageCtx& mock_image_ctx, MockApi& mock_api) { + MockImageCacheStateRWL *rwl_state = new MockImageCacheStateRWL(&mock_image_ctx, mock_api); return rwl_state; } @@ -112,7 +120,8 @@ TEST_F(TestMockCacheReplicatedWriteLog, init_state_write) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockImageCacheStateRWL image_cache_state(&mock_image_ctx); + MockApi mock_api; + MockImageCacheStateRWL image_cache_state(&mock_image_ctx, mock_api); validate_cache_state(ictx, image_cache_state, false, true, true, "", "", 0); @@ -152,7 +161,8 @@ TEST_F(TestMockCacheReplicatedWriteLog, init_state_json_write) { \"pwl_path\": \"/tmp\", \ \"pwl_size\": \"1024\" }"; get_jf(strf, &f); - MockImageCacheStateRWL image_cache_state(&mock_image_ctx, f); + MockApi mock_api; + MockImageCacheStateRWL image_cache_state(&mock_image_ctx, f, mock_api); validate_cache_state(ictx, image_cache_state, true, false, false, "testhost", "/tmp", 1024); @@ -169,7 +179,11 @@ TEST_F(TestMockCacheReplicatedWriteLog, init_shutdown) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); MockContextRWL finish_ctx1; expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); @@ -189,7 +203,11 @@ TEST_F(TestMockCacheReplicatedWriteLog, write) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); MockContextRWL finish_ctx1; expect_op_work_queue(mock_image_ctx); @@ -218,7 +236,12 @@ TEST_F(TestMockCacheReplicatedWriteLog, flush) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); + expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); @@ -254,7 +277,12 @@ TEST_F(TestMockCacheReplicatedWriteLog, flush_source_shutdown) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); + expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); @@ -288,7 +316,12 @@ TEST_F(TestMockCacheReplicatedWriteLog, flush_source_internal) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); + expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); @@ -322,7 +355,11 @@ TEST_F(TestMockCacheReplicatedWriteLog, flush_source_user) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); @@ -357,7 +394,11 @@ TEST_F(TestMockCacheReplicatedWriteLog, read_hit_rwl_cache) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); @@ -396,7 +437,11 @@ TEST_F(TestMockCacheReplicatedWriteLog, read_hit_part_rwl_cache) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); @@ -439,7 +484,11 @@ TEST_F(TestMockCacheReplicatedWriteLog, read_miss_rwl_cache) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); @@ -477,7 +526,11 @@ TEST_F(TestMockCacheReplicatedWriteLog, discard) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); @@ -521,7 +574,11 @@ TEST_F(TestMockCacheReplicatedWriteLog, writesame) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); @@ -559,7 +616,11 @@ TEST_F(TestMockCacheReplicatedWriteLog, invalidate) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); @@ -595,7 +656,11 @@ TEST_F(TestMockCacheReplicatedWriteLog, compare_and_write_compare_matched) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); @@ -645,7 +710,11 @@ TEST_F(TestMockCacheReplicatedWriteLog, compare_and_write_compare_failed) { ASSERT_EQ(0, open_image(m_image_name, &ictx)); MockImageCtx mock_image_ctx(*ictx); - MockReplicatedWriteLog rwl(mock_image_ctx, get_cache_state(mock_image_ctx)); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockReplicatedWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); expect_op_work_queue(mock_image_ctx); expect_metadata_set(mock_image_ctx); diff --git a/src/test/librbd/crypto/luks/test_mock_FormatRequest.cc b/src/test/librbd/crypto/luks/test_mock_FormatRequest.cc new file mode 100644 index 00000000000..36f99fef6d3 --- /dev/null +++ b/src/test/librbd/crypto/luks/test_mock_FormatRequest.cc @@ -0,0 +1,181 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/librbd/test_mock_fixture.h" +#include "test/librbd/test_support.h" +#include "test/librbd/mock/MockImageCtx.h" + +namespace librbd { +namespace util { + +inline ImageCtx *get_image_ctx(MockImageCtx *image_ctx) { + return image_ctx->image_ctx; +} + +} // namespace util +} // namespace librbd + +#include "librbd/crypto/luks/FormatRequest.cc" + +namespace librbd { +namespace crypto { +namespace luks { + +using ::testing::_; +using ::testing::Invoke; +using ::testing::Return; + +struct TestMockCryptoLuksFormatRequest : public TestMockFixture { + typedef FormatRequest<librbd::MockImageCtx> MockFormatRequest; + + const size_t OBJECT_SIZE = 4 * 1024 * 1024; + const char* passphrase_cstr = "password"; + std::string passphrase = passphrase_cstr; + + MockImageCtx* mock_image_ctx; + C_SaferCond finished_cond; + Context *on_finish = &finished_cond; + io::AioCompletion* aio_comp; + ceph::bufferlist header_bl; + + void SetUp() override { + TestMockFixture::SetUp(); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + mock_image_ctx = new MockImageCtx(*ictx); + } + + void TearDown() override { + delete mock_image_ctx; + TestMockFixture::TearDown(); + } + + void expect_get_object_size() { + EXPECT_CALL(*mock_image_ctx, get_object_size()).WillOnce(Return( + OBJECT_SIZE)); + } + + void expect_crypto_layer_exists_check(bool exists = false) { + EXPECT_CALL(*mock_image_ctx->io_object_dispatcher, exists( + io::OBJECT_DISPATCH_LAYER_CRYPTO)).WillOnce(Return(exists)); + } + + void expect_image_write() { + EXPECT_CALL(*mock_image_ctx->io_image_dispatcher, send(_)) + .WillOnce(Invoke([this](io::ImageDispatchSpec* spec) { + auto* write = boost::get<io::ImageDispatchSpec::Write>( + &spec->request); + ASSERT_TRUE(write != nullptr); + + ASSERT_EQ(1, spec->image_extents.size()); + ASSERT_EQ(0, spec->image_extents[0].first); + ASSERT_GT(spec->image_extents[0].second, 0); + + spec->dispatch_result = io::DISPATCH_RESULT_COMPLETE; + aio_comp = spec->aio_comp; + header_bl = write->bl; + })); + } + + void complete_aio(int r) { + if (r < 0) { + aio_comp->fail(r); + } else { + aio_comp->set_request_count(1); + aio_comp->add_request(); + aio_comp->complete_request(r); + } + } + + void verify_header(size_t expected_key_length, + uint64_t expected_sector_size) { + Header header(mock_image_ctx->cct); + + ASSERT_EQ(0, header.init()); + ASSERT_EQ(0, header.write(header_bl)); + ASSERT_EQ(0, header.load()); + + ASSERT_EQ(expected_sector_size, header.get_sector_size()); + ASSERT_EQ(0, header.get_data_offset() % OBJECT_SIZE); + + char volume_key[64]; + size_t volume_key_size = sizeof(volume_key); + ASSERT_EQ(0, header.read_volume_key( + passphrase_cstr, strlen(passphrase_cstr), + reinterpret_cast<char*>(volume_key), &volume_key_size)); + } +}; + +TEST_F(TestMockCryptoLuksFormatRequest, LUKS1) { + auto mock_format_request = MockFormatRequest::create( + mock_image_ctx, DiskEncryptionFormat::DISK_ENCRYPTION_FORMAT_LUKS1, + CipherAlgorithm::CIPHER_ALGORITHM_AES128, std::move(passphrase), + on_finish, true); + expect_crypto_layer_exists_check(); + expect_get_object_size(); + expect_image_write(); + mock_format_request->send(); + ASSERT_EQ(ETIMEDOUT, finished_cond.wait_for(0)); + complete_aio(0); + ASSERT_EQ(0, finished_cond.wait()); + ASSERT_NO_FATAL_FAILURE(verify_header(32, 512)); +} + +TEST_F(TestMockCryptoLuksFormatRequest, AES128) { + auto mock_format_request = MockFormatRequest::create( + mock_image_ctx, DiskEncryptionFormat::DISK_ENCRYPTION_FORMAT_LUKS2, + CipherAlgorithm::CIPHER_ALGORITHM_AES128, std::move(passphrase), + on_finish, true); + expect_crypto_layer_exists_check(); + expect_get_object_size(); + expect_image_write(); + mock_format_request->send(); + ASSERT_EQ(ETIMEDOUT, finished_cond.wait_for(0)); + complete_aio(0); + ASSERT_EQ(0, finished_cond.wait()); + ASSERT_NO_FATAL_FAILURE(verify_header(32, 4096)); +} + +TEST_F(TestMockCryptoLuksFormatRequest, AES256) { + auto mock_format_request = MockFormatRequest::create( + mock_image_ctx, DiskEncryptionFormat::DISK_ENCRYPTION_FORMAT_LUKS2, + CipherAlgorithm::CIPHER_ALGORITHM_AES256, std::move(passphrase), + on_finish, true); + expect_crypto_layer_exists_check(); + expect_get_object_size(); + expect_image_write(); + mock_format_request->send(); + ASSERT_EQ(ETIMEDOUT, finished_cond.wait_for(0)); + complete_aio(0); + ASSERT_EQ(0, finished_cond.wait()); + ASSERT_NO_FATAL_FAILURE(verify_header(62, 4096)); +} + +TEST_F(TestMockCryptoLuksFormatRequest, CryptoAlreadyLoaded) { + auto mock_format_request = MockFormatRequest::create( + mock_image_ctx, DiskEncryptionFormat::DISK_ENCRYPTION_FORMAT_LUKS2, + CipherAlgorithm::CIPHER_ALGORITHM_AES256, std::move(passphrase), + on_finish, true); + expect_crypto_layer_exists_check(true); + mock_format_request->send(); + ASSERT_EQ(-EEXIST, finished_cond.wait()); +} + +TEST_F(TestMockCryptoLuksFormatRequest, WriteFail) { + auto mock_format_request = MockFormatRequest::create( + mock_image_ctx, DiskEncryptionFormat::DISK_ENCRYPTION_FORMAT_LUKS2, + CipherAlgorithm::CIPHER_ALGORITHM_AES256, std::move(passphrase), + on_finish, true); + expect_crypto_layer_exists_check(); + expect_get_object_size(); + expect_image_write(); + mock_format_request->send(); + ASSERT_EQ(ETIMEDOUT, finished_cond.wait_for(0)); + complete_aio(-123); + ASSERT_EQ(-123, finished_cond.wait()); +} + +} // namespace luks +} // namespace crypto +} // namespace librbd diff --git a/src/test/librbd/crypto/luks/test_mock_LoadRequest.cc b/src/test/librbd/crypto/luks/test_mock_LoadRequest.cc new file mode 100644 index 00000000000..5511fbe8f64 --- /dev/null +++ b/src/test/librbd/crypto/luks/test_mock_LoadRequest.cc @@ -0,0 +1,220 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/librbd/test_mock_fixture.h" +#include "test/librbd/test_support.h" +#include "test/librbd/mock/MockImageCtx.h" + +namespace librbd { +namespace util { + +inline ImageCtx *get_image_ctx(MockImageCtx *image_ctx) { + return image_ctx->image_ctx; +} + +} // namespace util +} // namespace librbd + +#include "librbd/crypto/luks/LoadRequest.cc" + +namespace librbd { +namespace crypto { +namespace luks { + +using ::testing::_; +using ::testing::Invoke; +using ::testing::Return; + +struct TestMockCryptoLuksLoadRequest : public TestMockFixture { + typedef LoadRequest<librbd::MockImageCtx> MockLoadRequest; + + const size_t OBJECT_SIZE = 4 * 1024 * 1024; + const char* passphrase_cstr = "password"; + std::string passphrase = passphrase_cstr; + + MockImageCtx* mock_image_ctx; + ceph::ref_t<CryptoInterface> crypto; + MockLoadRequest* mock_load_request; + C_SaferCond finished_cond; + Context *on_finish = &finished_cond; + Context* image_read_request; + ceph::bufferlist header_bl; + uint64_t data_offset; + + void SetUp() override { + TestMockFixture::SetUp(); + + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + mock_image_ctx = new MockImageCtx(*ictx); + crypto = nullptr; + mock_load_request = MockLoadRequest::create( + mock_image_ctx, std::move(passphrase), &crypto, on_finish); + } + + void TearDown() override { + delete mock_image_ctx; + if (crypto != nullptr) { + crypto = nullptr; + } + TestMockFixture::TearDown(); + } + + // returns data offset in bytes + void generate_header(const char* type, const char* alg, size_t key_size, + const char* cipher_mode, uint32_t sector_size) { + Header header(mock_image_ctx->cct); + + ASSERT_EQ(0, header.init()); + ASSERT_EQ(0, header.format(type, alg, key_size, cipher_mode, sector_size, + OBJECT_SIZE, true)); + ASSERT_EQ(0, header.add_keyslot(passphrase_cstr, strlen(passphrase_cstr))); + ASSERT_LE(0, header.read(&header_bl)); + + data_offset = header.get_data_offset(); + } + + void expect_crypto_layer_exists_check(bool exists = false) { + EXPECT_CALL(*mock_image_ctx->io_object_dispatcher, exists( + io::OBJECT_DISPATCH_LAYER_CRYPTO)).WillOnce(Return(exists)); + } + + void expect_image_read(uint64_t offset, uint64_t length) { + EXPECT_CALL(*mock_image_ctx->io_image_dispatcher, send(_)) + .WillOnce(Invoke([this, offset, + length](io::ImageDispatchSpec* spec) { + auto* read = boost::get<io::ImageDispatchSpec::Read>( + &spec->request); + ASSERT_TRUE(read != nullptr); + + ASSERT_EQ(1, spec->image_extents.size()); + ASSERT_EQ(offset, spec->image_extents[0].first); + ASSERT_EQ(length, spec->image_extents[0].second); + + spec->dispatch_result = io::DISPATCH_RESULT_COMPLETE; + auto aio_comp = spec->aio_comp; + aio_comp->set_request_count(1); + aio_comp->read_result = std::move(read->read_result); + aio_comp->read_result.set_image_extents(spec->image_extents); + auto ctx = new io::ReadResult::C_ImageReadRequest( + aio_comp, 0, spec->image_extents); + if (header_bl.length() < offset + length) { + header_bl.append_zero(offset + length - header_bl.length()); + } + ctx->bl.substr_of(header_bl, offset, length); + image_read_request = ctx; + })); + } +}; + +TEST_F(TestMockCryptoLuksLoadRequest, AES128) { + generate_header(CRYPT_LUKS2, "aes", 32, "xts-plain64", 4096); + expect_crypto_layer_exists_check(); + expect_image_read(0, DEFAULT_INITIAL_READ_SIZE); + mock_load_request->send(); + image_read_request->complete(DEFAULT_INITIAL_READ_SIZE); + ASSERT_EQ(0, finished_cond.wait()); + ASSERT_NE(crypto, nullptr); +} + +TEST_F(TestMockCryptoLuksLoadRequest, AES256) { + generate_header(CRYPT_LUKS2, "aes", 64, "xts-plain64", 4096); + expect_crypto_layer_exists_check(); + expect_image_read(0, DEFAULT_INITIAL_READ_SIZE); + mock_load_request->send(); + image_read_request->complete(DEFAULT_INITIAL_READ_SIZE); + ASSERT_EQ(0, finished_cond.wait()); + ASSERT_NE(crypto, nullptr); +} + +TEST_F(TestMockCryptoLuksLoadRequest, LUKS1) { + generate_header(CRYPT_LUKS1, "aes", 32, "xts-plain64", 512); + expect_crypto_layer_exists_check(); + expect_image_read(0, DEFAULT_INITIAL_READ_SIZE); + mock_load_request->send(); + image_read_request->complete(DEFAULT_INITIAL_READ_SIZE); + ASSERT_EQ(0, finished_cond.wait()); + ASSERT_NE(crypto, nullptr); +} + +TEST_F(TestMockCryptoLuksLoadRequest, UnsupportedAlgorithm) { + generate_header(CRYPT_LUKS2, "twofish", 32, "xts-plain64", 4096); + expect_crypto_layer_exists_check(); + expect_image_read(0, DEFAULT_INITIAL_READ_SIZE); + mock_load_request->send(); + image_read_request->complete(DEFAULT_INITIAL_READ_SIZE); + ASSERT_EQ(-ENOTSUP, finished_cond.wait()); + ASSERT_EQ(crypto, nullptr); +} + +TEST_F(TestMockCryptoLuksLoadRequest, UnsupportedCipherMode) { + generate_header(CRYPT_LUKS2, "aes", 32, "cbc-essiv:sha256", 4096); + expect_crypto_layer_exists_check(); + expect_image_read(0, DEFAULT_INITIAL_READ_SIZE); + mock_load_request->send(); + image_read_request->complete(DEFAULT_INITIAL_READ_SIZE); + ASSERT_EQ(-ENOTSUP, finished_cond.wait()); + ASSERT_EQ(crypto, nullptr); +} + +TEST_F(TestMockCryptoLuksLoadRequest, HeaderBiggerThanInitialRead) { + generate_header(CRYPT_LUKS2, "aes", 64, "xts-plain64", 4096); + mock_load_request->set_initial_read_size(4096); + expect_crypto_layer_exists_check(); + expect_image_read(0, 4096); + mock_load_request->send(); + + expect_image_read(4096, MAXIMUM_HEADER_SIZE - 4096); + image_read_request->complete(4096); // complete initial read + + image_read_request->complete(MAXIMUM_HEADER_SIZE - 4096); + ASSERT_EQ(0, finished_cond.wait()); + ASSERT_NE(crypto, nullptr); +} + +TEST_F(TestMockCryptoLuksLoadRequest, KeyslotsBiggerThanInitialRead) { + generate_header(CRYPT_LUKS2, "aes", 64, "xts-plain64", 4096); + mock_load_request->set_initial_read_size(16384); + expect_crypto_layer_exists_check(); + expect_image_read(0, 16384); + mock_load_request->send(); + + expect_image_read(16384, data_offset - 16384); + image_read_request->complete(16384); // complete initial read + + image_read_request->complete(data_offset - 16384); + ASSERT_EQ(0, finished_cond.wait()); + ASSERT_NE(crypto, nullptr); +} + +TEST_F(TestMockCryptoLuksLoadRequest, WrongPassphrase) { + delete mock_load_request; + mock_load_request = MockLoadRequest::create( + mock_image_ctx, "wrong", &crypto, on_finish); + + generate_header(CRYPT_LUKS2, "aes", 64, "xts-plain64", 4096); + expect_crypto_layer_exists_check(); + expect_image_read(0, DEFAULT_INITIAL_READ_SIZE); + mock_load_request->send(); + + // crypt_volume_key_get will fail, we will retry reading more + expect_image_read(DEFAULT_INITIAL_READ_SIZE, + data_offset - DEFAULT_INITIAL_READ_SIZE); + image_read_request->complete(DEFAULT_INITIAL_READ_SIZE); + + image_read_request->complete(data_offset - DEFAULT_INITIAL_READ_SIZE); + ASSERT_EQ(-EPERM, finished_cond.wait()); + ASSERT_EQ(crypto, nullptr); +} + +TEST_F(TestMockCryptoLuksLoadRequest, CryptoAlreadyLoaded) { + generate_header(CRYPT_LUKS2, "aes", 32, "xts-plain64", 4096); + expect_crypto_layer_exists_check(true); + mock_load_request->send(); + ASSERT_EQ(-EEXIST, finished_cond.wait()); + ASSERT_EQ(crypto, nullptr); +} + +} // namespace luks +} // namespace crypto +} // namespace librbd diff --git a/src/test/librbd/crypto/openssl/test_DataCryptor.cc b/src/test/librbd/crypto/openssl/test_DataCryptor.cc index 16c144e4a3b..69e2b450778 100644 --- a/src/test/librbd/crypto/openssl/test_DataCryptor.cc +++ b/src/test/librbd/crypto/openssl/test_DataCryptor.cc @@ -14,7 +14,7 @@ const unsigned char TEST_IV[16] = {2}; const unsigned char TEST_IV_2[16] = {3}; const unsigned char TEST_DATA[4096] = {4}; -struct TestDataCryptor : public TestFixture { +struct TestCryptoOpensslDataCryptor : public TestFixture { DataCryptor *cryptor; void SetUp() override { @@ -30,28 +30,28 @@ struct TestDataCryptor : public TestFixture { } }; -TEST_F(TestDataCryptor, InvalidCipherName) { +TEST_F(TestCryptoOpensslDataCryptor, InvalidCipherName) { EXPECT_EQ(-EINVAL, cryptor->init(nullptr, TEST_KEY, sizeof(TEST_KEY))); EXPECT_EQ(-EINVAL, cryptor->init("", TEST_KEY, sizeof(TEST_KEY))); EXPECT_EQ(-EINVAL, cryptor->init("Invalid", TEST_KEY, sizeof(TEST_KEY))); } -TEST_F(TestDataCryptor, InvalidKey) { +TEST_F(TestCryptoOpensslDataCryptor, InvalidKey) { EXPECT_EQ(-EINVAL, cryptor->init(TEST_CIPHER_NAME, nullptr, 0)); EXPECT_EQ(-EINVAL, cryptor->init(TEST_CIPHER_NAME, nullptr, sizeof(TEST_KEY))); EXPECT_EQ(-EINVAL, cryptor->init(TEST_CIPHER_NAME, TEST_KEY, 1)); } -TEST_F(TestDataCryptor, GetContextInvalidMode) { +TEST_F(TestCryptoOpensslDataCryptor, GetContextInvalidMode) { EXPECT_EQ(nullptr, cryptor->get_context(static_cast<CipherMode>(-1))); } -TEST_F(TestDataCryptor, ReturnNullContext) { +TEST_F(TestCryptoOpensslDataCryptor, ReturnNullContext) { cryptor->return_context(nullptr, static_cast<CipherMode>(-1)); } -TEST_F(TestDataCryptor, ReturnContextInvalidMode) { +TEST_F(TestCryptoOpensslDataCryptor, ReturnContextInvalidMode) { auto ctx = cryptor->get_context(CipherMode::CIPHER_MODE_ENC); ASSERT_NE(ctx, nullptr); cryptor->return_context(ctx, CipherMode::CIPHER_MODE_DEC); @@ -60,7 +60,7 @@ TEST_F(TestDataCryptor, ReturnContextInvalidMode) { cryptor->return_context(ctx, static_cast<CipherMode>(-1)); } -TEST_F(TestDataCryptor, EncryptDecrypt) { +TEST_F(TestCryptoOpensslDataCryptor, EncryptDecrypt) { auto ctx = cryptor->get_context(CipherMode::CIPHER_MODE_ENC); ASSERT_NE(ctx, nullptr); cryptor->init_context(ctx, TEST_IV, sizeof(TEST_IV)); @@ -78,7 +78,7 @@ TEST_F(TestDataCryptor, EncryptDecrypt) { cryptor->return_context(ctx, CipherMode::CIPHER_MODE_DEC); } -TEST_F(TestDataCryptor, ReuseContext) { +TEST_F(TestCryptoOpensslDataCryptor, ReuseContext) { auto ctx = cryptor->get_context(CipherMode::CIPHER_MODE_ENC); ASSERT_NE(ctx, nullptr); @@ -105,7 +105,7 @@ TEST_F(TestDataCryptor, ReuseContext) { cryptor->return_context(ctx2, CipherMode::CIPHER_MODE_ENC); } -TEST_F(TestDataCryptor, InvalidIVLength) { +TEST_F(TestCryptoOpensslDataCryptor, InvalidIVLength) { auto ctx = cryptor->get_context(CipherMode::CIPHER_MODE_ENC); ASSERT_NE(ctx, nullptr); diff --git a/src/test/librbd/crypto/test_mock_BlockCrypto.cc b/src/test/librbd/crypto/test_mock_BlockCrypto.cc index 72510165850..2089970f7ae 100644 --- a/src/test/librbd/crypto/test_mock_BlockCrypto.cc +++ b/src/test/librbd/crypto/test_mock_BlockCrypto.cc @@ -21,12 +21,13 @@ MATCHER_P(CompareArrayToString, s, "") { return (memcmp(arg, s.c_str(), s.length()) == 0); } -struct TestMockBlockCrypto : public TestFixture { +struct TestMockCryptoBlockCrypto : public TestFixture { MockDataCryptor cryptor; - BlockCrypto<MockCryptoContext>* bc; + ceph::ref_t<BlockCrypto<MockCryptoContext>> bc; int cryptor_block_size = 2; int cryptor_iv_size = 16; int block_size = 4; + int data_offset = 0; ExpectationSet* expectation_set; void SetUp() override { @@ -35,7 +36,7 @@ struct TestMockBlockCrypto : public TestFixture { cryptor.block_size = cryptor_block_size; bc = new BlockCrypto<MockCryptoContext>( reinterpret_cast<CephContext*>(m_ioctx.cct()), &cryptor, - block_size); + block_size, data_offset); expectation_set = new ExpectationSet(); } @@ -72,7 +73,7 @@ struct TestMockBlockCrypto : public TestFixture { } }; -TEST_F(TestMockBlockCrypto, Encrypt) { +TEST_F(TestMockCryptoBlockCrypto, Encrypt) { uint32_t image_offset = 0x1234 * block_size; ceph::bufferlist data1; @@ -103,19 +104,19 @@ TEST_F(TestMockBlockCrypto, Encrypt) { ASSERT_TRUE(data.is_aligned(block_size)); } -TEST_F(TestMockBlockCrypto, UnalignedImageOffset) { +TEST_F(TestMockCryptoBlockCrypto, UnalignedImageOffset) { ceph::bufferlist data; data.append("1234"); ASSERT_EQ(-EINVAL, bc->encrypt(&data, 2)); } -TEST_F(TestMockBlockCrypto, UnalignedDataLength) { +TEST_F(TestMockCryptoBlockCrypto, UnalignedDataLength) { ceph::bufferlist data; data.append("123"); ASSERT_EQ(-EINVAL, bc->encrypt(&data, 0)); } -TEST_F(TestMockBlockCrypto, GetContextError) { +TEST_F(TestMockCryptoBlockCrypto, GetContextError) { ceph::bufferlist data; data.append("1234"); EXPECT_CALL(cryptor, get_context(CipherMode::CIPHER_MODE_ENC)).WillOnce( @@ -123,7 +124,7 @@ TEST_F(TestMockBlockCrypto, GetContextError) { ASSERT_EQ(-EIO, bc->encrypt(&data, 0)); } -TEST_F(TestMockBlockCrypto, InitContextError) { +TEST_F(TestMockCryptoBlockCrypto, InitContextError) { ceph::bufferlist data; data.append("1234"); expect_get_context(CipherMode::CIPHER_MODE_ENC); @@ -131,7 +132,7 @@ TEST_F(TestMockBlockCrypto, InitContextError) { ASSERT_EQ(-123, bc->encrypt(&data, 0)); } -TEST_F(TestMockBlockCrypto, UpdateContextError) { +TEST_F(TestMockCryptoBlockCrypto, UpdateContextError) { ceph::bufferlist data; data.append("1234"); expect_get_context(CipherMode::CIPHER_MODE_ENC); diff --git a/src/test/librbd/crypto/test_mock_CryptoContextPool.cc b/src/test/librbd/crypto/test_mock_CryptoContextPool.cc index 1d0402dd7b0..6eb7877eb66 100644 --- a/src/test/librbd/crypto/test_mock_CryptoContextPool.cc +++ b/src/test/librbd/crypto/test_mock_CryptoContextPool.cc @@ -14,7 +14,7 @@ using ::testing::Return; namespace librbd { namespace crypto { -struct TestMockCryptoContextPool : public ::testing::Test { +struct TestMockCryptoCryptoContextPool : public ::testing::Test { MockDataCryptor cryptor; void expect_get_context(CipherMode mode) { @@ -28,7 +28,7 @@ struct TestMockCryptoContextPool : public ::testing::Test { } }; -TEST_F(TestMockCryptoContextPool, Test) { +TEST_F(TestMockCryptoCryptoContextPool, Test) { CryptoContextPool<MockCryptoContext> pool(&cryptor, 1); expect_get_context(CipherMode::CIPHER_MODE_ENC); diff --git a/src/test/librbd/exclusive_lock/test_mock_PostAcquireRequest.cc b/src/test/librbd/exclusive_lock/test_mock_PostAcquireRequest.cc index 7335dfe6ae1..943b8cc2dfa 100644 --- a/src/test/librbd/exclusive_lock/test_mock_PostAcquireRequest.cc +++ b/src/test/librbd/exclusive_lock/test_mock_PostAcquireRequest.cc @@ -11,7 +11,6 @@ #include "test/librbd/mock/MockObjectMap.h" #include "test/librados_test_stub/MockTestMemIoCtxImpl.h" #include "test/librados_test_stub/MockTestMemRadosClient.h" -#include "librbd/cache/pwl/InitRequest.h" #include "librbd/exclusive_lock/PostAcquireRequest.h" #include "librbd/image/RefreshRequest.h" @@ -60,32 +59,6 @@ struct RefreshRequest<librbd::MockTestImageCtx> { RefreshRequest<librbd::MockTestImageCtx> *RefreshRequest<librbd::MockTestImageCtx>::s_instance = nullptr; } // namespace image - -namespace cache { -namespace pwl { - -template<> -struct InitRequest<librbd::MockTestImageCtx> { - static InitRequest *s_instance; - Context *on_finish = nullptr; - - static InitRequest *create(librbd::MockTestImageCtx &image_ctx, - Context *on_finish) { - ceph_assert(s_instance != nullptr); - s_instance->on_finish = on_finish; - return s_instance; - } - - InitRequest() { - s_instance = this; - } - MOCK_METHOD0(send, void()); -}; - -InitRequest<librbd::MockTestImageCtx> *InitRequest<librbd::MockTestImageCtx>::s_instance = nullptr; - -} // namespace pwl -} // namespace cache } // namespace librbd // template definitions @@ -117,7 +90,6 @@ class TestMockExclusiveLockPostAcquireRequest : public TestMockFixture { public: typedef PostAcquireRequest<MockTestImageCtx> MockPostAcquireRequest; typedef librbd::image::RefreshRequest<MockTestImageCtx> MockRefreshRequest; - typedef librbd::cache::pwl::InitRequest<MockTestImageCtx> MockInitRequest; void expect_test_features(MockTestImageCtx &mock_image_ctx, uint64_t features, bool enabled) { @@ -202,10 +174,14 @@ public: EXPECT_CALL(*mock_image_ctx.state, handle_prepare_lock_complete()); } - void expect_init_image_cache(MockTestImageCtx &mock_image_ctx, - MockInitRequest &mock_init_request, int r) { - EXPECT_CALL(mock_init_request, send()) - .WillOnce(FinishRequest(&mock_init_request, r, &mock_image_ctx)); + void expect_acquired_exclusive_lock(MockTestImageCtx &mock_image_ctx, int r) { + EXPECT_CALL(*mock_image_ctx.plugin_registry, acquired_exclusive_lock(_)) + .WillOnce(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue)); + } + + void expect_prerelease_exclusive_lock(MockTestImageCtx &mock_image_ctx, int r) { + EXPECT_CALL(*mock_image_ctx.plugin_registry, prerelease_exclusive_lock(_)) + .WillOnce(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue)); } }; @@ -239,8 +215,8 @@ TEST_F(TestMockExclusiveLockPostAcquireRequest, Success) { expect_get_journal_policy(mock_image_ctx, mock_journal_policy); expect_allocate_journal_tag(mock_image_ctx, mock_journal_policy, 0); - MockInitRequest mock_init_request; - expect_init_image_cache(mock_image_ctx, mock_init_request, 0); + expect_acquired_exclusive_lock(mock_image_ctx, 0); + C_SaferCond acquire_ctx; C_SaferCond ctx; MockPostAcquireRequest *req = MockPostAcquireRequest::create(mock_image_ctx, @@ -271,8 +247,7 @@ TEST_F(TestMockExclusiveLockPostAcquireRequest, SuccessRefresh) { mock_image_ctx.image_lock, false); expect_handle_prepare_lock_complete(mock_image_ctx); - MockInitRequest mock_init_request; - expect_init_image_cache(mock_image_ctx, mock_init_request, 0); + expect_acquired_exclusive_lock(mock_image_ctx, 0); C_SaferCond acquire_ctx; C_SaferCond ctx; @@ -305,8 +280,7 @@ TEST_F(TestMockExclusiveLockPostAcquireRequest, SuccessJournalDisabled) { mock_image_ctx.image_lock, false); expect_handle_prepare_lock_complete(mock_image_ctx); - MockInitRequest mock_init_request; - expect_init_image_cache(mock_image_ctx, mock_init_request, 0); + expect_acquired_exclusive_lock(mock_image_ctx, 0); C_SaferCond acquire_ctx; C_SaferCond ctx; @@ -344,8 +318,7 @@ TEST_F(TestMockExclusiveLockPostAcquireRequest, SuccessObjectMapDisabled) { expect_get_journal_policy(mock_image_ctx, mock_journal_policy); expect_allocate_journal_tag(mock_image_ctx, mock_journal_policy, 0); - MockInitRequest mock_init_request; - expect_init_image_cache(mock_image_ctx, mock_init_request, 0); + expect_acquired_exclusive_lock(mock_image_ctx, 0); C_SaferCond acquire_ctx; C_SaferCond ctx; @@ -401,8 +374,7 @@ TEST_F(TestMockExclusiveLockPostAcquireRequest, RefreshLockDisabled) { mock_image_ctx.image_lock, false); expect_handle_prepare_lock_complete(mock_image_ctx); - MockInitRequest mock_init_request; - expect_init_image_cache(mock_image_ctx, mock_init_request, 0); + expect_acquired_exclusive_lock(mock_image_ctx, 0); C_SaferCond acquire_ctx; C_SaferCond ctx; @@ -521,8 +493,8 @@ TEST_F(TestMockExclusiveLockPostAcquireRequest, InitImageCacheError) { expect_get_journal_policy(mock_image_ctx, mock_journal_policy); expect_allocate_journal_tag(mock_image_ctx, mock_journal_policy, 0); - MockInitRequest mock_init_request; - expect_init_image_cache(mock_image_ctx, mock_init_request, -ENOENT); + expect_acquired_exclusive_lock(mock_image_ctx, -ENOENT); + expect_prerelease_exclusive_lock(mock_image_ctx, 0); expect_close_journal(mock_image_ctx, mock_journal); expect_close_object_map(mock_image_ctx, mock_object_map); @@ -593,8 +565,7 @@ TEST_F(TestMockExclusiveLockPostAcquireRequest, OpenObjectMapTooBig) { expect_get_journal_policy(mock_image_ctx, mock_journal_policy); expect_allocate_journal_tag(mock_image_ctx, mock_journal_policy, 0); - MockInitRequest mock_init_request; - expect_init_image_cache(mock_image_ctx, mock_init_request, 0); + expect_acquired_exclusive_lock(mock_image_ctx, 0); C_SaferCond acquire_ctx; C_SaferCond ctx; diff --git a/src/test/librbd/exclusive_lock/test_mock_PreReleaseRequest.cc b/src/test/librbd/exclusive_lock/test_mock_PreReleaseRequest.cc index a3f2e14e10e..ceb45ca2e68 100644 --- a/src/test/librbd/exclusive_lock/test_mock_PreReleaseRequest.cc +++ b/src/test/librbd/exclusive_lock/test_mock_PreReleaseRequest.cc @@ -135,11 +135,9 @@ public: .WillOnce(CompleteContext(0, mock_image_ctx.image_ctx->op_work_queue)); } - void expect_close_image_cache(MockTestImageCtx &mock_image_ctx, int r) { - EXPECT_CALL(*mock_image_ctx.io_image_dispatcher, - shut_down_dispatch(io::IMAGE_DISPATCH_LAYER_WRITEBACK_CACHE, _)) - .WillOnce(WithArg<1>( - CompleteContext(0, mock_image_ctx.image_ctx->op_work_queue))); + void expect_prerelease_exclusive_lock(MockTestImageCtx &mock_image_ctx, int r) { + EXPECT_CALL(*mock_image_ctx.plugin_registry, prerelease_exclusive_lock(_)) + .WillOnce(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue)); } void expect_invalidate_cache(MockTestImageCtx &mock_image_ctx, @@ -204,7 +202,7 @@ TEST_F(TestMockExclusiveLockPreReleaseRequest, Success) { expect_prepare_lock(mock_image_ctx); - expect_close_image_cache(mock_image_ctx, 0); + expect_prerelease_exclusive_lock(mock_image_ctx, 0); expect_invalidate_cache(mock_image_ctx, 0); @@ -245,7 +243,7 @@ TEST_F(TestMockExclusiveLockPreReleaseRequest, SuccessJournalDisabled) { expect_cancel_op_requests(mock_image_ctx, 0); expect_prepare_lock(mock_image_ctx); - expect_close_image_cache(mock_image_ctx, 0); + expect_prerelease_exclusive_lock(mock_image_ctx, 0); expect_invalidate_cache(mock_image_ctx, 0); @@ -281,7 +279,7 @@ TEST_F(TestMockExclusiveLockPreReleaseRequest, SuccessObjectMapDisabled) { InSequence seq; expect_cancel_op_requests(mock_image_ctx, 0); - expect_close_image_cache(mock_image_ctx, 0); + expect_prerelease_exclusive_lock(mock_image_ctx, 0); expect_invalidate_cache(mock_image_ctx, 0); @@ -313,7 +311,7 @@ TEST_F(TestMockExclusiveLockPreReleaseRequest, Blocklisted) { -EBLOCKLISTED); expect_prepare_lock(mock_image_ctx); - expect_close_image_cache(mock_image_ctx, 0); + expect_prerelease_exclusive_lock(mock_image_ctx, 0); expect_invalidate_cache(mock_image_ctx, -EBLOCKLISTED); @@ -356,7 +354,7 @@ TEST_F(TestMockExclusiveLockPreReleaseRequest, Disabled) { expect_prepare_lock(mock_image_ctx); - expect_close_image_cache(mock_image_ctx, 0); + expect_prerelease_exclusive_lock(mock_image_ctx, 0); expect_invalidate_cache(mock_image_ctx, 0); diff --git a/src/test/librbd/io/test_mock_CopyupRequest.cc b/src/test/librbd/io/test_mock_CopyupRequest.cc index 677128d190c..5963b8c7d87 100644 --- a/src/test/librbd/io/test_mock_CopyupRequest.cc +++ b/src/test/librbd/io/test_mock_CopyupRequest.cc @@ -219,7 +219,7 @@ struct TestMockIoCopyupRequest : public TestMockFixture { aio_comp->read_result = std::move(req->read_result); aio_comp->read_result.set_image_extents(image_extents); aio_comp->set_request_count(1); - auto ctx = new ReadResult::C_ImageReadRequest(aio_comp, + auto ctx = new ReadResult::C_ImageReadRequest(aio_comp, 0, image_extents); ctx->bl.append(data); mock_image_ctx.image_ctx->op_work_queue->queue(ctx, r); diff --git a/src/test/librbd/migration/test_mock_HttpClient.cc b/src/test/librbd/migration/test_mock_HttpClient.cc new file mode 100644 index 00000000000..57718edf127 --- /dev/null +++ b/src/test/librbd/migration/test_mock_HttpClient.cc @@ -0,0 +1,870 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/librbd/test_mock_fixture.h" +#include "test/librbd/test_support.h" +#include "include/rbd_types.h" +#include "common/ceph_mutex.h" +#include "librbd/migration/HttpClient.h" +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include <boost/asio/ip/tcp.hpp> +#include <boost/beast/core.hpp> +#include <boost/beast/http.hpp> + +namespace librbd { +namespace { + +struct MockTestImageCtx : public MockImageCtx { + MockTestImageCtx(ImageCtx &image_ctx) : MockImageCtx(image_ctx) { + } +}; + +} // anonymous namespace + +namespace util { + +inline ImageCtx *get_image_ctx(MockTestImageCtx *image_ctx) { + return image_ctx->image_ctx; +} + +} // namespace util +} // namespace librbd + +#include "librbd/migration/HttpClient.cc" + +using EmptyHttpRequest = boost::beast::http::request< + boost::beast::http::empty_body>; +using HttpResponse = boost::beast::http::response< + boost::beast::http::string_body>; + +namespace boost { +namespace beast { +namespace http { + +template <typename Body> +bool operator==(const boost::beast::http::request<Body>& lhs, + const boost::beast::http::request<Body>& rhs) { + return (lhs.method() == rhs.method() && + lhs.target() == rhs.target()); +} + +template <typename Body> +bool operator==(const boost::beast::http::response<Body>& lhs, + const boost::beast::http::response<Body>& rhs) { + return (lhs.result() == rhs.result() && + lhs.body() == rhs.body()); +} + +} // namespace http +} // namespace beast +} // namespace boost + +namespace librbd { +namespace migration { + +using ::testing::Invoke; + +class TestMockMigrationHttpClient : public TestMockFixture { +public: + typedef HttpClient<MockTestImageCtx> MockHttpClient; + + void SetUp() override { + TestMockFixture::SetUp(); + + ASSERT_EQ(0, open_image(m_image_name, &m_image_ctx)); + + create_acceptor(false); + } + + void TearDown() override { + m_acceptor.reset(); + + TestMockFixture::TearDown(); + } + + void create_acceptor(bool reuse) { + m_acceptor.emplace(*m_image_ctx->asio_engine, + boost::asio::ip::tcp::endpoint( + boost::asio::ip::tcp::v4(), m_server_port), reuse); + m_server_port = m_acceptor->local_endpoint().port(); + } + + std::string get_local_url(UrlScheme url_scheme) { + std::stringstream sstream; + switch (url_scheme) { + case URL_SCHEME_HTTP: + sstream << "http://127.0.0.1"; + break; + case URL_SCHEME_HTTPS: + sstream << "https://localhost"; + break; + default: + ceph_assert(false); + break; + } + + sstream << ":" << m_server_port << "/target"; + return sstream.str(); + } + + void client_accept(boost::asio::ip::tcp::socket* socket, bool close, + Context* on_connect) { + m_acceptor->async_accept( + boost::asio::make_strand(m_image_ctx->asio_engine->get_executor()), + [socket, close, on_connect] + (auto ec, boost::asio::ip::tcp::socket in_socket) { + if (close) { + in_socket.shutdown(boost::asio::ip::tcp::socket::shutdown_both); + } else { + ASSERT_FALSE(ec) << "Unexpected error: " << ec; + *socket = std::move(in_socket); + } + on_connect->complete(0); + }); + } + + template <typename Body> + void client_read_request(boost::asio::ip::tcp::socket& socket, + boost::beast::http::request<Body>& expected_req) { + boost::beast::http::request<Body> req; + boost::beast::error_code ec; + boost::beast::http::read(socket, m_buffer, req, ec); + ASSERT_FALSE(ec) << "Unexpected errror: " << ec; + + expected_req.target("/target"); + ASSERT_EQ(expected_req, req); + } + + void client_write_response(boost::asio::ip::tcp::socket& socket, + HttpResponse& expected_res) { + expected_res.set(boost::beast::http::field::server, + BOOST_BEAST_VERSION_STRING); + expected_res.set(boost::beast::http::field::content_type, "text/plain"); + expected_res.content_length(expected_res.body().size()); + expected_res.prepare_payload(); + + boost::beast::error_code ec; + boost::beast::http::write(socket, expected_res, ec); + ASSERT_FALSE(ec) << "Unexpected errror: " << ec; + } + + template <typename Stream> + void client_ssl_handshake(Stream& stream, bool ignore_failure, + Context* on_handshake) { + stream.async_handshake( + boost::asio::ssl::stream_base::server, + [ignore_failure, on_handshake](auto ec) { + ASSERT_FALSE(!ignore_failure && ec) << "Unexpected error: " << ec; + on_handshake->complete(-ec.value()); + }); + } + + template <typename Stream> + void client_ssl_shutdown(Stream& stream, Context* on_shutdown) { + stream.async_shutdown( + [on_shutdown](auto ec) { + ASSERT_FALSE(ec) << "Unexpected error: " << ec; + on_shutdown->complete(-ec.value()); + }); + } + + void load_server_certificate(boost::asio::ssl::context& ctx) { + ctx.set_options( + boost::asio::ssl::context::default_workarounds | + boost::asio::ssl::context::no_sslv2 | + boost::asio::ssl::context::single_dh_use); + ctx.use_certificate_chain( + boost::asio::buffer(CERT.data(), CERT.size())); + ctx.use_private_key( + boost::asio::buffer(KEY.data(), KEY.size()), + boost::asio::ssl::context::file_format::pem); + ctx.use_tmp_dh( + boost::asio::buffer(DH.data(), DH.size())); + } + + // dummy self-signed cert for localhost + const std::string CERT = + "-----BEGIN CERTIFICATE-----\n" + "MIIDXzCCAkegAwIBAgIUYH6rAaq66LC6yJ3XK1WEMIfmY4cwDQYJKoZIhvcNAQEL\n" + "BQAwPzELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAlZBMQ8wDQYDVQQHDAZNY0xlYW4x\n" + "EjAQBgNVBAMMCWxvY2FsaG9zdDAeFw0yMDExMDIyMTM5NTVaFw00ODAzMjAyMTM5\n" + "NTVaMD8xCzAJBgNVBAYTAlVTMQswCQYDVQQIDAJWQTEPMA0GA1UEBwwGTWNMZWFu\n" + "MRIwEAYDVQQDDAlsb2NhbGhvc3QwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK\n" + "AoIBAQCeRkyxjP0eNHxzj4/R+Bg/31p7kEjB5d/LYtrzBIYNe+3DN8gdReixEpR5\n" + "lgTLDsl8gfk2HRz4cnAiseqYL6GKtw/cFadzLyXTbW4iavmTWiGYw/8RJlKunbhA\n" + "hDjM6H99ysLf0NS6t14eK+bEJIW1PiTYRR1U5I4kSIjpCX7+nJVuwMEZ2XBpN3og\n" + "nHhv2hZYTdzEkQEyZHz4V/ApfD7rlja5ecd/vJfPJeA8nudnGCh3Uo6f8I9TObAj\n" + "8hJdfRiRBvnA4NnkrMrxW9UtVjScnw9Xia11FM/IGJIgMpLQ5dqBw930p6FxMYtn\n" + "tRD1AF9sT+YjoCaHv0hXZvBEUEF3AgMBAAGjUzBRMB0GA1UdDgQWBBTQoIiX3+p/\n" + "P4Xz2vwERz6pbjPGhzAfBgNVHSMEGDAWgBTQoIiX3+p/P4Xz2vwERz6pbjPGhzAP\n" + "BgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQCVKoYAw+D1qqWRDSh3\n" + "2KlKMnT6sySo7XmReGArj8FTKiZUprByj5CfAtaiDSdPOpcg3EazWbdasZbMmSQm\n" + "+jpe5WoKnxL9b12lwwUYHrLl6RlrDHVkIVlXLNbJFY5TpfjvZfHpwVAygF3fnbgW\n" + "PPuODUNAS5NDwST+t29jBZ/wwU0pyW0CS4K5d3XMGHBc13j2V/FyvmsZ5xfA4U9H\n" + "oEnmZ/Qm+FFK/nR40rTAZ37cuv4ysKFtwvatNgTfHGJwaBUkKFdDbcyxt9abCi6x\n" + "/K+ScoJtdIeVcfx8Fnc5PNtSpy8bHI3Zy4IEyw4kOqwwI1h37iBafZ2WdQkTxlAx\n" + "JIDj\n" + "-----END CERTIFICATE-----\n"; + const std::string KEY = + "-----BEGIN PRIVATE KEY-----\n" + "MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCeRkyxjP0eNHxz\n" + "j4/R+Bg/31p7kEjB5d/LYtrzBIYNe+3DN8gdReixEpR5lgTLDsl8gfk2HRz4cnAi\n" + "seqYL6GKtw/cFadzLyXTbW4iavmTWiGYw/8RJlKunbhAhDjM6H99ysLf0NS6t14e\n" + "K+bEJIW1PiTYRR1U5I4kSIjpCX7+nJVuwMEZ2XBpN3ognHhv2hZYTdzEkQEyZHz4\n" + "V/ApfD7rlja5ecd/vJfPJeA8nudnGCh3Uo6f8I9TObAj8hJdfRiRBvnA4NnkrMrx\n" + "W9UtVjScnw9Xia11FM/IGJIgMpLQ5dqBw930p6FxMYtntRD1AF9sT+YjoCaHv0hX\n" + "ZvBEUEF3AgMBAAECggEACCaYpoAbPOX5Dr5y6p47KXboIvrgNFQRPVke62rtOF6M\n" + "dQQ3YwKJpCzPxp8qKgbd63KKEfZX2peSHMdKzIGPcSRSRcQ7tlvUN9on1M/rgGIg\n" + "3swhI5H0qhdnOLNWdX73qdO6S2pmuiLdTvJ11N4IoLfNj/GnPAr1Ivs1ScL6bkQv\n" + "UybaNQ/g2lB0tO7vUeVe2W/AqsIb1eQlf2g+SH7xRj2bGQkr4cWTylqfiVoL/Xic\n" + "QVTCks3BWaZhYIhTFgvqVhXZpp52O9J+bxsWJItKQrrCBemxwp82xKbiW/KoI9L1\n" + "wSnKvxx7Q3RUN5EvXeOpTRR8QIpBoxP3TTeoj+EOMQKBgQDQb/VfLDlLgfYJpgRC\n" + "hKCLW90un9op3nA2n9Dmm9TTLYOmUyiv5ub8QDINEw/YK/NE2JsTSUk2msizqTLL\n" + "Z82BFbz9kPlDbJ5MgxG5zXeLvOLurAFmZk/z5JJO+65PKjf0QVLncSAJvMCeNFuC\n" + "2yZrEzbrItrjQsN6AedWdx6TTwKBgQDCZAsSI3lQgOh2q1GSxjuIzRAc7JnSGBvD\n" + "nG8+SkfKAy7BWe638772Dgx8KYO7TLI4zlm8c9Tr/nkZsGWmM5S2DMI69PWOQWNa\n" + "R6QzOFFwNg2JETH7ow+x8+9Q9d3WsPzROz3r5uDXgEk0glthaymVoPILFOiYpz3r\n" + "heUbd6mFWQKBgQCCJBVJGhyf54IOHij0u0heGrpr/QTDNY5MnNZa1hs4y2cydyOl\n" + "SH8aKp7ViPxQlYhriO6ySQS8YkJD4rXDSImIOmFo1Ja9oVjpHsD3iLFGf2YVbTHm\n" + "lKUA+8raI8x+wzZyfELeHMTLL534aWpltp0zJ6kXgQi38pyIVh3x36gogwKBgQCt\n" + "nba5k49VVFzLKEXqBjzD+QqMGtFjcH7TnZNJmgQ2K9OFgzIPf5atomyKNHXgQibn\n" + "T32cMAQaZqR4SjDvWSBX3FtZVtE+Ja57woKn8IPj6ZL7Oa1fpwpskIbM01s31cln\n" + "gjbSy9lC/+PiDw9YmeKBLkcfmKQJO021Xlf6yUxRuQKBgBWPODUO8oKjkuJXNI/w\n" + "El9hNWkd+/SZDfnt93dlVyXTtTF0M5M95tlOgqvLtWKSyB/BOnoZYWqR8luMl15d\n" + "bf75j5mB0lHMWtyQgvZSkFqe9Or7Zy7hfTShDlZ/w+OXK7PGesaE1F14irShXSji\n" + "yn5DZYAZ5pU52xreJeYvDngO\n" + "-----END PRIVATE KEY-----\n"; + const std::string DH = + "-----BEGIN DH PARAMETERS-----\n" + "MIIBCAKCAQEA4+DA1j0gDWS71okwHpnvA65NmmR4mf+B3H39g163zY5S+cnWS2LI\n" + "dvqnUDpw13naWtQ+Nu7I4rk1XoPaxOPSTu1MTbtYOxxU9M1ceBu4kQjDeHwasPVM\n" + "zyEs1XXX3tsbPUxAuayX+AgW6QQAQUEjKDnv3FzVnQTFjwI49LqjnrSjbgQcoMaH\n" + "EdGGUc6t1/We2vtsJZx0/dbaMkzFYO8dAbEYHL4sPKQb2mLpCPJZC3vwzpFkHFCd\n" + "QSnLW2qRhy+66Mf8shdr6uvpoMcnKMOAvjKdXl9PBeJM9eJPz2lC4tnTiM3DqNzK\n" + "Hn8+Pu3KkSIFL/5uBVu1fZSq+lFIEI23wwIBAg==\n" + "-----END DH PARAMETERS-----\n"; + + librbd::ImageCtx *m_image_ctx; + + std::optional<boost::asio::ip::tcp::acceptor> m_acceptor; + boost::beast::flat_buffer m_buffer; + uint64_t m_server_port = 0; +}; + +TEST_F(TestMockMigrationHttpClient, OpenCloseHttp) { + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx; + client_accept(&socket, false, &on_connect_ctx); + + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTP)); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx.wait()); + ASSERT_EQ(0, ctx1.wait()); + + C_SaferCond ctx2; + http_client.close(&ctx2); + ASSERT_EQ(0, ctx2.wait()); +} + +TEST_F(TestMockMigrationHttpClient, OpenCloseHttps) { + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx; + client_accept(&socket, false, &on_connect_ctx); + + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTPS)); + http_client.set_ignore_self_signed_cert(true); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx.wait()); + + boost::asio::ssl::context ssl_context{boost::asio::ssl::context::tlsv12}; + load_server_certificate(ssl_context); + boost::beast::ssl_stream<boost::beast::tcp_stream> ssl_stream{ + std::move(socket), ssl_context}; + + C_SaferCond on_ssl_handshake_ctx; + client_ssl_handshake(ssl_stream, false, &on_ssl_handshake_ctx); + ASSERT_EQ(0, on_ssl_handshake_ctx.wait()); + + ASSERT_EQ(0, ctx1.wait()); + + C_SaferCond ctx2; + http_client.close(&ctx2); + + C_SaferCond on_ssl_shutdown_ctx; + client_ssl_shutdown(ssl_stream, &on_ssl_shutdown_ctx); + ASSERT_EQ(0, on_ssl_shutdown_ctx.wait()); + + ASSERT_EQ(0, ctx2.wait()); +} + +TEST_F(TestMockMigrationHttpClient, OpenHttpsHandshakeFail) { + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx; + client_accept(&socket, false, &on_connect_ctx); + + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTPS)); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx.wait()); + + boost::asio::ssl::context ssl_context{boost::asio::ssl::context::tlsv12}; + load_server_certificate(ssl_context); + boost::beast::ssl_stream<boost::beast::tcp_stream> ssl_stream{ + std::move(socket), ssl_context}; + + C_SaferCond on_ssl_handshake_ctx; + client_ssl_handshake(ssl_stream, true, &on_ssl_handshake_ctx); + ASSERT_NE(0, on_ssl_handshake_ctx.wait()); + ASSERT_NE(0, ctx1.wait()); +} + +TEST_F(TestMockMigrationHttpClient, OpenInvalidUrl) { + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, "ftp://nope/"); + + C_SaferCond ctx; + http_client.open(&ctx); + ASSERT_EQ(-EINVAL, ctx.wait()); +} + +TEST_F(TestMockMigrationHttpClient, OpenResolveFail) { + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, "http://invalid.ceph.com"); + + C_SaferCond ctx; + http_client.open(&ctx); + ASSERT_EQ(-ENOENT, ctx.wait()); +} + +TEST_F(TestMockMigrationHttpClient, OpenConnectFail) { + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + "http://localhost:2/"); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(-ECONNREFUSED, ctx1.wait()); +} + +TEST_F(TestMockMigrationHttpClient, IssueHead) { + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx; + client_accept(&socket, false, &on_connect_ctx); + + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTP)); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx.wait()); + ASSERT_EQ(0, ctx1.wait()); + + EmptyHttpRequest req; + req.method(boost::beast::http::verb::head); + + C_SaferCond ctx2; + HttpResponse res; + http_client.issue(EmptyHttpRequest{req}, + [&ctx2, &res](int r, HttpResponse&& response) mutable { + res = std::move(response); + ctx2.complete(r); + }); + + HttpResponse expected_res; + client_read_request(socket, req); + client_write_response(socket, expected_res); + + ASSERT_EQ(0, ctx2.wait()); + ASSERT_EQ(expected_res, res); + + C_SaferCond ctx3; + http_client.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +TEST_F(TestMockMigrationHttpClient, IssueGet) { + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx; + client_accept(&socket, false, &on_connect_ctx); + + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTP)); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx.wait()); + ASSERT_EQ(0, ctx1.wait()); + + EmptyHttpRequest req; + req.method(boost::beast::http::verb::get); + + C_SaferCond ctx2; + HttpResponse res; + http_client.issue(EmptyHttpRequest{req}, + [&ctx2, &res](int r, HttpResponse&& response) mutable { + res = std::move(response); + ctx2.complete(r); + }); + + HttpResponse expected_res; + expected_res.body() = "test"; + client_read_request(socket, req); + client_write_response(socket, expected_res); + + ASSERT_EQ(0, ctx2.wait()); + ASSERT_EQ(expected_res, res); + + C_SaferCond ctx3; + http_client.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +TEST_F(TestMockMigrationHttpClient, IssueSendFailed) { + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx1; + client_accept(&socket, false, &on_connect_ctx1); + + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTP)); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx1.wait()); + ASSERT_EQ(0, ctx1.wait()); + + // close connection to client + boost::system::error_code ec; + socket.close(ec); + + C_SaferCond on_connect_ctx2; + client_accept(&socket, false, &on_connect_ctx2); + + // send request via closed connection + EmptyHttpRequest req; + req.method(boost::beast::http::verb::get); + + C_SaferCond ctx2; + http_client.issue(EmptyHttpRequest{req}, + [&ctx2](int r, HttpResponse&&) mutable { + ctx2.complete(r); + }); + + // connection will be reset and request retried + ASSERT_EQ(0, on_connect_ctx2.wait()); + HttpResponse expected_res; + expected_res.body() = "test"; + client_read_request(socket, req); + client_write_response(socket, expected_res); + ASSERT_EQ(0, ctx2.wait()); + + C_SaferCond ctx3; + http_client.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +TEST_F(TestMockMigrationHttpClient, IssueReceiveFailed) { + boost::asio::ip::tcp::socket socket1(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx1; + client_accept(&socket1, false, &on_connect_ctx1); + + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTP)); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx1.wait()); + ASSERT_EQ(0, ctx1.wait()); + + // send request via closed connection + EmptyHttpRequest req; + req.method(boost::beast::http::verb::get); + + C_SaferCond ctx2; + http_client.issue(EmptyHttpRequest{req}, + [&ctx2](int r, HttpResponse&&) mutable { + ctx2.complete(r); + }); + + // close connection to client after reading request + client_read_request(socket1, req); + + C_SaferCond on_connect_ctx2; + boost::asio::ip::tcp::socket socket2(*m_image_ctx->asio_engine); + client_accept(&socket2, false, &on_connect_ctx2); + + boost::system::error_code ec; + socket1.close(ec); + ASSERT_EQ(0, on_connect_ctx2.wait()); + + // connection will be reset and request retried + HttpResponse expected_res; + expected_res.body() = "test"; + client_read_request(socket2, req); + client_write_response(socket2, expected_res); + ASSERT_EQ(0, ctx2.wait()); + + C_SaferCond ctx3; + http_client.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +TEST_F(TestMockMigrationHttpClient, IssueResetFailed) { + m_server_port = 0; + create_acceptor(true); + + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx1; + client_accept(&socket, false, &on_connect_ctx1); + + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTP)); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx1.wait()); + ASSERT_EQ(0, ctx1.wait()); + + // send requests then close connection + EmptyHttpRequest req; + req.method(boost::beast::http::verb::get); + + C_SaferCond ctx2; + http_client.issue(EmptyHttpRequest{req}, + [&ctx2](int r, HttpResponse&&) mutable { + ctx2.complete(r); + }); + + C_SaferCond ctx3; + http_client.issue(EmptyHttpRequest{req}, + [&ctx3](int r, HttpResponse&&) mutable { + ctx3.complete(r); + }); + + client_read_request(socket, req); + client_read_request(socket, req); + + // close connection to client and verify requests are failed + m_acceptor.reset(); + boost::system::error_code ec; + socket.close(ec); + + ASSERT_EQ(-ECONNREFUSED, ctx2.wait()); + ASSERT_EQ(-ECONNREFUSED, ctx3.wait()); + + // additional request will retry the failed connection + create_acceptor(true); + + C_SaferCond on_connect_ctx2; + client_accept(&socket, false, &on_connect_ctx2); + + C_SaferCond ctx4; + http_client.issue(EmptyHttpRequest{req}, + [&ctx4](int r, HttpResponse&&) mutable { + ctx4.complete(r); + }); + + ASSERT_EQ(0, on_connect_ctx2.wait()); + client_read_request(socket, req); + + HttpResponse expected_res; + expected_res.body() = "test"; + client_write_response(socket, expected_res); + ASSERT_EQ(0, ctx4.wait()); + + C_SaferCond ctx5; + http_client.close(&ctx5); + ASSERT_EQ(0, ctx5.wait()); +} + +TEST_F(TestMockMigrationHttpClient, IssuePipelined) { + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx; + client_accept(&socket, false, &on_connect_ctx); + + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTP)); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx.wait()); + ASSERT_EQ(0, ctx1.wait()); + + // issue two pipelined (concurrent) get requests + EmptyHttpRequest req1; + req1.method(boost::beast::http::verb::get); + + C_SaferCond ctx2; + HttpResponse res1; + http_client.issue(EmptyHttpRequest{req1}, + [&ctx2, &res1](int r, HttpResponse&& response) mutable { + res1 = std::move(response); + ctx2.complete(r); + }); + + EmptyHttpRequest req2; + req2.method(boost::beast::http::verb::get); + + C_SaferCond ctx3; + HttpResponse res2; + http_client.issue(EmptyHttpRequest{req2}, + [&ctx3, &res2](int r, HttpResponse&& response) mutable { + res2 = std::move(response); + ctx3.complete(r); + }); + + client_read_request(socket, req1); + client_read_request(socket, req2); + + // read the responses sequentially + HttpResponse expected_res1; + expected_res1.body() = "test"; + client_write_response(socket, expected_res1); + ASSERT_EQ(0, ctx2.wait()); + ASSERT_EQ(expected_res1, res1); + + HttpResponse expected_res2; + expected_res2.body() = "test"; + client_write_response(socket, expected_res2); + ASSERT_EQ(0, ctx3.wait()); + ASSERT_EQ(expected_res2, res2); + + C_SaferCond ctx4; + http_client.close(&ctx4); + ASSERT_EQ(0, ctx4.wait()); +} + +TEST_F(TestMockMigrationHttpClient, IssuePipelinedRestart) { + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx1; + client_accept(&socket, false, &on_connect_ctx1); + + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTP)); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx1.wait()); + ASSERT_EQ(0, ctx1.wait()); + + // issue two pipelined (concurrent) get requests + EmptyHttpRequest req1; + req1.keep_alive(false); + req1.method(boost::beast::http::verb::get); + + C_SaferCond on_connect_ctx2; + client_accept(&socket, false, &on_connect_ctx2); + + C_SaferCond ctx2; + HttpResponse res1; + http_client.issue(EmptyHttpRequest{req1}, + [&ctx2, &res1](int r, HttpResponse&& response) mutable { + res1 = std::move(response); + ctx2.complete(r); + }); + + EmptyHttpRequest req2; + req2.method(boost::beast::http::verb::get); + + C_SaferCond ctx3; + HttpResponse res2; + http_client.issue(EmptyHttpRequest{req2}, + [&ctx3, &res2](int r, HttpResponse&& response) mutable { + res2 = std::move(response); + ctx3.complete(r); + }); + + client_read_request(socket, req1); + client_read_request(socket, req2); + + // read the responses sequentially + HttpResponse expected_res1; + expected_res1.body() = "test"; + expected_res1.keep_alive(false); + client_write_response(socket, expected_res1); + ASSERT_EQ(0, ctx2.wait()); + ASSERT_EQ(expected_res1, res1); + + // second request will need to be re-sent due to 'need_eof' condition + ASSERT_EQ(0, on_connect_ctx2.wait()); + client_read_request(socket, req2); + + HttpResponse expected_res2; + expected_res2.body() = "test"; + client_write_response(socket, expected_res2); + ASSERT_EQ(0, ctx3.wait()); + ASSERT_EQ(expected_res2, res2); + + C_SaferCond ctx4; + http_client.close(&ctx4); + ASSERT_EQ(0, ctx4.wait()); +} + +TEST_F(TestMockMigrationHttpClient, ShutdownInFlight) { + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx; + client_accept(&socket, false, &on_connect_ctx); + + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTP)); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx.wait()); + ASSERT_EQ(0, ctx1.wait()); + + EmptyHttpRequest req; + req.method(boost::beast::http::verb::get); + + C_SaferCond ctx2; + http_client.issue(EmptyHttpRequest{req}, + [&ctx2](int r, HttpResponse&&) mutable { + ctx2.complete(r); + }); + + client_read_request(socket, req); + + C_SaferCond ctx3; + http_client.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); + ASSERT_EQ(-ESHUTDOWN, ctx2.wait()); +} + +TEST_F(TestMockMigrationHttpClient, GetSize) { + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTP)); + + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx; + client_accept(&socket, false, &on_connect_ctx); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx.wait()); + ASSERT_EQ(0, ctx1.wait()); + + uint64_t size = 0; + C_SaferCond ctx2; + http_client.get_size(&size, &ctx2); + + EmptyHttpRequest expected_req; + expected_req.method(boost::beast::http::verb::head); + client_read_request(socket, expected_req); + + HttpResponse expected_res; + expected_res.body() = std::string(123, '1'); + client_write_response(socket, expected_res); + + ASSERT_EQ(0, ctx2.wait()); + ASSERT_EQ(123, size); + + C_SaferCond ctx3; + http_client.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +TEST_F(TestMockMigrationHttpClient, GetSizeError) { + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTP)); + + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx; + client_accept(&socket, false, &on_connect_ctx); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx.wait()); + ASSERT_EQ(0, ctx1.wait()); + + uint64_t size = 0; + C_SaferCond ctx2; + http_client.get_size(&size, &ctx2); + + EmptyHttpRequest expected_req; + expected_req.method(boost::beast::http::verb::head); + client_read_request(socket, expected_req); + + HttpResponse expected_res; + expected_res.result(boost::beast::http::status::internal_server_error); + client_write_response(socket, expected_res); + + ASSERT_EQ(-EIO, ctx2.wait()); + + C_SaferCond ctx3; + http_client.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +TEST_F(TestMockMigrationHttpClient, Read) { + MockTestImageCtx mock_test_image_ctx(*m_image_ctx); + MockHttpClient http_client(&mock_test_image_ctx, + get_local_url(URL_SCHEME_HTTP)); + + boost::asio::ip::tcp::socket socket(*m_image_ctx->asio_engine); + C_SaferCond on_connect_ctx; + client_accept(&socket, false, &on_connect_ctx); + + C_SaferCond ctx1; + http_client.open(&ctx1); + ASSERT_EQ(0, on_connect_ctx.wait()); + ASSERT_EQ(0, ctx1.wait()); + + bufferlist bl; + C_SaferCond ctx2; + http_client.read({{0, 128}, {256, 64}}, &bl, &ctx2); + + EmptyHttpRequest expected_req1; + expected_req1.method(boost::beast::http::verb::get); + expected_req1.set(boost::beast::http::field::range, "bytes=0-127"); + client_read_request(socket, expected_req1); + + EmptyHttpRequest expected_req2; + expected_req2.method(boost::beast::http::verb::get); + expected_req2.set(boost::beast::http::field::range, "bytes=256-319"); + client_read_request(socket, expected_req2); + + HttpResponse expected_res1; + expected_res1.result(boost::beast::http::status::partial_content); + expected_res1.body() = std::string(128, '1'); + client_write_response(socket, expected_res1); + + HttpResponse expected_res2; + expected_res2.result(boost::beast::http::status::partial_content); + expected_res2.body() = std::string(64, '2'); + client_write_response(socket, expected_res2); + + ASSERT_EQ(192, ctx2.wait()); + + bufferlist expect_bl; + expect_bl.append(std::string(128, '1')); + expect_bl.append(std::string(64, '2')); + ASSERT_EQ(expect_bl, bl); + + C_SaferCond ctx3; + http_client.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +} // namespace migration +} // namespace librbd diff --git a/src/test/librbd/migration/test_mock_HttpStream.cc b/src/test/librbd/migration/test_mock_HttpStream.cc new file mode 100644 index 00000000000..aff22b757e9 --- /dev/null +++ b/src/test/librbd/migration/test_mock_HttpStream.cc @@ -0,0 +1,194 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/librbd/test_mock_fixture.h" +#include "test/librbd/test_support.h" +#include "include/rbd_types.h" +#include "common/ceph_mutex.h" +#include "librbd/migration/HttpClient.h" +#include "librbd/migration/HttpStream.h" +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include "json_spirit/json_spirit.h" +#include <boost/beast/http.hpp> + +namespace librbd { +namespace { + +struct MockTestImageCtx : public MockImageCtx { + MockTestImageCtx(ImageCtx &image_ctx) : MockImageCtx(image_ctx) { + } +}; + +} // anonymous namespace + +namespace migration { + +template <> +struct HttpClient<MockTestImageCtx> { + static HttpClient* s_instance; + static HttpClient* create(MockTestImageCtx*, const std::string&) { + ceph_assert(s_instance != nullptr); + return s_instance; + } + + MOCK_METHOD1(open, void(Context*)); + MOCK_METHOD1(close, void(Context*)); + MOCK_METHOD2(get_size, void(uint64_t*, Context*)); + MOCK_METHOD3(do_read, void(const io::Extents&, bufferlist*, Context*)); + void read(io::Extents&& extents, bufferlist* bl, Context* ctx) { + do_read(extents, bl, ctx); + } + + HttpClient() { + s_instance = this; + } +}; + +HttpClient<MockTestImageCtx>* HttpClient<MockTestImageCtx>::s_instance = nullptr; + +} // namespace migration +} // namespace librbd + +#include "librbd/migration/HttpStream.cc" + +namespace librbd { +namespace migration { + +using ::testing::_; +using ::testing::Invoke; +using ::testing::InSequence; +using ::testing::WithArgs; + +class TestMockMigrationHttpStream : public TestMockFixture { +public: + typedef HttpStream<MockTestImageCtx> MockHttpStream; + typedef HttpClient<MockTestImageCtx> MockHttpClient; + + librbd::ImageCtx *m_image_ctx; + + void SetUp() override { + TestMockFixture::SetUp(); + + ASSERT_EQ(0, open_image(m_image_name, &m_image_ctx)); + json_object["url"] = "http://some.site/file"; + } + + void expect_open(MockHttpClient& mock_http_client, int r) { + EXPECT_CALL(mock_http_client, open(_)) + .WillOnce(Invoke([r](Context* ctx) { ctx->complete(r); })); + } + + void expect_close(MockHttpClient& mock_http_client, int r) { + EXPECT_CALL(mock_http_client, close(_)) + .WillOnce(Invoke([r](Context* ctx) { ctx->complete(r); })); + } + + void expect_get_size(MockHttpClient& mock_http_client, uint64_t size, int r) { + EXPECT_CALL(mock_http_client, get_size(_, _)) + .WillOnce(Invoke([size, r](uint64_t* out_size, Context* ctx) { + *out_size = size; + ctx->complete(r); + })); + } + + void expect_read(MockHttpClient& mock_http_client, io::Extents byte_extents, + const bufferlist& bl, int r) { + uint64_t len = 0; + for (auto [_, byte_len] : byte_extents) { + len += byte_len; + } + EXPECT_CALL(mock_http_client, do_read(byte_extents, _, _)) + .WillOnce(WithArgs<1, 2>(Invoke( + [len, bl, r](bufferlist* out_bl, Context* ctx) { + *out_bl = bl; + ctx->complete(r < 0 ? r : len); + }))); + } + + json_spirit::mObject json_object; +}; + +TEST_F(TestMockMigrationHttpStream, OpenClose) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + + auto mock_http_client = new MockHttpClient(); + expect_open(*mock_http_client, 0); + + expect_close(*mock_http_client, 0); + + MockHttpStream mock_http_stream(&mock_image_ctx, json_object); + + C_SaferCond ctx1; + mock_http_stream.open(&ctx1); + ASSERT_EQ(0, ctx1.wait()); + + C_SaferCond ctx2; + mock_http_stream.close(&ctx2); + ASSERT_EQ(0, ctx2.wait()); +} + +TEST_F(TestMockMigrationHttpStream, GetSize) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + + auto mock_http_client = new MockHttpClient(); + expect_open(*mock_http_client, 0); + + expect_get_size(*mock_http_client, 128, 0); + + expect_close(*mock_http_client, 0); + + MockHttpStream mock_http_stream(&mock_image_ctx, json_object); + + C_SaferCond ctx1; + mock_http_stream.open(&ctx1); + ASSERT_EQ(0, ctx1.wait()); + + C_SaferCond ctx2; + uint64_t size; + mock_http_stream.get_size(&size, &ctx2); + ASSERT_EQ(0, ctx2.wait()); + ASSERT_EQ(128, size); + + C_SaferCond ctx3; + mock_http_stream.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +TEST_F(TestMockMigrationHttpStream, Read) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + + auto mock_http_client = new MockHttpClient(); + expect_open(*mock_http_client, 0); + + bufferlist expect_bl; + expect_bl.append(std::string(192, '1')); + expect_read(*mock_http_client, {{0, 128}, {256, 64}}, expect_bl, 0); + + expect_close(*mock_http_client, 0); + + MockHttpStream mock_http_stream(&mock_image_ctx, json_object); + + C_SaferCond ctx1; + mock_http_stream.open(&ctx1); + ASSERT_EQ(0, ctx1.wait()); + + C_SaferCond ctx2; + bufferlist bl; + mock_http_stream.read({{0, 128}, {256, 64}}, &bl, &ctx2); + ASSERT_EQ(192, ctx2.wait()); + ASSERT_EQ(expect_bl, bl); + + C_SaferCond ctx3; + mock_http_stream.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +} // namespace migration +} // namespace librbd diff --git a/src/test/librbd/migration/test_mock_RawFormat.cc b/src/test/librbd/migration/test_mock_RawFormat.cc index aa04f88449b..fd83de013be 100644 --- a/src/test/librbd/migration/test_mock_RawFormat.cc +++ b/src/test/librbd/migration/test_mock_RawFormat.cc @@ -3,10 +3,9 @@ #include "test/librbd/test_mock_fixture.h" #include "test/librbd/test_support.h" -#include "test/librbd/mock/migration/MockStreamInterface.h" +#include "test/librbd/mock/migration/MockSnapshotInterface.h" #include "include/rbd_types.h" #include "common/ceph_mutex.h" -#include "librbd/migration/FileStream.h" #include "librbd/migration/RawFormat.h" #include "librbd/migration/SourceSpecBuilder.h" #include "gtest/gtest.h" @@ -28,8 +27,8 @@ namespace migration { template<> struct SourceSpecBuilder<librbd::MockTestImageCtx> { - MOCK_CONST_METHOD2(build_stream, int(const json_spirit::mObject&, - std::unique_ptr<StreamInterface>*)); + MOCK_CONST_METHOD3(build_snapshot, int(const json_spirit::mObject&, uint64_t, + std::shared_ptr<SnapshotInterface>*)); }; @@ -41,6 +40,8 @@ struct SourceSpecBuilder<librbd::MockTestImageCtx> { using ::testing::_; using ::testing::InSequence; using ::testing::Invoke; +using ::testing::ReturnRef; +using ::testing::WithArg; using ::testing::WithArgs; namespace librbd { @@ -65,42 +66,61 @@ public: json_object["stream"] = stream_obj; } - void expect_build_stream(MockSourceSpecBuilder& mock_source_spec_builder, - MockStreamInterface* mock_stream_interface, int r) { - EXPECT_CALL(mock_source_spec_builder, build_stream(_, _)) - .WillOnce(WithArgs<1>(Invoke([mock_stream_interface, r] - (std::unique_ptr<StreamInterface>* ptr) { - ptr->reset(mock_stream_interface); + void expect_build_snapshot(MockSourceSpecBuilder& mock_source_spec_builder, + uint64_t index, + MockSnapshotInterface* mock_snapshot_interface, + int r) { + EXPECT_CALL(mock_source_spec_builder, build_snapshot(_, index, _)) + .WillOnce(WithArgs<2>(Invoke([mock_snapshot_interface, r] + (std::shared_ptr<SnapshotInterface>* ptr) { + ptr->reset(mock_snapshot_interface); return r; }))); } - void expect_stream_open(MockStreamInterface& mock_stream_interface, int r) { - EXPECT_CALL(mock_stream_interface, open(_)) - .WillOnce(Invoke([r](Context* ctx) { ctx->complete(r); })); + void expect_snapshot_open(MockSnapshotInterface& mock_snapshot_interface, + int r) { + EXPECT_CALL(mock_snapshot_interface, open(_, _)) + .WillOnce(WithArg<1>(Invoke([r](Context* ctx) { ctx->complete(r); }))); } - void expect_stream_close(MockStreamInterface& mock_stream_interface, int r) { - EXPECT_CALL(mock_stream_interface, close(_)) + void expect_snapshot_close(MockSnapshotInterface& mock_snapshot_interface, + int r) { + EXPECT_CALL(mock_snapshot_interface, close(_)) .WillOnce(Invoke([r](Context* ctx) { ctx->complete(r); })); } - void expect_stream_get_size(MockStreamInterface& mock_stream_interface, - uint64_t size, int r) { - EXPECT_CALL(mock_stream_interface, get_size(_, _)) - .WillOnce(Invoke([size, r](uint64_t* out_size, Context* ctx) { - *out_size = size; + void expect_snapshot_get_info(MockSnapshotInterface& mock_snapshot_interface, + const SnapInfo& snap_info) { + EXPECT_CALL(mock_snapshot_interface, get_snap_info()) + .WillOnce(ReturnRef(snap_info)); + } + + void expect_snapshot_read(MockSnapshotInterface& mock_snapshot_interface, + const io::Extents& image_extents, + const bufferlist& bl, int r) { + EXPECT_CALL(mock_snapshot_interface, read(_, image_extents, _)) + .WillOnce(WithArgs<0, 2>(Invoke([bl, image_extents, r] + (io::AioCompletion* aio_comp, io::ReadResult& read_result) { + aio_comp->read_result = std::move(read_result); + aio_comp->read_result.set_image_extents(image_extents); + aio_comp->set_request_count(1); + auto ctx = new io::ReadResult::C_ImageReadRequest(aio_comp, 0, + image_extents); + ctx->bl = std::move(bl); ctx->complete(r); - })); + }))); } - void expect_stream_read(MockStreamInterface& mock_stream_interface, - const io::Extents& byte_extents, - const bufferlist& bl, int r) { - EXPECT_CALL(mock_stream_interface, read(byte_extents, _, _)) - .WillOnce(WithArgs<1, 2>(Invoke([bl, r] - (bufferlist* out_bl, Context* ctx) { - *out_bl = bl; + void expect_snapshot_list_snap(MockSnapshotInterface& mock_snapshot_interface, + const io::Extents& image_extents, + const io::SparseExtents& sparse_extents, + int r) { + EXPECT_CALL(mock_snapshot_interface, list_snap(image_extents, _, _)) + .WillOnce(WithArgs<1, 2>(Invoke( + [sparse_extents, r](io::SparseExtents* out_sparse_extents, + Context* ctx) { + out_sparse_extents->insert(sparse_extents); ctx->complete(r); }))); } @@ -121,13 +141,13 @@ TEST_F(TestMockMigrationRawFormat, OpenClose) { InSequence seq; MockSourceSpecBuilder mock_source_spec_builder; - auto mock_stream_interface = new MockStreamInterface(); - expect_build_stream(mock_source_spec_builder, mock_stream_interface, 0); + auto mock_snapshot_interface = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, CEPH_NOSNAP, + mock_snapshot_interface, 0); - expect_stream_open(*mock_stream_interface, 0); - expect_stream_get_size(*mock_stream_interface, 0, 0); + expect_snapshot_open(*mock_snapshot_interface, 0); - expect_stream_close(*mock_stream_interface, 0); + expect_snapshot_close(*mock_snapshot_interface, 0); MockRawFormat mock_raw_format(&mock_image_ctx, json_object, &mock_source_spec_builder); @@ -147,13 +167,48 @@ TEST_F(TestMockMigrationRawFormat, OpenError) { InSequence seq; MockSourceSpecBuilder mock_source_spec_builder; - auto mock_stream_interface = new MockStreamInterface(); - expect_build_stream(mock_source_spec_builder, mock_stream_interface, 0); + auto mock_snapshot_interface = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, CEPH_NOSNAP, + mock_snapshot_interface, 0); + + expect_snapshot_open(*mock_snapshot_interface, -ENOENT); + + expect_snapshot_close(*mock_snapshot_interface, 0); + expect_close(mock_image_ctx, 0); + + MockRawFormat mock_raw_format(&mock_image_ctx, json_object, + &mock_source_spec_builder); + + C_SaferCond ctx; + mock_raw_format.open(&ctx); + ASSERT_EQ(-ENOENT, ctx.wait()); +} + +TEST_F(TestMockMigrationRawFormat, OpenSnapshotError) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + MockSourceSpecBuilder mock_source_spec_builder; + + auto mock_snapshot_interface_head = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, CEPH_NOSNAP, + mock_snapshot_interface_head, 0); + + auto mock_snapshot_interface_1 = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, 1, + mock_snapshot_interface_1, 0); - expect_stream_open(*mock_stream_interface, -ENOENT); + expect_snapshot_open(*mock_snapshot_interface_1, -ENOENT); + expect_snapshot_open(*mock_snapshot_interface_head, 0); + expect_snapshot_close(*mock_snapshot_interface_1, 0); + expect_snapshot_close(*mock_snapshot_interface_head, 0); expect_close(mock_image_ctx, 0); + json_spirit::mArray snapshots; + snapshots.push_back(json_spirit::mObject{}); + json_object["snapshots"] = snapshots; + MockRawFormat mock_raw_format(&mock_image_ctx, json_object, &mock_source_spec_builder); @@ -168,13 +223,13 @@ TEST_F(TestMockMigrationRawFormat, GetSnapshots) { InSequence seq; MockSourceSpecBuilder mock_source_spec_builder; - auto mock_stream_interface = new MockStreamInterface(); - expect_build_stream(mock_source_spec_builder, mock_stream_interface, 0); + auto mock_snapshot_interface = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, CEPH_NOSNAP, + mock_snapshot_interface, 0); - expect_stream_open(*mock_stream_interface, 0); - expect_stream_get_size(*mock_stream_interface, 0, 0); + expect_snapshot_open(*mock_snapshot_interface, 0); - expect_stream_close(*mock_stream_interface, 0); + expect_snapshot_close(*mock_snapshot_interface, 0); MockRawFormat mock_raw_format(&mock_image_ctx, json_object, &mock_source_spec_builder); @@ -200,15 +255,16 @@ TEST_F(TestMockMigrationRawFormat, GetImageSize) { InSequence seq; MockSourceSpecBuilder mock_source_spec_builder; - auto mock_stream_interface = new MockStreamInterface(); - expect_build_stream(mock_source_spec_builder, mock_stream_interface, 0); + auto mock_snapshot_interface = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, CEPH_NOSNAP, + mock_snapshot_interface, 0); - expect_stream_open(*mock_stream_interface, 0); - expect_stream_get_size(*mock_stream_interface, 0, 0); + expect_snapshot_open(*mock_snapshot_interface, 0); - expect_stream_get_size(*mock_stream_interface, 123, 0); + SnapInfo snap_info{{}, {}, 123, {}, 0, 0, {}}; + expect_snapshot_get_info(*mock_snapshot_interface, snap_info); - expect_stream_close(*mock_stream_interface, 0); + expect_snapshot_close(*mock_snapshot_interface, 0); MockRawFormat mock_raw_format(&mock_image_ctx, json_object, &mock_source_spec_builder); @@ -228,19 +284,19 @@ TEST_F(TestMockMigrationRawFormat, GetImageSize) { ASSERT_EQ(0, ctx3.wait()); } -TEST_F(TestMockMigrationRawFormat, GetImageSizeSnapshot) { +TEST_F(TestMockMigrationRawFormat, GetImageSizeSnapshotDNE) { MockTestImageCtx mock_image_ctx(*m_image_ctx); InSequence seq; MockSourceSpecBuilder mock_source_spec_builder; - auto mock_stream_interface = new MockStreamInterface(); - expect_build_stream(mock_source_spec_builder, mock_stream_interface, 0); + auto mock_snapshot_interface = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, CEPH_NOSNAP, + mock_snapshot_interface, 0); - expect_stream_open(*mock_stream_interface, 0); - expect_stream_get_size(*mock_stream_interface, 0, 0); + expect_snapshot_open(*mock_snapshot_interface, 0); - expect_stream_close(*mock_stream_interface, 0); + expect_snapshot_close(*mock_snapshot_interface, 0); MockRawFormat mock_raw_format(&mock_image_ctx, json_object, &mock_source_spec_builder); @@ -252,7 +308,7 @@ TEST_F(TestMockMigrationRawFormat, GetImageSizeSnapshot) { C_SaferCond ctx2; uint64_t size; mock_raw_format.get_image_size(0, &size, &ctx2); - ASSERT_EQ(-EINVAL, ctx2.wait()); + ASSERT_EQ(-ENOENT, ctx2.wait()); C_SaferCond ctx3; mock_raw_format.close(&ctx3); @@ -265,17 +321,17 @@ TEST_F(TestMockMigrationRawFormat, Read) { InSequence seq; MockSourceSpecBuilder mock_source_spec_builder; - auto mock_stream_interface = new MockStreamInterface(); - expect_build_stream(mock_source_spec_builder, mock_stream_interface, 0); + auto mock_snapshot_interface = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, CEPH_NOSNAP, + mock_snapshot_interface, 0); - expect_stream_open(*mock_stream_interface, 0); - expect_stream_get_size(*mock_stream_interface, 0, 0); + expect_snapshot_open(*mock_snapshot_interface, 0); bufferlist expect_bl; expect_bl.append(std::string(123, '1')); - expect_stream_read(*mock_stream_interface, {{123, 123}}, expect_bl, 0); + expect_snapshot_read(*mock_snapshot_interface, {{123, 123}}, expect_bl, 0); - expect_stream_close(*mock_stream_interface, 0); + expect_snapshot_close(*mock_snapshot_interface, 0); MockRawFormat mock_raw_format(&mock_image_ctx, json_object, &mock_source_spec_builder); @@ -305,13 +361,20 @@ TEST_F(TestMockMigrationRawFormat, ListSnaps) { InSequence seq; MockSourceSpecBuilder mock_source_spec_builder; - auto mock_stream_interface = new MockStreamInterface(); - expect_build_stream(mock_source_spec_builder, mock_stream_interface, 0); + auto mock_snapshot_interface = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, CEPH_NOSNAP, + mock_snapshot_interface, 0); - expect_stream_open(*mock_stream_interface, 0); - expect_stream_get_size(*mock_stream_interface, 0, 0); + expect_snapshot_open(*mock_snapshot_interface, 0); - expect_stream_close(*mock_stream_interface, 0); + SnapInfo snap_info{{}, {}, 123, {}, 0, 0, {}}; + expect_snapshot_get_info(*mock_snapshot_interface, snap_info); + io::SparseExtents sparse_extents; + sparse_extents.insert(0, 123, {io::SPARSE_EXTENT_STATE_DATA, 123}); + expect_snapshot_list_snap(*mock_snapshot_interface, {{0, 123}}, + sparse_extents, 0); + + expect_snapshot_close(*mock_snapshot_interface, 0); MockRawFormat mock_raw_format(&mock_image_ctx, json_object, &mock_source_spec_builder); @@ -322,9 +385,135 @@ TEST_F(TestMockMigrationRawFormat, ListSnaps) { C_SaferCond ctx2; io::SnapshotDelta snapshot_delta; - mock_raw_format.list_snaps({{0, 123}}, {}, 0, &snapshot_delta, {}, &ctx2); + mock_raw_format.list_snaps({{0, 123}}, {CEPH_NOSNAP}, 0, &snapshot_delta, {}, + &ctx2); ASSERT_EQ(0, ctx2.wait()); + io::SnapshotDelta expected_snapshot_delta; + expected_snapshot_delta[{CEPH_NOSNAP, CEPH_NOSNAP}] = sparse_extents; + ASSERT_EQ(expected_snapshot_delta, snapshot_delta); + + C_SaferCond ctx3; + mock_raw_format.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +TEST_F(TestMockMigrationRawFormat, ListSnapsError) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + MockSourceSpecBuilder mock_source_spec_builder; + + auto mock_snapshot_interface = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, CEPH_NOSNAP, + mock_snapshot_interface, 0); + + + expect_snapshot_open(*mock_snapshot_interface, 0); + + SnapInfo snap_info{{}, {}, 123, {}, 0, 0, {}}; + expect_snapshot_get_info(*mock_snapshot_interface, snap_info); + io::SparseExtents sparse_extents; + sparse_extents.insert(0, 123, {io::SPARSE_EXTENT_STATE_DATA, 123}); + expect_snapshot_list_snap(*mock_snapshot_interface, {{0, 123}}, + sparse_extents, -EINVAL); + + expect_snapshot_close(*mock_snapshot_interface, 0); + + MockRawFormat mock_raw_format(&mock_image_ctx, json_object, + &mock_source_spec_builder); + + C_SaferCond ctx1; + mock_raw_format.open(&ctx1); + ASSERT_EQ(0, ctx1.wait()); + + C_SaferCond ctx2; + io::SnapshotDelta snapshot_delta; + mock_raw_format.list_snaps({{0, 123}}, {CEPH_NOSNAP}, 0, &snapshot_delta, {}, + &ctx2); + ASSERT_EQ(-EINVAL, ctx2.wait()); + + C_SaferCond ctx3; + mock_raw_format.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +TEST_F(TestMockMigrationRawFormat, ListSnapsMerge) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + MockSourceSpecBuilder mock_source_spec_builder; + + auto mock_snapshot_interface_head = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, CEPH_NOSNAP, + mock_snapshot_interface_head, 0); + + auto mock_snapshot_interface_1 = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, 1, + mock_snapshot_interface_1, 0); + + auto mock_snapshot_interface_2 = new MockSnapshotInterface(); + expect_build_snapshot(mock_source_spec_builder, 2, + mock_snapshot_interface_2, 0); + + + expect_snapshot_open(*mock_snapshot_interface_1, 0); + expect_snapshot_open(*mock_snapshot_interface_2, 0); + expect_snapshot_open(*mock_snapshot_interface_head, 0); + + SnapInfo snap_info_head{{}, {}, 256, {}, 0, 0, {}}; + SnapInfo snap_info_1{snap_info_head}; + snap_info_1.size = 123; + expect_snapshot_get_info(*mock_snapshot_interface_1, snap_info_1); + io::SparseExtents sparse_extents_1; + sparse_extents_1.insert(0, 123, {io::SPARSE_EXTENT_STATE_DATA, 123}); + expect_snapshot_list_snap(*mock_snapshot_interface_1, {{0, 123}}, + sparse_extents_1, 0); + + SnapInfo snap_info_2{snap_info_head}; + snap_info_2.size = 64; + expect_snapshot_get_info(*mock_snapshot_interface_2, snap_info_2); + io::SparseExtents sparse_extents_2; + sparse_extents_2.insert(0, 32, {io::SPARSE_EXTENT_STATE_DATA, 32}); + expect_snapshot_list_snap(*mock_snapshot_interface_2, {{0, 123}}, + sparse_extents_2, 0); + + expect_snapshot_get_info(*mock_snapshot_interface_head, snap_info_head); + io::SparseExtents sparse_extents_head; + sparse_extents_head.insert(0, 16, {io::SPARSE_EXTENT_STATE_DATA, 16}); + expect_snapshot_list_snap(*mock_snapshot_interface_head, {{0, 123}}, + sparse_extents_head, 0); + + expect_snapshot_close(*mock_snapshot_interface_1, 0); + expect_snapshot_close(*mock_snapshot_interface_2, 0); + expect_snapshot_close(*mock_snapshot_interface_head, 0); + + json_spirit::mArray snapshots; + snapshots.push_back(json_spirit::mObject{}); + snapshots.push_back(json_spirit::mObject{}); + json_object["snapshots"] = snapshots; + + MockRawFormat mock_raw_format(&mock_image_ctx, json_object, + &mock_source_spec_builder); + + C_SaferCond ctx1; + mock_raw_format.open(&ctx1); + ASSERT_EQ(0, ctx1.wait()); + + C_SaferCond ctx2; + io::SnapshotDelta snapshot_delta; + mock_raw_format.list_snaps({{0, 123}}, {1, CEPH_NOSNAP}, 0, &snapshot_delta, + {}, &ctx2); + ASSERT_EQ(0, ctx2.wait()); + + io::SnapshotDelta expected_snapshot_delta; + expected_snapshot_delta[{1, 1}] = sparse_extents_1; + sparse_extents_2.erase(0, 16); + sparse_extents_2.insert(64, 59, {io::SPARSE_EXTENT_STATE_ZEROED, 59}); + expected_snapshot_delta[{CEPH_NOSNAP, 2}] = sparse_extents_2; + expected_snapshot_delta[{CEPH_NOSNAP, CEPH_NOSNAP}] = sparse_extents_head; + ASSERT_EQ(expected_snapshot_delta, snapshot_delta); + C_SaferCond ctx3; mock_raw_format.close(&ctx3); ASSERT_EQ(0, ctx3.wait()); diff --git a/src/test/librbd/migration/test_mock_RawSnapshot.cc b/src/test/librbd/migration/test_mock_RawSnapshot.cc new file mode 100644 index 00000000000..3ce4b5c9daa --- /dev/null +++ b/src/test/librbd/migration/test_mock_RawSnapshot.cc @@ -0,0 +1,255 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/librbd/test_mock_fixture.h" +#include "test/librbd/test_support.h" +#include "test/librbd/mock/migration/MockStreamInterface.h" +#include "include/rbd_types.h" +#include "common/ceph_mutex.h" +#include "librbd/migration/FileStream.h" +#include "librbd/migration/RawSnapshot.h" +#include "librbd/migration/SourceSpecBuilder.h" +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include "json_spirit/json_spirit.h" + +namespace librbd { +namespace { + +struct MockTestImageCtx : public MockImageCtx { + MockTestImageCtx(ImageCtx &image_ctx) : MockImageCtx(image_ctx) { + } +}; + +} // anonymous namespace + +namespace migration { + +template<> +struct SourceSpecBuilder<librbd::MockTestImageCtx> { + + MOCK_CONST_METHOD2(build_stream, int(const json_spirit::mObject&, + std::shared_ptr<StreamInterface>*)); + +}; + +} // namespace migration +} // namespace librbd + +#include "librbd/migration/RawSnapshot.cc" + +using ::testing::_; +using ::testing::InSequence; +using ::testing::Invoke; +using ::testing::WithArgs; + +namespace librbd { +namespace migration { + +using ::testing::Invoke; + +class TestMockMigrationRawSnapshot : public TestMockFixture { +public: + typedef RawSnapshot<MockTestImageCtx> MockRawSnapshot; + typedef SourceSpecBuilder<MockTestImageCtx> MockSourceSpecBuilder; + + librbd::ImageCtx *m_image_ctx; + + void SetUp() override { + TestMockFixture::SetUp(); + + ASSERT_EQ(0, open_image(m_image_name, &m_image_ctx)); + + json_spirit::mObject stream_obj; + stream_obj["type"] = "file"; + json_object["stream"] = stream_obj; + } + + void expect_build_stream(MockSourceSpecBuilder& mock_source_spec_builder, + MockStreamInterface* mock_stream_interface, int r) { + EXPECT_CALL(mock_source_spec_builder, build_stream(_, _)) + .WillOnce(WithArgs<1>(Invoke([mock_stream_interface, r] + (std::shared_ptr<StreamInterface>* ptr) { + ptr->reset(mock_stream_interface); + return r; + }))); + } + + void expect_stream_open(MockStreamInterface& mock_stream_interface, int r) { + EXPECT_CALL(mock_stream_interface, open(_)) + .WillOnce(Invoke([r](Context* ctx) { ctx->complete(r); })); + } + + void expect_stream_close(MockStreamInterface& mock_stream_interface, int r) { + EXPECT_CALL(mock_stream_interface, close(_)) + .WillOnce(Invoke([r](Context* ctx) { ctx->complete(r); })); + } + + void expect_stream_get_size(MockStreamInterface& mock_stream_interface, + uint64_t size, int r) { + EXPECT_CALL(mock_stream_interface, get_size(_, _)) + .WillOnce(Invoke([size, r](uint64_t* out_size, Context* ctx) { + *out_size = size; + ctx->complete(r); + })); + } + + void expect_stream_read(MockStreamInterface& mock_stream_interface, + const io::Extents& byte_extents, + const bufferlist& bl, int r) { + EXPECT_CALL(mock_stream_interface, read(byte_extents, _, _)) + .WillOnce(WithArgs<1, 2>(Invoke([bl, r] + (bufferlist* out_bl, Context* ctx) { + *out_bl = bl; + ctx->complete(r); + }))); + } + + json_spirit::mObject json_object; +}; + +TEST_F(TestMockMigrationRawSnapshot, OpenClose) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + MockSourceSpecBuilder mock_source_spec_builder; + + auto mock_stream_interface = new MockStreamInterface(); + expect_build_stream(mock_source_spec_builder, mock_stream_interface, 0); + + expect_stream_open(*mock_stream_interface, 0); + expect_stream_get_size(*mock_stream_interface, 123, 0); + + expect_stream_close(*mock_stream_interface, 0); + + json_object["name"] = "snap1"; + MockRawSnapshot mock_raw_snapshot(&mock_image_ctx, json_object, + &mock_source_spec_builder, 1); + + C_SaferCond ctx1; + mock_raw_snapshot.open(nullptr, &ctx1); + ASSERT_EQ(0, ctx1.wait()); + + auto snap_info = mock_raw_snapshot.get_snap_info(); + ASSERT_EQ("snap1", snap_info.name); + ASSERT_EQ(123, snap_info.size); + + C_SaferCond ctx2; + mock_raw_snapshot.close(&ctx2); + ASSERT_EQ(0, ctx2.wait()); +} + +TEST_F(TestMockMigrationRawSnapshot, OpenError) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + MockSourceSpecBuilder mock_source_spec_builder; + + auto mock_stream_interface = new MockStreamInterface(); + expect_build_stream(mock_source_spec_builder, mock_stream_interface, 0); + + expect_stream_open(*mock_stream_interface, -ENOENT); + + MockRawSnapshot mock_raw_snapshot(&mock_image_ctx, json_object, + &mock_source_spec_builder, 0); + + C_SaferCond ctx; + mock_raw_snapshot.open(nullptr, &ctx); + ASSERT_EQ(-ENOENT, ctx.wait()); +} + +TEST_F(TestMockMigrationRawSnapshot, GetSizeError) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + MockSourceSpecBuilder mock_source_spec_builder; + + auto mock_stream_interface = new MockStreamInterface(); + expect_build_stream(mock_source_spec_builder, mock_stream_interface, 0); + + expect_stream_open(*mock_stream_interface, 0); + expect_stream_get_size(*mock_stream_interface, 0, -EINVAL); + + expect_stream_close(*mock_stream_interface, 0); + + MockRawSnapshot mock_raw_snapshot(&mock_image_ctx, json_object, + &mock_source_spec_builder, 0); + + C_SaferCond ctx; + mock_raw_snapshot.open(nullptr, &ctx); + ASSERT_EQ(-EINVAL, ctx.wait()); +} + +TEST_F(TestMockMigrationRawSnapshot, Read) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + MockSourceSpecBuilder mock_source_spec_builder; + + auto mock_stream_interface = new MockStreamInterface(); + expect_build_stream(mock_source_spec_builder, mock_stream_interface, 0); + + expect_stream_open(*mock_stream_interface, 0); + expect_stream_get_size(*mock_stream_interface, 0, 0); + + bufferlist expect_bl; + expect_bl.append(std::string(123, '1')); + expect_stream_read(*mock_stream_interface, {{123, 123}}, expect_bl, 0); + + expect_stream_close(*mock_stream_interface, 0); + + MockRawSnapshot mock_raw_snapshot(&mock_image_ctx, json_object, + &mock_source_spec_builder, 0); + + C_SaferCond ctx1; + mock_raw_snapshot.open(nullptr, &ctx1); + ASSERT_EQ(0, ctx1.wait()); + + C_SaferCond ctx2; + auto aio_comp = io::AioCompletion::create_and_start( + &ctx2, m_image_ctx, io::AIO_TYPE_READ); + bufferlist bl; + io::ReadResult read_result{&bl}; + mock_raw_snapshot.read(aio_comp, {{123, 123}}, std::move(read_result), 0, 0, + {}); + ASSERT_EQ(123, ctx2.wait()); + ASSERT_EQ(expect_bl, bl); + + C_SaferCond ctx3; + mock_raw_snapshot.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +TEST_F(TestMockMigrationRawSnapshot, ListSnap) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + MockSourceSpecBuilder mock_source_spec_builder; + + auto mock_stream_interface = new MockStreamInterface(); + expect_build_stream(mock_source_spec_builder, mock_stream_interface, 0); + + expect_stream_open(*mock_stream_interface, 0); + expect_stream_get_size(*mock_stream_interface, 0, 0); + + expect_stream_close(*mock_stream_interface, 0); + + MockRawSnapshot mock_raw_snapshot(&mock_image_ctx, json_object, + &mock_source_spec_builder, 0); + + C_SaferCond ctx1; + mock_raw_snapshot.open(nullptr, &ctx1); + ASSERT_EQ(0, ctx1.wait()); + + C_SaferCond ctx2; + io::SparseExtents sparse_extents; + mock_raw_snapshot.list_snap({{0, 123}}, 0, &sparse_extents, {}, &ctx2); + ASSERT_EQ(0, ctx2.wait()); + + C_SaferCond ctx3; + mock_raw_snapshot.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +} // namespace migration +} // namespace librbd diff --git a/src/test/librbd/migration/test_mock_S3Stream.cc b/src/test/librbd/migration/test_mock_S3Stream.cc new file mode 100644 index 00000000000..2f2097f7926 --- /dev/null +++ b/src/test/librbd/migration/test_mock_S3Stream.cc @@ -0,0 +1,238 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/librbd/test_mock_fixture.h" +#include "test/librbd/test_support.h" +#include "include/rbd_types.h" +#include "common/ceph_mutex.h" +#include "librbd/migration/HttpClient.h" +#include "librbd/migration/S3Stream.h" +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include "json_spirit/json_spirit.h" +#include <boost/algorithm/string/predicate.hpp> +#include <boost/beast/http.hpp> + +namespace librbd { +namespace { + +struct MockTestImageCtx : public MockImageCtx { + MockTestImageCtx(ImageCtx &image_ctx) : MockImageCtx(image_ctx) { + } +}; + +} // anonymous namespace + +namespace migration { + +template <> +struct HttpClient<MockTestImageCtx> { + static HttpClient* s_instance; + static HttpClient* create(MockTestImageCtx*, const std::string&) { + ceph_assert(s_instance != nullptr); + return s_instance; + } + + HttpProcessorInterface* http_processor = nullptr; + void set_http_processor(HttpProcessorInterface* http_processor) { + this->http_processor = http_processor; + } + + MOCK_METHOD1(open, void(Context*)); + MOCK_METHOD1(close, void(Context*)); + MOCK_METHOD2(get_size, void(uint64_t*, Context*)); + MOCK_METHOD3(do_read, void(const io::Extents&, bufferlist*, Context*)); + void read(io::Extents&& extents, bufferlist* bl, Context* ctx) { + do_read(extents, bl, ctx); + } + + HttpClient() { + s_instance = this; + } +}; + +HttpClient<MockTestImageCtx>* HttpClient<MockTestImageCtx>::s_instance = nullptr; + +} // namespace migration +} // namespace librbd + +#include "librbd/migration/S3Stream.cc" + +namespace librbd { +namespace migration { + +using ::testing::_; +using ::testing::Invoke; +using ::testing::InSequence; +using ::testing::WithArgs; + +class TestMockMigrationS3Stream : public TestMockFixture { +public: + typedef S3Stream<MockTestImageCtx> MockS3Stream; + typedef HttpClient<MockTestImageCtx> MockHttpClient; + + using EmptyBody = boost::beast::http::empty_body; + using EmptyRequest = boost::beast::http::request<EmptyBody>; + + librbd::ImageCtx *m_image_ctx; + + void SetUp() override { + TestMockFixture::SetUp(); + + ASSERT_EQ(0, open_image(m_image_name, &m_image_ctx)); + json_object["url"] = "http://some.site/bucket/file"; + json_object["access_key"] = "0555b35654ad1656d804"; + json_object["secret_key"] = "h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q=="; + } + + void expect_open(MockHttpClient& mock_http_client, int r) { + EXPECT_CALL(mock_http_client, open(_)) + .WillOnce(Invoke([r](Context* ctx) { ctx->complete(r); })); + } + + void expect_close(MockHttpClient& mock_http_client, int r) { + EXPECT_CALL(mock_http_client, close(_)) + .WillOnce(Invoke([r](Context* ctx) { ctx->complete(r); })); + } + + void expect_get_size(MockHttpClient& mock_http_client, uint64_t size, int r) { + EXPECT_CALL(mock_http_client, get_size(_, _)) + .WillOnce(Invoke([size, r](uint64_t* out_size, Context* ctx) { + *out_size = size; + ctx->complete(r); + })); + } + + void expect_read(MockHttpClient& mock_http_client, io::Extents byte_extents, + const bufferlist& bl, int r) { + uint64_t len = 0; + for (auto [_, byte_len] : byte_extents) { + len += byte_len; + } + EXPECT_CALL(mock_http_client, do_read(byte_extents, _, _)) + .WillOnce(WithArgs<1, 2>(Invoke( + [len, bl, r](bufferlist* out_bl, Context* ctx) { + *out_bl = bl; + ctx->complete(r < 0 ? r : len); + }))); + } + + json_spirit::mObject json_object; +}; + +TEST_F(TestMockMigrationS3Stream, OpenClose) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + + auto mock_http_client = new MockHttpClient(); + expect_open(*mock_http_client, 0); + + expect_close(*mock_http_client, 0); + + MockS3Stream mock_http_stream(&mock_image_ctx, json_object); + + C_SaferCond ctx1; + mock_http_stream.open(&ctx1); + ASSERT_EQ(0, ctx1.wait()); + + C_SaferCond ctx2; + mock_http_stream.close(&ctx2); + ASSERT_EQ(0, ctx2.wait()); +} + +TEST_F(TestMockMigrationS3Stream, GetSize) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + + auto mock_http_client = new MockHttpClient(); + expect_open(*mock_http_client, 0); + + expect_get_size(*mock_http_client, 128, 0); + + expect_close(*mock_http_client, 0); + + MockS3Stream mock_http_stream(&mock_image_ctx, json_object); + + C_SaferCond ctx1; + mock_http_stream.open(&ctx1); + ASSERT_EQ(0, ctx1.wait()); + + C_SaferCond ctx2; + uint64_t size; + mock_http_stream.get_size(&size, &ctx2); + ASSERT_EQ(0, ctx2.wait()); + ASSERT_EQ(128, size); + + C_SaferCond ctx3; + mock_http_stream.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +TEST_F(TestMockMigrationS3Stream, Read) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + + auto mock_http_client = new MockHttpClient(); + expect_open(*mock_http_client, 0); + + bufferlist expect_bl; + expect_bl.append(std::string(192, '1')); + expect_read(*mock_http_client, {{0, 128}, {256, 64}}, expect_bl, 0); + + expect_close(*mock_http_client, 0); + + MockS3Stream mock_http_stream(&mock_image_ctx, json_object); + + C_SaferCond ctx1; + mock_http_stream.open(&ctx1); + ASSERT_EQ(0, ctx1.wait()); + + C_SaferCond ctx2; + bufferlist bl; + mock_http_stream.read({{0, 128}, {256, 64}}, &bl, &ctx2); + ASSERT_EQ(192, ctx2.wait()); + ASSERT_EQ(expect_bl, bl); + + C_SaferCond ctx3; + mock_http_stream.close(&ctx3); + ASSERT_EQ(0, ctx3.wait()); +} + +TEST_F(TestMockMigrationS3Stream, ProcessRequest) { + MockTestImageCtx mock_image_ctx(*m_image_ctx); + + InSequence seq; + + auto mock_http_client = new MockHttpClient(); + expect_open(*mock_http_client, 0); + + expect_close(*mock_http_client, 0); + + MockS3Stream mock_http_stream(&mock_image_ctx, json_object); + + C_SaferCond ctx1; + mock_http_stream.open(&ctx1); + ASSERT_EQ(0, ctx1.wait()); + + EmptyRequest request; + request.method(boost::beast::http::verb::get); + request.target("/bucket/resource"); + mock_http_client->http_processor->process_request(request); + + // basic test for date and known portion of authorization + ASSERT_EQ(1U, request.count(boost::beast::http::field::date)); + ASSERT_EQ(1U, request.count(boost::beast::http::field::authorization)); + ASSERT_TRUE(boost::algorithm::starts_with( + request[boost::beast::http::field::authorization], + "AWS 0555b35654ad1656d804:")); + + C_SaferCond ctx2; + mock_http_stream.close(&ctx2); + ASSERT_EQ(0, ctx2.wait()); +} + +} // namespace migration +} // namespace librbd diff --git a/src/test/librbd/migration/test_mock_Utils.cc b/src/test/librbd/migration/test_mock_Utils.cc new file mode 100644 index 00000000000..917c191dde6 --- /dev/null +++ b/src/test/librbd/migration/test_mock_Utils.cc @@ -0,0 +1,47 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/librbd/test_mock_fixture.h" +#include "test/librbd/test_support.h" +#include "librbd/migration/Utils.h" +#include "gtest/gtest.h" +#include "gmock/gmock.h" + +namespace librbd { +namespace migration { +namespace util { + +class TestMockMigrationUtils : public TestMockFixture { +public: +}; + +TEST_F(TestMockMigrationUtils, ParseUrl) { + UrlSpec url_spec; + ASSERT_EQ(-EINVAL, parse_url(g_ceph_context, "", &url_spec)); + ASSERT_EQ(-EINVAL, parse_url(g_ceph_context, "jttp://google.com/path", + &url_spec)); + ASSERT_EQ(-EINVAL, parse_url(g_ceph_context, "http://google.com:absd/path", + &url_spec)); + + ASSERT_EQ(0, parse_url(g_ceph_context, "ceph.io/path", &url_spec)); + ASSERT_EQ(UrlSpec(URL_SCHEME_HTTP, "ceph.io", "80", "/path"), url_spec); + + ASSERT_EQ(0, parse_url(g_ceph_context, "http://google.com/path", &url_spec)); + ASSERT_EQ(UrlSpec(URL_SCHEME_HTTP, "google.com", "80", "/path"), url_spec); + + ASSERT_EQ(0, parse_url(g_ceph_context, "https://ceph.io/", &url_spec)); + ASSERT_EQ(UrlSpec(URL_SCHEME_HTTPS, "ceph.io", "443", "/"), url_spec); + + ASSERT_EQ(0, parse_url(g_ceph_context, + "http://google.com:1234/some/other/path", &url_spec)); + ASSERT_EQ(UrlSpec(URL_SCHEME_HTTP, "google.com", "1234", "/some/other/path"), + url_spec); + + ASSERT_EQ(0, parse_url(g_ceph_context, + "http://1.2.3.4/", &url_spec)); + ASSERT_EQ(UrlSpec(URL_SCHEME_HTTP, "1.2.3.4", "80", "/"), url_spec); +} + +} // namespace util +} // namespace migration +} // namespace librbd diff --git a/src/test/librbd/mock/MockImageCtx.h b/src/test/librbd/mock/MockImageCtx.h index 1e176bd21ef..429a58410e9 100644 --- a/src/test/librbd/mock/MockImageCtx.h +++ b/src/test/librbd/mock/MockImageCtx.h @@ -12,6 +12,7 @@ #include "test/librbd/mock/MockJournal.h" #include "test/librbd/mock/MockObjectMap.h" #include "test/librbd/mock/MockOperations.h" +#include "test/librbd/mock/MockPluginRegistry.h" #include "test/librbd/mock/MockReadahead.h" #include "test/librbd/mock/io/MockImageDispatcher.h" #include "test/librbd/mock/io/MockObjectDispatcher.h" @@ -86,6 +87,7 @@ struct MockImageCtx { io_image_dispatcher(new io::MockImageDispatcher()), io_object_dispatcher(new io::MockObjectDispatcher()), op_work_queue(new MockContextWQ()), + plugin_registry(new MockPluginRegistry()), readahead_max_bytes(image_ctx.readahead_max_bytes), event_socket(image_ctx.event_socket), parent(NULL), operations(new MockOperations()), @@ -126,6 +128,7 @@ struct MockImageCtx { delete operations; delete image_watcher; delete op_work_queue; + delete plugin_registry; delete io_image_dispatcher; delete io_object_dispatcher; } @@ -294,6 +297,8 @@ struct MockImageCtx { io::MockObjectDispatcher *io_object_dispatcher; MockContextWQ *op_work_queue; + MockPluginRegistry* plugin_registry; + MockReadahead readahead; uint64_t readahead_max_bytes; diff --git a/src/test/librbd/mock/MockPluginRegistry.h b/src/test/librbd/mock/MockPluginRegistry.h new file mode 100644 index 00000000000..8854a742606 --- /dev/null +++ b/src/test/librbd/mock/MockPluginRegistry.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_TEST_LIBRBD_MOCK_PLUGIN_REGISTRY_H +#define CEPH_TEST_LIBRBD_MOCK_PLUGIN_REGISTRY_H + +#include <gmock/gmock.h> + +class Context; + +namespace librbd { + +struct MockPluginRegistry{ + MOCK_METHOD2(init, void(const std::string&, Context*)); + MOCK_METHOD1(acquired_exclusive_lock, void(Context*)); + MOCK_METHOD1(prerelease_exclusive_lock, void(Context*)); +}; + +} // namespace librbd + +#endif // CEPH_TEST_LIBRBD_MOCK_PLUGIN_REGISTRY_H diff --git a/src/test/librbd/mock/crypto/MockCryptoInterface.h b/src/test/librbd/mock/crypto/MockCryptoInterface.h index 0745583624e..1263fba95b8 100644 --- a/src/test/librbd/mock/crypto/MockCryptoInterface.h +++ b/src/test/librbd/mock/crypto/MockCryptoInterface.h @@ -19,6 +19,10 @@ struct MockCryptoInterface : CryptoInterface { uint64_t get_block_size() const override { return 4096; } + + uint64_t get_data_offset() const override { + return 4 * 1024 * 1024; + } }; } // namespace crypto diff --git a/src/test/librbd/mock/io/MockImageDispatcher.h b/src/test/librbd/mock/io/MockImageDispatcher.h index 7d63044cda8..bd1c962e8d0 100644 --- a/src/test/librbd/mock/io/MockImageDispatcher.h +++ b/src/test/librbd/mock/io/MockImageDispatcher.h @@ -22,6 +22,7 @@ public: MOCK_METHOD1(shut_down, void(Context*)); MOCK_METHOD1(register_dispatch, void(ImageDispatchInterface*)); + MOCK_METHOD1(exists, bool(ImageDispatchLayer)); MOCK_METHOD2(shut_down_dispatch, void(ImageDispatchLayer, Context*)); MOCK_METHOD1(invalidate_cache, void(Context *)); diff --git a/src/test/librbd/mock/io/MockObjectDispatcher.h b/src/test/librbd/mock/io/MockObjectDispatcher.h index 688744bcbe5..5e700397bb0 100644 --- a/src/test/librbd/mock/io/MockObjectDispatcher.h +++ b/src/test/librbd/mock/io/MockObjectDispatcher.h @@ -22,6 +22,7 @@ public: MOCK_METHOD1(shut_down, void(Context*)); MOCK_METHOD1(register_dispatch, void(ObjectDispatchInterface*)); + MOCK_METHOD1(exists, bool(ObjectDispatchLayer)); MOCK_METHOD2(shut_down_dispatch, void(ObjectDispatchLayer, Context*)); MOCK_METHOD2(flush, void(FlushSource, Context*)); diff --git a/src/test/librbd/mock/migration/MockSnapshotInterface.h b/src/test/librbd/mock/migration/MockSnapshotInterface.h new file mode 100644 index 00000000000..abb6d1a08a7 --- /dev/null +++ b/src/test/librbd/mock/migration/MockSnapshotInterface.h @@ -0,0 +1,44 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_TEST_LIBRBD_MOCK_MIGRATION_MOCK_SNAPSHOT_INTERFACE_H +#define CEPH_TEST_LIBRBD_MOCK_MIGRATION_MOCK_SNAPSHOT_INTERFACE_H + +#include "include/buffer.h" +#include "gmock/gmock.h" +#include "librbd/io/AioCompletion.h" +#include "librbd/io/ReadResult.h" +#include "librbd/io/Types.h" +#include "librbd/migration/SnapshotInterface.h" + +namespace librbd { +namespace migration { + +struct MockSnapshotInterface : public SnapshotInterface { + MOCK_METHOD2(open, void(SnapshotInterface*, Context*)); + MOCK_METHOD1(close, void(Context*)); + + MOCK_CONST_METHOD0(get_snap_info, const SnapInfo&()); + + MOCK_METHOD3(read, void(io::AioCompletion*, const io::Extents&, + io::ReadResult&)); + void read(io::AioCompletion* aio_comp, io::Extents&& image_extents, + io::ReadResult&& read_result, int op_flags, int read_flags, + const ZTracer::Trace &parent_trace) override { + read(aio_comp, image_extents, read_result); + } + + MOCK_METHOD3(list_snap, void(const io::Extents&, io::SparseExtents*, + Context*)); + void list_snap(io::Extents&& image_extents, int list_snaps_flags, + io::SparseExtents* sparse_extents, + const ZTracer::Trace &parent_trace, + Context* on_finish) override { + list_snap(image_extents, sparse_extents, on_finish); + } +}; + +} // namespace migration +} // namespace librbd + +#endif // CEPH_TEST_LIBRBD_MOCK_MIGRATION_MOCK_SNAPSHOT_INTERFACE_H diff --git a/src/test/librbd/test_DeepCopy.cc b/src/test/librbd/test_DeepCopy.cc index 9f75089598a..741a5d54485 100644 --- a/src/test/librbd/test_DeepCopy.cc +++ b/src/test/librbd/test_DeepCopy.cc @@ -122,8 +122,8 @@ struct TestDeepCopy : public TestFixture { std::cout << "snap: " << (src_snap_name ? src_snap_name : "null") << ", block " << offset << "~" << read_size << " differs" << std::endl; - // std::cout << "src block: " << std::endl; src_bl.hexdump(std::cout); - // std::cout << "dst block: " << std::endl; dst_bl.hexdump(std::cout); + std::cout << "src block: " << std::endl; src_bl.hexdump(std::cout); + std::cout << "dst block: " << std::endl; dst_bl.hexdump(std::cout); } EXPECT_TRUE(src_bl.contents_equal(dst_bl)); offset += read_size; diff --git a/src/test/librbd/test_ImageWatcher.cc b/src/test/librbd/test_ImageWatcher.cc index a3cdfdb5e34..f02c7b37b81 100644 --- a/src/test/librbd/test_ImageWatcher.cc +++ b/src/test/librbd/test_ImageWatcher.cc @@ -175,6 +175,41 @@ public: *id = payload.async_request_id; } return true; + case NOTIFY_OP_SNAP_RENAME: + { + SnapRenamePayload payload; + payload.decode(7, iter); + *id = payload.async_request_id; + } + return true; + case NOTIFY_OP_SNAP_REMOVE: + { + SnapRemovePayload payload; + payload.decode(7, iter); + *id = payload.async_request_id; + } + return true; + case NOTIFY_OP_SNAP_PROTECT: + { + SnapProtectPayload payload; + payload.decode(7, iter); + *id = payload.async_request_id; + } + return true; + case NOTIFY_OP_SNAP_UNPROTECT: + { + SnapUnprotectPayload payload; + payload.decode(7, iter); + *id = payload.async_request_id; + } + return true; + case NOTIFY_OP_RENAME: + { + RenamePayload payload; + payload.decode(7, iter); + *id = payload.async_request_id; + } + return true; case NOTIFY_OP_REBUILD_OBJECT_MAP: { RebuildObjectMapPayload payload; @@ -293,7 +328,90 @@ struct SnapCreateTask { C_SaferCond ctx; ictx->image_watcher->notify_snap_create(0, cls::rbd::UserSnapshotNamespace(), "snap", 0, *progress_context, &ctx); - ASSERT_EQ(0, ctx.wait()); + result = ctx.wait(); + } +}; + +struct SnapRenameTask { + librbd::ImageCtx *ictx; + int result = 0; + + SnapRenameTask(librbd::ImageCtx *ictx) + : ictx(ictx) { + } + + void operator()() { + std::shared_lock l{ictx->owner_lock}; + C_SaferCond ctx; + ictx->image_watcher->notify_snap_rename(0, 1, "snap-rename", &ctx); + result = ctx.wait(); + } +}; + +struct SnapRemoveTask { + librbd::ImageCtx *ictx; + int result = 0; + + SnapRemoveTask(librbd::ImageCtx *ictx) + : ictx(ictx) { + } + + void operator()() { + std::shared_lock l{ictx->owner_lock}; + C_SaferCond ctx; + ictx->image_watcher->notify_snap_remove( + 0, cls::rbd::UserSnapshotNamespace(), "snap", &ctx); + result = ctx.wait(); + } +}; + +struct SnapProtectTask { + librbd::ImageCtx *ictx; + int result = 0; + + SnapProtectTask(librbd::ImageCtx *ictx) + : ictx(ictx) { + } + + void operator()() { + std::shared_lock l{ictx->owner_lock}; + C_SaferCond ctx; + ictx->image_watcher->notify_snap_protect( + 0, cls::rbd::UserSnapshotNamespace(), "snap", &ctx); + result = ctx.wait(); + } +}; + +struct SnapUnprotectTask { + librbd::ImageCtx *ictx; + int result = 0; + + SnapUnprotectTask(librbd::ImageCtx *ictx) + : ictx(ictx) { + } + + void operator()() { + std::shared_lock l{ictx->owner_lock}; + C_SaferCond ctx; + ictx->image_watcher->notify_snap_unprotect( + 0, cls::rbd::UserSnapshotNamespace(), "snap", &ctx); + result = ctx.wait(); + } +}; + +struct RenameTask { + librbd::ImageCtx *ictx; + int result = 0; + + RenameTask(librbd::ImageCtx *ictx) + : ictx(ictx) { + } + + void operator()() { + std::shared_lock l{ictx->owner_lock}; + C_SaferCond ctx; + ictx->image_watcher->notify_rename(0, "new_name", &ctx); + result = ctx.wait(); } }; @@ -505,14 +623,23 @@ TEST_F(TestImageWatcher, NotifySnapRename) { m_notify_acks = {{NOTIFY_OP_SNAP_RENAME, create_response_message(0)}}; - std::shared_lock l{ictx->owner_lock}; - C_SaferCond notify_ctx; - ictx->image_watcher->notify_snap_rename(1, "snap-rename", ¬ify_ctx); - ASSERT_EQ(0, notify_ctx.wait()); + SnapRenameTask snap_rename_task(ictx); + boost::thread thread(boost::ref(snap_rename_task)); + + ASSERT_TRUE(wait_for_notifies(*ictx)); NotifyOps expected_notify_ops; expected_notify_ops += NOTIFY_OP_SNAP_RENAME; ASSERT_EQ(expected_notify_ops, m_notifies); + + AsyncRequestId async_request_id; + ASSERT_TRUE(extract_async_request_id(NOTIFY_OP_SNAP_RENAME, + &async_request_id)); + + ASSERT_EQ(0, notify_async_complete(ictx, async_request_id, 0)); + + ASSERT_TRUE(thread.timed_join(boost::posix_time::seconds(10))); + ASSERT_EQ(0, snap_rename_task.result); } TEST_F(TestImageWatcher, NotifySnapRenameError) { @@ -529,7 +656,7 @@ TEST_F(TestImageWatcher, NotifySnapRenameError) { std::shared_lock l{ictx->owner_lock}; C_SaferCond notify_ctx; - ictx->image_watcher->notify_snap_rename(1, "snap-rename", ¬ify_ctx); + ictx->image_watcher->notify_snap_rename(0, 1, "snap-rename", ¬ify_ctx); ASSERT_EQ(-EEXIST, notify_ctx.wait()); NotifyOps expected_notify_ops; @@ -549,16 +676,23 @@ TEST_F(TestImageWatcher, NotifySnapRemove) { m_notify_acks = {{NOTIFY_OP_SNAP_REMOVE, create_response_message(0)}}; - std::shared_lock l{ictx->owner_lock}; - C_SaferCond notify_ctx; - ictx->image_watcher->notify_snap_remove(cls::rbd::UserSnapshotNamespace(), - "snap", - ¬ify_ctx); - ASSERT_EQ(0, notify_ctx.wait()); + SnapRemoveTask snap_remove_task(ictx); + boost::thread thread(boost::ref(snap_remove_task)); + + ASSERT_TRUE(wait_for_notifies(*ictx)); NotifyOps expected_notify_ops; expected_notify_ops += NOTIFY_OP_SNAP_REMOVE; ASSERT_EQ(expected_notify_ops, m_notifies); + + AsyncRequestId async_request_id; + ASSERT_TRUE(extract_async_request_id(NOTIFY_OP_SNAP_REMOVE, + &async_request_id)); + + ASSERT_EQ(0, notify_async_complete(ictx, async_request_id, 0)); + + ASSERT_TRUE(thread.timed_join(boost::posix_time::seconds(10))); + ASSERT_EQ(0, snap_remove_task.result); } TEST_F(TestImageWatcher, NotifySnapProtect) { @@ -573,16 +707,23 @@ TEST_F(TestImageWatcher, NotifySnapProtect) { m_notify_acks = {{NOTIFY_OP_SNAP_PROTECT, create_response_message(0)}}; - std::shared_lock l{ictx->owner_lock}; - C_SaferCond notify_ctx; - ictx->image_watcher->notify_snap_protect(cls::rbd::UserSnapshotNamespace(), - "snap", - ¬ify_ctx); - ASSERT_EQ(0, notify_ctx.wait()); + SnapProtectTask snap_protect_task(ictx); + boost::thread thread(boost::ref(snap_protect_task)); + + ASSERT_TRUE(wait_for_notifies(*ictx)); NotifyOps expected_notify_ops; expected_notify_ops += NOTIFY_OP_SNAP_PROTECT; ASSERT_EQ(expected_notify_ops, m_notifies); + + AsyncRequestId async_request_id; + ASSERT_TRUE(extract_async_request_id(NOTIFY_OP_SNAP_PROTECT, + &async_request_id)); + + ASSERT_EQ(0, notify_async_complete(ictx, async_request_id, 0)); + + ASSERT_TRUE(thread.timed_join(boost::posix_time::seconds(10))); + ASSERT_EQ(0, snap_protect_task.result); } TEST_F(TestImageWatcher, NotifySnapUnprotect) { @@ -597,16 +738,23 @@ TEST_F(TestImageWatcher, NotifySnapUnprotect) { m_notify_acks = {{NOTIFY_OP_SNAP_UNPROTECT, create_response_message(0)}}; - std::shared_lock l{ictx->owner_lock}; - C_SaferCond notify_ctx; - ictx->image_watcher->notify_snap_unprotect(cls::rbd::UserSnapshotNamespace(), - "snap", - ¬ify_ctx); - ASSERT_EQ(0, notify_ctx.wait()); + SnapUnprotectTask snap_unprotect_task(ictx); + boost::thread thread(boost::ref(snap_unprotect_task)); + + ASSERT_TRUE(wait_for_notifies(*ictx)); NotifyOps expected_notify_ops; expected_notify_ops += NOTIFY_OP_SNAP_UNPROTECT; ASSERT_EQ(expected_notify_ops, m_notifies); + + AsyncRequestId async_request_id; + ASSERT_TRUE(extract_async_request_id(NOTIFY_OP_SNAP_UNPROTECT, + &async_request_id)); + + ASSERT_EQ(0, notify_async_complete(ictx, async_request_id, 0)); + + ASSERT_TRUE(thread.timed_join(boost::posix_time::seconds(10))); + ASSERT_EQ(0, snap_unprotect_task.result); } TEST_F(TestImageWatcher, NotifyRename) { @@ -621,14 +769,23 @@ TEST_F(TestImageWatcher, NotifyRename) { m_notify_acks = {{NOTIFY_OP_RENAME, create_response_message(0)}}; - std::shared_lock l{ictx->owner_lock}; - C_SaferCond notify_ctx; - ictx->image_watcher->notify_rename("new_name", ¬ify_ctx); - ASSERT_EQ(0, notify_ctx.wait()); + RenameTask rename_task(ictx); + boost::thread thread(boost::ref(rename_task)); + + ASSERT_TRUE(wait_for_notifies(*ictx)); NotifyOps expected_notify_ops; expected_notify_ops += NOTIFY_OP_RENAME; ASSERT_EQ(expected_notify_ops, m_notifies); + + AsyncRequestId async_request_id; + ASSERT_TRUE(extract_async_request_id(NOTIFY_OP_RENAME, + &async_request_id)); + + ASSERT_EQ(0, notify_async_complete(ictx, async_request_id, 0)); + + ASSERT_TRUE(thread.timed_join(boost::posix_time::seconds(10))); + ASSERT_EQ(0, rename_task.result); } TEST_F(TestImageWatcher, NotifyAsyncTimedOut) { diff --git a/src/test/librbd/test_Migration.cc b/src/test/librbd/test_Migration.cc index 5410eab553e..5f96d620e26 100644 --- a/src/test/librbd/test_Migration.cc +++ b/src/test/librbd/test_Migration.cc @@ -144,11 +144,8 @@ struct TestMigration : public TestFixture { std::cout << description << ", block " << offset << "~" << read_size << " differs" << std::endl; - char *c = getenv("TEST_RBD_MIGRATION_VERBOSE"); - if (c != NULL && *c != '\0') { - std::cout << "src block: " << src_ictx->id << ": " << std::endl; src_bl.hexdump(std::cout); - std::cout << "dst block: " << dst_ictx->id << ": " << std::endl; dst_bl.hexdump(std::cout); - } + std::cout << "src block: " << src_ictx->id << ": " << std::endl; src_bl.hexdump(std::cout); + std::cout << "dst block: " << dst_ictx->id << ": " << std::endl; dst_bl.hexdump(std::cout); } EXPECT_TRUE(src_bl.contents_equal(dst_bl)); offset += read_size; diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index bbd40071363..fe7fe1a5ff6 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -8781,6 +8781,161 @@ TEST_F(TestLibRBD, WriteZeroesThickProvision) { ASSERT_EQ(0, image.close()); } +TEST_F(TestLibRBD, ConcurentOperations) +{ + REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK); + + librbd::RBD rbd; + librados::IoCtx ioctx; + ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx)); + std::string name = get_temp_image_name(); + int order = 0; + uint64_t size = 2 << 20; + ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order)); + + // Test creating/removing many snapshots simultaneously + + std::vector<librbd::Image> images(10); + std::vector<librbd::RBD::AioCompletion *> comps; + + for (auto &image : images) { + auto comp = new librbd::RBD::AioCompletion(NULL, NULL); + ASSERT_EQ(0, rbd.aio_open(ioctx, image, name.c_str(), NULL, comp)); + comps.push_back(comp); + } + + for (auto &comp : comps) { + ASSERT_EQ(0, comp->wait_for_complete()); + ASSERT_EQ(1, comp->is_complete()); + ASSERT_EQ(0, comp->get_return_value()); + comp->release(); + } + comps.clear(); + + std::vector<std::thread> threads; + int i = 0; + for (auto &image : images) { + std::string snap_name = "snap" + stringify(i++); + threads.emplace_back([&image, snap_name]() { + int r = image.snap_create(snap_name.c_str()); + ceph_assert(r == 0); + }); + } + + for (auto &t : threads) { + t.join(); + } + threads.clear(); + + i = 0; + for (auto &image : images) { + std::string snap_name = "snap" + stringify(i++); + threads.emplace_back([&image, snap_name](){ + int r = image.snap_remove(snap_name.c_str()); + ceph_assert(r == 0); + }); + } + + for (auto &t : threads) { + t.join(); + } + threads.clear(); + + for (auto &image : images) { + auto comp = new librbd::RBD::AioCompletion(NULL, NULL); + ASSERT_EQ(0, image.aio_close(comp)); + comps.push_back(comp); + } + + for (auto &comp : comps) { + ASSERT_EQ(0, comp->wait_for_complete()); + ASSERT_EQ(1, comp->is_complete()); + ASSERT_EQ(0, comp->get_return_value()); + comp->release(); + } + comps.clear(); + + // Test shutdown + { + librbd::Image image1, image2, image3; + ASSERT_EQ(0, rbd.open(ioctx, image1, name.c_str(), NULL)); + ASSERT_EQ(0, rbd.open(ioctx, image2, name.c_str(), NULL)); + ASSERT_EQ(0, rbd.open(ioctx, image3, name.c_str(), NULL)); + + ASSERT_EQ(0, image1.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE)); + + struct Watcher : public librbd::QuiesceWatchCtx { + size_t count = 0; + + ceph::mutex lock = ceph::make_mutex("lock"); + ceph::condition_variable cv; + + void handle_quiesce() override { + std::unique_lock locker(lock); + count++; + cv.notify_one(); + } + + void handle_unquiesce() override { + } + + bool wait_for_quiesce(size_t c) { + std::unique_lock locker(lock); + return cv.wait_for(locker, seconds(60), + [this, c]() { return count >= c; }); + } + } watcher; + uint64_t handle; + ASSERT_EQ(0, image2.quiesce_watch(&watcher, &handle)); + + auto close1_comp = new librbd::RBD::AioCompletion(NULL, NULL); + + std::thread create_snap1([&image1, close1_comp]() { + int r = image1.snap_create("snap1"); + ceph_assert(r == 0); + r = image1.aio_close(close1_comp); + ceph_assert(r == 0); + }); + + ASSERT_TRUE(watcher.wait_for_quiesce(1)); + + std::thread create_snap2([&image2]() { + int r = image2.snap_create("snap2"); + ceph_assert(r == 0); + }); + + std::thread create_snap3([&image3]() { + int r = image3.snap_create("snap3"); + ceph_assert(r == 0); + }); + + image2.quiesce_complete(handle, 0); + create_snap1.join(); + + ASSERT_TRUE(watcher.wait_for_quiesce(2)); + image2.quiesce_complete(handle, 0); + + ASSERT_TRUE(watcher.wait_for_quiesce(3)); + image2.quiesce_complete(handle, 0); + + ASSERT_EQ(0, close1_comp->wait_for_complete()); + ASSERT_EQ(1, close1_comp->is_complete()); + ASSERT_EQ(0, close1_comp->get_return_value()); + + create_snap2.join(); + create_snap3.join(); + + ASSERT_EQ(0, image2.quiesce_unwatch(handle)); + ASSERT_EQ(0, image2.snap_remove("snap1")); + ASSERT_EQ(0, image2.snap_remove("snap2")); + ASSERT_EQ(0, image2.snap_remove("snap3")); + } + + ASSERT_EQ(0, rbd.remove(ioctx, name.c_str())); + ioctx.close(); +} + + // poorman's ceph_assert() namespace ceph { void __ceph_assert_fail(const char *assertion, const char *file, int line, diff --git a/src/test/objectstore/Allocator_test.cc b/src/test/objectstore/Allocator_test.cc index 78e018b7a57..43f2fbf51ec 100644 --- a/src/test/objectstore/Allocator_test.cc +++ b/src/test/objectstore/Allocator_test.cc @@ -341,12 +341,14 @@ TEST_P(AllocTest, test_dump_fragmentation_score) //allocate want_size = ( rng() % one_alloc_max ) / alloc_unit * alloc_unit + alloc_unit; tmp.clear(); - uint64_t r = alloc->allocate(want_size, alloc_unit, 0, 0, &tmp); - for (auto& t: tmp) { - if (t.length > 0) - allocated.push_back(t); - } - allocated_cnt += r; + int64_t r = alloc->allocate(want_size, alloc_unit, 0, 0, &tmp); + if (r > 0) { + for (auto& t: tmp) { + if (t.length > 0) + allocated.push_back(t); + } + allocated_cnt += r; + } } else { //free ceph_assert(allocated.size() > 0); @@ -480,6 +482,24 @@ TEST_P(AllocTest, test_alloc_contiguous) alloc->shutdown(); } +TEST_P(AllocTest, test_alloc_47883) +{ + uint64_t block = 0x1000; + uint64_t size = 1599858540544ul; + + init_alloc(size, block); + + alloc->init_add_free(0x1b970000, 0x26000); + alloc->init_add_free(0x1747e9d5000, 0x493000); + alloc->init_add_free(0x1747ee6a000, 0x196000); + + PExtentVector extents; + auto need = 0x3f980000; + auto got = alloc->allocate(need, 0x10000, 0, (int64_t)0, &extents); + EXPECT_GT(got, 0); + EXPECT_EQ(got, 0x630000); +} + INSTANTIATE_TEST_SUITE_P( Allocator, AllocTest, diff --git a/src/test/objectstore/CMakeLists.txt b/src/test/objectstore/CMakeLists.txt index 233baa46ebf..340855657c4 100644 --- a/src/test/objectstore/CMakeLists.txt +++ b/src/test/objectstore/CMakeLists.txt @@ -163,9 +163,9 @@ add_ceph_unittest(unittest_memstore_clone) target_link_libraries(unittest_memstore_clone os global) if(WITH_BLUESTORE) - add_executable(ceph_test_bmap_alloc_replay - bmap_allocator_replay_test.cc) - target_link_libraries(ceph_test_bmap_alloc_replay os global ${UNITTEST_LIBS}) - install(TARGETS ceph_test_bmap_alloc_replay + add_executable(ceph_test_alloc_replay + allocator_replay_test.cc) + target_link_libraries(ceph_test_alloc_replay os global ${UNITTEST_LIBS}) + install(TARGETS ceph_test_alloc_replay DESTINATION bin) endif() diff --git a/src/test/objectstore/bmap_allocator_replay_test.cc b/src/test/objectstore/allocator_replay_test.cc index e721fe62e17..8f4fe18f017 100644 --- a/src/test/objectstore/bmap_allocator_replay_test.cc +++ b/src/test/objectstore/allocator_replay_test.cc @@ -1,7 +1,7 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab /* - * Bitmap allocator replay tool. + * Allocator replay tool. * Author: Igor Fedotov, ifedotov@suse.com */ #include <iostream> @@ -9,12 +9,15 @@ #include "common/ceph_argparse.h" #include "common/debug.h" #include "common/Cycles.h" +#include "common/errno.h" +#include "common/ceph_json.h" +#include "common/admin_socket.h" #include "global/global_init.h" #include "os/bluestore/Allocator.h" void usage(const string &name) { - cerr << "Usage: " << name << " <log_to_replay> " + cerr << "Usage: " << name << " <log_to_replay> <raw_duplicate|free_dump>" << std::endl; } @@ -211,6 +214,100 @@ int replay_and_check_for_duplicate(char* fname) return 0; } +/* +* This replays allocator dump (in JSON) reported by + "ceph daemon <osd> bluestore allocator dump <name>" + command and applies custom method to it +*/ +int replay_free_dump_and_apply(char* fname, + std::function<int (Allocator*, const string& aname)> fn) +{ + string alloc_type; + string alloc_name; + uint64_t capacity = 0; + uint64_t alloc_unit = 0; + + JSONParser p; + std::cout << "parsing..." << std::endl; + bool b = p.parse(fname); + if (!b) { + std::cerr << "Failed to parse json: " << fname << std::endl; + return -1; + } + + JSONObj::data_val v; + ceph_assert(p.is_object()); + + auto *o = p.find_obj("allocator_type"); + ceph_assert(o); + alloc_type = o->get_data_val().str; + + o = p.find_obj("allocator_name"); + ceph_assert(o); + alloc_name = o->get_data_val().str; + + o = p.find_obj("capacity"); + ceph_assert(o); + decode_json_obj(capacity, o); + o = p.find_obj("alloc_unit"); + ceph_assert(o); + decode_json_obj(alloc_unit, o); + + o = p.find_obj("extents"); + ceph_assert(o); + ceph_assert(o->is_array()); + std::cout << "parsing completed!" << std::endl; + + unique_ptr<Allocator> alloc; + alloc.reset(Allocator::create(g_ceph_context, alloc_type, + capacity, alloc_unit, alloc_name)); + + auto it = o->find_first(); + while (!it.end()) { + auto *item_obj = *it; + uint64_t offset = 0; + uint64_t length = 0; + string offset_str, length_str; + + bool b = JSONDecoder::decode_json("offset", offset_str, item_obj); + ceph_assert(b); + b = JSONDecoder::decode_json("length", length_str, item_obj); + ceph_assert(b); + + char* p; + offset = strtol(offset_str.c_str(), &p, 16); + length = strtol(length_str.c_str(), &p, 16); + + alloc->init_add_free(offset, length); + + ++it; + } + + int r = fn(alloc.get(), alloc_name); + + return r; +} + +void dump_alloc(Allocator* alloc, const string& aname) +{ + AdminSocket* admin_socket = g_ceph_context->get_admin_socket(); + ceph_assert(admin_socket); + + ceph::bufferlist in, out; + ostringstream err; + + string cmd = "{\"prefix\": \"bluestore allocator dump " + aname + "\"}"; + auto r = admin_socket->execute_command( + { cmd }, + in, err, &out); + if (r != 0) { + cerr << "failure querying: " << cpp_strerror(r) << std::endl; + } + else { + std::cout << std::string(out.c_str(), out.length()) << std::endl; + } +} + int main(int argc, char **argv) { vector<const char*> args; @@ -220,10 +317,27 @@ int main(int argc, char **argv) common_init_finish(g_ceph_context); g_ceph_context->_conf.apply_changes(nullptr); - if (argc < 2) { + if (argc < 3) { usage(argv[0]); return 1; } - - return replay_and_check_for_duplicate(argv[1]); + if (strcmp(argv[2], "raw_duplicate") == 0) { + return replay_and_check_for_duplicate(argv[1]); + } else if (strcmp(argv[2], "free_dump") == 0) { + return replay_free_dump_and_apply(argv[1], + [&](Allocator* a, const string& aname) { + ceph_assert(a); + std::cout << "Fragmentation:" << a->get_fragmentation() + << std::endl; + std::cout << "Fragmentation score:" << a->get_fragmentation_score() + << std::endl; + std::cout << "Free:" << std::hex << a->get_free() << std::dec + << std::endl; + { + // stub to implement various testing stuff on properly initialized allocator + // e.g. one can dump allocator back via dump_alloc(a, aname); + } + return 0; + }); + } } diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index e8d25fc6dc7..370f28f0453 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -7893,8 +7893,8 @@ TEST_P(StoreTest, KVDBHistogramTest) { ASSERT_EQ(r, 0); } - Formatter *f = Formatter::create("store_test", "json-pretty", "json-pretty"); - store->generate_db_histogram(f); + std::unique_ptr<Formatter> f(Formatter::create("store_test", "json-pretty", "json-pretty")); + store->generate_db_histogram(f.get()); f->flush(cout); cout << std::endl; } @@ -7937,8 +7937,8 @@ TEST_P(StoreTest, KVDBStatsTest) { ASSERT_EQ(r, 0); } - Formatter *f = Formatter::create("store_test", "json-pretty", "json-pretty"); - store->get_db_statistics(f); + std::unique_ptr<Formatter> f(Formatter::create("store_test", "json-pretty", "json-pretty")); + store->get_db_statistics(f.get()); f->flush(cout); cout << std::endl; } @@ -8482,8 +8482,8 @@ TEST_P(StoreTest, BluestoreStatistics) { ASSERT_EQ(static_cast<int>(bl.length()), r); ASSERT_TRUE(bl_eq(bl, readback)); } - Formatter *f = Formatter::create("store_test", "json-pretty", "json-pretty"); - EXPECT_NO_THROW(store->get_db_statistics(f)); + std::unique_ptr<Formatter> f(Formatter::create("store_test", "json-pretty", "json-pretty")); + EXPECT_NO_THROW(store->get_db_statistics(f.get())); f->flush(cout); cout << std::endl; } diff --git a/src/test/objectstore/test_bluestore_types.cc b/src/test/objectstore/test_bluestore_types.cc index da811be305b..6ddf717d605 100644 --- a/src/test/objectstore/test_bluestore_types.cc +++ b/src/test/objectstore/test_bluestore_types.cc @@ -345,8 +345,7 @@ TEST(Blob, put_ref) auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); BlueStore::Blob b; - b.shared_blob = new BlueStore::SharedBlob(nullptr); - b.shared_blob->get(); // hack to avoid dtor from running + b.shared_blob = new BlueStore::SharedBlob(coll.get()); b.dirty_blob().allocated_test(bluestore_pextent_t(0x40715000, 0x2000)); b.dirty_blob().allocated_test( bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x8000)); @@ -379,8 +378,7 @@ TEST(Blob, put_ref) { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(0, mas * 2)); @@ -401,8 +399,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(123, mas * 2)); @@ -426,8 +423,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas)); @@ -465,8 +461,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas)); @@ -507,8 +502,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas * 6)); @@ -540,8 +534,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas * 4)); @@ -579,8 +572,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas * 4)); @@ -635,8 +627,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas * 4)); @@ -691,8 +682,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas * 8)); @@ -735,8 +725,7 @@ TEST(Blob, put_ref) // verify csum chunk size if factored in properly { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(0, mas*4)); @@ -754,8 +743,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); b.allocated_test(bluestore_pextent_t(0x40101000, 0x4000)); b.allocated_test(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, @@ -777,8 +765,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); b.allocated_test(bluestore_pextent_t(1, 0x5000)); b.allocated_test(bluestore_pextent_t(2, 0x5000)); @@ -796,8 +783,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); b.allocated_test(bluestore_pextent_t(1, 0x7000)); b.allocated_test(bluestore_pextent_t(2, 0x7000)); @@ -826,8 +812,7 @@ TEST(Blob, put_ref) auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); BlueStore::Blob B; - B.shared_blob = new BlueStore::SharedBlob(nullptr); - B.shared_blob->get(); // hack to avoid dtor from running + B.shared_blob = new BlueStore::SharedBlob(coll.get()); bluestore_blob_t& b = B.dirty_blob(); b.allocated_test(bluestore_pextent_t(1, 0x5000)); b.allocated_test(bluestore_pextent_t(2, 0x7000)); @@ -917,9 +902,7 @@ TEST(Blob, split) { BlueStore::Blob L, R; L.shared_blob = new BlueStore::SharedBlob(coll.get()); - L.shared_blob->get(); // hack to avoid dtor from running R.shared_blob = new BlueStore::SharedBlob(coll.get()); - R.shared_blob->get(); // hack to avoid dtor from running L.dirty_blob().allocated_test(bluestore_pextent_t(0x2000, 0x2000)); L.dirty_blob().init_csum(Checksummer::CSUM_CRC32C, 12, 0x2000); L.get_ref(coll.get(), 0, 0x2000); @@ -940,9 +923,7 @@ TEST(Blob, split) { BlueStore::Blob L, R; L.shared_blob = new BlueStore::SharedBlob(coll.get()); - L.shared_blob->get(); // hack to avoid dtor from running R.shared_blob = new BlueStore::SharedBlob(coll.get()); - R.shared_blob->get(); // hack to avoid dtor from running L.dirty_blob().allocated_test(bluestore_pextent_t(0x2000, 0x1000)); L.dirty_blob().allocated_test(bluestore_pextent_t(0x12000, 0x1000)); L.dirty_blob().init_csum(Checksummer::CSUM_CRC32C, 12, 0x2000); @@ -1147,6 +1128,14 @@ TEST(ExtentMap, has_any_lextents) ASSERT_FALSE(em.has_any_lextents(500, 1000)); } +void erase_and_delete(BlueStore::ExtentMap& em, size_t v) +{ + auto d = em.find(v); + ASSERT_NE(d, em.extent_map.end()); + em.extent_map.erase(d); + delete &*d; +} + TEST(ExtentMap, compress_extent_map) { BlueStore store(g_ceph_context, "", 4096); @@ -1176,8 +1165,7 @@ TEST(ExtentMap, compress_extent_map) ASSERT_EQ(0, em.compress_extent_map(100000, 1000)); ASSERT_EQ(2, em.compress_extent_map(0, 100000)); ASSERT_EQ(2u, em.extent_map.size()); - - em.extent_map.erase(em.find(100)); + erase_and_delete(em, 100); em.extent_map.insert(*new BlueStore::Extent(100, 0, 100, b2)); em.extent_map.insert(*new BlueStore::Extent(200, 100, 100, b3)); em.extent_map.insert(*new BlueStore::Extent(300, 200, 100, b2)); @@ -1194,9 +1182,9 @@ TEST(ExtentMap, compress_extent_map) ASSERT_EQ(0, em.compress_extent_map(800, 1000)); ASSERT_EQ(2, em.compress_extent_map(100, 500)); ASSERT_EQ(7u, em.extent_map.size()); - em.extent_map.erase(em.find(300)); - em.extent_map.erase(em.find(500)); - em.extent_map.erase(em.find(700)); + erase_and_delete(em, 300); + erase_and_delete(em, 500); + erase_and_delete(em, 700); em.extent_map.insert(*new BlueStore::Extent(400, 300, 100, b2)); em.extent_map.insert(*new BlueStore::Extent(500, 400, 100, b2)); em.extent_map.insert(*new BlueStore::Extent(700, 500, 100, b2)); @@ -1204,6 +1192,17 @@ TEST(ExtentMap, compress_extent_map) ASSERT_EQ(6u, em.extent_map.size()); } + +void clear_and_dispose(BlueStore::old_extent_map_t& old_em) +{ + auto oep = old_em.begin(); + while (oep != old_em.end()) { + auto &lo = *oep; + oep = old_em.erase(oep); + delete &lo; + } +} + TEST(GarbageCollector, BasicTest) { BlueStore::OnodeCacheShard *oc = BlueStore::OnodeCacheShard::create( @@ -1277,9 +1276,8 @@ TEST(GarbageCollector, BasicTest) auto v = p{100ul, 10ul}; ASSERT_EQ(*it, v); } - em.clear(); - old_extents.clear(); + clear_and_dispose(old_extents); } /* original disposition @@ -1363,7 +1361,7 @@ TEST(GarbageCollector, BasicTest) } em.clear(); - old_extents.clear(); + clear_and_dispose(old_extents); } /* original disposition @@ -1404,7 +1402,7 @@ TEST(GarbageCollector, BasicTest) auto& to_collect = gc.get_extents_to_collect(); ASSERT_EQ(to_collect.num_intervals(), 0u); em.clear(); - old_extents.clear(); + clear_and_dispose(old_extents); } /* original disposition @@ -1495,7 +1493,7 @@ TEST(GarbageCollector, BasicTest) } em.clear(); - old_extents.clear(); + clear_and_dispose(old_extents); } } diff --git a/src/test/objectstore/test_idempotent.cc b/src/test/objectstore/test_idempotent.cc index c89ea9b70fe..0889375ed17 100644 --- a/src/test/objectstore/test_idempotent.cc +++ b/src/test/objectstore/test_idempotent.cc @@ -13,6 +13,7 @@ */ #include <iostream> +#include <iterator> #include <sstream> #include <boost/scoped_ptr.hpp> #include "os/filestore/FileStore.h" @@ -31,14 +32,10 @@ void usage(const string &name) { template <typename T> typename T::iterator rand_choose(T &cont) { - if (cont.size() == 0) { - return cont.end(); + if (std::empty(cont)) { + return std::end(cont); } - int index = rand() % cont.size(); - typename T::iterator retval = cont.begin(); - - for (; index > 0; --index) ++retval; - return retval; + return std::next(std::begin(cont), rand() % cont.size()); } int main(int argc, char **argv) { diff --git a/src/test/osd/RadosModel.h b/src/test/osd/RadosModel.h index b2e2496fd21..d5689f6067a 100644 --- a/src/test/osd/RadosModel.h +++ b/src/test/osd/RadosModel.h @@ -6,6 +6,7 @@ #include "include/rados/librados.hpp" #include <iostream> +#include <iterator> #include <sstream> #include <map> #include <set> @@ -32,14 +33,10 @@ class TestOpStat; template <typename T> typename T::iterator rand_choose(T &cont) { - if (cont.size() == 0) { - return cont.end(); + if (std::empty(cont)) { + return std::end(cont); } - int index = rand() % cont.size(); - typename T::iterator retval = cont.begin(); - - for (; index > 0; --index) ++retval; - return retval; + return std::next(std::begin(cont), rand() % cont.size()); } enum TestOpType { diff --git a/src/test/pybind/test_rados.py b/src/test/pybind/test_rados.py index 39607d35e10..8ff6312b261 100644 --- a/src/test/pybind/test_rados.py +++ b/src/test/pybind/test_rados.py @@ -578,6 +578,26 @@ class TestIoctx(object): self.ioctx.operate_read_op(read_op, "hw") eq(list(iter), []) + def test_remove_omap_ramge2(self): + keys = ("1", "2", "3", "4") + values = (b"a", b"bb", b"ccc", b"dddd") + with WriteOpCtx() as write_op: + self.ioctx.set_omap(write_op, keys, values) + self.ioctx.operate_write_op(write_op, "test_obj") + with ReadOpCtx() as read_op: + iter, ret = self.ioctx.get_omap_vals_by_keys(read_op, keys) + eq(ret, 0) + self.ioctx.operate_read_op(read_op, "test_obj") + eq(list(iter), list(zip(keys, values))) + with WriteOpCtx() as write_op: + self.ioctx.remove_omap_range2(write_op, "1", "4") + self.ioctx.operate_write_op(write_op, "test_obj") + with ReadOpCtx() as read_op: + iter, ret = self.ioctx.get_omap_vals_by_keys(read_op, keys) + eq(ret, 0) + self.ioctx.operate_read_op(read_op, "test_obj") + eq(list(iter), [("4", b"dddd")]) + def test_xattrs_op(self): xattrs = dict(a=b'1', b=b'2', c=b'3', d=b'a\0b', e=b'\0') with WriteOpCtx() as write_op: @@ -989,6 +1009,22 @@ class TestIoctx(object): [i.remove() for i in self.ioctx.list_objects()] + def test_aio_setxattr(self): + lock = threading.Condition() + count = [0] + def cb(blah): + with lock: + count[0] += 1 + lock.notify() + return 0 + comp = self.ioctx.aio_setxattr("obj", "key", b'value', cb) + comp.wait_for_complete() + with lock: + while count[0] < 1: + lock.wait() + eq(comp.get_return_value(), 0) + eq(self.ioctx.get_xattr("obj", "key"), b'value') + def test_applications(self): cmd = {"prefix":"osd dump", "format":"json"} ret, buf, errs = self.rados.mon_command(json.dumps(cmd), b'') diff --git a/src/test/rgw/rgw_multi/tests_ps.py b/src/test/rgw/rgw_multi/tests_ps.py index 269d358f529..d074644a5ea 100644 --- a/src/test/rgw/rgw_multi/tests_ps.py +++ b/src/test/rgw/rgw_multi/tests_ps.py @@ -873,6 +873,10 @@ def test_ps_s3_topic_on_master(): assert_equal(topic_arn, result['GetTopicResponse']['GetTopicResult']['Topic']['TopicArn']) assert_equal(endpoint_address, result['GetTopicResponse']['GetTopicResult']['Topic']['EndPoint']['EndpointAddress']) # Note that endpoint args may be ordered differently in the result + result = topic_conf3.get_attributes() + assert_equal(topic_arn, result['Attributes']['TopicArn']) + json_endpoint = json.loads(result['Attributes']['EndPoint']) + assert_equal(endpoint_address, json_endpoint['EndpointAddress']) # delete topic 1 result = topic_conf1.del_config() @@ -881,6 +885,12 @@ def test_ps_s3_topic_on_master(): # try to get a deleted topic _, status = topic_conf1.get_config() assert_equal(status, 404) + try: + topic_conf1.get_attributes() + except: + print('topic already deleted - this is expected') + else: + assert False, 'topic 1 should be deleted at this point' # get the remaining 2 topics result, status = topic_conf1.get_list() diff --git a/src/test/rgw/rgw_multi/zone_ps.py b/src/test/rgw/rgw_multi/zone_ps.py index 241bbe8a26e..9f55626d211 100644 --- a/src/test/rgw/rgw_multi/zone_ps.py +++ b/src/test/rgw/rgw_multi/zone_ps.py @@ -187,6 +187,7 @@ class PSTopicS3: POST ?Action=CreateTopic&Name=<topic name>[&OpaqueData=<data>[&push-endpoint=<endpoint>&[<arg1>=<value1>...]]] POST ?Action=ListTopics POST ?Action=GetTopic&TopicArn=<topic-arn> + POST ?Action=GetTopicAttributes&TopicArn=<topic-arn> POST ?Action=DeleteTopic&TopicArn=<topic-arn> """ def __init__(self, conn, topic_name, region, endpoint_args=None, opaque_data=None): @@ -239,6 +240,10 @@ class PSTopicS3: dict_response = xmltodict.parse(data) return dict_response, status + def get_attributes(self): + """get topic attributes""" + return self.client.get_topic_attributes(TopicArn=self.topic_arn) + def set_config(self): """set topic""" result = self.client.create_topic(Name=self.topic_name, Attributes=self.attributes) diff --git a/src/test/test_snap_mapper.cc b/src/test/test_snap_mapper.cc index 9b6dbdd2b0c..50730080d8d 100644 --- a/src/test/test_snap_mapper.cc +++ b/src/test/test_snap_mapper.cc @@ -1,4 +1,5 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include <iterator> #include <map> #include <set> #include <boost/scoped_ptr.hpp> @@ -16,14 +17,10 @@ using namespace std; template <typename T> typename T::iterator rand_choose(T &cont) { - if (cont.size() == 0) { - return cont.end(); + if (std::empty(cont)) { + return std::end(cont); } - int index = rand() % cont.size(); - typename T::iterator retval = cont.begin(); - - for (; index > 0; --index) ++retval; - return retval; + return std::next(std::begin(cont), rand() % cont.size()); } string random_string(size_t size) diff --git a/src/test/xattr_bench.cc b/src/test/xattr_bench.cc index edbfae25bef..7fac235be49 100644 --- a/src/test/xattr_bench.cc +++ b/src/test/xattr_bench.cc @@ -16,6 +16,7 @@ #include <time.h> #include <string.h> #include <iostream> +#include <iterator> #include <sstream> #include "os/filestore/FileStore.h" #include "include/Context.h" @@ -40,14 +41,10 @@ const int THREADS = 5; template <typename T> typename T::iterator rand_choose(T &cont) { - if (cont.size() == 0) { - return cont.end(); + if (std::empty(cont) == 0) { + return std::end(cont); } - int index = rand() % cont.size(); - typename T::iterator retval = cont.begin(); - - for (; index > 0; --index) ++retval; - return retval; + return std::next(std::begin(cont), rand() % cont.size()); } class OnApplied : public Context { diff --git a/src/tools/crimson/CMakeLists.txt b/src/tools/crimson/CMakeLists.txt index fb4baf7a94a..19a2cfa9170 100644 --- a/src/tools/crimson/CMakeLists.txt +++ b/src/tools/crimson/CMakeLists.txt @@ -1,5 +1,8 @@ -add_executable(perf_crimson_msgr perf_crimson_msgr.cc) -target_link_libraries(perf_crimson_msgr crimson) +add_executable(perf-crimson-msgr perf_crimson_msgr.cc) +target_link_libraries(perf-crimson-msgr crimson) -add_executable(perf_async_msgr perf_async_msgr.cc) -target_link_libraries(perf_async_msgr ceph-common global ${ALLOC_LIBS}) +add_executable(perf-async-msgr perf_async_msgr.cc) +target_link_libraries(perf-async-msgr ceph-common global ${ALLOC_LIBS}) + +add_executable(perf-staged-fltree perf_staged_fltree.cc) +target_link_libraries(perf-staged-fltree crimson-seastore) diff --git a/src/tools/crimson/perf_crimson_msgr.cc b/src/tools/crimson/perf_crimson_msgr.cc index 1efef3a2f57..e76f273a921 100644 --- a/src/tools/crimson/perf_crimson_msgr.cc +++ b/src/tools/crimson/perf_crimson_msgr.cc @@ -40,7 +40,7 @@ seastar::future<T*> create_sharded(Args... args) { auto sharded_obj = seastar::make_lw_shared<seastar::sharded<T>>(); return sharded_obj->start(args...).then([sharded_obj]() { seastar::engine().at_exit([sharded_obj]() { - return sharded_obj->stop().finally([sharded_obj] {}); + return sharded_obj->stop().then([sharded_obj] {}); }); return sharded_obj.get(); }); @@ -152,8 +152,8 @@ static seastar::future<> run( msg_data.append_zero(msg_len); } - seastar::future<> ms_dispatch(crimson::net::Connection* c, - MessageRef m) override { + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef c, MessageRef m) override { ceph_assert(m->get_type() == CEPH_MSG_OSD_OP); // server replies with MOSDOp to generate server-side write workload @@ -166,7 +166,8 @@ static seastar::future<> run( bufferlist data(msg_data); rep->write(0, msg_len, data); rep->set_tid(m->get_tid()); - return c->send(std::move(rep)); + std::ignore = c->send(std::move(rep)); + return {seastar::now()}; } seastar::future<> init(bool v1_crc_enabled, const entity_addr_t& addr) { @@ -180,17 +181,21 @@ static seastar::future<> run( msgr->set_crc_header(); msgr->set_crc_data(); } - return msgr->bind(entity_addrvec_t{addr}).then([this] { - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(*this); - return msgr->start(chained_dispatchers); - }); + return msgr->bind(entity_addrvec_t{addr}).safe_then([this] { + return msgr->start({this}); + }, crimson::net::Messenger::bind_ertr::all_same_way( + [addr] (const std::error_code& e) { + logger().error("Server: " + "there is another instance running at {}", addr); + ceph_abort(); + })); }); } seastar::future<> shutdown() { logger().info("{} shutdown...", lname); return seastar::smp::submit_to(msgr_sid, [this] { ceph_assert(msgr); + msgr->stop(); return msgr->shutdown(); }); } @@ -302,8 +307,8 @@ static seastar::future<> run( void ms_handle_connect(crimson::net::ConnectionRef conn) override { conn_stats.connected_time = mono_clock::now(); } - seastar::future<> ms_dispatch(crimson::net::Connection* c, - MessageRef m) override { + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef, MessageRef m) override { // server replies with MOSDOp to generate server-side write workload ceph_assert(m->get_type() == CEPH_MSG_OSD_OP); @@ -322,7 +327,7 @@ static seastar::future<> run( ++(conn_stats.received_count); depth.signal(1); - return seastar::now(); + return {seastar::now()}; } // should start messenger at this shard? @@ -332,9 +337,7 @@ static seastar::future<> run( } seastar::future<> init(bool v1_crc_enabled) { - auto chained_dispatchers = seastar::make_lw_shared<ChainedDispatchers>(); - chained_dispatchers->push_back(*this); - return container().invoke_on_all([v1_crc_enabled, chained_dispatchers] (auto& client) mutable { + return container().invoke_on_all([v1_crc_enabled] (auto& client) { if (client.is_active()) { client.msgr = crimson::net::Messenger::create(entity_name_t::OSD(client.sid), client.lname, client.sid); client.msgr->set_default_policy(crimson::net::SocketPolicy::lossy_client(0)); @@ -345,7 +348,7 @@ static seastar::future<> run( client.msgr->set_crc_header(); client.msgr->set_crc_data(); } - return client.msgr->start(chained_dispatchers); + return client.msgr->start({&client}); } return seastar::now(); }); @@ -356,6 +359,7 @@ static seastar::future<> run( if (client.is_active()) { logger().info("{} shutdown...", client.lname); ceph_assert(client.msgr); + client.msgr->stop(); return client.msgr->shutdown().then([&client] { return client.stop_dispatch_messages(); }); @@ -617,7 +621,7 @@ static seastar::future<> run( } ).handle_exception_type([] (const DepthBroken& e) { // ok, stopped by stop_dispatch_messages() - }).finally([this, conn] { + }).then([this, conn] { std::chrono::duration<double> dur_conn = conn_stats.connected_time - conn_stats.connecting_time; std::chrono::duration<double> dur_msg = mono_clock::now() - conn_stats.start_time; unsigned ops = conn_stats.received_count - conn_stats.start_count; @@ -666,9 +670,9 @@ static seastar::future<> run( }).then([client, ramptime = client_conf.ramptime, msgtime = client_conf.msgtime] { return client->dispatch_with_timer(ramptime, msgtime); - }).finally([client] { + }).then([client] { return client->shutdown(); - }).finally([server, fp_server = std::move(fp_server)] () mutable { + }).then([server, fp_server = std::move(fp_server)] () mutable { return server->shutdown().then([cleanup = std::move(fp_server)] {}); }); } else if (mode == perf_mode_t::client) { @@ -681,7 +685,7 @@ static seastar::future<> run( }).then([client, ramptime = client_conf.ramptime, msgtime = client_conf.msgtime] { return client->dispatch_with_timer(ramptime, msgtime); - }).finally([client] { + }).then([client] { return client->shutdown(); }); } else { // mode == perf_mode_t::server @@ -692,7 +696,7 @@ static seastar::future<> run( ).then([server] { return server->wait(); // shutdown - }).finally([server, fp_server = std::move(fp_server)] () mutable { + }).then([server, fp_server = std::move(fp_server)] () mutable { return server->shutdown().then([cleanup = std::move(fp_server)] {}); }); } diff --git a/src/tools/crimson/perf_staged_fltree.cc b/src/tools/crimson/perf_staged_fltree.cc new file mode 100644 index 00000000000..bd05cb39e94 --- /dev/null +++ b/src/tools/crimson/perf_staged_fltree.cc @@ -0,0 +1,132 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <boost/program_options.hpp> + +#include <seastar/core/app-template.hh> +#include <seastar/core/thread.hh> + +#include "crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" + +using namespace crimson::os::seastore::onode; +namespace bpo = boost::program_options; + +template <bool TRACK> +class PerfTree : public TMTestState { + public: + PerfTree(bool is_dummy) : is_dummy{is_dummy} {} + + seastar::future<> run(KVPool& kvs) { + return start(kvs).then([this] { + return tree->run().handle_error( + crimson::ct_error::all_same_way([] { + ceph_abort("runtime error"); + }) + ); + }).then([this] { + return stop(); + }); + } + + private: + seastar::future<> start(KVPool& kvs) { + if (is_dummy) { + tree = std::make_unique<TreeBuilder<TRACK>>( + kvs, NodeExtentManager::create_dummy(true)); + return tree->bootstrap().handle_error( + crimson::ct_error::all_same_way([] { + ceph_abort("Unable to mkfs"); + }) + ); + } else { + return tm_setup().then([this, &kvs] { + tree = std::make_unique<TreeBuilder<TRACK>>( + kvs, NodeExtentManager::create_seastore(*tm)); + return tree->bootstrap(); + }).handle_error( + crimson::ct_error::all_same_way([] { + ceph_abort("Unable to mkfs"); + }) + ); + } + } + + seastar::future<> stop() { + tree.reset(); + if (is_dummy) { + return seastar::now(); + } else { + return tm_teardown(); + } + } + + bool is_dummy; + std::unique_ptr<TreeBuilder<TRACK>> tree; +}; + +template <bool TRACK> +seastar::future<> run(const bpo::variables_map& config) { + return seastar::async([&config]{ + auto backend = config["backend"].as<std::string>(); + bool is_dummy; + if (backend == "dummy") { + is_dummy = true; + } else if (backend == "seastore") { + is_dummy = false; + } else { + ceph_abort(false && "invalid backend"); + } + auto str_sizes = config["str-sizes"].as<std::vector<size_t>>(); + auto onode_sizes = config["onode-sizes"].as<std::vector<size_t>>(); + auto range2 = config["range2"].as<std::vector<int>>(); + ceph_assert(range2.size() == 2); + auto range1 = config["range1"].as<std::vector<unsigned>>(); + ceph_assert(range1.size() == 2); + auto range0 = config["range0"].as<std::vector<unsigned>>(); + ceph_assert(range0.size() == 2); + + KVPool kvs{str_sizes, onode_sizes, + {range2[0], range2[1]}, + {range1[0], range1[1]}, + {range0[0], range0[1]}}; + PerfTree<TRACK> perf{is_dummy}; + perf.run(kvs).get0(); + }); +} + + +int main(int argc, char** argv) +{ + seastar::app_template app; + app.add_options() + ("backend", bpo::value<std::string>()->default_value("dummy"), + "tree backend: dummy, seastore") + ("tracked", bpo::value<bool>()->default_value(false), + "track inserted cursors") + ("str-sizes", bpo::value<std::vector<size_t>>()->default_value( + {8, 11, 64, 256, 301, 320}), + "sizes of ns/oid strings") + ("onode-sizes", bpo::value<std::vector<size_t>>()->default_value( + {8, 16, 128, 512, 576, 640}), + "sizes of onode") + ("range2", bpo::value<std::vector<int>>()->default_value( + {0, 128}), + "range of shard-pool-crush [a, b)") + ("range1", bpo::value<std::vector<unsigned>>()->default_value( + {0, 10}), + "range of ns-oid strings [a, b)") + ("range0", bpo::value<std::vector<unsigned>>()->default_value( + {0, 4}), + "range of snap-gen [a, b)"); + return app.run(argc, argv, [&app] { + auto&& config = app.configuration(); + auto tracked = config["tracked"].as<bool>(); + if (tracked) { + return run<true>(config); + } else { + return run<false>(config); + } + }); +} diff --git a/src/tools/immutable_object_cache/ObjectCacheStore.cc b/src/tools/immutable_object_cache/ObjectCacheStore.cc index d3faf515bea..18f64250ff7 100644 --- a/src/tools/immutable_object_cache/ObjectCacheStore.cc +++ b/src/tools/immutable_object_cache/ObjectCacheStore.cc @@ -3,7 +3,13 @@ #include "ObjectCacheStore.h" #include "Utils.h" +#if __has_include(<filesystem>) +#include <filesystem> +namespace fs = std::filesystem; +#else #include <experimental/filesystem> +namespace fs = std::experimental::filesystem; +#endif #define dout_context g_ceph_context #define dout_subsys ceph_subsys_immutable_obj_cache @@ -11,7 +17,6 @@ #define dout_prefix *_dout << "ceph::cache::ObjectCacheStore: " << this << " " \ << __func__ << ": " -namespace efs = std::experimental::filesystem; namespace ceph { namespace immutable_obj_cache { @@ -117,15 +122,15 @@ int ObjectCacheStore::init(bool reset) { // TODO(dehao): fsck and reuse existing cache objects if (reset) { try { - if (efs::exists(m_cache_root_dir)) { + if (fs::exists(m_cache_root_dir)) { // remove all sub folders - for (auto& p : efs::directory_iterator(m_cache_root_dir)) { - efs::remove_all(p.path()); + for (auto& p : fs::directory_iterator(m_cache_root_dir)) { + fs::remove_all(p.path()); } } else { - efs::create_directories(m_cache_root_dir); + fs::create_directories(m_cache_root_dir); } - } catch (const efs::filesystem_error& e) { + } catch (const fs::filesystem_error& e) { lderr(m_cct) << "failed to initialize cache store directory: " << e.what() << dendl; return -e.code().value(); @@ -346,12 +351,12 @@ std::string ObjectCacheStore::get_cache_file_path(std::string cache_file_name, ldout(m_cct, 20) << "creating cache dir: " << cache_file_dir <<dendl; std::error_code ec; std::string new_dir = m_cache_root_dir + cache_file_dir; - if (efs::exists(new_dir, ec)) { + if (fs::exists(new_dir, ec)) { ldout(m_cct, 20) << "cache dir exists: " << cache_file_dir <<dendl; return new_dir + cache_file_name; } - if (!efs::create_directories(new_dir, ec)) { + if (!fs::create_directories(new_dir, ec)) { ldout(m_cct, 5) << "fail to create cache dir: " << new_dir << "error: " << ec.message() << dendl; return ""; diff --git a/src/tools/rbd/Utils.cc b/src/tools/rbd/Utils.cc index 6f2dfdcc1af..27789df1d6e 100644 --- a/src/tools/rbd/Utils.cc +++ b/src/tools/rbd/Utils.cc @@ -45,7 +45,7 @@ static std::string mgr_command_args_to_str( int ProgressContext::update_progress(uint64_t offset, uint64_t total) { if (progress) { int pc = total ? (offset * 100ull / total) : 0; - if (pc != last_pc) { + if (pc > last_pc) { cerr << "\r" << operation << ": " << pc << "% complete..."; cerr.flush(); diff --git a/src/tools/rbd_mirror/CMakeLists.txt b/src/tools/rbd_mirror/CMakeLists.txt index 5a89b6c3c9a..f260d978632 100644 --- a/src/tools/rbd_mirror/CMakeLists.txt +++ b/src/tools/rbd_mirror/CMakeLists.txt @@ -85,5 +85,6 @@ target_link_libraries(rbd-mirror cls_journal_client global heap_profiler - ${ALLOC_LIBS}) + ${ALLOC_LIBS} + OpenSSL::SSL) install(TARGETS rbd-mirror DESTINATION bin) diff --git a/src/tools/rbd_nbd/rbd-nbd.cc b/src/tools/rbd_nbd/rbd-nbd.cc index 11ccdf0e4b4..d2e11405f8d 100644 --- a/src/tools/rbd_nbd/rbd-nbd.cc +++ b/src/tools/rbd_nbd/rbd-nbd.cc @@ -1594,7 +1594,6 @@ static int do_map(int argc, const char *argv[], Config *cfg, bool reconnect) global_init_postfork_start(g_ceph_context); } - g_ceph_context->_conf.finalize_reexpand_meta(); common_init_finish(g_ceph_context); global_init_chdir(g_ceph_context); diff --git a/src/tools/rbd_wnbd/rbd_wnbd.cc b/src/tools/rbd_wnbd/rbd_wnbd.cc index eb7a9a52301..fbf7392d184 100644 --- a/src/tools/rbd_wnbd/rbd_wnbd.cc +++ b/src/tools/rbd_wnbd/rbd_wnbd.cc @@ -396,7 +396,10 @@ int load_mapping_config_from_registry(string devpath, Config* cfg) auto reg_key = RegistryKey( g_ceph_context, HKEY_LOCAL_MACHINE, strKey.c_str(), false); if (!reg_key.hKey) { - return -EINVAL; + if (reg_key.missingKey) + return -ENOENT; + else + return -EINVAL; } reg_key.get("devpath", cfg->devpath); @@ -587,7 +590,7 @@ Service options: --hard-disconnect Skip attempting a soft disconnect --soft-disconnect-timeout Cummulative soft disconnect timeout in seconds, used when disconnecting existing mappings. A hard - disconnect will be issuedwhen hitting the timeout. + disconnect will be issued when hitting the timeout --service-thread-count The number of workers used when mapping or unmapping images. Default: 8 @@ -1135,6 +1138,8 @@ static int parse_args(std::vector<const char*>& args, cmd = Service; } else if (strcmp(*args.begin(), "stats") == 0) { cmd = Stats; + } else if (strcmp(*args.begin(), "help") == 0) { + return HELP_INFO; } else { *err_msg << "rbd-wnbd: unknown command: " << *args.begin(); return -EINVAL; diff --git a/src/vstart.sh b/src/vstart.sh index 88a161f7d6a..749a125d461 100755 --- a/src/vstart.sh +++ b/src/vstart.sh @@ -1574,6 +1574,7 @@ echo "" { echo "export PYTHONPATH=$PYBIND:$CYTHON_PYTHONPATH:$CEPH_PYTHON_COMMON\$PYTHONPATH" echo "export LD_LIBRARY_PATH=$CEPH_LIB:\$LD_LIBRARY_PATH" + echo "export PATH=$CEPH_DIR/bin:\$PATH" if [ "$CEPH_DIR" != "$PWD" ]; then echo "export CEPH_CONF=$conf_fn" |